commit_id,repo,msg,filename,diff,label,partition
3218043d6d3a019756607643cf65574fbfef5d7a,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 411896058
Change-Id: Ia031058247e3cf382957a6662d3f9e1cbb481ca2",op_level_cost_estimator.cc,"@@ -2153,7 +2153,7 @@ OpInfo::TensorProperties OpLevelCostEstimator::DescribeTensor(
 }
 
 /* static */
-OpLevelCostEstimator::ConvolutionDimensions
+StatusOr<OpLevelCostEstimator::ConvolutionDimensions>
 OpLevelCostEstimator::OpDimensionsFromInputs(
     const TensorShapeProto& original_image_shape, const OpInfo& op_info,
     bool* found_unknown_shapes) {
@@ -2190,6 +2190,11 @@ OpLevelCostEstimator::OpDimensionsFromInputs(
   std::vector<int64_t> strides = GetStrides(op_info);
   int64_t sx = strides[x_index];
   int64_t sy = strides[y_index];
+  if (sx == 0 || sy == 0) {
+    return errors::InvalidArgument(
+        ""Stride must be > 0 for Height and Width, but got ("", sy, "", "", sx,
+        "")"");
+  }
   const auto padding = GetPadding(op_info);
 
   int64_t ox = GetOutputSize(ix, kx, sx, padding);
@@ -2206,8 +2211,9 @@ Status OpLevelCostEstimator::PredictMaxPool(const OpContext& op_context,
   bool found_unknown_shapes = false;
   const auto& op_info = op_context.op_info;
   // x: op_info.inputs(0)
-  ConvolutionDimensions dims = OpDimensionsFromInputs(
-      op_info.inputs(0).shape(), op_info, &found_unknown_shapes);
+  TF_ASSIGN_OR_RETURN(ConvolutionDimensions dims,
+                      OpDimensionsFromInputs(op_info.inputs(0).shape(), op_info,
+                                             &found_unknown_shapes));
   // kx * ky - 1 comparisons per output (kx * xy > 1)
   // or 1 copy per output (kx * k1 = 1).
   int per_output_ops = dims.kx * dims.ky == 1 ? 1 : dims.kx * dims.ky - 1;
@@ -2248,8 +2254,9 @@ Status OpLevelCostEstimator::PredictMaxPoolGrad(const OpContext& op_context,
                                    op_info.ShortDebugString());
   }
 
-  ConvolutionDimensions dims = OpDimensionsFromInputs(
-      op_info.inputs(0).shape(), op_info, &found_unknown_shapes);
+  TF_ASSIGN_OR_RETURN(ConvolutionDimensions dims,
+                      OpDimensionsFromInputs(op_info.inputs(0).shape(), op_info,
+                                             &found_unknown_shapes));
 
   int64_t ops = 0;
   if (dims.kx == 1 && dims.ky == 1) {
@@ -2324,8 +2331,9 @@ Status OpLevelCostEstimator::PredictAvgPool(const OpContext& op_context,
   bool found_unknown_shapes = false;
   const auto& op_info = op_context.op_info;
   // x: op_info.inputs(0)
-  ConvolutionDimensions dims = OpDimensionsFromInputs(
-      op_info.inputs(0).shape(), op_info, &found_unknown_shapes);
+  TF_ASSIGN_OR_RETURN(ConvolutionDimensions dims,
+                      OpDimensionsFromInputs(op_info.inputs(0).shape(), op_info,
+                                             &found_unknown_shapes));
 
   // kx * ky - 1 additions and 1 multiplication per output.
   int64_t ops = dims.batch * dims.ox * dims.oy * dims.oz * dims.kx * dims.ky;
@@ -2382,8 +2390,9 @@ Status OpLevelCostEstimator::PredictAvgPoolGrad(const OpContext& op_context,
     found_unknown_shapes = true;
   }
 
-  ConvolutionDimensions dims =
-      OpDimensionsFromInputs(x_shape, op_info, &found_unknown_shapes);
+  TF_ASSIGN_OR_RETURN(
+      ConvolutionDimensions dims,
+      OpDimensionsFromInputs(x_shape, op_info, &found_unknown_shapes));
 
   int64_t ops = 0;
   if (dims.kx <= dims.sx && dims.ky <= dims.sy) {
@@ -2409,8 +2418,9 @@ Status OpLevelCostEstimator::PredictFusedBatchNorm(
   // offset: op_info.inputs(2)
   // mean: op_info.inputs(3)  --> only for inference
   // variance: op_info.inputs(4) --> only for inference
-  ConvolutionDimensions dims = OpDimensionsFromInputs(
-      op_info.inputs(0).shape(), op_info, &found_unknown_shapes);
+  TF_ASSIGN_OR_RETURN(ConvolutionDimensions dims,
+                      OpDimensionsFromInputs(op_info.inputs(0).shape(), op_info,
+                                             &found_unknown_shapes));
   const bool is_training = IsTraining(op_info);
 
   int64_t ops = 0;
@@ -2459,8 +2469,9 @@ Status OpLevelCostEstimator::PredictFusedBatchNormGrad(
   // scale: op_info.inputs(2)
   // mean: op_info.inputs(3)
   // variance or inverse of variance: op_info.inputs(4)
-  ConvolutionDimensions dims = OpDimensionsFromInputs(
-      op_info.inputs(1).shape(), op_info, &found_unknown_shapes);
+  TF_ASSIGN_OR_RETURN(ConvolutionDimensions dims,
+                      OpDimensionsFromInputs(op_info.inputs(1).shape(), op_info,
+                                             &found_unknown_shapes));
 
   int64_t ops = 0;
   const auto rsqrt_cost = Eigen::internal::functor_traits<
",1,test
3218043d6d3a019756607643cf65574fbfef5d7a,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 411896058
Change-Id: Ia031058247e3cf382957a6662d3f9e1cbb481ca2",op_level_cost_estimator.h,"@@ -290,7 +290,7 @@ class OpLevelCostEstimator {
       bool* found_unknown_shapes);
 
   // For Pooling, FusedBatchNorm, and their grad ops.
-  static ConvolutionDimensions OpDimensionsFromInputs(
+  static StatusOr<ConvolutionDimensions> OpDimensionsFromInputs(
       const TensorShapeProto& original_image_shape, const OpInfo& op_info,
       bool* found_unknown_shapes);
 
",1,test
3218043d6d3a019756607643cf65574fbfef5d7a,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 411896058
Change-Id: Ia031058247e3cf382957a6662d3f9e1cbb481ca2",op_level_cost_estimator_test.cc,"@@ -24,6 +24,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor_shape.h""
 #include ""tensorflow/core/framework/tensor_shape.pb.h""
 #include ""tensorflow/core/framework/types.h""
+#include ""tensorflow/core/platform/status_matchers.h""
 #include ""tensorflow/core/platform/test.h""
 #include ""tensorflow/core/protobuf/device_properties.pb.h""
 
@@ -558,9 +559,10 @@ class OpLevelCostEstimatorTest : public ::testing::Test {
     }
 
     bool found_unknown_shapes;
-    auto dims = OpLevelCostEstimator::OpDimensionsFromInputs(
-        op_context.op_info.inputs(0).shape(), op_context.op_info,
-        &found_unknown_shapes);
+    TF_ASSERT_OK_AND_ASSIGN(
+        auto dims, OpLevelCostEstimator::OpDimensionsFromInputs(
+                       op_context.op_info.inputs(0).shape(), op_context.op_info,
+                       &found_unknown_shapes));
     Padding padding_enum;
     if (padding == ""VALID"") {
       padding_enum = Padding::VALID;
@@ -581,6 +583,38 @@ class OpLevelCostEstimatorTest : public ::testing::Test {
     EXPECT_EQ(padding_enum, dims.padding);
   }
 
+  StatusOr<OpLevelCostEstimator::ConvolutionDimensions>
+  CallOpDimensionsFromInputs(const int n, const int h, const int w, const int c,
+                             const int kx, const int ky, const int sx,
+                             const int sy, const string& data_format,
+                             const string& padding) {
+    OpContext op_context;
+
+    const std::vector<int> x = {n, h, w, c};
+    const std::vector<int> ksize = {1, kx, ky, 1};
+    std::vector<int> strides;
+    if (data_format == ""NHWC"") {
+      strides = {1, sy, sx, 1};
+    } else {
+      strides = {1, 1, sy, sx};
+    }
+
+    auto& op_info = op_context.op_info;
+    SetCpuDevice(&op_info);
+    op_info.set_op(""MaxPool"");
+
+    DescribeTensor4D(x[0], x[1], x[2], x[3], op_info.add_inputs());
+    auto* attr = op_info.mutable_attr();
+    SetAttrValue(data_format, &(*attr)[""data_format""]);
+    SetAttrValue(padding, &(*attr)[""padding""]);
+    SetAttrValue(strides, &(*attr)[""strides""]);
+    SetAttrValue(ksize, &(*attr)[""ksize""]);
+    bool found_unknown_shapes;
+    return OpLevelCostEstimator::OpDimensionsFromInputs(
+        op_context.op_info.inputs(0).shape(), op_context.op_info,
+        &found_unknown_shapes);
+  }
+
   OpLevelCostEstimator estimator_;
 };
 
@@ -1383,6 +1417,26 @@ TEST_F(OpLevelCostEstimatorTest, OpDimensionsFromInputs) {
   }
 }
 
+TEST_F(OpLevelCostEstimatorTest, OpDimensionsFromInputsError) {
+  std::vector<string> paddings = {""VALID"", ""SAME""};
+  std::vector<string> formats = {""NHWC"", ""NCHW""};
+  for (const auto& p : paddings) {
+    for (const auto& f : formats) {
+      // n, h, w, c, kx, ky, sx, sy, data_format, padding.
+      ASSERT_THAT(
+          CallOpDimensionsFromInputs(10, 14, 14, 3840, 3, 3, 0, 2, f, p),
+          testing::StatusIs(
+              error::INVALID_ARGUMENT,
+              ""Stride must be > 0 for Height and Width, but got (2, 0)""));
+      ASSERT_THAT(
+          CallOpDimensionsFromInputs(10, 14, 14, 3840, 3, 3, 2, 0, f, p),
+          testing::StatusIs(
+              error::INVALID_ARGUMENT,
+              ""Stride must be > 0 for Height and Width, but got (0, 2)""));
+    }
+  }
+}
+
 TEST_F(OpLevelCostEstimatorTest, PredictMaxPool) {
   auto predict_max_pool = [this](const int n, const int in, const int c,
                                  const int k, const int s,
",1,test
23968a8bf65b009120c43b5ebcceaf52dbc9e943,tensorflow/tensorflow,"Fix out of bound access in DequantizeOp by adding check for axis < input dimension

PiperOrigin-RevId: 411214268
Change-Id: I3249d2a69ddc82f182c589a3a5bbfb71543f4b29",dequantize_op.cc,"@@ -94,6 +94,11 @@ class DequantizeOp : public OpKernel {
     const Tensor& input_min_tensor = ctx->input(1);
     const Tensor& input_max_tensor = ctx->input(2);
 
+    OP_REQUIRES(
+        ctx, axis_ < input.dims(),
+        errors::InvalidArgument(""Axis must be less than input dimension("",
+                                input.dims(), ""), got "", axis_));
+
     int num_slices = 1;
     if (axis_ > -1) {
       num_slices = input.dim_size(axis_);
",1,train
b64638ec5ccaa77b7c1eb90958e3d85ce381f91b,tensorflow/tensorflow,"Fix Integer overflow error in Dequantize op shape function, by adding a bound check on axis.

PiperOrigin-RevId: 412121389
Change-Id: I3088dbad9e90f9998d406b618c16694388a9dfb4",array_ops.cc,"@@ -24,6 +24,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/framework/types.pb.h""
 #include ""tensorflow/core/lib/core/errors.h""
+#include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/core/util/mirror_pad_mode.h""
 #include ""tensorflow/core/util/padding.h""
 #include ""tensorflow/core/util/strided_slice_op.h""
@@ -3028,6 +3029,12 @@ REGISTER_OP(""Dequantize"")
         return errors::InvalidArgument(""axis should be at least -1, got "",
                                        axis);
       }
+      auto input_dims = c->Rank(c->input(0));
+      if (axis > input_dims) {
+        return errors::InvalidArgument(
+            ""Axis must be less than input dimension("", input_dims, ""), got "",
+            axis);
+      }
       const int minmax_rank = (axis == -1) ? 0 : 1;
       TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
       ShapeHandle minmax;
@@ -3035,6 +3042,13 @@ REGISTER_OP(""Dequantize"")
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), minmax_rank, &minmax));
       if (axis != -1) {
         ShapeHandle input;
+        if (axis >= kint32max) {
+          // Check int32 max bound for a corner case to prevent integer flow
+          // when input actually has kint32max rank and above bound check is not
+          // triggered.
+          return errors::InvalidArgument(
+              ""Axis cannot be >= kint32max value, got "", axis);
+        }
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
         TF_RETURN_IF_ERROR(
",1,train
b64638ec5ccaa77b7c1eb90958e3d85ce381f91b,tensorflow/tensorflow,"Fix Integer overflow error in Dequantize op shape function, by adding a bound check on axis.

PiperOrigin-RevId: 412121389
Change-Id: I3088dbad9e90f9998d406b618c16694388a9dfb4",array_ops_test.py,"@@ -1704,6 +1704,21 @@ class QuantizeAndDequantizeTest(test_util.TensorFlowTestCase):
       output_grad = gradient_checker_v2.compute_gradient(f, [input_tensor])
       self.assertAllClose(output_grad[0], np.zeros([1, 4, 4]))
 
+  def testOutOfBoundAxis(self):
+    input_tensor = constant_op.constant([1., 1.])
+    input_min = [0]
+    input_max = [1]
+    q_input, _, _ = array_ops.quantize(input_tensor, 0, 1, dtypes.qint32)
+    error = (errors.InvalidArgumentError, ValueError)
+    with self.assertRaisesRegex(error,
+                                r"".*Axis must be less than input dimension.*""):
+      self.evaluate(
+          gen_array_ops.dequantize(
+              input=q_input,
+              min_range=input_min,
+              max_range=input_max,
+              axis=2**31 - 1))
+
 
 @test_util.run_all_in_graph_and_eager_modes
 class SortedSearchTest(test_util.TensorFlowTestCase):
",1,train
37c01fb5e25c3d80213060460196406c43d31995,tensorflow/tensorflow,"Fix out of bound error in ReverseSequence Op shape function

PiperOrigin-RevId: 411896080
Change-Id: I7e59a38e2f960886edf2b6c54ed5a84e86a9b193",array_ops.cc,"@@ -1653,11 +1653,21 @@ REGISTER_OP(""ReverseSequence"")
         return errors::InvalidArgument(
             ""batch_dim must be < input rank: "", batch_dim, "" vs. "", input_rank);
       }
+
       if (seq_dim >= input_rank) {
         return errors::InvalidArgument(
             ""seq_dim must be < input rank: "", seq_dim, "" vs. "", input_rank);
       }
 
+      // To prevent out of bound access when calling c->Dim(input, batch_dim),
+      // batch_dim range [-1 * input rank, input rank) is allowed. However,
+      // the op implementation has a stricter bound for batch_dim requiring >= 0
+      // value. Thus, perform strict check here.
+      if (batch_dim < 0) {
+        return errors::InvalidArgument(""batch_dim must be >=0, got "",
+                                       batch_dim);
+      }
+
       DimensionHandle batch_dim_dim = c->Dim(input, batch_dim);
       TF_RETURN_IF_ERROR(
           c->Merge(batch_dim_dim, c->Dim(seq_lens_shape, 0), &batch_dim_dim));
",1,train
58b34c6c8250983948b5a781b426f6aa01fd47af,tensorflow/tensorflow,"Fix integer overflow leading to divide by zero error in Unravel index kernel when dimensions product exceeds max int value.

PiperOrigin-RevId: 413250052
Change-Id: I9450b6e8acecd2e881a64b882e2b7c70e8e9289a",unravel_index_op.cc,"@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <cstdint>
+
+#include ""tensorflow/core/framework/types.pb.h""
+#include ""tensorflow/core/platform/types.h""
 #define EIGEN_USE_THREADS
 
 #include ""tensorflow/core/framework/op_kernel.h""
@@ -35,7 +39,8 @@ typedef Eigen::ThreadPoolDevice CPUDevice;
 template <typename Tidx>
 class UnravelIndexOp : public OpKernel {
  public:
-  explicit UnravelIndexOp(OpKernelConstruction* ctx) : OpKernel(ctx) {}
+  explicit UnravelIndexOp(OpKernelConstruction* ctx)
+      : OpKernel(ctx), dtidx_(DataTypeToEnum<Tidx>::v()) {}
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& indices_tensor = ctx->input(0);
@@ -54,12 +59,31 @@ class UnravelIndexOp : public OpKernel {
 
     auto dims = dims_tensor.vec<Tidx>();
     // Make sure dims does not contain a zero
+    double prod = 1;
+    uint64_t limit;
+    if (dtidx_ == DataType::DT_INT64) {
+      limit = kint64max;
+    } else {
+      limit = kint32max;
+    }
+
     for (int i = 0; i < dims.size(); i++) {
       OP_REQUIRES(
           ctx, dims(i) != 0,
           errors::InvalidArgument(""Input dims cannot contain a dim of zero, ""
                                   ""but dims contains zero at index "",
                                   i));
+      OP_REQUIRES(ctx, dims(i) > 0,
+                  errors::InvalidArgument(
+                      ""Input dims cannot be negative. Got dim = "", dims(i),
+                      "" at index "", i));
+      // Check interger overflow
+      OP_REQUIRES(
+          ctx, prod <= limit / dims(i),
+          errors::InvalidArgument(""Input dims product is causing integer ""
+                                  ""overflow: ("",
+                                  dims, "")""));
+      prod = (prod * dims(i));
     }
 
     // Check to make sure indices is not out of boundary
@@ -132,6 +156,7 @@ class UnravelIndexOp : public OpKernel {
                strides_shifted.reshape(reshape).broadcast(bcast);
     }
   }
+  const DataType dtidx_;
 };
 
 #define REGISTER_KERNEL(type)                                               \
",1,train
58b34c6c8250983948b5a781b426f6aa01fd47af,tensorflow/tensorflow,"Fix integer overflow leading to divide by zero error in Unravel index kernel when dimensions product exceeds max int value.

PiperOrigin-RevId: 413250052
Change-Id: I9450b6e8acecd2e881a64b882e2b7c70e8e9289a",array_ops_test.py,"@@ -1580,6 +1580,20 @@ class UnravelIndexTest(test_util.TensorFlowTestCase):
           dims = constant_op.constant([3, 0], dtype=dtype)
           self.evaluate(array_ops.unravel_index(indices=indices, dims=dims))
 
+  def testUnravelIndexIntegerOverflow(self):
+    with self.cached_session():
+      for dtype in [dtypes.int32, dtypes.int64]:
+        with self.assertRaisesRegex(
+            errors.InvalidArgumentError,
+            r""Input dims product is causing integer overflow""):
+          indices = constant_op.constant(-0x100000, dtype=dtype)
+          if dtype == dtypes.int32:
+            value = 0x10000000
+          else:
+            value = 0x7FFFFFFFFFFFFFFF
+          dims = constant_op.constant([value, value], dtype=dtype)
+          self.evaluate(array_ops.unravel_index(indices=indices, dims=dims))
+
 
 class GuaranteeConstOpTest(test_util.TensorFlowTestCase):
 
",1,train
002408c3696b173863228223d535f9de72a101a9,tensorflow/tensorflow,"Add negative bound check for row and column pooling_sequence in FractionalAvgPoolGrad op to avoid out of bound heap access

PiperOrigin-RevId: 413837346
Change-Id: I2b86034101df31bee161abcb781755e236c7bccd",fractional_avg_pool_op.cc,"@@ -311,15 +311,26 @@ class FractionalAvgPoolGradOp : public OpKernel {
     for (int64_t b = 0; b < out_batch; ++b) {
       for (int64_t r = 0; r < out_rows; ++r) {
         const int64_t in_row_start = row_seq_tensor_flat(r);
+
         int64_t in_row_end = overlapping_ ? row_seq_tensor_flat(r + 1)
                                           : row_seq_tensor_flat(r + 1) - 1;
         in_row_end = std::min(in_row_end, in_max_row_index);
+        OP_REQUIRES(context, in_row_start >= 0 && in_row_end >= 0,
+                    errors::InvalidArgument(
+                        ""Row sequence tensor values must not be negative, got "",
+                        row_seq_tensor_flat));
+
         for (int64_t c = 0; c < out_cols; ++c) {
           const int64_t in_col_start = col_seq_tensor_flat(c);
           int64_t in_col_end = overlapping_ ? col_seq_tensor_flat(c + 1)
                                             : col_seq_tensor_flat(c + 1) - 1;
           in_col_end = std::min(in_col_end, in_max_col_index);
 
+          OP_REQUIRES(
+              context, in_col_start >= 0 && in_col_end >= 0,
+              errors::InvalidArgument(
+                  ""Column sequence tensor values must not be negative, got "",
+                  col_seq_tensor_flat));
           const int64_t num_elements_in_pooling_cell =
               (in_row_end - in_row_start + 1) * (in_col_end - in_col_start + 1);
           const int64_t out_index = (b * out_rows + r) * out_cols + c;
",1,train
002408c3696b173863228223d535f9de72a101a9,tensorflow/tensorflow,"Add negative bound check for row and column pooling_sequence in FractionalAvgPoolGrad op to avoid out of bound heap access

PiperOrigin-RevId: 413837346
Change-Id: I2b86034101df31bee161abcb781755e236c7bccd",fractional_avg_pool_op_test.py,"@@ -20,6 +20,7 @@ import numpy as np
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_nn_ops
@@ -306,6 +307,32 @@ class FractionalAvgTest(test.TestCase):
           input_b, row_seq, col_seq, overlapping)
       self.assertSequenceEqual(expected.shape, actual.shape)
 
+  def testNegativeSeqValuesForGradOp(self):
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        r""Row sequence tensor values must not be negative.*""):
+      y = nn_ops.gen_nn_ops.fractional_avg_pool_grad(
+          orig_input_tensor_shape=[2, 2, 2, 2],
+          out_backprop=[[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11,
+                                                                      12]]]],
+          row_pooling_sequence=[-10, 1, 2, 3],
+          col_pooling_sequence=[1, 2, 3, 4],
+          overlapping=True)
+
+      self.evaluate(y)
+      with self.assertRaisesRegex(
+          errors.InvalidArgumentError,
+          r""Column sequence tensor values must not be negative.*""):
+        z = nn_ops.gen_nn_ops.fractional_avg_pool_grad(
+            orig_input_tensor_shape=[2, 2, 2, 2],
+            out_backprop=[[[[1, 2], [3, 4], [5, 6]], [[7, 8], [9, 10], [11,
+                                                                        12]]]],
+            row_pooling_sequence=[10, 1, 2, 3],
+            col_pooling_sequence=[1, 2, -3, 4],
+            overlapping=True)
+
+        self.evaluate(z)
+
 
 class FractionalAvgPoolGradTest(test.TestCase):
   """"""Tests for FractionalAvgPoolGrad.
",1,train
08d7b00c0a5a20926363849f611729f53f3ec022,tensorflow/tensorflow,"Fix Segfault in Concat V2 shape function.

PiperOrigin-RevId: 412120654
Change-Id: I3ff915faea694f9ad8b00024e9af2de9909011be",common_shape_fns.cc,"@@ -2005,7 +2005,7 @@ Status ConcatShapeHelper(InferenceContext* c, int start_value_index,
   }
 
   // Minimum required number of dimensions.
-  const int min_rank = concat_dim < 0 ? -concat_dim : concat_dim + 1;
+  const int64 min_rank = concat_dim < 0 ? -concat_dim : concat_dim + 1;
 
   ShapeHandle output_before;
   ShapeHandle output_after;
",1,test
08d7b00c0a5a20926363849f611729f53f3ec022,tensorflow/tensorflow,"Fix Segfault in Concat V2 shape function.

PiperOrigin-RevId: 412120654
Change-Id: I3ff915faea694f9ad8b00024e9af2de9909011be",concat_op_test.py,"@@ -16,6 +16,7 @@
 
 import numpy as np
 
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
@@ -570,6 +571,17 @@ class ConcatOpTest(test.TestCase):
         t2 = [2]
         gen_array_ops.concat_v2([t1, t2], 1).eval()
 
+  def testConcatInvalidAxisInTfFunction(self):
+
+    @def_function.function
+    def concat_wrapper():
+      y = gen_array_ops.concat_v2(
+          values=[[1, 2, 3], [4, 5, 6]], axis=0xb500005b)
+      return y
+
+    with self.assertRaises(ValueError):
+      concat_wrapper()
+
   def testConcatNegativeAxis(self):
     with test_util.use_gpu():
       t1 = [[1, 2, 3], [4, 5, 6]]
",1,test
e3749a6d5d1e8d11806d4a2e9cc3123d1a90b75e,tensorflow/tensorflow,"[tf.data] Set limit on number of threads used in threadpool_dataset.

PiperOrigin-RevId: 410922677
Change-Id: Ib25814a99043ab10805b5d2d7088ae0e0b7b04fd",threadpool_dataset_op.cc,"@@ -39,6 +39,22 @@ namespace experimental {
     PrivateThreadPoolDatasetOp::kDatasetType;
 /* static */ constexpr const char* const PrivateThreadPoolDatasetOp::kDatasetOp;
 
+namespace {
+// To prevent integer overflow issues when allocating threadpool memory for an
+// unreasonable number of threads.
+constexpr int kThreadLimit = 65536;
+
+Status ValidateNumThreads(int32_t num_threads) {
+  if (num_threads < 0) {
+    return errors::InvalidArgument(""`num_threads` must be >= 0"");
+  }
+  if (num_threads >= kThreadLimit) {
+    return errors::InvalidArgument(""`num_threads` must be < "", kThreadLimit);
+  }
+  return Status::OK();
+}
+}  // namespace
+
 class ThreadPoolResource : public ResourceBase {
  public:
   ThreadPoolResource(Env* env, const ThreadOptions& thread_options,
@@ -83,9 +99,7 @@ class ThreadPoolHandleOp : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->GetAttr(""num_threads"", &num_threads_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr(""max_intra_op_parallelism"",
                                      &max_intra_op_parallelism_));
-    OP_REQUIRES(
-        ctx, num_threads_ > 0,
-        errors::InvalidArgument(""`num_threads` must be greater than zero.""));
+    OP_REQUIRES_OK(ctx, ValidateNumThreads(num_threads_));
   }
 
   // The resource is deleted from the resource manager only when it is private
@@ -531,8 +545,7 @@ void PrivateThreadPoolDatasetOp::MakeDatasetFromOptions(OpKernelContext* ctx,
                                                         DatasetBase* input,
                                                         int32_t num_threads,
                                                         DatasetBase** output) {
-  OP_REQUIRES(ctx, num_threads >= 0,
-              errors::InvalidArgument(""`num_threads` must be >= 0""));
+  OP_REQUIRES_OK(ctx, ValidateNumThreads(num_threads));
   *output = new Dataset(ctx,
                         DatasetContext(DatasetContext::Params(
                             {PrivateThreadPoolDatasetOp::kDatasetType,
@@ -546,8 +559,7 @@ void PrivateThreadPoolDatasetOp::MakeDataset(OpKernelContext* ctx,
   int64_t num_threads = 0;
   OP_REQUIRES_OK(
       ctx, ParseScalarArgument<int64_t>(ctx, ""num_threads"", &num_threads));
-  OP_REQUIRES(ctx, num_threads >= 0,
-              errors::InvalidArgument(""`num_threads` must be >= 0""));
+  OP_REQUIRES_OK(ctx, ValidateNumThreads(num_threads));
   *output = new Dataset(ctx, input, num_threads);
 }
 
",1,train
f68fdab93fb7f4ddb4eb438c8fe052753c9413e8,tensorflow/tensorflow,"Add a check for pad width to be a positive value.

PiperOrigin-RevId: 413275853
Change-Id: I261a8db9dabf5ce48a806a9e58129080c9fac619",string_ngrams_op.cc,"@@ -152,6 +152,16 @@ class StringNGramsOp : public tensorflow::OpKernel {
         // We don't have to worry about dynamic padding sizes here: if padding
         // was dynamic, every sequence would have had sufficient padding to
         // generate at least one ngram.
+
+        // If reached here, pad_width should be > 0, pad_width_ = -1,
+        // which indicates max(ngram_widths) - 1 cannot be used here since
+        // ngram_width is not known.
+        OP_REQUIRES(
+            context, pad_width_ >= 0,
+            errors::InvalidArgument(""Pad width should be >= 0 when ""
+                                    ""preserve_short_sequences is True and ""
+                                    ""ngram_widths are not provided, got "",
+                                    pad_width_));
         int ngram_width = data_length + 2 * pad_width_;
         auto output_start = &ngrams_data[output_start_idx];
         int num_ngrams = 1;
",1,train
f68fdab93fb7f4ddb4eb438c8fe052753c9413e8,tensorflow/tensorflow,"Add a check for pad width to be a positive value.

PiperOrigin-RevId: 413275853
Change-Id: I261a8db9dabf5ce48a806a9e58129080c9fac619",raw_ops_test.py,"@@ -28,7 +28,6 @@ from tensorflow.python.platform import test
 
 
 @test_util.run_all_in_graph_and_eager_modes
-@test_util.disable_tfrt
 class RawOpsTest(test.TestCase, parameterized.TestCase):
 
   def testSimple(self):
@@ -63,8 +62,9 @@ class RawOpsTest(test.TestCase, parameterized.TestCase):
   @parameterized.parameters([[0, 8]], [[-1, 6]])
   def testStringNGramsBadDataSplits(self, splits):
     data = [""aa"", ""bb"", ""cc"", ""dd"", ""ee"", ""ff""]
-    with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                ""Invalid split value""):
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        r""Invalid split value|First split value must be 0""):
       self.evaluate(
           gen_string_ops.string_n_grams(
               data=data,
@@ -76,6 +76,25 @@ class RawOpsTest(test.TestCase, parameterized.TestCase):
               pad_width=0,
               preserve_short_sequences=False))
 
+  def testStringSplit(self):
+    data = [""123456""]
+    data_splits = [0, 1]
+    separator = ""a"" * 15
+    ngram_widths = []
+    pad_width = -5
+    left_pad = right_pad = """"
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                ""Pad width should be >= 0""):
+      self.evaluate(gen_string_ops.string_n_grams(
+          data=data,
+          data_splits=data_splits,
+          separator=separator,
+          ngram_widths=ngram_widths,
+          left_pad=left_pad,
+          right_pad=right_pad,
+          pad_width=pad_width,
+          preserve_short_sequences=True))
+
   def testGetSessionHandle(self):
     if context.executing_eagerly():
       with self.assertRaisesRegex(
",1,train
f57315566d7094f322b784947093406c2aea0d7d,tensorflow/tensorflow,"Add a check for Key being scalar tensor for MapStage and OrderedMapStage ops.

According to documentation[1][2], key must be int64 value, but this wasn't enforced and the ops would fail with check failure for non-scalar key value.

[1]https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/ordered-map-stage
[2]https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/map-stage

PiperOrigin-RevId: 413822112
Change-Id: I9d118faf990e6361900aa32272eff486ad9f0e2e",map_stage_op.cc,"@@ -536,6 +536,11 @@ class MapStageOp : public OpKernel {
     OP_REQUIRES(ctx, key_tensor->NumElements() > 0,
                 errors::InvalidArgument(""key must not be empty""));
 
+    OP_REQUIRES(ctx, key_tensor->NumElements() == 1,
+                errors::InvalidArgument(
+                    ""key must be an int64 scalar, got tensor with shape: "",
+                    key_tensor->shape()));
+
     // Create copy for insertion into Staging Area
     Tensor key(*key_tensor);
 
",1,test
f57315566d7094f322b784947093406c2aea0d7d,tensorflow/tensorflow,"Add a check for Key being scalar tensor for MapStage and OrderedMapStage ops.

According to documentation[1][2], key must be int64 value, but this wasn't enforced and the ops would fail with check failure for non-scalar key value.

[1]https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/ordered-map-stage
[2]https://www.tensorflow.org/api_docs/cc/class/tensorflow/ops/map-stage

PiperOrigin-RevId: 413822112
Change-Id: I9d118faf990e6361900aa32272eff486ad9f0e2e",map_stage_op_test.py,"@@ -12,8 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-from tensorflow.python.framework import errors
+import numpy as np
+
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
@@ -28,7 +31,7 @@ class MapStageTest(test.TestCase):
 
   @test_util.run_deprecated_v1
   def testSimple(self):
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32)
         pi = array_ops.placeholder(dtypes.int64)
@@ -40,9 +43,9 @@ class MapStageTest(test.TestCase):
         k, y = stager.get(gi)
         y = math_ops.reduce_max(math_ops.matmul(y, y))
 
-    G.finalize()
+    g.finalize()
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       sess.run(stage, feed_dict={x: -1, pi: 0})
       for i in range(10):
         _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i})
@@ -50,7 +53,7 @@ class MapStageTest(test.TestCase):
 
   @test_util.run_deprecated_v1
   def testMultiple(self):
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32)
         pi = array_ops.placeholder(dtypes.int64)
@@ -62,9 +65,9 @@ class MapStageTest(test.TestCase):
         k, (z, y) = stager.get(gi)
         y = math_ops.reduce_max(z * math_ops.matmul(y, y))
 
-    G.finalize()
+    g.finalize()
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       sess.run(stage, feed_dict={x: -1, pi: 0})
       for i in range(10):
         _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i})
@@ -73,26 +76,25 @@ class MapStageTest(test.TestCase):
 
   @test_util.run_deprecated_v1
   def testDictionary(self):
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32)
         pi = array_ops.placeholder(dtypes.int64)
         gi = array_ops.placeholder(dtypes.int64)
         v = 2. * (array_ops.zeros([128, 128]) + x)
       with ops.device(test.gpu_device_name()):
-        stager = data_flow_ops.MapStagingArea(
-            [dtypes.float32, dtypes.float32],
-            shapes=[[], [128, 128]],
-            names=['x', 'v'])
+        stager = data_flow_ops.MapStagingArea([dtypes.float32, dtypes.float32],
+                                              shapes=[[], [128, 128]],
+                                              names=['x', 'v'])
         stage = stager.put(pi, {'x': x, 'v': v})
         key, ret = stager.get(gi)
         z = ret['x']
         y = ret['v']
         y = math_ops.reduce_max(z * math_ops.matmul(y, y))
 
-    G.finalize()
+    g.finalize()
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       sess.run(stage, feed_dict={x: -1, pi: 0})
       for i in range(10):
         _, yval = sess.run([stage, y], feed_dict={x: i, pi: i + 1, gi: i})
@@ -102,7 +104,7 @@ class MapStageTest(test.TestCase):
   def testColocation(self):
     gpu_dev = test.gpu_device_name()
 
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32)
         v = 2. * (array_ops.zeros([128, 128]) + x)
@@ -119,58 +121,56 @@ class MapStageTest(test.TestCase):
         self.assertEqual(y.device, '/device:CPU:0')
         self.assertEqual(z[0].device, '/device:CPU:0')
 
-    G.finalize()
+    g.finalize()
 
   @test_util.run_deprecated_v1
   def testPeek(self):
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.int32, name='x')
         pi = array_ops.placeholder(dtypes.int64)
         gi = array_ops.placeholder(dtypes.int64)
         p = array_ops.placeholder(dtypes.int32, name='p')
       with ops.device(test.gpu_device_name()):
-        stager = data_flow_ops.MapStagingArea(
-            [
-                dtypes.int32,
-            ], shapes=[[]])
+        stager = data_flow_ops.MapStagingArea([
+            dtypes.int32,
+        ], shapes=[[]])
         stage = stager.put(pi, [x], [0])
         peek = stager.peek(gi)
         size = stager.size()
 
-    G.finalize()
+    g.finalize()
 
     n = 10
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       for i in range(n):
         sess.run(stage, feed_dict={x: i, pi: i})
 
       for i in range(n):
-        self.assertTrue(sess.run(peek, feed_dict={gi: i})[0] == i)
+        self.assertEqual(sess.run(peek, feed_dict={gi: i})[0], i)
 
-      self.assertTrue(sess.run(size) == 10)
+      self.assertEqual(sess.run(size), 10)
 
   @test_util.run_deprecated_v1
   def testSizeAndClear(self):
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32, name='x')
         pi = array_ops.placeholder(dtypes.int64)
         gi = array_ops.placeholder(dtypes.int64)
         v = 2. * (array_ops.zeros([128, 128]) + x)
       with ops.device(test.gpu_device_name()):
-        stager = data_flow_ops.MapStagingArea(
-            [dtypes.float32, dtypes.float32],
-            shapes=[[], [128, 128]],
-            names=['x', 'v'])
+        stager = data_flow_ops.MapStagingArea([dtypes.float32, dtypes.float32],
+                                              shapes=[[], [128, 128]],
+                                              names=['x', 'v'])
         stage = stager.put(pi, {'x': x, 'v': v})
         size = stager.size()
         clear = stager.clear()
 
-    G.finalize()
+    g.finalize()
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       sess.run(stage, feed_dict={x: -1, pi: 3})
       self.assertEqual(sess.run(size), 1)
       sess.run(stage, feed_dict={x: -1, pi: 1})
@@ -182,22 +182,23 @@ class MapStageTest(test.TestCase):
   def testCapacity(self):
     capacity = 3
 
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.int32, name='x')
         pi = array_ops.placeholder(dtypes.int64, name='pi')
         gi = array_ops.placeholder(dtypes.int64, name='gi')
       with ops.device(test.gpu_device_name()):
-        stager = data_flow_ops.MapStagingArea(
-            [
-                dtypes.int32,
-            ], capacity=capacity, shapes=[[]])
+        stager = data_flow_ops.MapStagingArea([
+            dtypes.int32,
+        ],
+                                              capacity=capacity,
+                                              shapes=[[]])
 
       stage = stager.put(pi, [x], [0])
       get = stager.get()
       size = stager.size()
 
-    G.finalize()
+    g.finalize()
 
     from six.moves import queue as Queue
     import threading
@@ -205,7 +206,7 @@ class MapStageTest(test.TestCase):
     queue = Queue.Queue()
     n = 8
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       # Stage data in a separate thread which will block
       # when it hits the staging area's capacity and thus
       # not fill the queue with n tokens
@@ -234,13 +235,13 @@ class MapStageTest(test.TestCase):
                                              capacity))
 
       # Should have capacity elements in the staging area
-      self.assertTrue(sess.run(size) == capacity)
+      self.assertEqual(sess.run(size), capacity)
 
       # Clear the staging area completely
       for i in range(n):
         sess.run(get)
 
-      self.assertTrue(sess.run(size) == 0)
+      self.assertEqual(sess.run(size), 0)
 
   @test_util.run_deprecated_v1
   def testMemoryLimit(self):
@@ -248,28 +249,28 @@ class MapStageTest(test.TestCase):
     chunk = 200 * 1024  # 256K
     capacity = memory_limit // chunk
 
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.uint8, name='x')
         pi = array_ops.placeholder(dtypes.int64, name='pi')
         gi = array_ops.placeholder(dtypes.int64, name='gi')
       with ops.device(test.gpu_device_name()):
-        stager = data_flow_ops.MapStagingArea(
-            [dtypes.uint8], memory_limit=memory_limit, shapes=[[]])
+        stager = data_flow_ops.MapStagingArea([dtypes.uint8],
+                                              memory_limit=memory_limit,
+                                              shapes=[[]])
         stage = stager.put(pi, [x], [0])
         get = stager.get()
         size = stager.size()
 
-    G.finalize()
+    g.finalize()
 
     from six.moves import queue as Queue
     import threading
-    import numpy as np
 
     queue = Queue.Queue()
     n = 8
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       # Stage data in a separate thread which will block
       # when it hits the staging area's capacity and thus
       # not fill the queue with n tokens
@@ -299,56 +300,57 @@ class MapStageTest(test.TestCase):
                                              capacity))
 
       # Should have capacity elements in the staging area
-      self.assertTrue(sess.run(size) == capacity)
+      self.assertEqual(sess.run(size), capacity)
 
       # Clear the staging area completely
       for i in range(n):
         sess.run(get)
 
-      self.assertTrue(sess.run(size) == 0)
+      self.assertEqual(sess.run(size), 0)
 
   @test_util.run_deprecated_v1
   def testOrdering(self):
     import six
     import random
 
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.int32, name='x')
         pi = array_ops.placeholder(dtypes.int64, name='pi')
         gi = array_ops.placeholder(dtypes.int64, name='gi')
       with ops.device(test.gpu_device_name()):
-        stager = data_flow_ops.MapStagingArea(
-            [
-                dtypes.int32,
-            ], shapes=[[]], ordered=True)
+        stager = data_flow_ops.MapStagingArea([
+            dtypes.int32,
+        ],
+                                              shapes=[[]],
+                                              ordered=True)
         stage = stager.put(pi, [x], [0])
         get = stager.get()
         size = stager.size()
 
-    G.finalize()
+    g.finalize()
 
     n = 10
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       # Keys n-1..0
       keys = list(reversed(six.moves.range(n)))
 
       for i in keys:
         sess.run(stage, feed_dict={pi: i, x: i})
 
-      self.assertTrue(sess.run(size) == n)
+      self.assertEqual(sess.run(size), n)
 
       # Check that key, values come out in ascending order
       for i, k in enumerate(reversed(keys)):
         get_key, values = sess.run(get)
         self.assertTrue(i == k == get_key == values)
 
-      self.assertTrue(sess.run(size) == 0)
+      self.assertEqual(sess.run(size), 0)
 
   @test_util.run_deprecated_v1
   def testPartialDictInsert(self):
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32)
         f = array_ops.placeholder(dtypes.float32)
@@ -366,41 +368,39 @@ class MapStageTest(test.TestCase):
         size = stager.size()
         isize = stager.incomplete_size()
 
-    G.finalize()
+    g.finalize()
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       # 0 complete and incomplete entries
-      self.assertTrue(sess.run([size, isize]) == [0, 0])
+      self.assertEqual(sess.run([size, isize]), [0, 0])
       # Stage key 0, x and f tuple entries
       sess.run(stage_xf, feed_dict={pi: 0, x: 1, f: 2})
-      self.assertTrue(sess.run([size, isize]) == [0, 1])
+      self.assertEqual(sess.run([size, isize]), [0, 1])
       # Stage key 1, x and f tuple entries
       sess.run(stage_xf, feed_dict={pi: 1, x: 1, f: 2})
-      self.assertTrue(sess.run([size, isize]) == [0, 2])
+      self.assertEqual(sess.run([size, isize]), [0, 2])
 
       # Now complete key 0 with tuple entry v
       sess.run(stage_v, feed_dict={pi: 0, v: 1})
       # 1 complete and 1 incomplete entry
-      self.assertTrue(sess.run([size, isize]) == [1, 1])
+      self.assertEqual(sess.run([size, isize]), [1, 1])
       # We can now obtain tuple associated with key 0
-      self.assertTrue(
-          sess.run([key, ret], feed_dict={
-              gi: 0
-          }) == [0, {
+      self.assertEqual(
+          sess.run([key, ret], feed_dict={gi: 0}),
+          [0, {
               'x': 1,
               'f': 2,
               'v': 1
           }])
 
       # 0 complete and 1 incomplete entry
-      self.assertTrue(sess.run([size, isize]) == [0, 1])
+      self.assertEqual(sess.run([size, isize]), [0, 1])
       # Now complete key 1 with tuple entry v
       sess.run(stage_v, feed_dict={pi: 1, v: 3})
       # We can now obtain tuple associated with key 1
-      self.assertTrue(
-          sess.run([key, ret], feed_dict={
-              gi: 1
-          }) == [1, {
+      self.assertEqual(
+          sess.run([key, ret], feed_dict={gi: 1}),
+          [1, {
               'x': 1,
               'f': 2,
               'v': 3
@@ -408,7 +408,7 @@ class MapStageTest(test.TestCase):
 
   @test_util.run_deprecated_v1
   def testPartialIndexInsert(self):
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32)
         f = array_ops.placeholder(dtypes.float32)
@@ -424,35 +424,35 @@ class MapStageTest(test.TestCase):
         size = stager.size()
         isize = stager.incomplete_size()
 
-    G.finalize()
+    g.finalize()
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       # 0 complete and incomplete entries
-      self.assertTrue(sess.run([size, isize]) == [0, 0])
+      self.assertEqual(sess.run([size, isize]), [0, 0])
       # Stage key 0, x and f tuple entries
       sess.run(stage_xf, feed_dict={pi: 0, x: 1, f: 2})
-      self.assertTrue(sess.run([size, isize]) == [0, 1])
+      self.assertEqual(sess.run([size, isize]), [0, 1])
       # Stage key 1, x and f tuple entries
       sess.run(stage_xf, feed_dict={pi: 1, x: 1, f: 2})
-      self.assertTrue(sess.run([size, isize]) == [0, 2])
+      self.assertEqual(sess.run([size, isize]), [0, 2])
 
       # Now complete key 0 with tuple entry v
       sess.run(stage_v, feed_dict={pi: 0, v: 1})
       # 1 complete and 1 incomplete entry
-      self.assertTrue(sess.run([size, isize]) == [1, 1])
+      self.assertEqual(sess.run([size, isize]), [1, 1])
       # We can now obtain tuple associated with key 0
-      self.assertTrue(sess.run([key, ret], feed_dict={gi: 0}) == [0, [1, 1, 2]])
+      self.assertEqual(sess.run([key, ret], feed_dict={gi: 0}), [0, [1, 1, 2]])
 
       # 0 complete and 1 incomplete entry
-      self.assertTrue(sess.run([size, isize]) == [0, 1])
+      self.assertEqual(sess.run([size, isize]), [0, 1])
       # Now complete key 1 with tuple entry v
       sess.run(stage_v, feed_dict={pi: 1, v: 3})
       # We can now obtain tuple associated with key 1
-      self.assertTrue(sess.run([key, ret], feed_dict={gi: 1}) == [1, [1, 3, 2]])
+      self.assertEqual(sess.run([key, ret], feed_dict={gi: 1}), [1, [1, 3, 2]])
 
   @test_util.run_deprecated_v1
   def testPartialDictGetsAndPeeks(self):
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32)
         f = array_ops.placeholder(dtypes.float32)
@@ -476,40 +476,38 @@ class MapStageTest(test.TestCase):
         size = stager.size()
         isize = stager.incomplete_size()
 
-    G.finalize()
+    g.finalize()
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       # 0 complete and incomplete entries
-      self.assertTrue(sess.run([size, isize]) == [0, 0])
+      self.assertEqual(sess.run([size, isize]), [0, 0])
       # Stage key 0, x and f tuple entries
       sess.run(stage_xf, feed_dict={pi: 0, x: 1, f: 2})
-      self.assertTrue(sess.run([size, isize]) == [0, 1])
+      self.assertEqual(sess.run([size, isize]), [0, 1])
       # Stage key 1, x and f tuple entries
       sess.run(stage_xf, feed_dict={pi: 1, x: 1, f: 2})
-      self.assertTrue(sess.run([size, isize]) == [0, 2])
+      self.assertEqual(sess.run([size, isize]), [0, 2])
 
       # Now complete key 0 with tuple entry v
       sess.run(stage_v, feed_dict={pi: 0, v: 1})
       # 1 complete and 1 incomplete entry
-      self.assertTrue(sess.run([size, isize]) == [1, 1])
+      self.assertEqual(sess.run([size, isize]), [1, 1])
 
       # We can now peek at 'x' and 'f' values associated with key 0
-      self.assertTrue(sess.run(peek_xf, feed_dict={pei: 0}) == {'x': 1, 'f': 2})
+      self.assertEqual(sess.run(peek_xf, feed_dict={pei: 0}), {'x': 1, 'f': 2})
       # Peek at 'v' value associated with key 0
-      self.assertTrue(sess.run(peek_v, feed_dict={pei: 0}) == {'v': 1})
+      self.assertEqual(sess.run(peek_v, feed_dict={pei: 0}), {'v': 1})
       # 1 complete and 1 incomplete entry
-      self.assertTrue(sess.run([size, isize]) == [1, 1])
+      self.assertEqual(sess.run([size, isize]), [1, 1])
 
       # We can now obtain 'x' and 'f' values associated with key 0
-      self.assertTrue(
-          sess.run([key_xf, get_xf], feed_dict={
-              gi: 0
-          }) == [0, {
+      self.assertEqual(
+          sess.run([key_xf, get_xf], feed_dict={gi: 0}), [0, {
               'x': 1,
               'f': 2
           }])
       # Still have 1 complete and 1 incomplete entry
-      self.assertTrue(sess.run([size, isize]) == [1, 1])
+      self.assertEqual(sess.run([size, isize]), [1, 1])
 
       # We can no longer get 'x' and 'f' from key 0
       with self.assertRaises(errors.InvalidArgumentError) as cm:
@@ -517,40 +515,36 @@ class MapStageTest(test.TestCase):
 
       exc_str = (""Tensor at index '0' for key '0' "" 'has already been removed.')
 
-      self.assertTrue(exc_str in cm.exception.message)
+      self.assertIn(exc_str, cm.exception.message)
 
       # Obtain 'v' value associated with key 0
-      self.assertTrue(
-          sess.run([key_v, get_v], feed_dict={
-              gi: 0
-          }) == [0, {
+      self.assertEqual(
+          sess.run([key_v, get_v], feed_dict={gi: 0}), [0, {
               'v': 1
           }])
       # 0 complete and 1 incomplete entry
-      self.assertTrue(sess.run([size, isize]) == [0, 1])
+      self.assertEqual(sess.run([size, isize]), [0, 1])
 
       # Now complete key 1 with tuple entry v
       sess.run(stage_v, feed_dict={pi: 1, v: 1})
       # 1 complete and 1 incomplete entry
-      self.assertTrue(sess.run([size, isize]) == [1, 0])
+      self.assertEqual(sess.run([size, isize]), [1, 0])
 
       # Pop without key to obtain 'x' and 'f' values associated with key 1
-      self.assertTrue(sess.run([pop_key_xf, pop_xf]) == [1, {'x': 1, 'f': 2}])
+      self.assertEqual(sess.run([pop_key_xf, pop_xf]), [1, {'x': 1, 'f': 2}])
       # still 1 complete and 1 incomplete entry
-      self.assertTrue(sess.run([size, isize]) == [1, 0])
+      self.assertEqual(sess.run([size, isize]), [1, 0])
       # We can now obtain 'x' and 'f' values associated with key 1
-      self.assertTrue(
-          sess.run([pop_key_v, pop_v], feed_dict={
-              pi: 1
-          }) == [1, {
+      self.assertEqual(
+          sess.run([pop_key_v, pop_v], feed_dict={pi: 1}), [1, {
               'v': 1
           }])
       # Nothing is left
-      self.assertTrue(sess.run([size, isize]) == [0, 0])
+      self.assertEqual(sess.run([size, isize]), [0, 0])
 
   @test_util.run_deprecated_v1
   def testPartialIndexGets(self):
-    with ops.Graph().as_default() as G:
+    with ops.Graph().as_default() as g:
       with ops.device('/cpu:0'):
         x = array_ops.placeholder(dtypes.float32)
         f = array_ops.placeholder(dtypes.float32)
@@ -568,28 +562,72 @@ class MapStageTest(test.TestCase):
         size = stager.size()
         isize = stager.incomplete_size()
 
-    G.finalize()
+    g.finalize()
 
-    with self.session(graph=G) as sess:
+    with self.session(graph=g) as sess:
       # Stage complete tuple
       sess.run(stage_xvf, feed_dict={pi: 0, x: 1, f: 2, v: 3})
 
-      self.assertTrue(sess.run([size, isize]) == [1, 0])
+      self.assertEqual(sess.run([size, isize]), [1, 0])
 
       # Partial get using indices
-      self.assertTrue(
-          sess.run([key_xf, get_xf], feed_dict={
-              gi: 0
-          }) == [0, [1, 2]])
+      self.assertEqual(
+          sess.run([key_xf, get_xf], feed_dict={gi: 0}), [0, [1, 2]])
 
       # Still some of key 0 left
-      self.assertTrue(sess.run([size, isize]) == [1, 0])
+      self.assertEqual(sess.run([size, isize]), [1, 0])
 
       # Partial get of remaining index
-      self.assertTrue(sess.run([key_v, get_v], feed_dict={gi: 0}) == [0, [3]])
+      self.assertEqual(sess.run([key_v, get_v], feed_dict={gi: 0}), [0, [3]])
 
       # All gone
-      self.assertTrue(sess.run([size, isize]) == [0, 0])
+      self.assertEqual(sess.run([size, isize]), [0, 0])
+
+  @test_util.run_deprecated_v1
+  def testNonScalarKeyOrderedMap(self):
+    with ops.Graph().as_default() as g:
+      x = array_ops.placeholder(dtypes.float32)
+      v = 2. * (array_ops.zeros([128, 128]) + x)
+      t = data_flow_ops.gen_data_flow_ops.ordered_map_stage(
+          key=constant_op.constant(value=[1], shape=(1, 3), dtype=dtypes.int64),
+          indices=np.array([[6]]),
+          values=[x, v],
+          dtypes=[dtypes.int64],
+          capacity=0,
+          memory_limit=0,
+          container='container1',
+          shared_name='',
+          name=None)
+
+    g.finalize()
+
+    with self.session(graph=g) as sess:
+      with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                  'key must be an int64 scalar'):
+        sess.run(t, feed_dict={x: 1})
+
+  @test_util.run_deprecated_v1
+  def testNonScalarKeyUnorderedMap(self):
+    with ops.Graph().as_default() as g:
+      x = array_ops.placeholder(dtypes.float32)
+      v = 2. * (array_ops.zeros([128, 128]) + x)
+      t = data_flow_ops.gen_data_flow_ops.map_stage(
+          key=constant_op.constant(value=[1], shape=(1, 3), dtype=dtypes.int64),
+          indices=np.array([[6]]),
+          values=[x, v],
+          dtypes=[dtypes.int64],
+          capacity=0,
+          memory_limit=0,
+          container='container1',
+          shared_name='',
+          name=None)
+
+    g.finalize()
+
+    with self.session(graph=g) as sess:
+      with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                  'key must be an int64 scalar'):
+        sess.run(t, feed_dict={x: 1})
 
 
 if __name__ == '__main__':
",1,test
ba4e8ac4dc2991e350d5cc407f8598c8d4ee70fb,tensorflow/tensorflow,"Fix potential divide by zero error when executing FractionalMaxPool, when pooling ratio is higher than input size for a particular dimension.

PiperOrigin-RevId: 412151722
Change-Id: I06e57cbb8eca43816eff79eac264fa7aae8f7163",fractional_max_pool_op.cc,"@@ -83,6 +83,13 @@ class FractionalMaxPoolOp : public OpKernel {
     std::vector<int> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
       input_size[i] = tensor_in.dim_size(i);
+
+      OP_REQUIRES(
+          context, input_size[i] >= pooling_ratio_[i],
+          errors::InvalidArgument(""Pooling ratio is higher than input ""
+                                  ""dimension size for dimension "",
+                                  i, "". Input dim size: "", input_size[i],
+                                  "" pooling ratio: "", pooling_ratio_[i]));
     }
     // Output size.
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
",1,train
ba4e8ac4dc2991e350d5cc407f8598c8d4ee70fb,tensorflow/tensorflow,"Fix potential divide by zero error when executing FractionalMaxPool, when pooling ratio is higher than input size for a particular dimension.

PiperOrigin-RevId: 412151722
Change-Id: I06e57cbb8eca43816eff79eac264fa7aae8f7163",fractional_max_pool_op_test.py,"@@ -20,6 +20,7 @@ import numpy as np
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_nn_ops
@@ -319,6 +320,24 @@ class FractionalMaxPoolTest(test.TestCase):
       nn_ops.fractional_max_pool(
           rand_mat, [1, 1.5, 1.5, 1], seed=1, seed2=1, deterministic=True)
 
+  def testPoolingRatio(self):
+    with self.cached_session() as _:
+      with self.assertRaisesRegex(
+          errors.InvalidArgumentError,
+          r""Pooling ratio is higher than input dimension size for dimension 1.*""
+      ):
+        result = nn_ops.gen_nn_ops.fractional_max_pool(
+            value=constant_op.constant(
+                value=[[[[1, 4, 2, 3]]]], dtype=dtypes.int64),
+            pooling_ratio=[1.0, 1.44, 1.73, 1.0],
+            pseudo_random=False,
+            overlapping=False,
+            deterministic=False,
+            seed=0,
+            seed2=0,
+            name=None)
+        self.evaluate(result)
+
 
 class FractionalMaxPoolGradTest(test.TestCase):
   """"""Tests for FractionalMaxPoolGrad.
",1,train
965b97e4a9650495cda5a8c210ef6684b4b9eceb,tensorflow/tensorflow,"Properly validate sparse tensor in `SparseTensorSliceDataset`

Existing validation was incomplete.

PiperOrigin-RevId: 415375048
Change-Id: I14cd18f29ede73286f3ffac35171bd15828997e9",sparse_tensor_slice_dataset_op.cc,"@@ -240,28 +240,29 @@ class SparseTensorSliceDatasetOp : public DatasetOpKernel {
     OP_REQUIRES_OK(ctx, ctx->input(""dense_shape"", &dense_shape));
 
     OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(indices->shape()),
-                errors::InvalidArgument(
-                    ""Input indices should be a matrix but received shape "",
-                    indices->shape().DebugString()));
-
-    const auto num_indices = indices->NumElements();
-    const auto num_values = values->NumElements();
-    if (num_indices == 0 || num_values == 0) {
-      OP_REQUIRES(ctx, num_indices == num_values,
-                  errors::InvalidArgument(
-                      ""If indices or values are empty, the other one must also ""
-                      ""be. Got indices of shape "",
-                      indices->shape().DebugString(), "" and values of shape "",
-                      values->shape().DebugString()));
-    }
+                errors::InvalidArgument(""Input indices must be a matrix. Got: "",
+                                        indices->shape().DebugString()));
     OP_REQUIRES(ctx, TensorShapeUtils::IsVector(values->shape()),
-                errors::InvalidArgument(
-                    ""Input values should be a vector but received shape "",
-                    indices->shape().DebugString()));
+                errors::InvalidArgument(""Input values must be a vector. Got: "",
+                                        values->shape().DebugString()));
     OP_REQUIRES(ctx, TensorShapeUtils::IsVector(dense_shape->shape()),
+                errors::InvalidArgument(""Input shape must be a vector. Got: "",
+                                        dense_shape->shape().DebugString()));
+    OP_REQUIRES(
+        ctx, values->shape().dim_size(0) == indices->shape().dim_size(0),
+        errors::InvalidArgument(
+            ""Number of values must match first dimension of indices. "", ""Got "",
+            values->shape().dim_size(0),
+            "" values, indices shape: "", indices->shape().DebugString()));
+    OP_REQUIRES(
+        ctx, dense_shape->shape().dim_size(0) == indices->shape().dim_size(1),
+        errors::InvalidArgument(
+            ""Number of dimensions must match second dimension of indices. "",
+            ""Got "", dense_shape->shape().dim_size(0),
+            "" dimensions, indices shape: "", indices->shape().DebugString()));
+    OP_REQUIRES(ctx, dense_shape->NumElements() > 0,
                 errors::InvalidArgument(
-                    ""Input shape should be a vector but received shape "",
-                    dense_shape->shape().DebugString()));
+                    ""The shape argument requires at least one element.""));
 
     // We currently ensure that `sparse_tensor` is ordered in the
     // batch dimension.
",1,train
965b97e4a9650495cda5a8c210ef6684b4b9eceb,tensorflow/tensorflow,"Properly validate sparse tensor in `SparseTensorSliceDataset`

Existing validation was incomplete.

PiperOrigin-RevId: 415375048
Change-Id: I14cd18f29ede73286f3ffac35171bd15828997e9",from_sparse_tensor_slices_test.py,"@@ -134,6 +134,25 @@ class FromSparseTensorSlicesTest(test_base.DatasetTestBase,
       with self.assertRaises(errors.InvalidArgumentError):
         sess.run(init_op, feed_dict={st: sparse_feed})
 
+  @combinations.generate(combinations.combine(tf_api_version=1, mode=[""graph""]))
+  def testEmptySparseTensorSlicesInvalid2(self):
+    """"""Test a dataset based on invalid `tf.sparse.SparseTensor`.""""""
+    st = array_ops.sparse_placeholder(dtypes.float64)
+    iterator = dataset_ops.make_initializable_iterator(
+        dataset_ops.Dataset.from_sparse_tensor_slices(st))
+    init_op = iterator.initializer
+
+    with self.cached_session() as sess:
+      # Test with an empty sparse tensor but with non empty values.
+      empty_indices = [[]]
+      empty_values = []
+      dense_shape = [1, 1]
+      sparse_feed = sparse_tensor.SparseTensorValue(empty_indices, empty_values,
+                                                    dense_shape)
+      # Here, we expect the test to fail when running the feed.
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(init_op, feed_dict={st: sparse_feed})
+
   @combinations.generate(combinations.combine(tf_api_version=2, mode=[""eager""]))
   def testFromSparseTensorSlicesError(self):
     with self.assertRaises(AttributeError):
",1,train
7019ce4f68925fd01cdafde26f8d8c938f47e6f9,tensorflow/tensorflow,"Fix check-fail when bincount ops are passed invalid values.

PiperOrigin-RevId: 415063028
Change-Id: I20f8dc09933ddca1111c4efbf9a3a1e863215d02",bincount_op.cc,"@@ -276,6 +276,9 @@ class DenseBincountOp : public OpKernel {
     const Tensor& size_t = ctx->input(1);
     const Tensor& weights = ctx->input(2);
 
+    OP_REQUIRES(ctx, size_t.dims() == 0,
+                errors::InvalidArgument(""Shape must be rank 0 but is rank "",
+                                        size_t.dims()));
     Tidx size = size_t.scalar<Tidx>()();
     OP_REQUIRES(
         ctx, size >= 0,
@@ -372,6 +375,9 @@ class SparseBincountOp : public OpKernel {
     const auto weights = ctx->input(4).flat<T>();
     const int64_t weights_size = weights.size();
 
+    OP_REQUIRES(ctx, size_t.dims() == 0,
+                errors::InvalidArgument(""Shape must be rank 0 but is rank "",
+                                        size_t.dims()));
     Tidx size = size_t.scalar<Tidx>()();
     OP_REQUIRES(
         ctx, size >= 0,
@@ -462,6 +468,9 @@ class RaggedBincountOp : public OpKernel {
     const auto weights = ctx->input(3).flat<T>();
     const int64_t weights_size = weights.size();
 
+    OP_REQUIRES(ctx, size_t.dims() == 0,
+                errors::InvalidArgument(""Shape must be rank 0 but is rank "",
+                                        size_t.dims()));
     Tidx size = size_t.scalar<Tidx>()();
     OP_REQUIRES(
         ctx, size >= 0,
",1,test
7019ce4f68925fd01cdafde26f8d8c938f47e6f9,tensorflow/tensorflow,"Fix check-fail when bincount ops are passed invalid values.

PiperOrigin-RevId: 415063028
Change-Id: I20f8dc09933ddca1111c4efbf9a3a1e863215d02",math_ops.cc,"@@ -1699,6 +1699,11 @@ REGISTER_OP(""Bincount"")
         return Status::OK();
       }
 
+      if (size_tensor->dims() != 0) {
+        return errors::InvalidArgument(""Shape must be rank 0 but is rank "",
+                                       size_tensor->dims());
+      }
+
       // Return `[size]` shape if size is known.
       int32_t size_val = size_tensor->scalar<int32>()();
       if (size_val < 0) {
@@ -1730,6 +1735,10 @@ REGISTER_OP(""DenseBincount"")
         c->set_output(0, c->UnknownShape());
         return Status::OK();
       }
+      if (size_tensor->dims() != 0) {
+        return errors::InvalidArgument(""Shape must be rank 0 but is rank "",
+                                       size_tensor->dims());
+      }
 
       int64_t size_val;
       DataType dtype;
@@ -1771,6 +1780,10 @@ REGISTER_OP(""SparseBincount"")
         c->set_output(0, c->UnknownShape());
         return Status::OK();
       }
+      if (size_tensor->dims() != 0) {
+        return errors::InvalidArgument(""Shape must be rank 0 but is rank "",
+                                       size_tensor->dims());
+      }
 
       int64_t size_val;
       DataType dtype;
",1,test
7019ce4f68925fd01cdafde26f8d8c938f47e6f9,tensorflow/tensorflow,"Fix check-fail when bincount ops are passed invalid values.

PiperOrigin-RevId: 415063028
Change-Id: I20f8dc09933ddca1111c4efbf9a3a1e863215d02",bincount_op_test.py,"@@ -344,6 +344,14 @@ class BincountOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
             gen_math_ops.dense_bincount(
                 input=[[[1, 2, 3], [0, 3, 2]]], weights=[], size=10))
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_size_is_not_scalar(self):  # b/206619828
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                ""Shape must be rank 0 but is rank 1""):
+      self.evaluate(
+          gen_math_ops.dense_bincount(
+              input=[0], size=[1, 1], weights=[3], binary_output=False))
+
 
 class SparseBincountOpTest(test_util.TensorFlowTestCase,
                            parameterized.TestCase):
@@ -511,6 +519,19 @@ class SparseBincountOpTest(test_util.TensorFlowTestCase,
                 weights=[],
                 binary_output=True)))
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_size_is_not_scalar(self):  # b/206619828
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                ""Shape must be rank 0 but is rank 1""):
+      self.evaluate(
+          gen_math_ops.sparse_bincount(
+              indices=[[0], [1]],
+              values=[0, 0],
+              dense_shape=[1, 1],
+              size=[1, 1],
+              weights=[0, 0],
+              binary_output=False))
+
 
 class RaggedBincountOpTest(test_util.TensorFlowTestCase,
                            parameterized.TestCase):
@@ -650,6 +671,19 @@ class RaggedBincountOpTest(test_util.TensorFlowTestCase,
                 size=size,
                 binary_output=True)))
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_size_is_not_scalar(self):  # b/206619828
+    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
+                                ""Shape must be rank 0 but is rank 1""):
+      self.evaluate(
+          gen_math_ops.ragged_bincount(
+              splits=[0, 0, 1],
+              values=[1],
+              size=[1, 1],
+              weights=[0, 0, 0],
+              binary_output=False,
+              name=None))
+
 
 if __name__ == ""__main__"":
   googletest.main()
",1,test
6f4d3e8139ec724dbbcb40505891c81dd1052c4a,tensorflow/tensorflow,"Prevent crash due to integer overflow followed by allocating negative sized array.

PiperOrigin-RevId: 414891322
Change-Id: I5df390e0dc1d9f115209293708950cdf9306931c",count_ops.cc,"@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <limits>
+
 #include ""absl/container/flat_hash_map.h""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/op_requires.h""
@@ -23,6 +25,9 @@ limitations under the License.
 
 namespace tensorflow {
 
+// Don't allocate too large `BatchedMap<T>` objects
+static int kMaxBatches = std::numeric_limits<int>::max();
+
 template <class T>
 using BatchedMap = std::vector<absl::flat_hash_map<int64_t, T>>;
 
@@ -235,6 +240,10 @@ class SparseCount : public OpKernel {
 
     bool is_1d = shape.NumElements() == 1;
     int num_batches = is_1d ? 1 : shape_vector(0);
+    OP_REQUIRES(
+        context, 0 < num_batches && num_batches < kMaxBatches,
+        errors::InvalidArgument(""Cannot allocate "", num_batches,
+                                "" batches, is the dense shape too wide?""));
 
     const auto values_values = values.flat<T>();
     const auto weight_values = weights.flat<W>();
",1,train
53b0dd6dc5957652f35964af16b892ec9af4a559,tensorflow/tensorflow,"Fix nullptr exception in QuantizedMaxPool op when empty list is sent to min_input or max_input parameters.

PiperOrigin-RevId: 413960973
Change-Id: I9e3ded593f3c4eabf0d6d5dc356e6a19a3ad2682",quantized_pooling_ops.cc,"@@ -15,6 +15,8 @@ limitations under the License.
 
 // See docs in ../ops/nn_ops.cc.
 
+#include ""tensorflow/core/framework/op_requires.h""
+#include ""tensorflow/core/platform/errors.h""
 #define EIGEN_USE_THREADS
 
 #include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
@@ -117,6 +119,18 @@ class QuantizedMaxPoolingOp : public MaxPoolingOp<Device, T> {
       : MaxPoolingOp<Device, T>(context) {}
 
   void Compute(OpKernelContext* context) override {
+    auto min_input_tensor = context->input(1);
+    auto max_input_tensor = context->input(2);
+    OP_REQUIRES(
+        context, min_input_tensor.NumElements() == 1,
+        errors::InvalidArgument(
+            ""min_input must be a scalar float value, got tensor with shape "",
+            min_input_tensor.shape()));
+    OP_REQUIRES(
+        context, max_input_tensor.NumElements() == 1,
+        errors::InvalidArgument(
+            ""max_input must be a scalar float value, got tensor with shape "",
+            max_input_tensor.shape()));
     const float min_input = context->input(1).flat<float>()(0);
     const float max_input = context->input(2).flat<float>()(0);
     MaxPoolingOp<Device, T>::Compute(context);
",1,train
2b7100d6cdff36aa21010a82269bc05a6d1cc74a,tensorflow/tensorflow,"Cleanup and remove duplicate validation in `SparseCount`.

We have valdiation that is duplicated, checking different conditions, in different formats and failing to capture all cases. This should fix all the previous bugs.

PiperOrigin-RevId: 414886981
Change-Id: Ibf0bba0beb057b76d505324bb9487565daf95f01",count_ops.cc,"@@ -185,6 +185,27 @@ class SparseCount : public OpKernel {
                 errors::InvalidArgument(
                     ""Input indices must be a 2-dimensional tensor. Got: "",
                     indices.shape().DebugString()));
+    OP_REQUIRES(context, TensorShapeUtils::IsVector(values.shape()),
+                errors::InvalidArgument(""Input values must be a vector. Got: "",
+                                        values.shape().DebugString()));
+    OP_REQUIRES(context, TensorShapeUtils::IsVector(shape.shape()),
+                errors::InvalidArgument(""Input shape must be a vector. Got: "",
+                                        shape.shape().DebugString()));
+    OP_REQUIRES(context,
+                values.shape().dim_size(0) == indices.shape().dim_size(0),
+                errors::InvalidArgument(
+                    ""Number of values must match first dimension of indices."",
+                    ""Got "", values.shape().dim_size(0),
+                    "" values, indices shape: "", indices.shape().DebugString()));
+    OP_REQUIRES(
+        context, shape.shape().dim_size(0) == indices.shape().dim_size(1),
+        errors::InvalidArgument(
+            ""Number of dimensions must match second dimension of indices."",
+            ""Got "", shape.shape().dim_size(0),
+            "" dimensions, indices shape: "", indices.shape().DebugString()));
+    OP_REQUIRES(context, shape.NumElements() > 0,
+                errors::InvalidArgument(
+                    ""The shape argument requires at least one element.""));
 
     if (use_weights) {
       OP_REQUIRES(
@@ -195,28 +216,11 @@ class SparseCount : public OpKernel {
               ""; values shape: "", values.shape().DebugString()));
     }
 
-    OP_REQUIRES(context, shape.NumElements() != 0,
-                errors::InvalidArgument(
-                    ""The shape argument requires at least one element.""));
-
     bool is_1d = shape.NumElements() == 1;
     auto shape_vector = shape.flat<int64_t>();
     int num_batches = is_1d ? 1 : shape_vector(0);
     int num_values = values.NumElements();
 
-    for (int b = 0; b < shape_vector.size(); b++) {
-      OP_REQUIRES(context, shape_vector(b) >= 0,
-                  errors::InvalidArgument(
-                      ""Elements in dense_shape must be >= 0. Instead got:"",
-                      shape.DebugString()));
-    }
-
-    OP_REQUIRES(context, num_values == indices.shape().dim_size(0),
-                errors::InvalidArgument(
-                    ""Number of values must match first dimension of indices."",
-                    ""Got "", num_values,
-                    "" values, indices shape: "", indices.shape().DebugString()));
-
     const auto indices_values = indices.matrix<int64_t>();
     const auto values_values = values.flat<T>();
     const auto weight_values = weights.flat<W>();
@@ -225,16 +229,6 @@ class SparseCount : public OpKernel {
 
     T max_value = 0;
 
-    OP_REQUIRES(context, num_values <= indices.shape().dim_size(0),
-                errors::InvalidArgument(
-                    ""The first dimension of indices must be equal to or ""
-                    ""greather than number of values. ( "",
-                    indices.shape().dim_size(0), "" vs. "", num_values, "" )""));
-    OP_REQUIRES(context, indices.shape().dim_size(1) > 0,
-                errors::InvalidArgument(""The second dimension of indices must ""
-                                        ""be greater than 0. Received: "",
-                                        indices.shape().dim_size(1)));
-
     for (int idx = 0; idx < num_values; ++idx) {
       int batch = is_1d ? 0 : indices_values(idx, 0);
       if (batch >= num_batches) {
",1,train
adbbabdb0d3abb3cdeac69e38a96de1d678b24b3,tensorflow/tensorflow,"Further validate sparse tensor for `SparseCount`: indices must be valid within dense shape.

PiperOrigin-RevId: 414888122
Change-Id: I4552bd74c135ecd4bcb5448acc0a3ce9402d8286",count_ops.cc,"@@ -206,6 +206,23 @@ class SparseCount : public OpKernel {
     OP_REQUIRES(context, shape.NumElements() > 0,
                 errors::InvalidArgument(
                     ""The shape argument requires at least one element.""));
+    // Validate indices: each index must be valid for the corresponding
+    // dimension. This could be possibly done better.
+    const auto indices_values = indices.matrix<int64_t>();
+    const auto shape_vector = shape.vec<int64_t>();
+    int num_values = values.NumElements();  // same as first dim of indices
+    int rank = indices.shape().dim_size(1);
+    for (int i = 0; i < num_values; ++i) {
+      for (int j = 0; j < rank; ++j) {
+        OP_REQUIRES(
+            context,
+            indices_values(i, j) >= 0 && indices_values(i, j) < shape_vector(j),
+            errors::InvalidArgument(
+                ""Invalid index value at "", i, "": dimension "", j, "" has value "",
+                indices_values(i, j), "" which is not in [0, "", shape_vector(j),
+                "") (as given by dense shape "", shape.DebugString()));
+      }
+    }
 
     if (use_weights) {
       OP_REQUIRES(
@@ -217,11 +234,8 @@ class SparseCount : public OpKernel {
     }
 
     bool is_1d = shape.NumElements() == 1;
-    auto shape_vector = shape.flat<int64_t>();
     int num_batches = is_1d ? 1 : shape_vector(0);
-    int num_values = values.NumElements();
 
-    const auto indices_values = indices.matrix<int64_t>();
     const auto values_values = values.flat<T>();
     const auto weight_values = weights.flat<W>();
 
",1,test
e5b0eec199c2d03de54fd6a7fd9275692218e2bc,tensorflow/tensorflow,"[lite] Add validation check for dilation height/width to be positive integers.

PiperOrigin-RevId: 416429178
Change-Id: If7cdcddca54486434d9b2f06e7e2b401d7c3ee25",depthwise_conv.cc,"@@ -115,6 +115,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   TF_LITE_ENSURE_EQ(context, NumDimensions(input), 4);
   TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 4);
+  TF_LITE_ENSURE(context, params->dilation_height_factor > 0);
+  TF_LITE_ENSURE(context, params->dilation_width_factor > 0);
 
   const TfLiteType data_type = input->type;
 
",1,train
8c6f391a2282684a25cbfec7687bd5d35261a209,tensorflow/tensorflow,"[lite] Add check for bias_size is zero to avoid division by zero. This shouldn't happen for properly converted models. Just safety check

PiperOrigin-RevId: 416383645
Change-Id: If8e508bf696ae8ecfb927e69c139a8ccf7fe60cb",common.h,"@@ -75,6 +75,7 @@ float ActivationFunction(float x) {
 inline void BiasAndClamp(float clamp_min, float clamp_max, int bias_size,
                          const float* bias_data, int array_size,
                          float* array_data) {
+  if (bias_size == 0) return;
   // Note: see b/132215220: in May 2019 we thought it would be OK to replace
   // this with the Eigen one-liner:
   //   return (array.colwise() + bias).cwiseMin(clamp_max).cwiseMin(clamp_max).
",1,train
a1e1511dde36b3f8aa27a6ec630838e7ea40e091,tensorflow/tensorflow,"[lite] Update TfLiteIntArrayCreate to return size_t

PiperOrigin-RevId: 416439896
Change-Id: I847f69b68d1ddaff4b1e925a09b8b69c1756653b",common.c,"@@ -21,10 +21,10 @@ limitations under the License.
 #include <string.h>
 #endif  // TF_LITE_STATIC_MEMORY
 
-int TfLiteIntArrayGetSizeInBytes(int size) {
+size_t TfLiteIntArrayGetSizeInBytes(int size) {
   static TfLiteIntArray dummy;
 
-  int computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size;
+  size_t computed_size = sizeof(dummy) + sizeof(dummy.data[0]) * size;
 #if defined(_MSC_VER)
   // Context for why this is needed is in http://b/189926408#comment21
   computed_size -= sizeof(dummy.data[0]);
@@ -51,7 +51,7 @@ int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
 #ifndef TF_LITE_STATIC_MEMORY
 
 TfLiteIntArray* TfLiteIntArrayCreate(int size) {
-  int alloc_size = TfLiteIntArrayGetSizeInBytes(size);
+  size_t alloc_size = TfLiteIntArrayGetSizeInBytes(size);
   if (alloc_size <= 0) return NULL;
   TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
   if (!ret) return ret;
",1,train
a1e1511dde36b3f8aa27a6ec630838e7ea40e091,tensorflow/tensorflow,"[lite] Update TfLiteIntArrayCreate to return size_t

PiperOrigin-RevId: 416439896
Change-Id: I847f69b68d1ddaff4b1e925a09b8b69c1756653b",common.h,"@@ -98,7 +98,7 @@ typedef struct TfLiteIntArray {
 
 // Given the size (number of elements) in a TfLiteIntArray, calculate its size
 // in bytes.
-int TfLiteIntArrayGetSizeInBytes(int size);
+size_t TfLiteIntArrayGetSizeInBytes(int size);
 
 #ifndef TF_LITE_STATIC_MEMORY
 // Create a array of a given `size` (uninitialized entries).
",1,train
1de49725a5fc4e48f1a3b902ec3599ee99283043,tensorflow/tensorflow,"[lite] Check for overflow when creating required bytes.

PiperOrigin-RevId: 417629001
Change-Id: Ia7feb3ea8e988f4fd4b3c98c1a1fed4557d99fd7",embedding_lookup_sparse.cc,"@@ -72,6 +72,7 @@ limitations under the License.
 #include ""tensorflow/lite/kernels/internal/tensor_ctypes.h""
 #include ""tensorflow/lite/kernels/internal/tensor_utils.h""
 #include ""tensorflow/lite/kernels/kernel_util.h""
+#include ""tensorflow/lite/util.h""
 
 namespace tflite {
 namespace ops {
@@ -175,25 +176,33 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteIntArray* output_shape = TfLiteIntArrayCreate(output_rank);
   TF_LITE_ENSURE(context, output_shape != nullptr);
   int k = 0;
-  int embedding_size = 1;
-  int lookup_size = 1;
+  size_t embedding_size = 1;
+  size_t lookup_size = 1;
   for (int i = 0; i < lookup_rank - 1; i++, k++) {
-    const int dim = dense_shape->data.i32[i];
-    lookup_size *= dim;
+    const size_t dim = dense_shape->data.i32[i];
+    TF_LITE_ENSURE_MSG(
+        context,
+        MultiplyAndCheckOverflow(lookup_size, dim, &lookup_size) == kTfLiteOk,
+        ""Lookup size overflowed."");
     output_shape->data[k] = dim;
   }
   for (int i = 1; i < embedding_rank; i++, k++) {
-    const int dim = SizeOfDimension(value, i);
-    embedding_size *= dim;
+    const size_t dim = SizeOfDimension(value, i);
+    TF_LITE_ENSURE_MSG(context,
+                       MultiplyAndCheckOverflow(embedding_size, dim,
+                                                &embedding_size) == kTfLiteOk,
+                       ""Embedding size overflowed."");
     output_shape->data[k] = dim;
   }
   TF_LITE_ENSURE_STATUS(context->ResizeTensor(context, output, output_shape));
-  const int output_size = lookup_size * embedding_size;
+  const size_t output_size = lookup_size * embedding_size;
   TfLiteTensorRealloc(output_size * sizeof(float), output);
 
   float* output_ptr = GetTensorData<float>(output);
   const float* weights_ptr = GetTensorData<float>(weights);
   const float* value_ptr = GetTensorData<float>(value);
+  // Makes sure reallocation was successful.
+  TF_LITE_ENSURE(context, output_ptr != nullptr);
 
   std::fill_n(output_ptr, output_size, 0.0f);
 
",1,train
a4e401da71458d253b05e41f28637b65baf64be4,tensorflow/tensorflow,"Prevent segfault in `embedding_lookup_sparse.cc`

Previous fixes missed one additional case.

PiperOrigin-RevId: 417676944
Change-Id: I8ab412155cf9b1e897448a6611d209eaa7ca9e66",embedding_lookup_sparse.cc,"@@ -159,6 +159,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 3, &weights));
   const TfLiteTensor* value;
   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 4, &value));
+  const size_t values_size = NumElements(value);
 
   const int lookup_rank = SizeOfDimension(indices, 1);
   const int embedding_rank = NumDimensions(value);
@@ -253,6 +254,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     current_squares_weight += w * w;
     current_total_weight += w;
     for (int k = 0; k < embedding_size; k++) {
+      // only index if indices are valid
+      if (current_output_offset + k < 0) continue;
+      if (current_output_offset + k >= output_size) continue;
+      if (example_embedding_offset + k < 0) continue;
+      if (example_embedding_offset + k >= values_size) continue;
       output_ptr[current_output_offset + k] +=
           value_ptr[example_embedding_offset + k] * w;
     }
",1,train
f19be71717c497723ba0cea0379e84f061a75e01,tensorflow/tensorflow,"[lite] Move MultiplyAndCheckOverflow to util to be able to share it.

PiperOrigin-RevId: 416897229
Change-Id: I5feb44881bdcbb6ed911da4f17c55bb978754059",subgraph.cc,"@@ -690,27 +690,6 @@ TfLiteStatus Subgraph::CheckInputAndOutputForOverlap(const int* input_indices,
   return kTfLiteOk;
 }
 
-namespace {
-// Multiply two sizes and return true if overflow occurred;
-// This is based off tensorflow/overflow.h but is simpler as we already
-// have unsigned numbers. It is also generalized to work where sizeof(size_t)
-// is not 8.
-TfLiteStatus MultiplyAndCheckOverflow(size_t a, size_t b, size_t* product) {
-  // Multiplying a * b where a and b are size_t cannot result in overflow in a
-  // size_t accumulator if both numbers have no non-zero bits in their upper
-  // half.
-  constexpr size_t size_t_bits = 8 * sizeof(size_t);
-  constexpr size_t overflow_upper_half_bit_position = size_t_bits / 2;
-  *product = a * b;
-  // If neither integers have non-zero bits past 32 bits can't overflow.
-  // Otherwise check using slow devision.
-  if (TFLITE_EXPECT_FALSE((a | b) >> overflow_upper_half_bit_position != 0)) {
-    if (a != 0 && *product / a != b) return kTfLiteError;
-  }
-  return kTfLiteOk;
-}
-}  // namespace
-
 TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
                                      size_t dims_size, size_t* bytes) {
   TF_LITE_ENSURE(&context_, bytes != nullptr);
",1,train
f19be71717c497723ba0cea0379e84f061a75e01,tensorflow/tensorflow,"[lite] Move MultiplyAndCheckOverflow to util to be able to share it.

PiperOrigin-RevId: 416897229
Change-Id: I5feb44881bdcbb6ed911da4f17c55bb978754059",util.cc,"@@ -27,6 +27,7 @@ limitations under the License.
 
 #include ""tensorflow/lite/builtin_ops.h""
 #include ""tensorflow/lite/c/common.h""
+#include ""tensorflow/lite/core/macros.h""
 #include ""tensorflow/lite/schema/schema_generated.h""
 
 namespace tflite {
@@ -176,4 +177,19 @@ bool IsValidationSubgraph(const char* name) {
   // NOLINTNEXTLINE: can't use absl::StartsWith as absl is not allowed.
   return name && std::string(name).find(kValidationSubgraphNamePrefix) == 0;
 }
+
+TfLiteStatus MultiplyAndCheckOverflow(size_t a, size_t b, size_t* product) {
+  // Multiplying a * b where a and b are size_t cannot result in overflow in a
+  // size_t accumulator if both numbers have no non-zero bits in their upper
+  // half.
+  constexpr size_t size_t_bits = 8 * sizeof(size_t);
+  constexpr size_t overflow_upper_half_bit_position = size_t_bits / 2;
+  *product = a * b;
+  // If neither integers have non-zero bits past 32 bits can't overflow.
+  // Otherwise check using slow devision.
+  if (TFLITE_EXPECT_FALSE((a | b) >> overflow_upper_half_bit_position != 0)) {
+    if (a != 0 && *product / a != b) return kTfLiteError;
+  }
+  return kTfLiteOk;
+}
 }  // namespace tflite
",1,train
f19be71717c497723ba0cea0379e84f061a75e01,tensorflow/tensorflow,"[lite] Move MultiplyAndCheckOverflow to util to be able to share it.

PiperOrigin-RevId: 416897229
Change-Id: I5feb44881bdcbb6ed911da4f17c55bb978754059",util.h,"@@ -99,6 +99,12 @@ constexpr char kValidationSubgraphNamePrefix[] = ""VALIDATION:"";
 // Checks whether the prefix of the subgraph name indicates the subgraph is a
 // validation subgraph.
 bool IsValidationSubgraph(const char* name);
+
+// Multiply two sizes and return true if overflow occurred;
+// This is based off tensorflow/overflow.h but is simpler as we already
+// have unsigned numbers. It is also generalized to work where sizeof(size_t)
+// is not 8.
+TfLiteStatus MultiplyAndCheckOverflow(size_t a, size_t b, size_t* product);
 }  // namespace tflite
 
 #endif  // TENSORFLOW_LITE_UTIL_H_
",1,train
f19be71717c497723ba0cea0379e84f061a75e01,tensorflow/tensorflow,"[lite] Move MultiplyAndCheckOverflow to util to be able to share it.

PiperOrigin-RevId: 416897229
Change-Id: I5feb44881bdcbb6ed911da4f17c55bb978754059",util_test.cc,"@@ -22,6 +22,7 @@ limitations under the License.
 #include <vector>
 
 #include <gtest/gtest.h>
+#include ""tensorflow/lite/c/c_api_types.h""
 #include ""tensorflow/lite/c/common.h""
 #include ""tensorflow/lite/schema/schema_generated.h""
 
@@ -130,5 +131,12 @@ TEST(ValidationSubgraph, NameIsDetected) {
   EXPECT_TRUE(IsValidationSubgraph(""VALIDATION:main""));
 }
 
+TEST(MultiplyAndCheckOverflow, Validate) {
+  size_t res = 0;
+  EXPECT_TRUE(MultiplyAndCheckOverflow(1, 2, &res) == kTfLiteOk);
+  EXPECT_FALSE(MultiplyAndCheckOverflow(static_cast<size_t>(123456789023),
+                                        1223423425, &res) == kTfLiteOk);
+}
+
 }  // namespace
 }  // namespace tflite
",1,train
6364463d6f5b6254cac3d6aedf999b6a96225038,tensorflow/tensorflow,"[lite] Add some safety checks to avoid out of bound access for sparsity format

PiperOrigin-RevId: 416910386
Change-Id: Ic0b4dc048dc4b5a6309c572b8c4c9f776e4db60a",sparsity_format_converter.cc,"@@ -282,10 +282,12 @@ void FormatConverter<T>::InitSparseToDenseConverter(
   block_size_.resize(block_map_.size());
   for (int i = 0; i < original_rank; i++) {
     if (block_dim < block_map_.size() && block_map_[block_dim] == i) {
-      int orig_dim = traversal_order_[original_rank + block_dim];
-      block_size_[block_dim] = dense_size[orig_dim];
-      blocked_shape_[i] = dense_shape_[i] / dense_size[orig_dim];
-      block_dim++;
+      if (original_rank + block_dim < traversal_order_.size()) {
+        int orig_dim = traversal_order_[original_rank + block_dim];
+        block_size_[block_dim] = dense_size[orig_dim];
+        blocked_shape_[i] = dense_shape_[i] / dense_size[orig_dim];
+        block_dim++;
+      }
     } else {
       blocked_shape_[i] = dense_shape_[i];
     }
@@ -328,13 +330,15 @@ void FormatConverter<T>::Populate(const T* src_data, std::vector<int> indices,
       Populate(src_data, indices, level + 1, prev_idx * shape_of_level + i,
                src_data_ptr, dest_data);
     }
-  } else {
+  } else if (prev_idx + 1 < dim_metadata_[metadata_idx].size()) {
     const auto& array_segments = dim_metadata_[metadata_idx];
     const auto& array_indices = dim_metadata_[metadata_idx + 1];
     for (int i = array_segments[prev_idx]; i < array_segments[prev_idx + 1];
          i++) {
-      indices[level] = array_indices[i];
-      Populate(src_data, indices, level + 1, i, src_data_ptr, dest_data);
+      if (i < array_indices.size() && level < indices.size()) {
+        indices[level] = array_indices[i];
+        Populate(src_data, indices, level + 1, i, src_data_ptr, dest_data);
+      }
     }
   }
 }
",1,train
6c0b2b70eeee588591680f5b7d5d38175fd7cdf6,tensorflow/tensorflow,"[lite] add validation check for sparse fully connected

PiperOrigin-RevId: 417629354
Change-Id: If96171c4bd4f5fdb01d6368d6deab19d1c9beca7",fully_connected.cc,"@@ -928,6 +928,36 @@ TfLiteStatus EvalShuffledQuantized(TfLiteContext* context, TfLiteNode* node,
   return kTfLiteOk;
 }
 
+// Verifies that sparsity values are valid given input/weight/output.
+bool VerifySparsity(const RuntimeShape& weights_shape,
+                    const RuntimeShape& input_shape,
+                    const RuntimeShape& output_shape,
+                    const TfLiteSparsity* sparsity) {
+  const int weights_dims_count = weights_shape.DimensionsCount();
+  const int output_dims_count = output_shape.DimensionsCount();
+  const int w0_size = sparsity->dim_metadata[0].dense_size;
+  const int accum_depth = weights_shape.Dims(weights_dims_count - 1);
+  const int output_elements = output_shape.FlatSize();
+  const int input_elements = input_shape.FlatSize();
+  const int batches = FlatSizeSkipDim(output_shape, output_dims_count - 1);
+  const int output_depth = MatchingDim(weights_shape, weights_dims_count - 2,
+                                       output_shape, output_dims_count - 1);
+  const int max_batch_index = batches - 1;
+  const int max_output = max_batch_index * output_depth + w0_size;
+  const int max_batch_depth = accum_depth * max_batch_index;
+
+  // Verify output size is enough.
+  if (output_elements < max_output) return false;
+
+  // Verify index from sparse in input is valid.
+  for (int i = 0; i < sparsity->dim_metadata[1].array_indices->size; ++i) {
+    if (input_elements <=
+        max_batch_depth + sparsity->dim_metadata[1].array_indices->data[i])
+      return false;
+  }
+  return true;
+}
+
 template <KernelType kernel_type>
 TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
                        TfLiteFullyConnectedParams* params, OpData* data,
@@ -968,24 +998,32 @@ TfLiteStatus EvalFloat(TfLiteContext* context, TfLiteNode* node,
                            ""Unsupported sparse fully-connected weight format."");
         return kTfLiteError;
       }
+      const auto& input_shape = GetTensorShape(input);
+      const auto& filter_shape = GetTensorShape(filter);
+      const auto& output_shape = GetTensorShape(output);
+      const auto& bias_shape = GetTensorShape(bias);
+      if (!VerifySparsity(filter_shape, input_shape, output_shape, &sparsity)) {
+        TF_LITE_KERNEL_LOG(context, ""Invalid sparse fully-connected format."");
+        return kTfLiteError;
+      }
 
       if (sparsity.dim_metadata_size == kDimMetadataSizeRandomSparse) {
         // Random sparse.
         optimized_ops::FullyConnectedSparseWeight(
-            sparsity, op_params, GetTensorShape(input),
-            GetTensorData<float>(input), GetTensorShape(filter),
-            GetTensorData<float>(filter), GetTensorShape(bias),
-            GetTensorData<float>(bias), GetTensorShape(output),
-            GetTensorData<float>(output));
+            sparsity, op_params,                         // Disable formatting
+            input_shape, GetTensorData<float>(input),    // Disable formatting
+            filter_shape, GetTensorData<float>(filter),  // Disable formatting
+            bias_shape, GetTensorData<float>(bias),      // Disable formatting
+            output_shape, GetTensorData<float>(output));
       } else if (sparsity.dim_metadata_size == kDimMetadataSizeBlockSparse &&
                  sparsity.dim_metadata[2].dense_size == 4) {
         // Block sparse with block size of 1x4.
         optimized_ops::FullyConnectedSparseWeight1x4(
-            sparsity, op_params, GetTensorShape(input),
-            GetTensorData<float>(input), GetTensorShape(filter),
-            GetTensorData<float>(filter), GetTensorShape(bias),
-            GetTensorData<float>(bias), GetTensorShape(output),
-            GetTensorData<float>(output),
+            sparsity, op_params,                         // Disable formatting
+            input_shape, GetTensorData<float>(input),    // Disable formatting
+            filter_shape, GetTensorData<float>(filter),  // Disable formatting
+            bias_shape, GetTensorData<float>(bias),      // Disable formatting
+            output_shape, GetTensorData<float>(output),
             CpuBackendContext::GetFromContext(context));
       } else {
         TF_LITE_KERNEL_LOG(context,
",1,train
14fea662350e7c26eb5fe1be2ac31704e5682ee6,tensorflow/tensorflow,"Prevent `CHECK`-fail when decoding resource handles from proto

In certain scenarios, the proto might contain tensors that have too many elements (overflow). This is a `CHECK`-fail in general, but we should prevent this, given how many CVEs caused by that we have received this year (a large fraction of 200).

PiperOrigin-RevId: 408049766
Change-Id: I2ac20b247aa8ed9110846fbdb7a0a9401f2c168c",resource_handle.cc,"@@ -17,8 +17,11 @@ limitations under the License.
 
 #include ""absl/strings/str_format.h""
 #include ""tensorflow/core/framework/resource_handle.pb.h""
+#include ""tensorflow/core/framework/tensor_shape.h""
 #include ""tensorflow/core/lib/core/errors.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
+#include ""tensorflow/core/platform/errors.h""
+#include ""tensorflow/core/platform/macros.h""
 
 namespace tensorflow {
 
@@ -28,7 +31,15 @@ namespace tensorflow {
 ResourceHandle::ResourceHandle() {}
 
 ResourceHandle::ResourceHandle(const ResourceHandleProto& proto) {
-  FromProto(proto);
+  TF_CHECK_OK(FromProto(proto));
+}
+
+Status ResourceHandle::BuildResourceHandle(const ResourceHandleProto& proto,
+                                           ResourceHandle* out) {
+  if (out == nullptr)
+    return errors::Internal(
+        ""BuildResourceHandle() was called with nullptr for the output"");
+  return out->FromProto(proto);
 }
 
 ResourceHandle::~ResourceHandle() {}
@@ -46,7 +57,7 @@ void ResourceHandle::AsProto(ResourceHandleProto* proto) const {
   }
 }
 
-void ResourceHandle::FromProto(const ResourceHandleProto& proto) {
+Status ResourceHandle::FromProto(const ResourceHandleProto& proto) {
   set_device(proto.device());
   set_container(proto.container());
   set_name(proto.name());
@@ -55,10 +66,16 @@ void ResourceHandle::FromProto(const ResourceHandleProto& proto) {
   std::vector<DtypeAndPartialTensorShape> dtypes_and_shapes;
   for (const auto& dtype_and_shape : proto.dtypes_and_shapes()) {
     DataType dtype = dtype_and_shape.dtype();
-    PartialTensorShape shape(dtype_and_shape.shape());
+    PartialTensorShape shape;
+    Status s = PartialTensorShape::BuildPartialTensorShape(
+        dtype_and_shape.shape(), &shape);
+    if (!s.ok()) {
+      return s;
+    }
     dtypes_and_shapes.push_back(DtypeAndPartialTensorShape{dtype, shape});
   }
   dtypes_and_shapes_ = std::move(dtypes_and_shapes);
+  return Status::OK();
 }
 
 string ResourceHandle::SerializeAsString() const {
@@ -69,9 +86,7 @@ string ResourceHandle::SerializeAsString() const {
 
 bool ResourceHandle::ParseFromString(const string& s) {
   ResourceHandleProto proto;
-  const bool status = proto.ParseFromString(s);
-  if (status) FromProto(proto);
-  return status;
+  return proto.ParseFromString(s) && FromProto(proto).ok();
 }
 
 string ResourceHandle::DebugString() const {
@@ -140,7 +155,9 @@ bool DecodeResourceHandleList(std::unique_ptr<port::StringListDecoder> d,
     if (!proto.ParseFromArray(d->Data(sizes[i]), sizes[i])) {
       return false;
     }
-    ps[i].FromProto(proto);
+    if (!ps[i].FromProto(proto).ok()) {
+      return false;
+    }
   }
   return true;
 }
",1,train
14fea662350e7c26eb5fe1be2ac31704e5682ee6,tensorflow/tensorflow,"Prevent `CHECK`-fail when decoding resource handles from proto

In certain scenarios, the proto might contain tensors that have too many elements (overflow). This is a `CHECK`-fail in general, but we should prevent this, given how many CVEs caused by that we have received this year (a large fraction of 200).

PiperOrigin-RevId: 408049766
Change-Id: I2ac20b247aa8ed9110846fbdb7a0a9401f2c168c",resource_handle.h,"@@ -46,6 +46,11 @@ class ResourceHandle {
   ResourceHandle(const ResourceHandleProto& proto);
   ~ResourceHandle();
 
+  // Use this factory method if the `proto` comes from user controlled input, to
+  // prevent a denial of service.
+  static Status BuildResourceHandle(const ResourceHandleProto& proto,
+                                    ResourceHandle* out);
+
   // Unique name for the device containing the resource.
   const std::string& device() const { return device_; }
 
@@ -91,7 +96,7 @@ class ResourceHandle {
 
   // Conversion to and from ResourceHandleProto
   void AsProto(ResourceHandleProto* proto) const;
-  void FromProto(const ResourceHandleProto& proto);
+  Status FromProto(const ResourceHandleProto& proto);
 
   // Serialization via ResourceHandleProto
   std::string SerializeAsString() const;
",1,train
14fea662350e7c26eb5fe1be2ac31704e5682ee6,tensorflow/tensorflow,"Prevent `CHECK`-fail when decoding resource handles from proto

In certain scenarios, the proto might contain tensors that have too many elements (overflow). This is a `CHECK`-fail in general, but we should prevent this, given how many CVEs caused by that we have received this year (a large fraction of 200).

PiperOrigin-RevId: 408049766
Change-Id: I2ac20b247aa8ed9110846fbdb7a0a9401f2c168c",tensor.cc,"@@ -537,6 +537,46 @@ TensorBuffer* FromProtoField(Allocator* a, const TensorProto& in, int64_t n) {
   return buf;
 }
 
+// Separate implementation for `ResourceHandle` to handle the case when the
+// proto for the resource is invalid. See `resource_handle.h` constructor and
+// static factory builder.
+template <>
+TensorBuffer* FromProtoField<ResourceHandle>(Allocator* a,
+                                             const TensorProto& in, int64_t n) {
+  CHECK_GT(n, 0);
+  Buffer<ResourceHandle>* buf = new Buffer<ResourceHandle>(a, n);
+  ResourceHandle* data = buf->template base<ResourceHandle>();
+  if (data == nullptr) {
+    buf->Unref();
+    return nullptr;
+  }
+  const int64_t in_n = ProtoHelper<ResourceHandle>::NumElements(in);
+  if (in_n <= 0) {
+    std::fill_n(data, n, ResourceHandle());
+  } else {
+    // If tensor shape says we have n < in_n elements in the output tensor
+    // then make sure to only decode the first n out of the in_n elements in the
+    // in tensors. In all other cases, we decode all in_n elements of in and set
+    // the remaining elements up to n to be the default ResourceHandle() value.
+    const int64_t real_n = n < in_n ? n : in_n;
+    for (int64_t i = 0; i < real_n; ++i) {
+      Status s = ResourceHandle::BuildResourceHandle(in.resource_handle_val(i),
+                                                     &data[i]);
+      if (!s.ok()) {
+        LOG(ERROR) << ""Could not decode resource handle from proto \""""
+                   << in.resource_handle_val(i).ShortDebugString()
+                   << ""\"", returned status: "" << s.ToString();
+        buf->Unref();
+        return nullptr;
+      }
+    }
+    for (int64_t i = in_n; i < n; ++i) {
+      data[i] = ResourceHandle();
+    }
+  }
+  return buf;
+}
+
 template <>
 TensorBuffer* FromProtoField<Variant>(Allocator* a, const TensorProto& in,
                                       int64_t n) {
",1,train
c2b31ff2d3151acb230edc3f5b1832d2c713a9e0,tensorflow/tensorflow,"Remove a `DCHECK`-fail, log an error instead.

`DCHECK` in debug mode results in crashes. TensorFlow has had multiple vulnerabilities due to this.

Outside of debug mode, `DCHECK` is a no-op.

A better alternative is to report an error to the log buffer and continue. This should happen both in debug mode and in prod mode.

PiperOrigin-RevId: 408375925
Change-Id: Id5b3e19c73f3fbe0cc4bba26ca44ff9607bb6356",op_def_util.cc,"@@ -821,9 +821,10 @@ bool RepeatedAttrDefEqual(
     const protobuf::RepeatedPtrField<OpDef::AttrDef>& a2) {
   std::unordered_map<string, const OpDef::AttrDef*> a1_set;
   for (const OpDef::AttrDef& def : a1) {
-    DCHECK(a1_set.find(def.name()) == a1_set.end())
-        << ""AttrDef names must be unique, but '"" << def.name()
-        << ""' appears more than once"";
+    if (a1_set.find(def.name()) != a1_set.end()) {
+      LOG(ERROR) << ""AttrDef names must be unique, but '"" << def.name()
+                 << ""' appears more than once"";
+    }
     a1_set[def.name()] = &def;
   }
   for (const OpDef::AttrDef& def : a2) {
",1,train
97282c6d0d34476b6ba033f961590b783fa184cd,tensorflow/tensorflow,"Prevent a crash due to heap OOB write in grappler.

PiperOrigin-RevId: 408318417
Change-Id: If095feb8c001e3a8ac4a85b7387b81e8309df47d",graph_properties.cc,"@@ -1134,7 +1134,12 @@ class SymbolicShapeRefiner {
         GetUnknownOutputShape(node, output_port);
     InferenceContext* ctx = GetContext(node);
     if (ctx == nullptr) {
-      return errors::InvalidArgument(""Missing context"");
+      return errors::InvalidArgument(""SetUnknownShape: Missing context"");
+    }
+    if (output_port < 0 || output_port >= ctx->num_outputs()) {
+      return errors::InvalidArgument(
+          ""SetUnknownShape: output_port must be in [0, "", ctx->num_outputs(),
+          "") but was "", output_port);
     }
     ctx->set_output(output_port, shape);
     return Status::OK();
",1,train
1b54cadd19391b60b6fcccd8d076426f7221d5e8,tensorflow/tensorflow,"Add missing validation to sparse dense cwise ops.

PiperOrigin-RevId: 415543133
Change-Id: I5baf3284e919338afb96178c468ad3d3cb0d956c",sparse_dense_binary_op_shared.cc,"@@ -78,11 +78,24 @@ class SparseDenseBinaryOpShared : public OpKernel {
                     ""but received shapes: "",
                     values_t->shape().DebugString(), "" and "",
                     shape_t->shape().DebugString()));
+    OP_REQUIRES(
+        ctx, TensorShapeUtils::IsVector(shape_t->shape()),
+        errors::InvalidArgument(""Input sp_shape must be a vector. Got: "",
+                                shape_t->shape().DebugString()));
     OP_REQUIRES(
         ctx, values_t->dim_size(0) == indices_t->dim_size(0),
         errors::InvalidArgument(
             ""The first dimension of values and indices should match. ("",
             values_t->dim_size(0), "" vs. "", indices_t->dim_size(0), "")""));
+    OP_REQUIRES(
+        ctx, shape_t->shape().dim_size(0) == indices_t->shape().dim_size(1),
+        errors::InvalidArgument(
+            ""Number of dimensions must match second dimension of indices. "",
+            ""Got "", shape_t->shape().dim_size(0),
+            "" dimensions, indices shape: "", indices_t->shape().DebugString()));
+    OP_REQUIRES(ctx, shape_t->NumElements() > 0,
+                errors::InvalidArgument(
+                    ""The shape argument requires at least one element.""));
 
     const auto indices_mat = indices_t->matrix<int64_t>();
     const auto shape_vec = shape_t->vec<int64_t>();
",1,test
e952a89b7026b98fe8cbe626514a93ed68b7c510,tensorflow/tensorflow,"Prevent overflow in sparse dense cwise ops.

PiperOrigin-RevId: 415543171
Change-Id: I22dab7c41be2121ab5efe5403ca0e2f9b7cb24b8",sparse_dense_binary_op_shared.cc,"@@ -99,7 +99,9 @@ class SparseDenseBinaryOpShared : public OpKernel {
 
     const auto indices_mat = indices_t->matrix<int64_t>();
     const auto shape_vec = shape_t->vec<int64_t>();
-    const auto lhs_dims = BCast::FromShape(TensorShape(shape_vec));
+    TensorShape lhs_shape;
+    OP_REQUIRES_OK(ctx, TensorShape::BuildTensorShape(shape_vec, &lhs_shape));
+    const auto lhs_dims = BCast::FromShape(lhs_shape);
     const auto rhs_dims = BCast::FromShape(dense_t->shape());
     BCast b(lhs_dims, rhs_dims, false);  // false for keeping the same num dims.
 
",1,train
a68f68061e263a88321c104a6c911fe5598050a8,tensorflow/tensorflow,"Replace faulty overflow check with a builder for `TensorShape`.

Prevents an integer overflow that was not caught before.

PiperOrigin-RevId: 415381595
Change-Id: I76585ddedc912bd9f4a390aeafa8e2ced1a28863",sparse_tensors_map_ops.cc,"@@ -263,22 +263,10 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp {
             ""Rank of input SparseTensor should be > 1, but saw rank: "", rank));
 
     auto input_shape_vec = input_shape->vec<int64_t>();
-    int new_num_elements = 1;
-    bool overflow_ocurred = false;
-    for (int i = 0; i < input_shape_vec.size(); i++) {
-      new_num_elements =
-          MultiplyWithoutOverflow(new_num_elements, input_shape_vec(i));
-      if (new_num_elements < 0) {
-        overflow_ocurred = true;
-        break;
-      }
-    }
-
-    OP_REQUIRES(
-        context, !overflow_ocurred,
-        errors::Internal(""Encountered overflow from large input shape.""));
 
-    TensorShape tensor_input_shape(input_shape_vec);
+    TensorShape tensor_input_shape;
+    OP_REQUIRES_OK(context, TensorShape::BuildTensorShape(input_shape_vec,
+                                                          &tensor_input_shape));
     gtl::InlinedVector<int64_t, 8> std_order(rank);
     std::iota(std_order.begin(), std_order.end(), 0);
     SparseTensor input_st;
",1,test
b51b82fe65ebace4475e3c54eb089c18a4403f1c,tensorflow/tensorflow,"Add missing validation to `AddManySparseToTensorsMap`.

Sparse tensors have a set of requirements for the 3 components and not all of them were checked.

PiperOrigin-RevId: 415358027
Change-Id: I96cbb672999cd1da772c22fabbd15507e32e12dc",sparse_tensors_map_ops.cc,"@@ -231,16 +231,29 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp {
                 errors::InvalidArgument(
                     ""Input indices should be a matrix but received shape "",
                     input_indices->shape().DebugString()));
-
     OP_REQUIRES(context, TensorShapeUtils::IsVector(input_values->shape()),
                 errors::InvalidArgument(
                     ""Input values should be a vector but received shape "",
                     input_values->shape().DebugString()));
-
     OP_REQUIRES(context, TensorShapeUtils::IsVector(input_shape->shape()),
                 errors::InvalidArgument(
                     ""Input shape should be a vector but received shape "",
                     input_shape->shape().DebugString()));
+    OP_REQUIRES(
+        context,
+        input_values->shape().dim_size(0) == input_indices->shape().dim_size(0),
+        errors::InvalidArgument(
+            ""Number of values must match first dimension of indices. "", ""Got "",
+            input_values->shape().dim_size(0),
+            "" values, indices shape: "", input_indices->shape().DebugString()));
+    OP_REQUIRES(
+        context,
+        input_shape->shape().dim_size(0) == input_indices->shape().dim_size(1),
+        errors::InvalidArgument(
+            ""Number of dimensions must match second dimension of indices. "",
+            ""Got "", input_shape->shape().dim_size(0),
+            "" dimensions, indices shape: "",
+            input_indices->shape().DebugString()));
 
     int rank = input_shape->NumElements();
 
",1,train
8a513cec4bec15961fbfdedcaa5376522980455c,tensorflow/tensorflow,"Prevent null dereference read in `SpecializeType()`

For some adversarial protos, the attribute for a key might not exist.

PiperOrigin-RevId: 408382090
Change-Id: Ie7eabe532c9ff280fce5dce1f6cdb93c76c2e040",full_type_util.cc,"@@ -22,6 +22,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/op_def.pb.h""
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/platform/statusor.h""
+#include ""tensorflow/core/protobuf/error_codes.pb.h""
 
 namespace tensorflow {
 
@@ -102,7 +103,11 @@ StatusOr<FullTypeDef> SpecializeType(const AttrSlice& attrs,
       auto* arg = t->mutable_args(i);
       if (arg->type_id() == TFT_VAR) {
         const auto* attr = attrs.Find(arg->s());
-        DCHECK(attr != nullptr);
+        if (attr == nullptr) {
+          return Status(
+              error::INVALID_ARGUMENT,
+              absl::StrCat(""Could not find an attribute for key "", arg->s()));
+        }
         if (attr->value_case() == AttrValue::kList) {
           const auto& attr_list = attr->list();
           arg->set_type_id(TFT_PRODUCT);
",1,train
5b491cd5e41ad63735161cec9c2a568172c8b6a3,tensorflow/tensorflow,"Validate `proto.dtype()` before calling `set_dtype()`.

This prevents a `DCHECK`-fail when the proto contains an invalid dtype for a tensor shape with 0 elements or for an incomplete tensor shape.

PiperOrigin-RevId: 408369083
Change-Id: Ia21a3e3d62a90d642a4561f08f3b543e5ad00c46",tensor.cc,"@@ -983,6 +983,15 @@ bool Tensor::FromProto(Allocator* a, const TensorProto& proto) {
                          dtype_error = true, dtype_error = true);
     }
     if (dtype_error || p == nullptr) return false;
+  } else {
+    // Handle the case of empty tensors (N = 0) or tensors with incomplete shape
+    // (N = -1). All other values of `shape.num_elements()` should be invalid by
+    // construction.
+    // Here, we just need to validate that the `proto.dtype()` value is valid.
+    bool dtype_error = false;
+    CASES_WITH_DEFAULT(proto.dtype(), break, dtype_error = true,
+                       dtype_error = true);
+    if (dtype_error) return false;
   }
   shape_ = shape;
   set_dtype(proto.dtype());
",1,train
cb164786dc891ea11d3a900e90367c339305dc7b,tensorflow/tensorflow,"Properly handle the case where `SpecializeType()` returns an error `Status`.

If the error case in `SpecializeType()` is reached, then we would get a crash when trying to access the value of an errorenous `StatusOr` object

PiperOrigin-RevId: 408380069
Change-Id: If3c3fc876dcf9384d5ec7a4985adc68c23ea7318",shape_inference.cc,"@@ -170,7 +170,10 @@ void InferenceContext::PreInputInit(
     const std::vector<ShapeHandle>& input_tensors_as_shapes) {
   // TODO(mdan): This is also done at graph construction. Run only here instead?
   const auto ret = full_type::SpecializeType(attrs_, op_def);
-  DCHECK(ret.status().ok()) << ""while instantiating types: "" << ret.status();
+  if (!ret.status().ok()) {
+    construction_status_ = ret.status();
+    return;
+  }
   ret_types_ = ret.ValueOrDie();
 
   input_tensors_ = input_tensors;
",1,train
ef1d027be116f25e25bb94a60da491c2cf55bd0b,tensorflow/tensorflow,"Prevent copying uninitialized data in `AssignOp`.

This prevents harder to debug undefined behaviors that cannot be traced back to the original tensor after assignments occur earlier in the graph execution. Several of these undefined behaviors are just reference bindings to null pointers, which are caught when running under ubsan/asan.

PiperOrigin-RevId: 408654780
Change-Id: Iad2ec40d43f5fd7ea016c20283356c12d5ddeab1",assign_op.h,"@@ -50,6 +50,12 @@ class AssignOp : public OpKernel {
     // We always return the input ref.
     context->forward_ref_input_to_ref_output(0, 0);
 
+    // Prevent copying uninitialized data, to solve harder to debug undefined
+    // behaviors that cannot be traced back to the original tensor.
+    OP_REQUIRES(
+        context, rhs.IsInitialized(),
+        errors::Internal(""Right hand side of AssignOp is not initialized""));
+
     // We can't always know how this value will be used downstream, so make
     // conservative assumptions in specifying constraints on the memory
     // allocation attributes, unless the Grappler graph analysis determined that
",1,train
0657c83d08845cc434175934c642299de2c0f042,tensorflow/tensorflow,"Fix heap OOB read/write due to incorrect indexing.

PiperOrigin-RevId: 408578046
Change-Id: Ifc9ffea49e5890f55fcb2c27568611052c3ddcfa",full_type_util.cc,"@@ -100,7 +100,7 @@ StatusOr<FullTypeDef> SpecializeType(const AttrSlice& attrs,
     // verifications are needed, they should be done by separately, and in a
     // way that can be reused for type inference.
     for (int j = 0; j < t->args_size(); j++) {
-      auto* arg = t->mutable_args(i);
+      auto* arg = t->mutable_args(j);
       if (arg->type_id() == TFT_VAR) {
         const auto* attr = attrs.Find(arg->s());
         if (attr == nullptr) {
",1,train
fcd18ce3101f245b083b30655c27b239dc72221e,tensorflow/tensorflow,"Prevent integer overflow in `OpLevelCostEstimator::CalculateTensorSize`.

In order to not change the API, we return a negative value in case of overflow. A better fix is to change the API to return a status instead.

PiperOrigin-RevId: 408713061
Change-Id: I3771475b0c72a2844a3854086966562fd33f2da5",op_level_cost_estimator.cc,"@@ -1555,7 +1555,13 @@ int64_t OpLevelCostEstimator::CalculateTensorSize(
   int64_t count = CalculateTensorElementCount(tensor, found_unknown_shapes);
   int size = DataTypeSize(BaseType(tensor.dtype()));
   VLOG(2) << ""Count: "" << count << "" DataTypeSize: "" << size;
-  return count * size;
+  int64_t tensor_size = MultiplyWithoutOverflow(count, size);
+  if (tensor_size < 0) {
+    VLOG(1) << ""Overflow encountered when computing tensor size, multiplying ""
+            << count << "" with "" << size;
+    return -1;
+  }
+  return tensor_size;
 }
 
 int64_t OpLevelCostEstimator::CalculateInputSize(const OpInfo& op_info,
",1,train
b9bd6cfd1c50e6807846af9a86f9b83cafc9c8ae,tensorflow/tensorflow,"Prevent integer overflow in `OpLevelCostEstimator::CalculateOutputSize`.

In order to not change the API, we return a negative value in case of overflow. A better fix is to change the API to return a status instead.

PiperOrigin-RevId: 408701427
Change-Id: Idf31e7f0bf18ca824d084fdd355e1f653f145c20",op_level_cost_estimator.cc,"@@ -27,6 +27,7 @@ limitations under the License.
 #include ""tensorflow/core/grappler/costs/op_context.h""
 #include ""tensorflow/core/grappler/costs/utils.h""
 #include ""tensorflow/core/platform/errors.h""
+#include ""tensorflow/core/util/overflow.h""
 
 namespace tensorflow {
 namespace grappler {
@@ -1607,7 +1608,14 @@ int64_t OpLevelCostEstimator::CalculateOutputSize(const OpInfo& op_info,
     auto output_shape = MaybeGetMinimumShape(original_output_shape, num_dims,
                                              found_unknown_shapes);
     for (const auto& dim : output_shape.dim()) {
-      output_size *= dim.size();
+      int64_t new_output_size =
+          MultiplyWithoutOverflow(output_size, dim.size());
+      if (new_output_size < 0) {
+        VLOG(1) << ""Overflow encountered when estimating cost, multiplying ""
+                << output_size << "" with "" << dim.size();
+        return -1;
+      }
+      output_size = new_output_size;
     }
     total_output_size += output_size;
     VLOG(1) << ""Output Size: "" << output_size
",1,train
4f38b1ac8e42727e18a2f0bde06d3bee8e77b250,tensorflow/tensorflow,"Prevent null dereference read in `GetInitOp`.

We have a map of maps. We test that the key exists in the first map but then we don't have any validation that this also means the second map has the needed key. In the scenarios where this is not the case, we'll dereference a nullptr, if we don't have this check

PiperOrigin-RevId: 408739325
Change-Id: If9bb7ed759aba1f3b56a34913f209508dbaf65ce",loader_util.cc,"@@ -34,9 +34,14 @@ Status GetInitOp(const string& export_dir, const MetaGraphDef& meta_graph_def,
   const auto& init_op_sig_it =
       meta_graph_def.signature_def().find(kSavedModelInitOpSignatureKey);
   if (init_op_sig_it != sig_def_map.end()) {
-    *init_op_name = init_op_sig_it->second.outputs()
-                        .find(kSavedModelInitOpSignatureKey)
-                        ->second.name();
+    const auto& sig_def_outputs = init_op_sig_it->second.outputs();
+    const auto& sig_def_outputs_it =
+        sig_def_outputs.find(kSavedModelInitOpSignatureKey);
+    if (sig_def_outputs_it == sig_def_outputs.end()) {
+      return errors::FailedPrecondition(""Could not find output "",
+                                        kSavedModelInitOpSignatureKey);
+    }
+    *init_op_name = sig_def_outputs_it->second.name();
     return Status::OK();
   }
 
",1,train
c79ccba517dbb1a0ccb9b01ee3bd2a63748b60dd,tensorflow/tensorflow,"Fix memory leak when a graph node is invalid.

If a graph node is invalid but a kernel is created then we set the kernel back to `nullptr` but we forget to delete it. Hence, we get a memory leak.

PiperOrigin-RevId: 408968108
Change-Id: I1d8a9d0d8988ed5e08be8b9f2004ce1b4cd11b7c",immutable_executor_state.cc,"@@ -131,6 +131,7 @@ Status ImmutableExecutorState::Initialize(const Graph& graph) {
 
     Status s = params_.create_kernel(n->properties(), &item->kernel);
     if (!s.ok()) {
+      params_.delete_kernel(item->kernel);
       item->kernel = nullptr;
       s = AttachDef(s, *n);
       return s;
",1,train
92dba16749fae36c246bec3f9ba474d9ddeb7662,tensorflow/tensorflow,"Prevent a null-pointer dereference / `CHECK`-fail in grappler.

PiperOrigin-RevId: 409187354
Change-Id: I369c249cca32e6c56ec193f0ebbf2f2768fc7d43",dependency_optimizer.cc,"@@ -75,8 +75,10 @@ bool DependencyOptimizer::SafeToRemoveIdentity(const NodeDef& node) const {
   }
 
   const NodeDef* input = node_map_->GetNode(NodeName(node.input(0)));
-  CHECK(input != nullptr) << ""node = "" << node.name()
-                          << "" input = "" << node.input(0);
+  if (input == nullptr) {
+    VLOG(1) << ""node = "" << node.name() << "" input = "" << node.input(0);
+    return false;
+  }
   // Don't remove Identity nodes corresponding to Variable reads or following
   // Recv.
   if (IsVariable(*input) || IsRecv(*input)) {
",1,train
1361fb7e29449629e1df94d44e0427ebec8c83c7,tensorflow/tensorflow,"Fix abort caused by allocating a too large vector.

We need to make sure that the number of dimensions in a shape is within limits.

PiperOrigin-RevId: 408997911
Change-Id: If59e1c23f2ec9c2d4ff4d8632fd62b2a7773a4eb",shape_inference.cc,"@@ -14,6 +14,8 @@ limitations under the License.
 ==============================================================================*/
 #include ""tensorflow/core/framework/shape_inference.h""
 
+#include <cstdint>
+
 #include ""tensorflow/core/framework/bounds_check.h""
 #include ""tensorflow/core/framework/full_type_util.h""
 #include ""tensorflow/core/framework/node_def.pb.h""
@@ -789,6 +791,19 @@ Status InferenceContext::InternalMakeShapeFromTensor(
       return ReturnUnknownShape(out);
     }
     const auto num_dims = Value(shape_dim);
+    // TODO(mihaimaruseac): Should be `TensorShape::MaxDimensions()` as we are
+    // not able to materialize shapes with more than this number of dimensions
+    // but then shape inference would fail for operations such as
+    // `tf.range`/`tf.ones`, etc. where the shape is not really materialized,
+    // only used during the inference. Hence, just prevent doing a `reserve`
+    // with a very large argument.
+    const int64_t max_dimensions = 1 << 20;
+    if (num_dims >= max_dimensions) {
+      return errors::Internal(
+          ""Cannot create a tensor with "", num_dims,
+          "" dimensions, as these would be more than maximum of "",
+          max_dimensions);
+    }
     std::vector<DimensionHandle> dims;
     dims.reserve(num_dims);
     for (int i = 0; i < num_dims; i++) dims.push_back(UnknownDim());
",1,train
1fb27733f943295d874417630edd3b38b34ce082,tensorflow/tensorflow,"Remove `CHECK`-fails from `IsSimplifiableReshape`

PiperOrigin-RevId: 409164987
Change-Id: I58c7dd459ff348c3dbae95e00c4c5e63b30a4e65",constant_folding.cc,"@@ -1689,7 +1689,11 @@ Status ConstantFolding::IsSimplifiableReshape(
   if (!IsReshape(node)) {
     return errors::Internal(""Node "", node.name(), "" is not a Reshape node"");
   }
-  CHECK_LE(2, node.input_size());
+  if (2 > node.input_size()) {
+    return errors::Internal(""Node "", node.name(),
+                            "" must have at most 2 inputs but has "",
+                            node.input_size());
+  }
   const NodeDef* new_shape = node_map_->GetNode(node.input(1));
   if (!IsReallyConstant(*new_shape)) {
     return errors::Internal(""Node "", node.name(), "" has shape "",
@@ -1707,7 +1711,11 @@ Status ConstantFolding::IsSimplifiableReshape(
   if (!s.ok()) {
     return errors::Internal(""Could not evaluate node "", node.name());
   }
-  CHECK_EQ(1, outputs.size());
+  if (outputs.size() != 1) {
+    return errors::Internal(""Node "", node.name(),
+                            "" must have exactly 1 output but has "",
+                            outputs.size());
+  }
 
   const std::vector<OpInfo::TensorProperties>& props =
       properties.GetInputProperties(node.name());
",1,train
240655511cd3e701155f944a972db71b6c0b1bb6,tensorflow/tensorflow,"Eliminate `CHECK`-fails from `IsSimplifiableReshape` via `MakeShape(<invalid shape>)`

PiperOrigin-RevId: 409166738
Change-Id: I7f0a3590b8acae3f3e3e2fe636e1f5ef285693cf",constant_folding.cc,"@@ -1741,14 +1741,16 @@ Status ConstantFolding::IsSimplifiableReshape(
       int32_t dim = outputs[0]->flat<int32>()(i);
       shp.push_back(dim);
     }
-    TF_CHECK_OK(TensorShapeUtils::MakeShape(shp, &new_dims));
+    s = TensorShapeUtils::MakeShape(shp, &new_dims);
+    if (!s.ok()) return s;
   } else {
     std::vector<int64_t> shp;
     for (int i = 0; i < outputs[0]->NumElements(); ++i) {
       int64_t dim = outputs[0]->flat<int64_t>()(i);
       shp.push_back(dim);
     }
-    TF_CHECK_OK(TensorShapeUtils::MakeShape(shp, &new_dims));
+    s = TensorShapeUtils::MakeShape(shp, &new_dims);
+    if (!s.ok()) return s;
   }
 
   if (!shape.IsCompatibleWith(new_dims)) {
",1,train
ebc1a2ffe5a7573d905e99bd0ee3568ee07c12c1,tensorflow/tensorflow,"Make `IsSimplifiableReshape` return `Status` instead of `bool`.

This is to allow remove `CHECK`-fails in subsequent commits.

PiperOrigin-RevId: 409160987
Change-Id: I3f050218a3832271395c4372a0b8ea05f1c03d80",constant_folding.cc,"@@ -1684,15 +1684,17 @@ Status ConstantFolding::FoldGraph(
   return Status::OK();
 }
 
-bool ConstantFolding::IsSimplifiableReshape(
+Status ConstantFolding::IsSimplifiableReshape(
     const NodeDef& node, const GraphProperties& properties) const {
   if (!IsReshape(node)) {
-    return false;
+    return errors::Internal(""Node "", node.name(), "" is not a Reshape node"");
   }
   CHECK_LE(2, node.input_size());
   const NodeDef* new_shape = node_map_->GetNode(node.input(1));
   if (!IsReallyConstant(*new_shape)) {
-    return false;
+    return errors::Internal(""Node "", node.name(), "" has shape "",
+                            new_shape->DebugString(),
+                            "" which is not a constant"");
   }
   TensorVector outputs;
   auto outputs_cleanup = gtl::MakeCleanup([&outputs] {
@@ -1703,22 +1705,25 @@ bool ConstantFolding::IsSimplifiableReshape(
 
   Status s = EvaluateNode(*new_shape, TensorVector(), &outputs);
   if (!s.ok()) {
-    return false;
+    return errors::Internal(""Could not evaluate node "", node.name());
   }
   CHECK_EQ(1, outputs.size());
 
   const std::vector<OpInfo::TensorProperties>& props =
       properties.GetInputProperties(node.name());
   if (props.empty()) {
-    return false;
+    return errors::Internal(""Node "", node.name(), "" has no properties"");
   }
   const OpInfo::TensorProperties& prop = props[0];
   if (prop.dtype() == DT_INVALID) {
-    return false;
+    return errors::Internal(""Node "", node.name(), "" has property "",
+                            prop.DebugString(), "" with invalid dtype"");
   }
   const PartialTensorShape shape(prop.shape());
   if (!shape.IsFullyDefined()) {
-    return false;
+    return errors::Internal(""Node "", node.name(), "" has property "",
+                            prop.DebugString(), "" with shape "",
+                            shape.DebugString(), "" which is not fully defined"");
   }
 
   PartialTensorShape new_dims;
@@ -1738,7 +1743,12 @@ bool ConstantFolding::IsSimplifiableReshape(
     TF_CHECK_OK(TensorShapeUtils::MakeShape(shp, &new_dims));
   }
 
-  return shape.IsCompatibleWith(new_dims);
+  if (!shape.IsCompatibleWith(new_dims)) {
+    return errors::Internal(""Expected shape "", shape.DebugString(),
+                            ""to be compatible with "", new_dims.DebugString());
+  }
+
+  return Status::OK();
 }
 
 #define IS_VALUE_CASE(DTYPE, VALUE)                   \
@@ -2925,7 +2935,7 @@ bool ConstantFolding::SimplifyReduction(GraphDef* optimized_graph,
 bool ConstantFolding::SimplifyReshape(const GraphProperties& properties,
                                       bool use_shape_info, NodeDef* node) {
   if (!use_shape_info || node->attr().count(""T"") == 0 ||
-      !IsSimplifiableReshape(*node, properties)) {
+      !IsSimplifiableReshape(*node, properties).ok()) {
     return false;
   }
   DataType output_type = node->attr().at(""T"").type();
",1,train
ebc1a2ffe5a7573d905e99bd0ee3568ee07c12c1,tensorflow/tensorflow,"Make `IsSimplifiableReshape` return `Status` instead of `bool`.

This is to allow remove `CHECK`-fails in subsequent commits.

PiperOrigin-RevId: 409160987
Change-Id: I3f050218a3832271395c4372a0b8ea05f1c03d80",constant_folding.h,"@@ -129,8 +129,8 @@ class ConstantFolding : public GraphOptimizer {
   Status FoldGraph(const GraphProperties& properties, GraphDef* output,
                    absl::flat_hash_set<string>* nodes_to_not_simplify);
 
-  bool IsSimplifiableReshape(const NodeDef& node,
-                             const GraphProperties& properties) const;
+  Status IsSimplifiableReshape(const NodeDef& node,
+                               const GraphProperties& properties) const;
   Status SimplifyGraph(GraphDef* optimized_graph, GraphProperties* properties,
                        absl::flat_hash_set<string>* nodes_to_not_simplify);
   Status SimplifyNode(NodeDef* node, GraphDef* optimized_graph,
",1,train
c2426bba00a01de6913738df8fa78e0215fcce02,tensorflow/tensorflow,"Use `PartialTensorShape` instead of `TensorShape`.

`TensorShape` constructor throws a CHECK-fail if shape is partial/overflows which the other doesn't. We are only determining the number of elements in the shape and partial shape should be used as it returns negative number when needed.

PiperOrigin-RevId: 409205384
Change-Id: Ia56542ff9ec758f2c9ffc7e4dcc9fa7eecd86e7b",attr_value_util.cc,"@@ -45,7 +45,7 @@ constexpr int kMaxTensorNestDepth = 100;
 // not fully defined return -1.
 int64_t TensorByteSize(const TensorProto& t) {
   // num_elements returns -1 if shape is not fully defined.
-  int64_t num_elems = TensorShape(t.tensor_shape()).num_elements();
+  int64_t num_elems = PartialTensorShape(t.tensor_shape()).num_elements();
   return num_elems < 0 ? -1 : num_elems * DataTypeSize(t.dtype());
 }
 
",1,train
a7c02f1a9bbc35473969618a09ee5f9f5d3e52d9,tensorflow/tensorflow,"Validate real and expected type of arguments to cwise ops.

Without this validation, it is possible to trigger a `CHECK`-fail denial of service.

This is a rollforward of a previous commit which was rolled back as it was relying on RTTI. This time we don't use RTTI, we replace `typeid(Tin).name()` with a double function call, `DataTypeString(DataTypeToEnum<Tin>::v())`.

PiperOrigin-RevId: 409340416
Change-Id: I96080b2796729a3a9b65e7c68307ac276070f2f0",cwise_ops_common.h,"@@ -87,7 +87,17 @@ class BinaryOp : public BinaryOpShared {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input_0 = ctx->input(0);
+    OP_REQUIRES(ctx, input_0.dtype() == DataTypeToEnum<Tin>::v(),
+                errors::InvalidArgument(
+                    ""Expected tensor of type "",
+                    DataTypeString(DataTypeToEnum<Tin>::v()), "" but got type "",
+                    DataTypeString(input_0.dtype())));
     const Tensor& input_1 = ctx->input(1);
+    OP_REQUIRES(ctx, input_1.dtype() == DataTypeToEnum<Tin>::v(),
+                errors::InvalidArgument(
+                    ""Expected tensor of type "",
+                    DataTypeString(DataTypeToEnum<Tin>::v()), "" but got type "",
+                    DataTypeString(input_1.dtype())));
     const Device& eigen_device = ctx->eigen_device<Device>();
     bool error = false;
     bool* const error_ptr = Functor::has_errors ? &error : nullptr;
",1,train
e746adbfcfee15e9cfdb391ff746c765b99bdf9b,tensorflow/tensorflow,"Prevent use after free in `DecodePng` kernel.

We are cleaning up the memory in `decode` and then we are using an `OP_REQUIRES` to check an invariant on the `decode` data.

PiperOrigin-RevId: 409299145
Change-Id: I4eb93aaca52483eb202e89b78df07fbb2f6cb254",decode_image_op.cc,"@@ -339,7 +339,6 @@ class DecodeImageV2Op : public OpKernel {
     if (width != static_cast<int64_t>(decode.width) || width <= 0 ||
         width >= (1LL << 27) || height != static_cast<int64_t>(decode.height) ||
         height <= 0 || height >= (1LL << 27) || total_size >= (1LL << 29)) {
-      png::CommonFreeDecode(&decode);
       OP_REQUIRES(context, false,
                   errors::InvalidArgument(""PNG size too large for int: "",
                                           decode.width, "" by "", decode.height));
",1,train
ab51e5b813573dc9f51efa335aebcf2994125ee9,tensorflow/tensorflow,"Prevent memory leak in decoding PNG images.

PiperOrigin-RevId: 409300653
Change-Id: I6182124c545989cef80cefd439b659095920763b",decode_image_op.cc,"@@ -18,6 +18,8 @@ limitations under the License.
 #include <cstdint>
 #include <memory>
 
+#include ""tensorflow/core/lib/gtl/cleanup.h""
+
 #define EIGEN_USE_THREADS
 
 #include ""absl/strings/escaping.h""
@@ -326,6 +328,16 @@ class DecodeImageV2Op : public OpKernel {
         context, png::CommonInitDecode(input, channels_, channel_bits, &decode),
         errors::InvalidArgument(""Invalid PNG. Failed to initialize decoder.""));
 
+    // If we reach this point, then there is data in `decode` which must be
+    // freed by the time we end execution in this function. We cannot call
+    // `png::CommonFreeDecode()` before an `OP_REQUIRES` because if
+    // `OP_REQUIRES` constraint is satisfied then the data would be freed
+    // prematurely. Instead, let's use a `Cleanup` object.
+    auto cleanup = gtl::MakeCleanup([&decode]() {
+      std::cerr << ""Cleanup called...\n"";
+      png::CommonFreeDecode(&decode);
+    });
+
     // Verify that width and height are not too large:
     // - verify width and height don't overflow int.
     // - width can later be multiplied by channels_ and sizeof(uint16), so
",1,train
3d89911481ba6ebe8c88c1c0b595412121e6c645,tensorflow/tensorflow,"Eliminate `CHECK`-fail from `function.cc`.

PiperOrigin-RevId: 409414744
Change-Id: Ic854e12ab2edb88b165d32e2d632c4ee654d71ad",function.cc,"@@ -181,7 +181,9 @@ class FunctionInstantiationHelper {
     DataTypeVector dtypes;
     TF_RETURN_IF_ERROR(
         ArgNumType(attr_values, arg_def, &is_type_list, &dtypes));
-    CHECK_GE(dtypes.size(), size_t{1});
+    if (dtypes.size() < size_t{1}) {
+      return errors::Internal(""Expected a list of at least one dtype"");
+    }
     int arg_index = result_.nodes.size();
     TF_RETURN_IF_ERROR(
         AddItem(arg_def.name(), {true, arg_index, 0, is_type_list, dtypes}));
",1,train
dcc21c7bc972b10b6fb95c2fb0f4ab5a59680ec2,tensorflow/tensorflow,"Eliminate debug `CHECK`-fail from `function.cc`

PiperOrigin-RevId: 409416119
Change-Id: I8376ee464d434e9b970ff0ad49edfdaa2a273cfe",function.cc,"@@ -191,7 +191,11 @@ class FunctionInstantiationHelper {
     for (size_t i = 0; i < dtypes.size(); ++i) {
       TF_RETURN_IF_ERROR(AddItem(strings::StrCat(arg_def.name(), "":"", i),
                                  {true, arg_index, 0, false, {dtypes[i]}}));
-      DCHECK_EQ(arg_index, result_.nodes.size());
+      if (arg_index != result_.nodes.size()) {
+        return errors::Internal(
+            ""Expected arg_index to be equal to the number of nodes in result."",
+            "" Got "", arg_index, "" and "", result_.nodes.size());
+      }
       string name = arg_def.name();
       if (dtypes.size() > 1) {
         strings::StrAppend(&name, ""_"", i);
",1,train
0aaaae6eca5a7175a193696383f582f53adab23f,tensorflow/tensorflow,"Prevent overflow in grappler cost estimation of crop&resize op.

The crop parameters are user controlled, so we should make sure a user can not trigger an overflow maliciously.

PiperOrigin-RevId: 409670234
Change-Id: I7994734a98b037c5642e051240329d16f959aae4",op_level_cost_estimator.cc,"@@ -2681,27 +2681,42 @@ Status OpLevelCostEstimator::PredictCropAndResize(const OpContext& op_context,
   // calculation differs from rough estimate in implementation, as it separates
   // out cost per box from cost per pixel and cost per element.
 
+  // Since crop arguments are user controlled, check for overflow.
+  int64_t crop_area = MultiplyWithoutOverflow(crop_height, crop_width);
+  if (crop_area < 0)
+    return errors::InvalidArgument(""Cannot estimate cost, multiplying "",
+                                   crop_height, "" with "", crop_width,
+                                   "" would overflow"");
+  int64_t crop_volume = MultiplyWithoutOverflow(crop_area, num_boxes);
+  if (crop_volume < 0)
+    return errors::InvalidArgument(""Cannot estimate cost, multiplying "",
+                                   crop_area, "" with "", num_boxes,
+                                   "" would overflow"");
+  int64_t crop_depth = MultiplyWithoutOverflow(crop_height, num_boxes);
+  if (crop_depth < 0)
+    return errors::InvalidArgument(""Cannot estimate cost, multiplying "",
+                                   crop_height, "" with "", num_boxes,
+                                   "" would overflow"");
+
   // Ops for variables height_scale and width_scale.
   int64_t ops = (sub_cost * 6 + mul_cost * 2 + div_cost * 2) * num_boxes;
   // Ops for variable in_y.
-  ops += (mul_cost * 2 + sub_cost + add_cost) * crop_height * num_boxes;
+  ops += (mul_cost * 2 + sub_cost + add_cost) * crop_depth;
   // Ops for variable in_x (same computation across both branches).
-  ops += (mul_cost * 2 + sub_cost + add_cost) * crop_height * crop_width *
-         num_boxes;
+  ops += (mul_cost * 2 + sub_cost + add_cost) * crop_volume;
   // Specify op_cost based on the method.
   if (use_bilinear_interp) {
     // Ops for variables top_y_index, bottom_y_index, y_lerp.
-    ops += (floor_cost + ceil_cost + sub_cost) * crop_height * num_boxes;
+    ops += (floor_cost + ceil_cost + sub_cost) * crop_depth;
     // Ops for variables left_x, right_x, x_lerp;
-    ops += (floor_cost + ceil_cost + sub_cost) * crop_height * crop_width *
-           num_boxes;
+    ops += (floor_cost + ceil_cost + sub_cost) * crop_volume;
     // Ops for innermost loop across depth.
     ops +=
         (cast_to_float_cost * 4 + add_cost * 3 + sub_cost * 3 + mul_cost * 3) *
         output_elements;
   } else /* method == ""nearest"" */ {
     // Ops for variables closest_x_index and closest_y_index.
-    ops += round_cost * 2 * crop_height * crop_width * num_boxes;
+    ops += round_cost * 2 * crop_volume;
     // Ops for innermost loop across depth.
     ops += cast_to_float_cost * output_elements;
   }
",1,train
6b5adc0877de832b2a7c189532dbbbc64622eeb6,tensorflow/tensorflow,"Prevent `CHECK`-fail when building reference tensor.

The tensor constructor does not allow reference dtypes, as these should not show up explicitly. However, when passed these invalid types instead of building an invalid object the constructor crashes via a `CHECK`-fail. We have a static builder that properly handles this case but is not applicable given current usage.

Instead, before calling the constructor, we can check that the dtype is not a reference type and return an error otherwise, given that the dtype is user controlled so malicious users can trigger denial of service.

PiperOrigin-RevId: 409662503
Change-Id: I5892f831fde7f276cd7ab34519cf6b8061c71a59",constant_folding.cc,"@@ -1363,6 +1363,11 @@ Status ConstantFolding::EvaluateOneFoldable(const NodeDef& node,
                           input_tensor.ToString(),
                           "" has a dtype of DT_INVALID.""));
     }
+    if (IsRefType(raw_val.dtype())) {
+      return errors::InvalidArgument(
+          ""Not allowed to construct a tensor with reference dtype, got "",
+          DataTypeString(raw_val.dtype()));
+    }
     Tensor* value = new Tensor(raw_val.dtype(), raw_val.tensor_shape());
     if (!value->FromProto(raw_val)) {
       delete (value);
",1,train
045deec1cbdebb27d817008ad5df94d96a08b1bf,tensorflow/tensorflow,"Prevent null pointer dereference in `mutable_graph_view`

PiperOrigin-RevId: 409684472
Change-Id: I577eb9d9ac470fcec0501423171e739a4ec0cb5c",mutable_graph_view.cc,"@@ -68,6 +68,9 @@ bool IsIdentityConsumingSwitch(const MutableGraphView& graph,
     }
 
     NodeDef* input_node = graph.GetNode(tensor_id.node());
+    if (input_node == nullptr) {
+      return false;
+    }
     return IsSwitch(*input_node);
   }
   return false;
",1,train
0a365c029e437be0349c31f8d4c9926b69fa3fa1,tensorflow/tensorflow,"Prevent null pointer dereference in constant folding.

Under certain conditions, an invalid protobuf saved model with invalid nodes would be loaded. During optimization phase, Grappler optimizer will then dereference a null pointer.

PiperOrigin-RevId: 409683530
Change-Id: I1f10340a7ec384bc9bc587300390f1078cf5caa0",constant_folding.cc,"@@ -3505,6 +3505,9 @@ bool ConstantFolding::MulConvPushDown(GraphDef* optimized_graph, NodeDef* node,
 
   NodeDef* mul_left_child = node_map_->GetNode(node->input(0));
   NodeDef* mul_right_child = node_map_->GetNode(node->input(1));
+  if (mul_left_child == nullptr || mul_right_child == nullptr) {
+    return false;
+  }
   // One child must be constant, and the second must be Conv op.
   const bool left_child_is_constant = IsReallyConstant(*mul_left_child);
   const bool right_child_is_constant = IsReallyConstant(*mul_right_child);
",1,test
955059813cc325dc1db5e2daa6221271406d4439,tensorflow/tensorflow,"Check for type inference error on node construction.

PiperOrigin-RevId: 409415804
Change-Id: Ieb6e020906b96f522bf8e2fa103715ddbbdc434a",graph.cc,"@@ -561,6 +561,11 @@ Node* Graph::AddNode(NodeDef node_def, Status* status) {
     VLOG(3) << ""AddNode: found type constructor for "" << node_def.name();
     const auto ctor_type =
         full_type::SpecializeType(AttrSlice(node_def), op_reg_data->op_def);
+    if (!ctor_type.ok()) {
+      *status = errors::InvalidArgument(""type error: "",
+                                        ctor_type.status().ToString());
+      return nullptr;
+    }
     const FullTypeDef ctor_typedef = ctor_type.ValueOrDie();
     if (ctor_typedef.type_id() != TFT_UNSET) {
       *(node_def.mutable_experimental_type()) = ctor_typedef;
",1,test
448a16182065bd08a202d9057dd8ca541e67996c,tensorflow/tensorflow,"Prevent stack overflow when FunctionLib in GraphDef has a self-recursive function.

It is likely that no recursivity is supported, but we should handle this separately.

PiperOrigin-RevId: 414860329
Change-Id: I02a2270e86282b37362ddd485eeef16fb986a9e0",loader.cc,"@@ -25,6 +25,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/attr_value.pb.h""
 #include ""tensorflow/core/framework/function.pb.h""
 #include ""tensorflow/core/framework/node_def.pb.h""
+#include ""tensorflow/core/framework/op_def.pb.h""
 #include ""tensorflow/core/framework/tensor.pb.h""
 #include ""tensorflow/core/lib/io/path.h""
 #include ""tensorflow/core/lib/monitoring/counter.h""
@@ -99,6 +100,19 @@ static Status ValidateNode(const NodeDef& node) {
   return Status::OK();
 }
 
+static Status ValidateFunctionNotRecursive(const FunctionDef& function) {
+  const auto& function_name = function.signature().name();
+  for (const auto& node : function.node_def()) {
+    if (node.op() == function_name) {
+      return errors::FailedPrecondition(
+          ""Function "", function_name,
+          "" is self recursive and TensorFlow does not support this scenario."");
+    }
+  }
+
+  return Status::OK();
+}
+
 static Status ValidateSavedTensors(const GraphDef& graph_def) {
   for (const auto& node : graph_def.node()) {
     TF_RETURN_IF_ERROR(ValidateNode(node));
@@ -110,6 +124,10 @@ static Status ValidateSavedTensors(const GraphDef& graph_def) {
       for (const auto& node : function.node_def()) {
         TF_RETURN_IF_ERROR(ValidateNode(node));
       }
+
+      // Also check that there is no recursivity in the library
+      // TODO(mihaimaruseac): Do more than self-recursivity
+      TF_RETURN_IF_ERROR(ValidateFunctionNotRecursive(function));
     }
   }
 
",1,train
c99d98cd189839dcf51aee94e7437b54b31f8abd,tensorflow/tensorflow,"Handle invalid inputs instead of crashing.

PiperOrigin-RevId: 409549744
Change-Id: I7f5935b34b53f7e426a5462fcc027bdbf5dcda24",graph.cc,"@@ -222,10 +222,16 @@ void Node::RunForwardTypeInference() {
       const auto& node_t = node->def().experimental_type();
       if (node_t.type_id() != TFT_UNSET) {
         int ix = input_idx[i];
-        DCHECK(ix < node_t.args_size())
-            << ""input "" << i << "" should have an output "" << ix
-            << "" but instead only has "" << node_t.args_size()
-            << "" outputs: "" << node_t.DebugString();
+        if (ix >= node_t.args_size()) {
+          LOG(WARNING) << name() << "" has bad type information: input "" << i
+                       << "" should have an output "" << ix
+                       << "" but instead only has "" << node_t.args_size()
+                       << "" outputs: "" << node_t.DebugString()
+                       << ""\nThis indicates either ""
+                          ""a bug in op registration or a corrupted graph."";
+          ClearTypeInfo();
+          return;
+        }
         input_types.emplace_back(node_t.args(ix));
       } else {
         input_types.emplace_back(*no_type);
",1,test
35f0fabb4c178253a964d7aabdbb15c6a398b69a,tensorflow/tensorflow,"Avoid Segfault for scalar shapes.

Calling tensor::FromElementsOp with an empty vector of elements and no type
causes a segfault. We need to let the FromElementsOp know which scalar type it
should have.
Also add back the DynamicBroadcastInDimOp canonicalization patterns, which
previously prevented this bug from happening.
Add a regression test that demonstrates the bug.

PiperOrigin-RevId: 417561444
Change-Id: I6d1d6cfb71aabbad6102422625a00bbe253ac95a",tf_cpurt_symbolic_shape_optimization.cc,"@@ -157,6 +157,10 @@ llvm::Optional<Value> simplifyBroadcast(ShapeComponentAnalysis& analysis,
     shapes_found.push_back(*found_shape);
     maxRank = std::max(maxRank, found_shape->size());
   }
+  if (maxRank == 0) {
+    return Value(builder->create<tensor::FromElementsOp>(
+        loc, shapes[0].getType(), SmallVector<Value>()));
+  }
 
   SmallVector<const ShapeComponentAnalysis::SymbolicExpr*> joined_dimensions(
       maxRank);
",1,train
e21af685e1828f7ca65038307df5cc06de4479e8,tensorflow/tensorflow,"Fix Null-pointer dereference in BuildXlaCompilationCache

If ConfigProto is not used, then use the default settings which is to allow all devices.

PiperOrigin-RevId: 420391800
Change-Id: I88161ad7042990aef678e77b597a2fb2c8f815be",xla_platform_info.cc,"@@ -82,11 +82,13 @@ Status BuildXlaCompilationCache(DeviceBase* device, FunctionLibraryRuntime* flr,
   client_options.set_intra_op_parallelism_threads(
       device->tensorflow_cpu_worker_threads()->num_threads);
 
-  string allowed_gpus =
-      flr->config_proto()->gpu_options().visible_device_list();
-  TF_ASSIGN_OR_RETURN(absl::optional<std::set<int>> gpu_ids,
-                      ParseVisibleDeviceList(allowed_gpus));
-  client_options.set_allowed_devices(gpu_ids);
+  if (flr->config_proto()) {
+    string allowed_gpus =
+        flr->config_proto()->gpu_options().visible_device_list();
+    TF_ASSIGN_OR_RETURN(absl::optional<std::set<int>> gpu_ids,
+                        ParseVisibleDeviceList(allowed_gpus));
+    client_options.set_allowed_devices(gpu_ids);
+  }
 
   auto client = xla::ClientLibrary::GetOrCreateLocalClient(client_options);
   if (!client.ok()) {
",1,train
eebb96c2830d48597d055d247c0e9aebaea94cd5,tensorflow/tensorflow,"Fix an invalid address vulnerability in `tf.raw_ops.RaggedBincount`.

PiperOrigin-RevId: 368293153
Change-Id: I4b4e493d3fd05e7dc55a55de3a041a80a4f275c3",bincount_op.cc,"@@ -420,6 +420,15 @@ class RaggedBincountOp : public OpKernel {
     int num_values = values.size();
     int batch_idx = 0;
 
+    OP_REQUIRES(ctx, splits(0) == 0,
+                errors::InvalidArgument(""Splits must start with 0, not with "",
+                                        splits(0)));
+
+    OP_REQUIRES(ctx, splits(num_rows) == num_values,
+                errors::InvalidArgument(
+                    ""Splits must end with the number of values, got "",
+                    splits(num_rows), "" instead of "", num_values));
+
     Tensor* out_t;
     OP_REQUIRES_OK(
         ctx, ctx->allocate_output(0, TensorShape({num_rows, size}), &out_t));
",1,train
030af767d357d1b4088c4a25c72cb3906abac489,tensorflow/tensorflow,"Fix `tf.raw_ops.ResourceCountUpTo` null pointer dereference.

PiperOrigin-RevId: 368294347
Change-Id: I2c16fbfc9b4966c402c3d8e311f0d665a9c852d8",ndarray_tensor.cc,"@@ -16,6 +16,7 @@ limitations under the License.
 #include ""tensorflow/python/lib/core/ndarray_tensor.h""
 
 #include <cstring>
+#include <optional>
 
 #include ""tensorflow/c/eager/tfe_context_internal.h""
 #include ""tensorflow/c/tf_tensor_internal.h""
@@ -74,6 +75,13 @@ Status PyArrayDescr_to_TF_DataType(PyArray_Descr* descr,
   PyObject* key;
   PyObject* value;
   Py_ssize_t pos = 0;
+
+  // Return an error if the fields attribute is null.
+  // Occurs with an improper conversion attempt to resource.
+  if (descr->fields == nullptr) {
+    return errors::Internal(""Unexpected numpy data type"");
+  }
+
   if (PyDict_Next(descr->fields, &pos, &key, &value)) {
     // In Python 3, the keys of numpy custom struct types are unicode, unlike
     // Python 2, where the keys are bytes.
",1,train
a7116dd3913c4a4afd2a3a938573aa7c785fdfc6,tensorflow/tensorflow,"Validate `MatrixDiagV{2,3}` arguments to prevent breakage.

PiperOrigin-RevId: 369056033
Change-Id: Ic2018c297d3dd6f252dc1dd3667f1ed5cb1eaa42",matrix_diag_op.cc,"@@ -192,9 +192,22 @@ class MatrixDiagOp : public OpKernel {
           upper_diag_index = diag_index.flat<int32>()(1);
         }
       }
-      num_rows = context->input(2).flat<int32>()(0);
-      num_cols = context->input(3).flat<int32>()(0);
-      padding_value = context->input(4).flat<T>()(0);
+
+      auto& num_rows_tensor = context->input(2);
+      OP_REQUIRES(context, TensorShapeUtils::IsScalar(num_rows_tensor.shape()),
+                  errors::InvalidArgument(""num_rows must be a scalar""));
+      num_rows = num_rows_tensor.flat<int32>()(0);
+
+      auto& num_cols_tensor = context->input(3);
+      OP_REQUIRES(context, TensorShapeUtils::IsScalar(num_cols_tensor.shape()),
+                  errors::InvalidArgument(""num_cols must be a scalar""));
+      num_cols = num_cols_tensor.flat<int32>()(0);
+
+      auto& padding_value_tensor = context->input(4);
+      OP_REQUIRES(context,
+                  TensorShapeUtils::IsScalar(padding_value_tensor.shape()),
+                  errors::InvalidArgument(""padding_value must be a scalar""));
+      padding_value = padding_value_tensor.flat<T>()(0);
     }
 
     // Size validations.
",1,train
b055b9c474cd376259dde8779908f9eeaf097d93,tensorflow/tensorflow,"Fix `tf.raw_ops.RaggedTensorToVariant` invalid resize.

PiperOrigin-RevId: 368299574
Change-Id: I751c186325aa0bab397928845e790e60c2d90918",ragged_tensor_to_variant_op.cc,"@@ -159,6 +159,11 @@ class RaggedTensorToVariantOp : public OpKernel {
 
     // Unbatch the Ragged Tensor and encode the components.
     std::vector<RaggedTensorVariant> unbatched_ragged_input;
+    auto batched_splits_top_vec =
+        batched_ragged_input.splits(0).vec<SPLIT_TYPE>();
+    int num_components = batched_splits_top_vec.size() - 1;
+    OP_REQUIRES(context, num_components >= 0,
+                errors::Internal(""Invalid split argument.""));
     OP_REQUIRES_OK(context, UnbatchRaggedZerothDim<VALUE_TYPE, SPLIT_TYPE>(
                                 batched_ragged_input, &unbatched_ragged_input));
 
",1,train
799f835a3dfa00a4d852defa29b15841eea9d64f,tensorflow/tensorflow,"Fix 2 issues with `Conv3D`.

We have an issue where the dimensions are not matching and this causes Eigen to crash on an assert.

Then, we have an issue where we accidentally do a division by 0.

PiperOrigin-RevId: 369242785
Change-Id: Ie94067b2d41f58699af99ebb5af335ad9defd931",conv_ops_3d.cc,"@@ -69,6 +69,11 @@ struct LaunchConvOp<CPUDevice, T> {
                 errors::InvalidArgument(""CPU implementation of Conv3D ""
                                         ""currently only supports dilated rates ""
                                         ""of 1.""));
+    OP_REQUIRES(context, filter.dim_size(3) == input.dim_size(input.dims() - 1),
+                errors::InvalidArgument(
+                    ""Number of channels in filter ("", filter.dim_size(3),
+                    "") must match last dimension of input ("",
+                    input.dim_size(input.dims() - 1), "")""));
     functor::CuboidConvolution<CPUDevice, T>()(
         context->eigen_device<CPUDevice>(), output->tensor<T, 5>(),
         input.tensor<T, 5>(), filter.tensor<T, 5>(), strides[2], strides[1],
@@ -142,6 +147,8 @@ class Conv3DOp : public BinaryOp<T> {
     const int64 filter_depth = filter.dim_size(3);
     const int64 out_depth = filter.dim_size(4);
 
+    OP_REQUIRES(context, filter_depth != 0,
+                errors::InvalidArgument(""filter_depth must be non-zero""));
     OP_REQUIRES(context, in_depth % filter_depth == 0,
                 errors::InvalidArgument(
                     ""Input depth must be evenly divisible by filter depth: "",
",1,train
ff70c47a396ef1e3cb73c90513da4f5cb71bebba,tensorflow/tensorflow,"Fix `tf.raw_ops.GetSessionTensor` and `tf.raw_ops.DeleteSessionTensor` null pointer dereferences.

PiperOrigin-RevId: 368294154
Change-Id: Ie10f07a0a9a1c2b685e08153d48a0ca4b93f9fc9",session_ops.cc,"@@ -91,7 +91,6 @@ TF_CALL_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 REGISTER_GPU_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 
-
 class GetSessionTensorOp : public OpKernel {
  public:
   explicit GetSessionTensorOp(OpKernelConstruction* context)
@@ -101,7 +100,11 @@ class GetSessionTensorOp : public OpKernel {
     const Tensor& handle = ctx->input(0);
     const string& name = handle.scalar<tstring>()();
     Tensor val;
-    OP_REQUIRES_OK(ctx, ctx->session_state()->GetTensor(name, &val));
+    auto session_state = ctx->session_state();
+    OP_REQUIRES(ctx, session_state != nullptr,
+                errors::FailedPrecondition(
+                    ""GetSessionTensor called on null session state""));
+    OP_REQUIRES_OK(ctx, session_state->GetTensor(name, &val));
     ctx->set_output(0, val);
   }
 
@@ -122,7 +125,6 @@ TF_CALL_NUMBER_TYPES(REGISTER_GPU_KERNEL);
 REGISTER_GPU_KERNEL(bool);
 #undef REGISTER_GPU_KERNEL
 
-
 class DeleteSessionTensorOp : public OpKernel {
  public:
   explicit DeleteSessionTensorOp(OpKernelConstruction* context)
@@ -131,7 +133,11 @@ class DeleteSessionTensorOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const Tensor& handle = ctx->input(0);
     const string& name = handle.scalar<tstring>()();
-    OP_REQUIRES_OK(ctx, ctx->session_state()->DeleteTensor(name));
+    auto session_state = ctx->session_state();
+    OP_REQUIRES(ctx, session_state != nullptr,
+                errors::FailedPrecondition(
+                    ""DeleteSessionTensor called on null session state""));
+    OP_REQUIRES_OK(ctx, session_state->DeleteTensor(name));
   }
 
   TF_DISALLOW_COPY_AND_ASSIGN(DeleteSessionTensorOp);
",1,train
b1cc5e5a50e7cee09f2c6eb48eb40ee9c4125025,tensorflow/tensorflow,"Fix `tf.raw_ops.SparseCross` failing CHECK.

PiperOrigin-RevId: 368701671
Change-Id: Id805729dd9ba0bda36e4bb309408129b55fb649d",sparse_cross_op.cc,"@@ -27,6 +27,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_shape.h""
 #include ""tensorflow/core/framework/types.h""
+#include ""tensorflow/core/framework/types.pb.h""
 #include ""tensorflow/core/lib/core/stringpiece.h""
 #include ""tensorflow/core/lib/strings/str_util.h""
 #include ""tensorflow/core/platform/fingerprint.h""
@@ -460,10 +461,19 @@ int64 CalculateBatchSize(const OpInputList& shapes_list_in,
 Status ValidateInput(const OpInputList& indices_list_in,
                      const OpInputList& values_list_in,
                      const OpInputList& shapes_list_in,
-                     const OpInputList& dense_list_in) {
+                     const OpInputList& dense_list_in,
+                     const DataType& internal_type) {
   const auto size = indices_list_in.size();
+  // Only perform internal_type check for SparseCrossOp.
+  // Check if the internal_type is not invalid before doing so.
+  bool check_type = internal_type != DT_INVALID;
   // Validates indices_list_in OpInputList.
   for (int i = 0; i < size; i++) {
+    if (check_type && indices_list_in[i].dtype() != DT_INT64) {
+      return errors::InvalidArgument(""Input indices should be of type "",
+                                     DT_INT64, "" but received "",
+                                     indices_list_in[i].dtype());
+    }
     if (!TensorShapeUtils::IsMatrix(indices_list_in[i].shape())) {
       return errors::InvalidArgument(
           ""Input indices should be a matrix but received shape "",
@@ -482,6 +492,14 @@ Status ValidateInput(const OpInputList& indices_list_in,
                                    values_list_in.size());
   }
   for (int i = 0; i < size; i++) {
+    // Make sure to avoid the expected type to be string, but input values to be
+    // int64.
+    if (check_type && internal_type == DT_STRING &&
+        values_list_in[i].dtype() == DT_INT64) {
+      return errors::InvalidArgument(""Input values should be of internal type "",
+                                     internal_type, "" but received "",
+                                     values_list_in[i].dtype());
+    }
     if (!TensorShapeUtils::IsVector(values_list_in[i].shape())) {
       return errors::InvalidArgument(
           ""Input values should be a vector but received shape "",
@@ -502,6 +520,11 @@ Status ValidateInput(const OpInputList& indices_list_in,
                                    shapes_list_in.size());
   }
   for (int i = 0; i < size; i++) {
+    if (check_type && shapes_list_in[i].dtype() != DT_INT64) {
+      return errors::InvalidArgument(""Input shape should be of type "", DT_INT64,
+                                     "" but received "",
+                                     shapes_list_in[i].dtype());
+    }
     if (!TensorShapeUtils::IsVector(shapes_list_in[i].shape())) {
       return errors::InvalidArgument(
           ""Input shapes should be a vector but received shape "",
@@ -517,6 +540,14 @@ Status ValidateInput(const OpInputList& indices_list_in,
 
   // Validates dense_list_in OpInputList
   for (int i = 0; i < dense_list_in.size(); ++i) {
+    // Make sure to avoid the expected type to be string, but input values to be
+    // int64.
+    if (check_type && internal_type == DT_STRING &&
+        dense_list_in[i].dtype() == DT_INT64) {
+      return errors::InvalidArgument(""Dense inputs should be of internal type "",
+                                     internal_type, "" but received "",
+                                     dense_list_in[i].dtype());
+    }
     if (!TensorShapeUtils::IsMatrix(dense_list_in[i].shape())) {
       return errors::InvalidArgument(
           ""Dense inputs should be a matrix but received shape "",
@@ -698,6 +729,7 @@ class SparseCrossOp : public OpKernel {
     int64 signed_hash_key_;
     OP_REQUIRES_OK(context, context->GetAttr(""hash_key"", &signed_hash_key_));
     hash_key_ = static_cast<uint64>(signed_hash_key_);
+    OP_REQUIRES_OK(context, context->GetAttr(""internal_type"", &internal_type_));
   }
 
   void Compute(OpKernelContext* context) override {
@@ -711,8 +743,10 @@ class SparseCrossOp : public OpKernel {
     OP_REQUIRES_OK(context,
                    context->input_list(""dense_inputs"", &dense_list_in));
 
-    OP_REQUIRES_OK(context, ValidateInput(indices_list_in, values_list_in,
-                                          shapes_list_in, dense_list_in));
+    DataType internal_type = internal_type_;
+    OP_REQUIRES_OK(
+        context, ValidateInput(indices_list_in, values_list_in, shapes_list_in,
+                               dense_list_in, internal_type));
 
     std::vector<std::unique_ptr<ColumnInterface<InternalType>>> columns =
         GenerateColumnsFromInput<InternalType>(indices_list_in, values_list_in,
@@ -756,6 +790,7 @@ class SparseCrossOp : public OpKernel {
  private:
   int64 num_buckets_;
   uint64 hash_key_;
+  DataType internal_type_;
 };
 
 class SparseCrossV2Op : public OpKernel {
@@ -773,8 +808,11 @@ class SparseCrossV2Op : public OpKernel {
     OP_REQUIRES_OK(context,
                    context->input_list(""dense_inputs"", &dense_list_in));
 
-    OP_REQUIRES_OK(context, ValidateInput(indices_list_in, values_list_in,
-                                          shapes_list_in, dense_list_in));
+    // Set internal_type to invalid_type so that the check will be ignored.
+    DataType internal_type = DT_INVALID;
+    OP_REQUIRES_OK(
+        context, ValidateInput(indices_list_in, values_list_in, shapes_list_in,
+                               dense_list_in, internal_type));
 
     const Tensor* sep_t;
     OP_REQUIRES_OK(context, context->input(""sep"", &sep_t));
@@ -832,8 +870,11 @@ class SparseCrossHashedOp : public OpKernel {
     OP_REQUIRES_OK(context,
                    context->input_list(""dense_inputs"", &dense_list_in));
 
-    OP_REQUIRES_OK(context, ValidateInput(indices_list_in, values_list_in,
-                                          shapes_list_in, dense_list_in));
+    // Set internal_type to invalid_type so that the check will be ignored.
+    DataType internal_type = DT_INVALID;
+    OP_REQUIRES_OK(
+        context, ValidateInput(indices_list_in, values_list_in, shapes_list_in,
+                               dense_list_in, internal_type));
 
     const Tensor* num_buckets_t;
     OP_REQUIRES_OK(context, context->input(""num_buckets"", &num_buckets_t));
",1,train
8f37b52e1320d8d72a9529b2468277791a261197,tensorflow/tensorflow,"Validate some shape requirements for `Conv3DBackpropFilter*` and `Conv3DBackpropInput*` ops.

Older versions of Eigen might otherwise crash / produce OOB read on specially crafted inputs.

PiperOrigin-RevId: 369293977
Change-Id: I58f51445a93936d7cf8e616f75de17677df36718",conv_grad_ops_3d.cc,"@@ -239,6 +239,20 @@ class Conv3DBackpropInputOp : public OpKernel {
       input_shape = context->input(0).shape();
     }
 
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument(""input and filter_sizes must have the same ""
+                                ""number of channels. Got "",
+                                input_shape.dim_size(4), "" for input and "",
+                                filter_shape.dim_size(3), "" for filter_sizes""));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument(""out_backprop and filter_sizes must have the ""
+                                ""same number of channels. Got "",
+                                out_backprop_shape.dim_size(4),
+                                "" for out_backprop and "",
+                                filter_shape.dim_size(4), "" for filter_sizes""));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context, ConvBackpropComputeDimensions(
                                 ""Conv3DBackpropInputOp"", /*num_spatial_dims=*/3,
@@ -346,6 +360,20 @@ class Conv3DCustomBackpropInputOp : public OpKernel {
       input_shape = context->input(0).shape();
     }
 
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument(""input and filter_sizes must have the same ""
+                                ""number of channels. Got "",
+                                input_shape.dim_size(4), "" for input and "",
+                                filter_shape.dim_size(3), "" for filter_sizes""));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument(""out_backprop and filter_sizes must have the ""
+                                ""same number of channels. Got "",
+                                out_backprop_shape.dim_size(4),
+                                "" for out_backprop and "",
+                                filter_shape.dim_size(4), "" for filter_sizes""));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context, ConvBackpropComputeDimensions(
                                 ""Conv3DBackpropInputOp"", /*num_spatial_dims=*/3,
@@ -696,6 +724,20 @@ class Conv3DBackpropFilterOp : public OpKernel {
       filter_shape = context->input(1).shape();
     }
 
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument(""input and filter_sizes must have the same ""
+                                ""number of channels. Got "",
+                                input_shape.dim_size(4), "" for input and "",
+                                filter_shape.dim_size(3), "" for filter_sizes""));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument(""out_backprop and filter_sizes must have the ""
+                                ""same number of channels. Got "",
+                                out_backprop_shape.dim_size(4),
+                                "" for out_backprop and "",
+                                filter_shape.dim_size(4), "" for filter_sizes""));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context,
                    ConvBackpropComputeDimensions(
@@ -808,6 +850,20 @@ class Conv3DCustomBackpropFilterOp : public OpKernel {
       filter_shape = context->input(1).shape();
     }
 
+    OP_REQUIRES(
+        context, input_shape.dim_size(4) == filter_shape.dim_size(3),
+        errors::InvalidArgument(""input and filter_sizes must have the same ""
+                                ""number of channels. Got "",
+                                input_shape.dim_size(4), "" for input and "",
+                                filter_shape.dim_size(3), "" for filter_sizes""));
+    OP_REQUIRES(
+        context, out_backprop_shape.dim_size(4) == filter_shape.dim_size(4),
+        errors::InvalidArgument(""out_backprop and filter_sizes must have the ""
+                                ""same number of channels. Got "",
+                                out_backprop_shape.dim_size(4),
+                                "" for out_backprop and "",
+                                filter_shape.dim_size(4), "" for filter_sizes""));
+
     ConvBackpropDimensions dims;
     OP_REQUIRES_OK(context,
                    ConvBackpropComputeDimensions(
",1,train
c57c0b9f3a4f8684f3489dd9a9ec627ad8b599f5,tensorflow/tensorflow,"Fix the segfault in `tf.raw_ops.SparseCountSparseOutput`.

PiperOrigin-RevId: 369264941
Change-Id: I23a96a15b8370c01ee21ba3841e1c7dcbf55e93d",count_ops.cc,"@@ -197,9 +197,17 @@ class SparseCount : public OpKernel {
                     ""The shape argument requires at least one element.""));
 
     bool is_1d = shape.NumElements() == 1;
-    int num_batches = is_1d ? 1 : shape.flat<int64>()(0);
+    auto shape_vector = shape.flat<int64>();
+    int num_batches = is_1d ? 1 : shape_vector(0);
     int num_values = values.NumElements();
 
+    for (int b = 0; b < shape_vector.size(); b++) {
+      OP_REQUIRES(context, shape_vector(b) >= 0,
+                  errors::InvalidArgument(
+                      ""Elements in dense_shape must be >= 0. Instead got:"",
+                      shape.DebugString()));
+    }
+
     OP_REQUIRES(context, num_values == indices.shape().dim_size(0),
                 errors::InvalidArgument(
                     ""Number of values must match first dimension of indices."",
",1,train
311403edbc9816df80274bd1ea8b3c0c0f22c3fa,tensorflow/tensorflow,"Eliminate a division by 0 in 3D convolutions.

Also prevent a CHECK failed introduced in the most recent change.

PiperOrigin-RevId: 369322073
Change-Id: I4f609c028f89565fb2b49c3fdd20b63496582bae",conv_grad_ops_3d.cc,"@@ -239,6 +239,14 @@ class Conv3DBackpropInputOp : public OpKernel {
       input_shape = context->input(0).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument(""input tensor must have 5 dimensions""));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument(""filter_sizes tensor must have 5 dimensions""));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument(""out_backprop tensor must have 5 dimensions""));
     OP_REQUIRES(
         context, input_shape.dim_size(4) == filter_shape.dim_size(3),
         errors::InvalidArgument(""input and filter_sizes must have the same ""
@@ -360,6 +368,14 @@ class Conv3DCustomBackpropInputOp : public OpKernel {
       input_shape = context->input(0).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument(""input tensor must have 5 dimensions""));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument(""filter_sizes tensor must have 5 dimensions""));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument(""out_backprop tensor must have 5 dimensions""));
     OP_REQUIRES(
         context, input_shape.dim_size(4) == filter_shape.dim_size(3),
         errors::InvalidArgument(""input and filter_sizes must have the same ""
@@ -444,6 +460,11 @@ class Conv3DCustomBackpropInputOp : public OpKernel {
     // contraction compared to sharding and matmuls.
     const bool use_parallel_contraction = dims.batch_size == 1;
 
+    OP_REQUIRES(
+        context, work_unit_size > 0,
+        errors::InvalidArgument(""input, filter_sizes and out_backprop tensors ""
+                                ""must all have at least 1 element""));
+
     const size_t shard_size =
         use_parallel_contraction
             ? 1
@@ -724,6 +745,14 @@ class Conv3DBackpropFilterOp : public OpKernel {
       filter_shape = context->input(1).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument(""input tensor must have 5 dimensions""));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument(""filter_sizes tensor must have 5 dimensions""));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument(""out_backprop tensor must have 5 dimensions""));
     OP_REQUIRES(
         context, input_shape.dim_size(4) == filter_shape.dim_size(3),
         errors::InvalidArgument(""input and filter_sizes must have the same ""
@@ -850,6 +879,14 @@ class Conv3DCustomBackpropFilterOp : public OpKernel {
       filter_shape = context->input(1).shape();
     }
 
+    OP_REQUIRES(context, input_shape.dims() == 5,
+                errors::InvalidArgument(""input tensor must have 5 dimensions""));
+    OP_REQUIRES(
+        context, filter_shape.dims() == 5,
+        errors::InvalidArgument(""filter_sizes tensor must have 5 dimensions""));
+    OP_REQUIRES(
+        context, out_backprop_shape.dims() == 5,
+        errors::InvalidArgument(""out_backprop tensor must have 5 dimensions""));
     OP_REQUIRES(
         context, input_shape.dim_size(4) == filter_shape.dim_size(3),
         errors::InvalidArgument(""input and filter_sizes must have the same ""
@@ -936,6 +973,11 @@ class Conv3DCustomBackpropFilterOp : public OpKernel {
 
     const int64 work_unit_size = size_A + size_B + size_C;
 
+    OP_REQUIRES(
+        context, work_unit_size > 0,
+        errors::InvalidArgument(""input, filter_sizes and out_backprop tensors ""
+                                ""must all have at least 1 element""));
+
     const size_t shard_size =
         (target_working_set_size + work_unit_size - 1) / work_unit_size;
 
",1,train
69c68ecbb24dff3fa0e46da0d16c821a2dd22d7c,tensorflow/tensorflow,"Fix overflow CHECK issue with `tf.raw_ops.AddManySparseToTensorsMap`.

PiperOrigin-RevId: 369492969
Change-Id: I1d70d6c0c92e3d7a25bc3b3aa2a0c0ac9688bf81",sparse_tensors_map_ops.cc,"@@ -21,9 +21,6 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
-#include ""tensorflow/core/framework/op_kernel.h""
-#include ""tensorflow/core/framework/register_types.h""
-
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/resource_mgr.h""
@@ -31,6 +28,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor_util.h""
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/lib/gtl/inlined_vector.h""
+#include ""tensorflow/core/util/overflow.h""
 #include ""tensorflow/core/util/sparse/sparse_tensor.h""
 
 namespace tensorflow {
@@ -254,7 +252,22 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp {
         errors::InvalidArgument(
             ""Rank of input SparseTensor should be > 1, but saw rank: "", rank));
 
-    TensorShape tensor_input_shape(input_shape->vec<int64>());
+    auto input_shape_vec = input_shape->vec<int64>();
+    int new_num_elements = 1;
+    bool overflow_ocurred = false;
+    for (int i = 0; i < input_shape_vec.size(); i++) {
+      new_num_elements =
+          MultiplyWithoutOverflow(new_num_elements, input_shape_vec(i));
+      if (new_num_elements < 0) {
+        overflow_ocurred = true;
+      }
+    }
+
+    OP_REQUIRES(
+        context, !overflow_ocurred,
+        errors::Internal(""Encountered overflow from large input shape.""));
+
+    TensorShape tensor_input_shape(input_shape_vec);
     gtl::InlinedVector<int64, 8> std_order(rank);
     std::iota(std_order.begin(), std_order.end(), 0);
     SparseTensor input_st;
@@ -262,8 +275,7 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp {
                                                  tensor_input_shape, std_order,
                                                  &input_st));
 
-    auto input_shape_t = input_shape->vec<int64>();
-    const int64 N = input_shape_t(0);
+    const int64 N = input_shape_vec(0);
 
     Tensor sparse_handles(DT_INT64, TensorShape({N}));
     auto sparse_handles_t = sparse_handles.vec<int64>();
@@ -274,7 +286,7 @@ class AddManySparseToTensorsMapOp : public SparseTensorAccessingOp {
     // minibatch entries.
     TensorShape output_shape;
     OP_REQUIRES_OK(context, TensorShapeUtils::MakeShape(
-                                input_shape_t.data() + 1,
+                                input_shape_vec.data() + 1,
                                 input_shape->NumElements() - 1, &output_shape));
 
     // Get groups by minibatch dimension
",1,test
fca9874a9b42a2134f907d2fb46ab774a831404a,tensorflow/tensorflow,"Prevent another division by zero.

PiperOrigin-RevId: 369338598
Change-Id: I55471d363e401fdcf8d259670ad4eef672b731e2",conv_grad_shape_utils.cc,"@@ -127,6 +127,10 @@ Status ConvBackpropComputeDimensionsV2(
   // dimensions of the filter Tensor.
   VLOG(2) << ""input vs filter_in depth "" << dims->in_depth << "" ""
           << filter_shape.dim_size(num_dims - 2);
+  if (filter_shape.dim_size(num_dims - 2) <= 0) {
+    return errors ::InvalidArgument(
+        label, "": filter depth must be strictly greated than zero"");
+  }
   if (dims->in_depth % filter_shape.dim_size(num_dims - 2)) {
     return errors::InvalidArgument(
         label, "": input depth must be evenly divisible by filter depth"");
",1,test
2be2cdf3a123e231b16f766aa0e27d56b4606535,tensorflow/tensorflow,"Prevent yet another division by zero

PiperOrigin-RevId: 369343977
Change-Id: I1a60da4cf512e60fd91e069c16e026544632fe7f",conv_grad_input_ops.h,"@@ -649,6 +649,11 @@ class Conv2DCustomBackpropInputOp : public OpKernel {
         dims.batch_size == 1 ||
         thread_work_unit_size >= min_thread_work_unit_size;
 
+    OP_REQUIRES(
+        context, work_unit_size > 0,
+        errors::InvalidArgument(""input, filter_sizes and out_backprop tensors ""
+                                ""must all have at least 1 element""));
+
     const size_t shard_size =
         use_parallel_contraction
             ? 1
",1,train
b12aa1d44352de21d1a6faaf04172d8c2508b42b,tensorflow/tensorflow,"Fix one more FPE.

PiperOrigin-RevId: 369346568
Change-Id: I840fd575962adc879713a4c9cc59e6da3331caa7",conv_ops.cc,"@@ -260,6 +260,11 @@ struct LaunchConv2DOp<CPUDevice, T> {
     const int64 out_depth = output->dim_size(3);
     const int64 patch_depth = filter.dim_size(2);
 
+    if (patch_depth <= 0) {
+      ctx->SetStatus(errors::InvalidArgument(
+          ""filter depth must be stricly positive, got "", patch_depth));
+      return;
+    }
     if (in_depth % patch_depth != 0) {
       ctx->SetStatus(errors::InvalidArgument(
           ""input depth must be evenly divisible by filter depth: "", in_depth,
@@ -268,6 +273,11 @@ struct LaunchConv2DOp<CPUDevice, T> {
     }
 
     const int64 num_groups = in_depth / patch_depth;
+    if (num_groups <= 0) {
+      ctx->SetStatus(errors::InvalidArgument(
+          ""number of groups must be stricly positive, got "", num_groups));
+      return;
+    }
     if (out_depth % num_groups != 0 || out_depth < num_groups) {
       ctx->SetStatus(errors::InvalidArgument(
           ""output depth must be evenly divisible by number of groups: "",
@@ -536,6 +546,9 @@ Status ComputeConv2DDimension(const Conv2DParameters& params,
               errors::InvalidArgument(""Patch depth too large""));
   const int in_depth = static_cast<int>(in_depth_raw);
   const int patch_depth = static_cast<int>(patch_depth_raw);
+  TF_REQUIRES(patch_depth > 0,
+              errors::InvalidArgument(
+                  ""filter depth must be stricly positive, got "", patch_depth));
   TF_REQUIRES(in_depth % patch_depth == 0,
               errors::InvalidArgument(
                   ""input depth must be evenly divisible by filter depth: "",
",1,test
cfa91be9863a91d5105a3b4941096044ab32036b,tensorflow/tensorflow,"Fix one FPE and remove two CHECK-fails.

PiperOrigin-RevId: 369349640
Change-Id: I1fedbfc2b5bab635c5cb51f103d7c9176f79831a",quantized_conv_ops.cc,"@@ -18,6 +18,8 @@ limitations under the License.
 #include <algorithm>
 #include <vector>
 
+#include ""tensorflow/core/platform/errors.h""
+
 #define EIGEN_USE_THREADS
 
 #define GEMMLOWP_ALLOW_SLOW_SCALAR_FALLBACK
@@ -227,8 +229,12 @@ class Im2ColConvFunctor {
       return;
     }
 
-    CHECK_GT(output_width, 0);
-    CHECK_GT(output_height, 0);
+    OP_REQUIRES(
+        context, output_width > 0,
+        errors::InvalidArgument(""output_width must be strictly positive""));
+    OP_REQUIRES(
+        context, output_height > 0,
+        errors::InvalidArgument(""output_height must be strictly positive""));
     int filter_left_offset;
     int filter_top_offset;
     if (padding == VALID) {
@@ -255,6 +261,9 @@ class Im2ColConvFunctor {
     // by the width, then the height. This is the standard memory order in the
     // image world if it helps to visualize it.
     const int filter_value_count = filter_width * filter_height * input_depth;
+    OP_REQUIRES(context, filter_value_count > 0,
+                errors::InvalidArgument(
+                    ""filter patch must contain at least one element""));
     const int64 patches_per_chunk =
         kMaxChunkSize / (filter_value_count * sizeof(T1));
     const int64 chunk_value_count =
",1,train
a1b11d2fdd1e51bfe18bb1ede804f60abfa92da6,tensorflow/tensorflow,"Fix one division by zero

PiperOrigin-RevId: 369474832
Change-Id: I1082858ed78d9b2e4738ce30b231955973d49e1e",quantized_mul_op.cc,"@@ -347,6 +347,11 @@ class QuantizedMulOp : public OpKernel {
         tensor_num_elements = x.NumElements();
         tensor_offset = offset_x;
       }
+      if (vector_num_elements == 0) {
+        context->SetStatus(
+            errors::InvalidArgument(""vector must have at least 1 element""));
+        return;
+      }
       VectorTensorMultiply<T, Toutput>(
           vector_data, vector_offset, vector_num_elements, tensor_data,
           tensor_offset, tensor_num_elements, z_data);
",1,train
f851613f8f0fb0c838d160ced13c134f778e3ce7,tensorflow/tensorflow,"Fix heap buffer overflow caused by rounding.

This was hard to fix. Due to the way we compute the pixels that influence an output pixel in resized images, for certain input configuration we might have issued a read to a pixel that is outside of boundary of the original image. This is because of floating errors that affected truncation results.

PiperOrigin-RevId: 369757871
Change-Id: If89425fff930983829a2168203c11858883eebc9",quantized_resize_bilinear_op.cc,"@@ -64,6 +64,8 @@ inline void ComputeInterpolationWeights(
         std::max(static_cast<int64>(in_f), static_cast<int64>(0));
     interpolation->upper[i] =
         std::min(static_cast<int64>(std::ceil(in)), in_size - 1);
+    interpolation->lower[i] =
+        std::min(interpolation->lower[i], interpolation->upper[i]);
     interpolation->lerp[i] = in - in_f;
     interpolation->ilerp[i] =
         static_cast<T_SCALE>((in - in_f) * (1 << resolution));
",1,train
e6a7c7cc18c3aaad1ae0872cb0a959f5c923d2bd,tensorflow/tensorflow,"Remove `OP_REQUIRES` call from helper function.

Since `OP_REQUIRES` macro expands to a `return;` (among other), calling it in a helper function only ends the helper function's execution earlier, but the kernel will still run from start to end. Thus, all the expected validations are actually broken/useless as the code ploughs through the next crash anyway.

PiperOrigin-RevId: 369524386
Change-Id: I54f6cf9328445675ccc392e661b04336b229c9da",sparse_cholesky_op.cc,"@@ -17,6 +17,8 @@ limitations under the License.
 #include <numeric>
 #include <vector>
 
+#include ""tensorflow/core/framework/op_requires.h""
+
 #define EIGEN_USE_THREADS
 
 #include ""third_party/eigen3/Eigen/Core""
@@ -82,8 +84,8 @@ class CSRSparseCholeskyCPUOp : public OpKernel {
 
     int64 num_rows;
     int batch_size;
-    ValidateInputs(ctx, *input_matrix, input_permutation_indices, &batch_size,
-                   &num_rows);
+    OP_REQUIRES_OK(ctx, ValidateInputs(*input_matrix, input_permutation_indices,
+                                       &batch_size, &num_rows));
 
     // Allocate batch pointers.
     Tensor batch_ptr(cpu_allocator(), DT_INT32, TensorShape({batch_size + 1}));
@@ -226,49 +228,48 @@ class CSRSparseCholeskyCPUOp : public OpKernel {
   }
 
  private:
-  void ValidateInputs(OpKernelContext* ctx,
-                      const CSRSparseMatrix& sparse_matrix,
-                      const Tensor& permutation_indices, int* batch_size,
-                      int64* num_rows) {
-    OP_REQUIRES(ctx, sparse_matrix.dtype() == DataTypeToEnum<T>::value,
-                errors::InvalidArgument(
-                    ""Asked for a CSRSparseMatrix of type "",
-                    DataTypeString(DataTypeToEnum<T>::value),
-                    "" but saw dtype: "", DataTypeString(sparse_matrix.dtype())));
+  Status ValidateInputs(const CSRSparseMatrix& sparse_matrix,
+                        const Tensor& permutation_indices, int* batch_size,
+                        int64* num_rows) {
+    if (sparse_matrix.dtype() != DataTypeToEnum<T>::value)
+      return errors::InvalidArgument(
+          ""Asked for a CSRSparseMatrix of type "",
+          DataTypeString(DataTypeToEnum<T>::value),
+          "" but saw dtype: "", DataTypeString(sparse_matrix.dtype()));
 
     const Tensor& dense_shape = sparse_matrix.dense_shape();
     const int rank = dense_shape.dim_size(0);
-    OP_REQUIRES(ctx, rank == 2 || rank == 3,
-                errors::InvalidArgument(""sparse matrix must have rank 2 or 3; "",
-                                        ""but dense_shape has size "", rank));
+    if (rank < 2 || rank > 3)
+      return errors::InvalidArgument(""sparse matrix must have rank 2 or 3; "",
+                                     ""but dense_shape has size "", rank);
     const int row_dim = (rank == 2) ? 0 : 1;
     auto dense_shape_vec = dense_shape.vec<int64>();
     *num_rows = dense_shape_vec(row_dim);
     const int64 num_cols = dense_shape_vec(row_dim + 1);
-    OP_REQUIRES(ctx, *num_rows == num_cols,
-                errors::InvalidArgument(""sparse matrix must be square; got: "",
-                                        *num_rows, "" != "", num_cols));
+    if (*num_rows != num_cols)
+      return errors::InvalidArgument(
+          ""sparse matrix must be square; got: "", *num_rows, "" != "", num_cols);
     const TensorShape& perm_shape = permutation_indices.shape();
-    OP_REQUIRES(
-        ctx, perm_shape.dims() + 1 == rank,
-        errors::InvalidArgument(
-            ""sparse matrix must have the same rank as permutation; got: "", rank,
-            "" != "", perm_shape.dims(), "" + 1.""));
-    OP_REQUIRES(
-        ctx, perm_shape.dim_size(rank - 2) == *num_rows,
-        errors::InvalidArgument(
-            ""permutation must have the same number of elements in each batch ""
-            ""as the number of rows in sparse matrix; got: "",
-            perm_shape.dim_size(rank - 2), "" != "", *num_rows));
+    if (perm_shape.dims() + 1 != rank)
+      return errors::InvalidArgument(
+          ""sparse matrix must have the same rank as permutation; got: "", rank,
+          "" != "", perm_shape.dims(), "" + 1."");
+    if (perm_shape.dim_size(rank - 2) != *num_rows)
+      return errors::InvalidArgument(
+          ""permutation must have the same number of elements in each batch ""
+          ""as the number of rows in sparse matrix; got: "",
+          perm_shape.dim_size(rank - 2), "" != "", *num_rows);
 
     *batch_size = sparse_matrix.batch_size();
     if (*batch_size > 1) {
-      OP_REQUIRES(
-          ctx, perm_shape.dim_size(0) == *batch_size,
-          errors::InvalidArgument(""permutation must have the same batch size ""
-                                  ""as sparse matrix; got: "",
-                                  perm_shape.dim_size(0), "" != "", *batch_size));
+      if (perm_shape.dim_size(0) != *batch_size)
+        return errors::InvalidArgument(
+            ""permutation must have the same batch size ""
+            ""as sparse matrix; got: "",
+            perm_shape.dim_size(0), "" != "", *batch_size);
     }
+
+    return Status::OK();
   }
 };
 
",1,test
26eb323554ffccd173e8a79a8c05c15b685ae4d1,tensorflow/tensorflow,"Fix null CHECK issue with `tf.raw_ops.EncodePng`.

PiperOrigin-RevId: 369717714
Change-Id: I24136cd99c20b8466671f4f93b670ef6f6dd1250",encode_png_op.cc,"@@ -54,6 +54,8 @@ class EncodePngOp : public OpKernel {
     OP_REQUIRES(context, image.dims() == 3,
                 errors::InvalidArgument(""image must be 3-dimensional"",
                                         image.shape().DebugString()));
+    OP_REQUIRES(context, image.NumElements() > 0,
+                errors::Internal(""Invalid image provided.""));
     OP_REQUIRES(
         context,
         FastBoundsCheck(image.NumElements(), std::numeric_limits<int32>::max()),
",1,train
44b7f486c0143f68b56c34e2d01e146ee445134a,tensorflow/tensorflow,"Fix out of bounds read in `ragged_cross_op.cc`.

PiperOrigin-RevId: 369757702
Change-Id: Ie6e5d2c21513a8d56bf41fcf35960caf76e890f9",ragged_cross_op.cc,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_shape.h""
+#include ""tensorflow/core/platform/errors.h""
 #include ""tensorflow/core/platform/fingerprint.h""
 #include ""tensorflow/core/util/util.h""
 #include ""tensorflow/core/util/work_sharder.h""
@@ -466,16 +467,45 @@ class RaggedCrossOp : public OpKernel {
     int next_dense = 0;
     for (char c : input_order_) {
       if (c == 'R') {
+        if (next_ragged >= ragged_values_list.size())
+          return errors::InvalidArgument(
+              ""input_order \"""", input_order_,
+              ""\"" specifies reading a ragged tensor value at index "",
+              next_ragged, "" from a list of "", ragged_values_list.size(),
+              "" values."");
+        if (next_ragged >= ragged_splits_list.size())
+          return errors::InvalidArgument(
+              ""input_order \"""", input_order_,
+              ""\"" specifies reading a ragged tensor split at index "",
+              next_ragged, "" from a list of "", ragged_splits_list.size(),
+              "" splits."");
         TF_RETURN_IF_ERROR(BuildRaggedFeatureReader(
             ragged_values_list[next_ragged], ragged_splits_list[next_ragged],
             features));
         next_ragged++;
       } else if (c == 'S') {
+        if (next_sparse >= sparse_values_list.size())
+          return errors::InvalidArgument(
+              ""input_order \"""", input_order_,
+              ""\"" specifies reading a sparse tensor value at index "",
+              next_sparse, "" from a list of "", sparse_values_list.size(),
+              "" values."");
+        if (next_sparse >= sparse_indices_list.size())
+          return errors::InvalidArgument(
+              ""input_order \"""", input_order_,
+              ""\"" specifies reading a sparse tensor index at index "",
+              next_sparse, "" from a list of "", sparse_indices_list.size(),
+              "" indices."");
         TF_RETURN_IF_ERROR(BuildSparseFeatureReader(
             sparse_indices_list[next_sparse], sparse_values_list[next_sparse],
             batch_size, features));
         next_sparse++;
       } else if (c == 'D') {
+        if (next_dense >= dense_list.size())
+          return errors::InvalidArgument(
+              ""input_order \"""", input_order_,
+              ""\"" specifies reading a dense tensor at index "", next_dense,
+              "" from a list of "", dense_list.size(), "" tensors."");
         TF_RETURN_IF_ERROR(
             BuildDenseFeatureReader(dense_list[next_dense++], features));
       } else {
",1,train
b432a38fe0e1b4b904a6c222cbce794c39703e87,tensorflow/tensorflow,"Fix overflow CHECK issue with `tf.raw_ops.DrawBoundingBoxes`.

PiperOrigin-RevId: 369753591
Change-Id: I3b45fc98ee0d28a3c20b7e9c995aa647c976ec40",draw_bounding_box_op.cc,"@@ -147,22 +147,46 @@ class DrawBoundingBoxesOp : public OpKernel {
 
         // At this point, {min,max}_box_{row,col}_clamp are inside the
         // image.
-        CHECK_GE(min_box_row_clamp, 0);
-        CHECK_GE(max_box_row_clamp, 0);
-        CHECK_LT(min_box_row_clamp, height);
-        CHECK_LT(max_box_row_clamp, height);
-        CHECK_GE(min_box_col_clamp, 0);
-        CHECK_GE(max_box_col_clamp, 0);
-        CHECK_LT(min_box_col_clamp, width);
-        CHECK_LT(max_box_col_clamp, width);
+        OP_REQUIRES(
+            context, min_box_row_clamp >= 0,
+            errors::InvalidArgument(""Min box row clamp is less than 0.""));
+        OP_REQUIRES(
+            context, max_box_row_clamp >= 0,
+            errors::InvalidArgument(""Max box row clamp is less than 0.""));
+        OP_REQUIRES(context, min_box_row_clamp <= height,
+                    errors::InvalidArgument(
+                        ""Min box row clamp is greater than height.""));
+        OP_REQUIRES(context, max_box_row_clamp <= height,
+                    errors::InvalidArgument(
+                        ""Max box row clamp is greater than height.""));
+
+        OP_REQUIRES(
+            context, min_box_col_clamp >= 0,
+            errors::InvalidArgument(""Min box col clamp is less than 0.""));
+        OP_REQUIRES(
+            context, max_box_col_clamp >= 0,
+            errors::InvalidArgument(""Max box col clamp is less than 0.""));
+        OP_REQUIRES(context, min_box_col_clamp <= width,
+                    errors::InvalidArgument(
+                        ""Min box col clamp is greater than width.""));
+        OP_REQUIRES(context, max_box_col_clamp <= width,
+                    errors::InvalidArgument(
+                        ""Max box col clamp is greater than width.""));
 
         // At this point, the min_box_row and min_box_col are either
         // in the image or above/left of it, and max_box_row and
         // max_box_col are either in the image or below/right or it.
-        CHECK_LT(min_box_row, height);
-        CHECK_GE(max_box_row, 0);
-        CHECK_LT(min_box_col, width);
-        CHECK_GE(max_box_col, 0);
+
+        OP_REQUIRES(
+            context, min_box_row <= height,
+            errors::InvalidArgument(""Min box row is greater than height.""));
+        OP_REQUIRES(context, max_box_row >= 0,
+                    errors::InvalidArgument(""Max box row is less than 0.""));
+        OP_REQUIRES(
+            context, min_box_col <= width,
+            errors::InvalidArgument(""Min box col is greater than width.""));
+        OP_REQUIRES(context, max_box_col >= 0,
+                    errors::InvalidArgument(""Max box col is less than 0.""));
 
         // Draw top line.
         if (min_box_row >= 0) {
",1,train
efea03b38fb8d3b81762237dc85e579cc5fc6e87,tensorflow/tensorflow,"Validate inputs to `QuantizedMul`

PiperOrigin-RevId: 369756982
Change-Id: I00d960cc3b9316fd7a86bd37a44e341c96e17624",quantized_mul_op.cc,"@@ -284,10 +284,22 @@ class QuantizedMulOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& x = context->input(0);
     const Tensor& y = context->input(1);
-    const float min_x = context->input(2).flat<float>()(0);
-    const float max_x = context->input(3).flat<float>()(0);
-    const float min_y = context->input(4).flat<float>()(0);
-    const float max_y = context->input(5).flat<float>()(0);
+    auto& min_x_tensor = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_x_tensor.shape()),
+                errors::InvalidArgument(""min_x must be a scalar""));
+    const float min_x = min_x_tensor.flat<float>()(0);
+    auto& max_x_tensor = context->input(3);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_x_tensor.shape()),
+                errors::InvalidArgument(""max_x must be a scalar""));
+    const float max_x = max_x_tensor.flat<float>()(0);
+    auto& min_y_tensor = context->input(4);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_y_tensor.shape()),
+                errors::InvalidArgument(""min_y must be a scalar""));
+    const float min_y = min_y_tensor.flat<float>()(0);
+    auto& max_y_tensor = context->input(5);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(max_y_tensor.shape()),
+                errors::InvalidArgument(""max_y must be a scalar""));
+    const float max_y = max_y_tensor.flat<float>()(0);
 
     BCast bcast(BCast::FromShape(x.shape()), BCast::FromShape(y.shape()));
     if (!bcast.IsValid()) {
",1,train
a324ac84e573fba362a5e53d4e74d5de6729933e,tensorflow/tensorflow,"Validate arguments to `QuantizedReshape`.

Ensure that validations from `Reshape` also terminate `QuantizedReshape` on failure.

PiperOrigin-RevId: 369775421
Change-Id: If8c5342267aceea65b7cb83a4b183304886f1ce8",quantized_reshape_op.cc,"@@ -17,6 +17,7 @@ limitations under the License.
 
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/register_types.h""
+#include ""tensorflow/core/framework/tensor_shape.h""
 #include ""tensorflow/core/framework/tensor_types.h""
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/kernels/reshape_op.h""
@@ -30,9 +31,29 @@ class QuantizedReshapeOp : public ReshapeOp {
   void Compute(OpKernelContext* ctx) override {
     // This call processes inputs 1 and 2 to write output 0.
     ReshapeOp::Compute(ctx);
+    if (!ctx->status().ok()) {
+      return;
+    }
+
+    const auto& input_min_float_tensor = ctx->input(2);
+    const auto& input_min_float_shape = input_min_float_tensor.shape();
+    OP_REQUIRES(ctx,
+                TensorShapeUtils::IsScalar(input_min_float_shape) ||
+                    (TensorShapeUtils::IsVector(input_min_float_shape) &&
+                     (input_min_float_shape.dim_size(0) == 1)),
+                errors::InvalidArgument(
+                    ""input_min must be a scalar or a vector of 1 element""));
+    const float input_min_float = input_min_float_tensor.flat<float>()(0);
+    const auto& input_max_float_tensor = ctx->input(3);
+    const auto& input_max_float_shape = input_max_float_tensor.shape();
+    OP_REQUIRES(ctx,
+                TensorShapeUtils::IsScalar(input_max_float_shape) ||
+                    (TensorShapeUtils::IsVector(input_max_float_shape) &&
+                     (input_max_float_shape.dim_size(0) == 1)),
+                errors::InvalidArgument(
+                    ""input_max must be a scalar or a vector of 1 element""));
+    const float input_max_float = input_max_float_tensor.flat<float>()(0);
 
-    const float input_min_float = ctx->input(2).flat<float>()(0);
-    const float input_max_float = ctx->input(3).flat<float>()(0);
     Tensor* output_min = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(1, TensorShape({}), &output_min));
     output_min->flat<float>()(0) = input_min_float;
",1,train
f6c40f0c6cbf00d46c7717a26419f2062f2f8694,tensorflow/tensorflow,"Validate min and max arguments to `QuantizedResizeBilinear`.

PiperOrigin-RevId: 369765091
Change-Id: I33be8b78273ab7d08b97541692fe05cb7f94963a",quantized_resize_bilinear_op.cc,"@@ -702,8 +702,14 @@ class QuantizedResizeBilinearOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    const float in_min = context->input(2).flat<float>()(0);
-    const float in_max = context->input(3).flat<float>()(0);
+    const auto& in_min_tensor = context->input(2);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_min_tensor.shape()),
+                errors::InvalidArgument(""min must be a scalar""));
+    const float in_min = in_min_tensor.flat<float>()(0);
+    const auto& in_max_tensor = context->input(3);
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(in_max_tensor.shape()),
+                errors::InvalidArgument(""max must be a scalar""));
+    const float in_max = in_max_tensor.flat<float>()(0);
 
     ImageResizerState st(align_corners_, false);
     st.ValidateAndCreateOutput(context);
",1,train
c570e2ecfc822941335ad48f6e10df4e21f11c96,tensorflow/tensorflow,"Fix issues in Conv2DBackpropFilter.

PiperOrigin-RevId: 369772454
Change-Id: I49b465f2ae2ce91def61b56cea8000197d5177d8",conv_grad_filter_ops.cc,"@@ -495,6 +495,14 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
     const int filter_total_size = dims.spatial_dims[0].filter_size *
                                   dims.spatial_dims[1].filter_size *
                                   dims.in_depth;
+    OP_REQUIRES(
+        context,
+        filter_total_size * dims.out_depth == filter_backprop->NumElements(),
+        errors::InvalidArgument(
+            ""filter_size does not have enough elements, requested "",
+            filter_total_size * dims.out_depth, "", got "",
+            filter_backprop->NumElements()));
+
     // The output image size is the spatial size of the output.
     const int output_image_size =
         dims.spatial_dims[0].output_size * dims.spatial_dims[1].output_size;
@@ -518,6 +526,11 @@ class Conv2DCustomBackpropFilterOp : public OpKernel {
 
     const size_t work_unit_size = size_A + size_B + size_C;
 
+    OP_REQUIRES(
+        context, work_unit_size != 0,
+        errors::InvalidArgument(
+            ""Work size for convolution would be 0, which is not acceptable""));
+
     const size_t shard_size =
         (target_working_set_size + work_unit_size - 1) / work_unit_size;
 
",1,train
4f663d4b8f0bec1b48da6fa091a7d29609980fa4,tensorflow/tensorflow,"Allowlist certain data types to avoid a seg fault.

PiperOrigin-RevId: 356326671
Change-Id: I23b65b52e93798cb5a6744632d31b0f88c6b6b31",immutable_constant_op.cc,"@@ -17,6 +17,8 @@ limitations under the License.
 
 #include <unordered_set>
 
+#include ""tensorflow/core/framework/types.pb.h""
+
 namespace tensorflow {
 
 namespace {
@@ -86,6 +88,9 @@ ImmutableConstantOp::ImmutableConstantOp(OpKernelConstruction* context)
   OP_REQUIRES_OK(context,
                  context->GetAttr(kMemoryRegionNameAttr, &region_name_));
   OP_REQUIRES_OK(context, context->GetAttr(kDTypeAttr, &dtype_));
+  OP_REQUIRES(context, dtype_ != DT_RESOURCE && dtype_ != DT_VARIANT,
+              errors::InvalidArgument(
+                  ""Resource and variant dtypes are invalid for this op.""));
   OP_REQUIRES_OK(context, context->GetAttr(kShapeAttr, &shape_));
 }
 
",1,train
ba424dd8f16f7110eea526a8086f1a155f14f22b,tensorflow/tensorflow,"Enhance validation of ngram op and handle case of 0 tokens.

PiperOrigin-RevId: 369940178
Change-Id: Ia82f42c09d14efe76e7dc013505b832a42282f0b",string_ngrams_op.cc,"@@ -61,16 +61,28 @@ class StringNGramsOp : public tensorflow::OpKernel {
     OP_REQUIRES_OK(context, context->input(""data_splits"", &splits));
     const auto& splits_vec = splits->flat<SPLITS_TYPE>();
 
-    // Validate that the splits are valid indices into data
+    // Validate that the splits are valid indices into data, only if there are
+    // splits specified.
     const int input_data_size = data->flat<tstring>().size();
     const int splits_vec_size = splits_vec.size();
-    for (int i = 0; i < splits_vec_size; ++i) {
-      bool valid_splits = splits_vec(i) >= 0;
-      valid_splits = valid_splits && (splits_vec(i) <= input_data_size);
-      OP_REQUIRES(
-          context, valid_splits,
-          errors::InvalidArgument(""Invalid split value "", splits_vec(i),
-                                  "", must be in [0,"", input_data_size, ""]""));
+    if (splits_vec_size > 0) {
+      int prev_split = splits_vec(0);
+      OP_REQUIRES(context, prev_split == 0,
+                  errors::InvalidArgument(""First split value must be 0, got "",
+                                          prev_split));
+      for (int i = 1; i < splits_vec_size; ++i) {
+        bool valid_splits = splits_vec(i) >= prev_split;
+        valid_splits = valid_splits && (splits_vec(i) <= input_data_size);
+        OP_REQUIRES(context, valid_splits,
+                    errors::InvalidArgument(
+                        ""Invalid split value "", splits_vec(i), "", must be in ["",
+                        prev_split, "", "", input_data_size, ""]""));
+        prev_split = splits_vec(i);
+      }
+      OP_REQUIRES(context, prev_split == input_data_size,
+                  errors::InvalidArgument(
+                      ""Last split value must be data size. Expected "",
+                      input_data_size, "", got "", prev_split));
     }
 
     int num_batch_items = splits_vec.size() - 1;
@@ -174,13 +186,31 @@ class StringNGramsOp : public tensorflow::OpKernel {
         ngram->append(left_pad_);
         ngram->append(separator_);
       }
+      // Only output first num_tokens - 1 pairs of data and separator
       for (int n = 0; n < num_tokens - 1; ++n) {
         ngram->append(data[data_start_index + n]);
         ngram->append(separator_);
       }
-      ngram->append(data[data_start_index + num_tokens - 1]);
-      for (int n = 0; n < right_padding; ++n) {
-        ngram->append(separator_);
+      // Handle case when there are no tokens or no right padding as these can
+      // result in consecutive separators.
+      if (num_tokens > 0) {
+        // If we have tokens, then output last and then pair each separator with
+        // the right padding that follows, to ensure ngram ends either with the
+        // token or with the right pad.
+        ngram->append(data[data_start_index + num_tokens - 1]);
+        for (int n = 0; n < right_padding; ++n) {
+          ngram->append(separator_);
+          ngram->append(right_pad_);
+        }
+      } else {
+        // If we don't have tokens, then the last item inserted into the ngram
+        // has been the separator from the left padding loop above. Hence,
+        // output right pad and separator and make sure to finish with a
+        // padding, not a separator.
+        for (int n = 0; n < right_padding - 1; ++n) {
+          ngram->append(right_pad_);
+          ngram->append(separator_);
+        }
         ngram->append(right_pad_);
       }
 
",1,test
ba424dd8f16f7110eea526a8086f1a155f14f22b,tensorflow/tensorflow,"Enhance validation of ngram op and handle case of 0 tokens.

PiperOrigin-RevId: 369940178
Change-Id: Ia82f42c09d14efe76e7dc013505b832a42282f0b",string_ngrams_op_test.cc,"@@ -542,6 +542,40 @@ TEST_F(NgramKernelTest, TestEmptyInput) {
   assert_int64_equal(expected_splits, *GetOutput(1));
 }
 
+TEST_F(NgramKernelTest, TestNoTokens) {
+  MakeOp(""|"", {3}, ""L"", ""R"", -1, false);
+  // Batch items are:
+  // 0:
+  // 1: ""a""
+  AddInputFromArray<tstring>(TensorShape({1}), {""a""});
+  AddInputFromArray<int64>(TensorShape({3}), {0, 0, 1});
+  TF_ASSERT_OK(RunOpKernel());
+
+  std::vector<tstring> expected_values(
+      {""L|L|R"", ""L|R|R"",             // no input in first split
+       ""L|L|a"", ""L|a|R"", ""a|R|R""});  // second split
+  std::vector<int64> expected_splits({0, 2, 5});
+
+  assert_string_equal(expected_values, *GetOutput(0));
+  assert_int64_equal(expected_splits, *GetOutput(1));
+}
+
+TEST_F(NgramKernelTest, TestNoTokensNoPad) {
+  MakeOp(""|"", {3}, """", """", 0, false);
+  // Batch items are:
+  // 0:
+  // 1: ""a""
+  AddInputFromArray<tstring>(TensorShape({1}), {""a""});
+  AddInputFromArray<int64>(TensorShape({3}), {0, 0, 1});
+  TF_ASSERT_OK(RunOpKernel());
+
+  std::vector<tstring> expected_values({});
+  std::vector<int64> expected_splits({0, 0, 0});
+
+  assert_string_equal(expected_values, *GetOutput(0));
+  assert_int64_equal(expected_splits, *GetOutput(1));
+}
+
 TEST_F(NgramKernelTest, ShapeFn) {
   ShapeInferenceTestOp op(""StringNGrams"");
   INFER_OK(op, ""?;?"", ""[?];[?]"");
",1,test
ea3b43e98c32c97b35d52b4c66f9107452ca8fb2,tensorflow/tensorflow,"Fix `tf.raw_ops.CTCGreedyDecoder` CHECK failure.

PiperOrigin-RevId: 369960465
Change-Id: If0b8b3264d5a47a24ac0970ed7b81ce6b4921fae",ctc_decoder_ops.cc,"@@ -232,6 +232,8 @@ class CTCGreedyDecoderOp : public OpKernel {
         int prev_indices = -1;
         for (int t = 0; t < seq_len_t(b); ++t) {
           int max_class_indices;
+          OP_REQUIRES(ctx, input_list_t[t].dimension(1) > 0,
+                      errors::InvalidArgument(""Invalid input dimensions.""));
           log_prob_t(b, 0) +=
               -RowMax<T>(input_list_t[t], b, &max_class_indices);
           if (max_class_indices != blank_index &&
",1,train
20431e9044cf2ad3c0323c34888b192f3289af6b,tensorflow/tensorflow,"Fix `tf.raw_ops.QuantizeAndDequantizeV4Grad` CHECK failure.

PiperOrigin-RevId: 370532425
Change-Id: I767721be266851b63d8fe55e7ac6be0af6017f6c",quantize_and_dequantize_op.cc,"@@ -160,7 +160,17 @@ class QuantizeAndDequantizeV4GradientOp : public OpKernel {
         errors::InvalidArgument(""gradient and input must be the same size""));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     const Tensor& input_min_tensor = ctx->input(2);
+    OP_REQUIRES(ctx,
+                input_min_tensor.dims() == 0 || input_min_tensor.dims() == 1,
+                errors::InvalidArgument(
+                    ""Input min tensor must have dimension 1. Recieved "",
+                    input_min_tensor.dims(), "".""));
     const Tensor& input_max_tensor = ctx->input(3);
+    OP_REQUIRES(ctx,
+                input_max_tensor.dims() == 0 || input_max_tensor.dims() == 1,
+                errors::InvalidArgument(
+                    ""Input max tensor must have dimension 1. Recieved "",
+                    input_max_tensor.dims(), "".""));
     if (axis_ != -1) {
       OP_REQUIRES(
           ctx, input_min_tensor.dim_size(0) == depth,
",1,train
1e922ccdf6bf46a3a52641f99fd47d54c1decd13,tensorflow/tensorflow,"Fix crash in `SparseTensorToCSRSparseMatrixCPUFunctor`

PiperOrigin-RevId: 370110290
Change-Id: I4451e92661a55c2180f80d38b67a9b50bf5edec5",kernels.cc,"@@ -22,6 +22,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor_types.h""
 #include ""tensorflow/core/lib/core/errors.h""
 #include ""tensorflow/core/lib/core/status.h""
+#include ""tensorflow/core/platform/errors.h""
 
 namespace tensorflow {
 namespace functor {
@@ -63,6 +64,11 @@ Status SparseTensorToCSRSparseMatrixCPUFunctor::operator()(
 
     for (int64 i = 0; i < total_nnz; ++i) {
       // For now, the rows pointers store the corresponding row counts.
+      int64 ix = indices(i, 0) + 1;
+      if (ix >= csr_row_ptr.size()) {
+        return errors::InvalidArgument(""Got an index "", ix,
+                                       "" that is outside of csr_row_ptr"");
+      }
       csr_row_ptr(indices(i, 0) + 1) += 1;
       csr_col_ind(i) = indices(i, 1);
     }
",1,train
67784700869470d65d5f2ef20aeb5e97c31673cb,tensorflow/tensorflow,"Prevent division by 0 in `QuantizedBiasAdd`.

PiperOrigin-RevId: 370117454
Change-Id: I3804e2ac8dcc6d3afcc92e27853e2325a017ca4d",quantized_bias_add_op.cc,"@@ -56,6 +56,8 @@ class QuantizedBiasAddOp : public OpKernel {
             ""Must provide as many biases as the last dimension ""
             ""of the input tensor: "",
             bias.shape().DebugString(), "" vs. "", input.shape().DebugString()));
+    OP_REQUIRES(context, bias.NumElements() > 0,
+                errors::InvalidArgument(""Must provide at least 1 bias""));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
",1,train
d6ed5bcfe1dcab9e85a4d39931bd18d99018e75b,tensorflow/tensorflow,"Add missing validation in `QuantizedBatchNormWithGlobalNormalization`

PiperOrigin-RevId: 370123451
Change-Id: Id234d6dab1ec21230bb8e503dba30f899af87f33",quantized_batch_norm_op.cc,"@@ -173,20 +173,50 @@ class QuantizedBatchNormOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
-    const float input_min = context->input(1).flat<float>()(0);
-    const float input_max = context->input(2).flat<float>()(0);
+    const auto& input_min_tensor = context->input(1);
+    OP_REQUIRES(context, input_min_tensor.NumElements() == 1,
+                errors::InvalidArgument(""input_min must have 1 element""));
+    const float input_min = input_min_tensor.flat<float>()(0);
+    const auto& input_max_tensor = context->input(2);
+    OP_REQUIRES(context, input_max_tensor.NumElements() == 1,
+                errors::InvalidArgument(""input_max must have 1 element""));
+    const float input_max = input_max_tensor.flat<float>()(0);
     const Tensor& mean = context->input(3);
-    const float mean_min = context->input(4).flat<float>()(0);
-    const float mean_max = context->input(5).flat<float>()(0);
+    const auto& mean_min_tensor = context->input(4);
+    OP_REQUIRES(context, mean_min_tensor.NumElements() == 1,
+                errors::InvalidArgument(""mean_min must have 1 element""));
+    const float mean_min = mean_min_tensor.flat<float>()(0);
+    const auto& mean_max_tensor = context->input(5);
+    OP_REQUIRES(context, mean_max_tensor.NumElements() == 1,
+                errors::InvalidArgument(""mean_max must have 1 element""));
+    const float mean_max = mean_max_tensor.flat<float>()(0);
     const Tensor& var = context->input(6);
-    const float var_min = context->input(7).flat<float>()(0);
-    const float var_max = context->input(8).flat<float>()(0);
+    const auto& var_min_tensor = context->input(7);
+    OP_REQUIRES(context, var_min_tensor.NumElements() == 1,
+                errors::InvalidArgument(""var_min must have 1 element""));
+    const float var_min = var_min_tensor.flat<float>()(0);
+    const auto& var_max_tensor = context->input(8);
+    OP_REQUIRES(context, var_max_tensor.NumElements() == 1,
+                errors::InvalidArgument(""var_max must have 1 element""));
+    const float var_max = var_max_tensor.flat<float>()(0);
     const Tensor& beta = context->input(9);
-    const float beta_min = context->input(10).flat<float>()(0);
-    const float beta_max = context->input(11).flat<float>()(0);
+    const auto& beta_min_tensor = context->input(10);
+    OP_REQUIRES(context, beta_min_tensor.NumElements() == 1,
+                errors::InvalidArgument(""beta_min must have 1 element""));
+    const float beta_min = beta_min_tensor.flat<float>()(0);
+    const auto& beta_max_tensor = context->input(11);
+    OP_REQUIRES(context, beta_max_tensor.NumElements() == 1,
+                errors::InvalidArgument(""beta_max must have 1 element""));
+    const float beta_max = beta_max_tensor.flat<float>()(0);
     const Tensor& gamma = context->input(12);
-    const float gamma_min = context->input(13).flat<float>()(0);
-    const float gamma_max = context->input(14).flat<float>()(0);
+    const auto& gamma_min_tensor = context->input(13);
+    OP_REQUIRES(context, gamma_min_tensor.NumElements() == 1,
+                errors::InvalidArgument(""gamma_min must have 1 element""));
+    const float gamma_min = gamma_min_tensor.flat<float>()(0);
+    const auto& gamma_max_tensor = context->input(14);
+    OP_REQUIRES(context, gamma_max_tensor.NumElements() == 1,
+                errors::InvalidArgument(""gamma_max must have 1 element""));
+    const float gamma_max = gamma_max_tensor.flat<float>()(0);
 
     OP_REQUIRES(context, input.dims() == 4,
                 errors::InvalidArgument(""input must be 4-dimensional"",
@@ -203,6 +233,33 @@ class QuantizedBatchNormOp : public OpKernel {
     OP_REQUIRES(context, gamma.dims() == 1,
                 errors::InvalidArgument(""gamma must be 1-dimensional"",
                                         gamma.shape().DebugString()));
+    OP_REQUIRES(context, mean.NumElements() > 1,
+                errors::InvalidArgument(""Must have at least a mean value"",
+                                        gamma.shape().DebugString()));
+    OP_REQUIRES(context, mean.NumElements() > 1,
+                errors::InvalidArgument(""Must have at least a mean value""));
+    const auto last_dim = input.shape().dims() - 1;
+    OP_REQUIRES(context,
+                mean.shape().dim_size(0) == input.shape().dim_size(last_dim),
+                errors::InvalidArgument(""Must provide as many means as the ""
+                                        ""last dimension of the input tensor: "",
+                                        mean.shape().DebugString(), "" vs. "",
+                                        input.shape().DebugString()));
+    OP_REQUIRES(
+        context, mean.shape().dim_size(0) == var.shape().dim_size(0),
+        errors::InvalidArgument(
+            ""Mean and variance tensors must have the same shape: "",
+            mean.shape().DebugString(), "" vs. "", var.shape().DebugString()));
+    OP_REQUIRES(
+        context, mean.shape().dim_size(0) == beta.shape().dim_size(0),
+        errors::InvalidArgument(
+            ""Mean and beta tensors must have the same shape: "",
+            mean.shape().DebugString(), "" vs. "", beta.shape().DebugString()));
+    OP_REQUIRES(
+        context, mean.shape().dim_size(0) == gamma.shape().dim_size(0),
+        errors::InvalidArgument(
+            ""Mean and gamma tensors must have the same shape: "",
+            mean.shape().DebugString(), "" vs. "", gamma.shape().DebugString()));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context,
",1,train
744009c9e5cc5d0447f0dc39d055f917e1fd9e16,tensorflow/tensorflow,"Validate work in `QuantizedAdd`, ensure at least one element.

PiperOrigin-RevId: 370127996
Change-Id: I57c6f3e01afdeada84737820a131590137463855",quantized_add_op.cc,"@@ -538,6 +538,8 @@ class QuantizedAddOp : public OpKernel {
         tensor_min = min_x;
         tensor_max = max_x;
       }
+      OP_REQUIRES(context, vector_num_elements > 0,
+                  errors::InvalidArgument(""Must have some elements to add""));
       VectorTensorAddition<T, Toutput>(
           vector_data, vector_min, vector_max, vector_num_elements, tensor_data,
           tensor_min, tensor_max, tensor_num_elements, min_z_value, max_z_value,
",1,train
548b5eaf23685d86f722233d8fbc21d0a4aecb96,tensorflow/tensorflow,"Fix divide by zero error in `fractional_pool_common.cc`.

PiperOrigin-RevId: 371126221
Change-Id: Iea4b2f363aaeb116ab460e3bc592c687484af344",fractional_avg_pool_op.cc,"@@ -80,6 +80,10 @@ class FractionalAvgPoolOp : public OpKernel {
     std::vector<int> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
       input_size[i] = tensor_in.dim_size(i);
+      OP_REQUIRES(
+          context, pooling_ratio_[i] <= input_size[i],
+          errors::InvalidArgument(
+              ""Pooling ratio cannot be bigger than input tensor dim size.""));
     }
     // Output size.
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
",1,train
480641e3599775a8895254ffbc0fc45621334f68,tensorflow/tensorflow,"Validate (and ensure validation sticks) inputs for `MatrixTriangularSolve`.

PiperOrigin-RevId: 370282444
Change-Id: Iaed61a0b0727cc42c830658b72eb69f785f48dc5",matrix_triangular_solve_op_impl.h,"@@ -162,6 +162,9 @@ class BaseMatrixTriangularSolveOp : public OpKernel {
     const Tensor& in1 = ctx->input(1);
 
     ValidateInputTensors(ctx, in0, in1);
+    if (!ctx->status().ok()) {
+      return;
+    }
 
     MatMulBCast bcast(in0.shape().dim_sizes(), in1.shape().dim_sizes());
     OP_REQUIRES(
@@ -230,13 +233,22 @@ class MatrixTriangularSolveOp
  private:
   void ValidateInputTensors(OpKernelContext* ctx, const Tensor& in0,
                             const Tensor& in1) override {
+    const auto in0_num_dims = in0.dims();
     OP_REQUIRES(
-        ctx, in0.dims() >= 2,
-        errors::InvalidArgument(""In[0] ndims must be >= 2: "", in0.dims()));
+        ctx, in0_num_dims >= 2,
+        errors::InvalidArgument(""In[0] ndims must be >= 2: "", in0_num_dims));
 
+    const auto in1_num_dims = in1.dims();
     OP_REQUIRES(
-        ctx, in1.dims() >= 2,
-        errors::InvalidArgument(""In[0] ndims must be >= 2: "", in1.dims()));
+        ctx, in1_num_dims >= 2,
+        errors::InvalidArgument(""In[1] ndims must be >= 2: "", in1_num_dims));
+
+    const auto in0_last_dim = in0.dim_size(in0_num_dims - 1);
+    const auto in0_prev_dim = in0.dim_size(in0_num_dims - 2);
+    OP_REQUIRES(ctx, in0_last_dim == in0_prev_dim,
+                errors::InvalidArgument(
+                    ""In[0] matrices in the last dimensions must be square ("",
+                    in0_last_dim, "" =/= "", in0_prev_dim, "")""));
   }
 };
 
",1,train
704866eabe03a9aeda044ec91a8d0c83fc1ebdbe,tensorflow/tensorflow,"Fix overflow CHECK issue with `tf.raw_ops.UnsortedSegmentJoin`.

PiperOrigin-RevId: 370766155
Change-Id: I33e7c6626224e1060a8a4ab51ad5d861c6d4c63e",unsorted_segment_join_op.cc,"@@ -90,6 +90,8 @@ class UnsortedSegmentJoinOp : public OpKernel {
     const int32 segment_dims = segment_id_shape.dims();
 
     const Tensor& num_segments_tensor = context->input(2);
+    OP_REQUIRES(context, num_segments_tensor.NumElements() != 0,
+                errors::InvalidArgument(""Number of segments cannot be empty.""));
     auto num_segments = num_segments_tensor.scalar<NUM_SEGMENTS_TYPE>()();
 
     OP_REQUIRES(context, segment_dims != 0,
",1,train
99085e8ff02c3763a0ec2263e44daec416f6a387,tensorflow/tensorflow,"Fix `tf.raw_ops.QuantizeAndDequantizeV3` array index failure.

PiperOrigin-RevId: 370577691
Change-Id: Ifeae64212f6bcd139435824fa2748d1329213c4c",quantize_and_dequantize_op.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include ""tensorflow/core/framework/op_requires.h""
 #define EIGEN_USE_THREADS
 
 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
@@ -234,6 +235,10 @@ class QuantizeAndDequantizeV3Op : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(ctx, axis_ < input.dims(),
+                errors::InvalidArgument(
+                    ""Axis requested is larger than input dimensions. Axis: "",
+                    axis_, "" Input Dimensions: "", input.dims()));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
",1,test
da5ff2daf618591f64b2b62d9d9803951b945e9f,tensorflow/tensorflow,"Fix FPE issue with `tf.raw_ops.DenseCountSparseOutput`.

PiperOrigin-RevId: 370946862
Change-Id: I3752584ad04aaecb327ff6793a9640ac56acfe7a",count_ops.cc,"@@ -122,6 +122,9 @@ class DenseCount : public OpKernel {
 
     int num_batch_elements = 1;
     for (int i = 0; i < num_batch_dimensions; ++i) {
+      OP_REQUIRES(context, data.shape().dim_size(i) != 0,
+                  errors::InvalidArgument(
+                      ""Invalid input: Shapes dimension cannot be 0.""));
       num_batch_elements *= data.shape().dim_size(i);
     }
     int num_value_elements = data.shape().num_elements() / num_batch_elements;
",1,test
1a2a87229d1d61e23a39373777c056161eb4084d,tensorflow/tensorflow,"Fix FPE issue with `tf.raw_ops.FusedBatchNorm`.

PiperOrigin-RevId: 370948185
Change-Id: If0c8e0320062ed6363e94ff5fe38e6a301f69ac2",fused_batch_norm_op.cc,"@@ -293,6 +293,9 @@ struct FusedBatchNorm<CPUDevice, T, U, /* is_training= */ false> {
     const CPUDevice& d = context->eigen_device<CPUDevice>();
 
     const int depth = x.dimension(3);
+    OP_REQUIRES(
+        context, depth != 0,
+        errors::Internal(""The 4th element in the input shape cannot be 0.""));
     const int size = x.size();
     const int rest_size = size / depth;
     Eigen::DSizes<Eigen::Index, 2> rest_by_depth(rest_size, depth);
",1,train
4071d8e2f6c45c1955a811fee757ca2adbe462c1,tensorflow/tensorflow,"Fix FPE issue with `tf.raw_ops.Reverse`.

PiperOrigin-RevId: 371176973
Change-Id: Ic6d483bfc95313ec2299c2d1c956cfe96c96626c",reverse_op.cc,"@@ -155,6 +155,12 @@ class ReverseOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
+    // If input is provided, check to make sure the first dimension is valid.
+    if (input.dims() > 0) {
+      OP_REQUIRES(
+          context, input.dim_size(0) != 0,
+          errors::InvalidArgument(""Invalid input first dimension. Found 0.""));
+    }
     const Tensor& dims = context->input(1);
 
     if (TensorShapeUtils::IsScalar(input.shape())) {
",1,train
7f283ff806b2031f407db64c4d3edcda8fb9f9f5,tensorflow/tensorflow,"Fix FPE issue in external Eigen source code issue with `tf.raw_ops.SparseMatMul`.

PiperOrigin-RevId: 370992919
Change-Id: Icfb276fef5fb40928b27c3e44608d2aca72c9fd7",sparse_matmul_op.cc,"@@ -1039,6 +1039,10 @@ class SparseMatMulOp : public OpKernel {
     if (transpose_b) {
       // TODO(agarwal): avoid transposing the matrix here and directly handle
       // transpose in CreateDenseSlices.
+      OP_REQUIRES(ctx, right->dim_size(0) != 0,
+                  errors::InvalidArgument(""b has an entry 0 in it's shape.""));
+      OP_REQUIRES(ctx, right->dim_size(1) != 0,
+                  errors::InvalidArgument(""b has an entry 0 in it's shape.""));
       right_tr.reset(
           new Tensor(right->dtype(),
                      TensorShape({right->dim_size(1), right->dim_size(0)})));
",1,train
8ba6fa29cd8bf9cef9b718dc31c78c73081f5b31,tensorflow/tensorflow,"Fix heap-buffer-overflow issue with `tf.raw_ops.SparseSplit`.

PiperOrigin-RevId: 371242872
Change-Id: I482bb3d12602c7c3cc9446f97fb9f584bb98e9a4",sparse_tensor.h,"@@ -527,6 +527,10 @@ inline Status SparseTensor::Split(const SparseTensor& input_tensor,
   for (int i = 0; i < input_tensor.indices().dim_size(0); ++i) {
     const int dim = input_tensor.indices().matrix<int64>()(i, split_dim);
     int slice_index = GetSliceIndex(dim, split_size, residual);
+    if (slice_index >= num_values.size()) {
+      return errors::InvalidArgument(""Slice index "", slice_index,
+                                     "" is larger than num_split."");
+    }
     num_values[slice_index]++;
   }
 
",1,train
51300ba1cc2f487aefec6e6631fef03b0e08b298,tensorflow/tensorflow,"Fix heap buffer overflow in tf.raw_ops.UnicodeEncode.

PiperOrigin-RevId: 371717714
Change-Id: If33443b28f158e58078f1268f6b92f2728d219e0",unicode_ops.cc,"@@ -533,6 +533,17 @@ class UnicodeEncodeOp : public OpKernel {
     const Tensor& input_splits = context->input(1);
     const auto input_splits_flat = input_splits.flat<SPLITS_TYPE>();
 
+    // Operation will treat first argument in input_splits as if it were zero
+    // regardless of its actual value since splits should begin with zero and
+    // end with the length of the input values vector.
+    OP_REQUIRES(
+        context, input_splits_flat(0) == 0,
+        errors::InvalidArgument(""First value in input_splits must be zero.""));
+    OP_REQUIRES(context,
+                input_splits_flat(input_splits_flat.size() - 1) ==
+                    input_tensor_flat.size(),
+                errors::InvalidArgument(""Last value in input_splits must be ""
+                                        ""equal to length of input_tensor.""));
     // Since we limit to a 2-D input (flat_values of rank 1 and a single splits
     // tensor), our output dimension will be 1 with it's size equal to the
     // number of splits (outer dimension or ragged tensor).
@@ -548,6 +559,14 @@ class UnicodeEncodeOp : public OpKernel {
     for (int i = 1; i < input_splits_flat.size(); ++i) {
       icu::UnicodeString unicode_string;
       icu::UnicodeStringAppendable appendable_unicode_string(unicode_string);
+      OP_REQUIRES(
+          context, input_splits_flat(i - 1) <= input_splits_flat(i),
+          errors::InvalidArgument(
+              ""Values in input_splits must be equal or in ascending order.""));
+      OP_REQUIRES(
+          context, input_splits_flat(i) <= input_tensor_flat.size(),
+          errors::InvalidArgument(""Values in input_splits must be less than or ""
+                                  ""equal to input_tensor length.""));
       for (; idx < input_splits_flat(i); ++idx) {
         int32 code_point = input_tensor_flat(idx);
         // Check for invalid code point
",1,test
a84358aa12f0b1518e606095ab9cfddbf597c121,tensorflow/tensorflow,"Fix heap-buffer-overflow issue with `tf.raw_ops.RaggedTensorToTensor`.

PiperOrigin-RevId: 371986929
Change-Id: I79ab962a22c5867f36f7f45b780a1ac881b1dbdd",ragged_tensor_to_tensor_op.cc,"@@ -313,6 +313,12 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
             output_index_multiplier, output_size, result);
         return tensorflow::Status::OK();
       case RowPartitionType::ROW_SPLITS:
+        if (row_partition_tensor.size() - 1 > parent_output_index.size()) {
+          return errors::InvalidArgument(
+              ""Row partition size is greater than output size: "",
+              row_partition_tensor.size() - 1, "" > "",
+              parent_output_index.size());
+        }
         CalculateOutputIndexRowSplit(
             context, row_partition_tensor, parent_output_index,
             output_index_multiplier, output_size, result);
",1,test
77dd114513d7796e1e2b8aece214a380af26fbf4,tensorflow/tensorflow,"Fix a check fail

PiperOrigin-RevId: 372011072
Change-Id: I1062cfaed0aa16884e9a16312483794d188db76f",load_and_remap_matrix_op.cc,"@@ -123,6 +123,11 @@ class LoadAndRemapMatrixOp : public OpKernel {
     // Processes the checkpoint source and the provided Tensor name.
     const Tensor* ckpt_path_t;
     OP_REQUIRES_OK(context, context->input(""ckpt_path"", &ckpt_path_t));
+    OP_REQUIRES(
+        context, ckpt_path_t->NumElements() == 1,
+        errors::InvalidArgument(""The `ckpt_path` tensor must have exactly one ""
+                                ""element, got tensor of shape "",
+                                ckpt_path_t->shape().DebugString()));
     const string& ckpt_path = ckpt_path_t->scalar<tstring>()();
     const Tensor* old_tensor_name_t;
     OP_REQUIRES_OK(context,
",1,train
1c56f53be0b722ca657cbc7df461ed676c8642a2,tensorflow/tensorflow,"Fix a check fail in Fast Fourier implementation

PiperOrigin-RevId: 372026629
Change-Id: Id05c3362aa575271bc3e06b16316c9037085fc11",fft_ops.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include ""tensorflow/core/platform/errors.h""
 #define EIGEN_USE_THREADS
 
 // See docs in ../ops/fft_ops.cc.
@@ -261,6 +262,9 @@ class FFTCPU : public FFTBase {
           i == FFTRank ? fft_shape[i - 1] / 2 + 1 : fft_shape[i - 1];
       full_fft_shape.AddDim(fft_shape[i - 1]);
     }
+    OP_REQUIRES(ctx, full_fft_shape.num_elements() > 0,
+                errors::InvalidArgument(""Obtained a FFT shape of 0 elements: "",
+                                        full_fft_shape.DebugString()));
 
     Tensor temp;
     OP_REQUIRES_OK(ctx, ctx->allocate_temp(DataTypeToEnum<ComplexT>::v(),
",1,test
31bd5026304677faa8a0b77602c6154171b9aec1,tensorflow/tensorflow,"Prevent check fail in FFT

PiperOrigin-RevId: 372031044
Change-Id: I50994e3e8a5d1342d01bde80256f6bf2730ca299",fft_ops.cc,"@@ -222,6 +222,9 @@ class FFTCPU : public FFTBase {
       input_slice_sizes[i] = fft_shape[i - 1];
       temp_shape.AddDim(fft_shape[i - 1]);
     }
+    OP_REQUIRES(ctx, temp_shape.num_elements() > 0,
+                errors::InvalidArgument(""Obtained a FFT shape of 0 elements: "",
+                                        temp_shape.DebugString()));
 
     auto output = out->flat_inner_dims<ComplexT, FFTRank + 1>();
     const Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> zero_start_indices;
",1,train
f4c364a5d6880557f6f5b6eb5cee2c407f0186b3,tensorflow/tensorflow,"Fix multiple issues in EditDistance

PiperOrigin-RevId: 372033948
Change-Id: Ieb957c29894af05bdfeb1a0402fced808dfcfd7b",edit_distance_op.cc,"@@ -64,6 +64,12 @@ Status ValidateShapes(OpKernelContext* ctx, const Tensor& hypothesis_indices,
     return errors::InvalidArgument(
         ""truth_shape should be a vector, but got shape: "",
         truth_shape.shape().DebugString());
+  if (hypothesis_values.NumElements() != hypothesis_indices.dim_size(0))
+    return errors::InvalidArgument(
+        ""Expected hypothesis_values.NumElements == ""
+        ""#rows(hypothesis_indices), their shapes are: "",
+        hypothesis_values.shape().DebugString(), "" and "",
+        hypothesis_indices.shape().DebugString());
   if (hypothesis_shape.NumElements() != hypothesis_indices.dim_size(1))
     return errors::InvalidArgument(
         ""Expected hypothesis_shape.NumElements == ""
@@ -75,6 +81,12 @@ Status ValidateShapes(OpKernelContext* ctx, const Tensor& hypothesis_indices,
         ""Input SparseTensors must have rank at least 2, but truth_shape ""
         ""rank is: "",
         truth_shape.NumElements());
+  if (truth_values.NumElements() != truth_indices.dim_size(0))
+    return errors::InvalidArgument(
+        ""Expected truth_values.NumElements == ""
+        ""#rows(truth_indices), their shapes are: "",
+        truth_values.shape().DebugString(), "" and "",
+        truth_indices.shape().DebugString());
   if (truth_shape.NumElements() != truth_indices.dim_size(1))
     return errors::InvalidArgument(
         ""Expected truth_shape.NumElements == ""
@@ -153,6 +165,11 @@ class EditDistanceOp : public OpKernel {
       output_shape.AddDim(std::max(hypothesis_st_shape.dim_size(d),
                                    truth_st_shape.dim_size(d)));
     }
+    const auto output_elements = output_shape.num_elements();
+    OP_REQUIRES(
+        ctx, output_elements > 0,
+        errors::InvalidArgument(""Got output shape "", output_shape.DebugString(),
+                                "" which has 0 elements""));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(""output"", output_shape, &output));
@@ -185,6 +202,12 @@ class EditDistanceOp : public OpKernel {
       if (g_truth == g_hypothesis) {
         auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                       output_strides.begin(), int64{0});
+        OP_REQUIRES(
+            ctx, loc < output_elements,
+            errors::Internal(""Got an inner product "", loc,
+                             "" which would require in writing to outside of ""
+                             ""the buffer for the output tensor (max elements "",
+                             output_elements, "")""));
         output_t(loc) =
             gtl::LevenshteinDistance<T>(truth_seq, hypothesis_seq, cmp);
         if (normalize_) output_t(loc) /= truth_seq.size();
@@ -194,6 +217,12 @@ class EditDistanceOp : public OpKernel {
       } else if (g_truth > g_hypothesis) {  // zero-length truth
         auto loc = std::inner_product(g_hypothesis.begin(), g_hypothesis.end(),
                                       output_strides.begin(), int64{0});
+        OP_REQUIRES(
+            ctx, loc < output_elements,
+            errors::Internal(""Got an inner product "", loc,
+                             "" which would require in writing to outside of ""
+                             ""the buffer for the output tensor (max elements "",
+                             output_elements, "")""));
         output_t(loc) = hypothesis_seq.size();
         if (normalize_ && output_t(loc) != 0.0f) {
           output_t(loc) = std::numeric_limits<float>::infinity();
@@ -202,6 +231,12 @@ class EditDistanceOp : public OpKernel {
       } else {  // zero-length hypothesis
         auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                       output_strides.begin(), int64{0});
+        OP_REQUIRES(
+            ctx, loc < output_elements,
+            errors::Internal(""Got an inner product "", loc,
+                             "" which would require in writing to outside of ""
+                             ""the buffer for the output tensor (max elements "",
+                             output_elements, "")""));
         output_t(loc) = (normalize_) ? 1.0 : truth_seq.size();
         ++truth_iter;
       }
@@ -212,6 +247,12 @@ class EditDistanceOp : public OpKernel {
       auto hypothesis_seq = hypothesis_j.values<T>();
       auto loc = std::inner_product(g_hypothesis.begin(), g_hypothesis.end(),
                                     output_strides.begin(), int64{0});
+      OP_REQUIRES(
+          ctx, loc < output_elements,
+          errors::Internal(""Got an inner product "", loc,
+                           "" which would require in writing to outside of the ""
+                           ""buffer for the output tensor (max elements "",
+                           output_elements, "")""));
       output_t(loc) = hypothesis_seq.size();
       if (normalize_ && output_t(loc) != 0.0f) {
         output_t(loc) = std::numeric_limits<float>::infinity();
@@ -224,6 +265,12 @@ class EditDistanceOp : public OpKernel {
       auto truth_seq = truth_i.values<T>();
       auto loc = std::inner_product(g_truth.begin(), g_truth.end(),
                                     output_strides.begin(), int64{0});
+      OP_REQUIRES(
+          ctx, loc < output_elements,
+          errors::Internal(""Got an inner product "", loc,
+                           "" which would require in writing to outside of the ""
+                           ""buffer for the output tensor (max elements "",
+                           output_elements, "")""));
       output_t(loc) = (normalize_) ? 1.0 : truth_seq.size();
       ++truth_iter;
     }
",1,test
faa76f39014ed3b5e2c158593b1335522e573c7f,tensorflow/tensorflow,"Fix heap-buffer-overflow issue with `tf.raw_ops.SparseFillEmptyRows`.

PiperOrigin-RevId: 372009178
Change-Id: Ia1a9e9691ecaa072f32fb39a0887b2aabd399210",sparse_fill_empty_rows_op.cc,"@@ -228,7 +228,10 @@ void SparseFillEmptyRowsOpImpl(OpKernelContext* context,
                               default_value_t.shape().DebugString()),
       done);
   // TODO(ebrevdo): add shape checks between values, indices,
-  // dense_shape.  Also add check that dense rank > 0.
+  // Also add check that dense rank > 0.
+  OP_REQUIRES_ASYNC(context, dense_shape_t.NumElements() != 0,
+                    errors::InvalidArgument(""Dense shape cannot be empty.""),
+                    done);
 
   using FunctorType = functor::SparseFillEmptyRows<Device, T, Tindex>;
   OP_REQUIRES_OK_ASYNC(context,
",1,train
3f6fe4dfef6f57e768260b48166c27d148f3015f,tensorflow/tensorflow,"Add missing validations in dillation ops.

PiperOrigin-RevId: 372037158
Change-Id: I4ee304c84a02550c030288a6534000b934fc1599",dilation_ops.cc,"@@ -130,6 +130,7 @@ class DilationOp : public OpKernel {
     ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
                &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
                &out_cols);
+    if (!context->status().ok()) return;
 
     // Output tensor is of the following dimensions:
     // [ batch, out_rows, out_cols, depth ]
@@ -229,6 +230,7 @@ class DilationBackpropInputOp : public OpKernel {
     ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
                &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
                &out_cols);
+    if (!context->status().ok()) return;
 
     // Verify that the incoming gradient tensor has the expected size
     // [ batch, out_rows, out_cols, depth ]
@@ -318,8 +320,10 @@ struct DilationBackpropInput<CPUDevice, T> {
                 }
               }
             }
-            in_backprop(b, h_in_max, w_in_max, d) +=
-                out_backprop(b, h_out, w_out, d);
+            if (h_in_max < input_rows && w_in_max < input_cols) {
+              in_backprop(b, h_in_max, w_in_max, d) +=
+                  out_backprop(b, h_out, w_out, d);
+            }
           }
         }
       }
@@ -349,6 +353,7 @@ class DilationBackpropFilterOp : public OpKernel {
     ParseSizes(context, strides_, rates_, padding_, &stride_rows, &stride_cols,
                &rate_rows, &rate_cols, &pad_top, &pad_left, &out_rows,
                &out_cols);
+    if (!context->status().ok()) return;
 
     // Verify that the incoming gradient tensor has the expected size
     // [ batch, out_rows, out_cols, depth ]
@@ -438,8 +443,10 @@ struct DilationBackpropFilter<CPUDevice, T> {
                 }
               }
             }
-            filter_backprop(h_max, w_max, d) +=
-                out_backprop(b, h_out, w_out, d);
+            if (h_max < filter_rows && w_max < filter_cols) {
+              filter_backprop(h_max, w_max, d) +=
+                  out_backprop(b, h_out, w_out, d);
+            }
           }
         }
       }
",1,train
7ae2af34087fb4b5c8915279efd03da3b81028bc,tensorflow/tensorflow,"Fix heap-buffer-overflow issue with `tf.raw_ops.SparseDenseCwiseMul`.

PiperOrigin-RevId: 372054410
Change-Id: Ifcce0491e2e3816838c87e73be30a1e61b65174d",sparse_dense_binary_op_shared.cc,"@@ -78,6 +78,11 @@ class SparseDenseBinaryOpShared : public OpKernel {
                     ""but received shapes: "",
                     values_t->shape().DebugString(), "" and "",
                     shape_t->shape().DebugString()));
+    OP_REQUIRES(
+        ctx, values_t->dim_size(0) == indices_t->dim_size(0),
+        errors::InvalidArgument(
+            ""The first dimension of values and indices should match. ("",
+            values_t->dim_size(0), "" vs. "", indices_t->dim_size(0), "")""));
 
     const auto indices_mat = indices_t->matrix<int64>();
     const auto shape_vec = shape_t->vec<int64>();
",1,train
5e52ef5a461570cfb68f3bdbbebfe972cb4e0fd8,tensorflow/tensorflow,"Fix breakage in parameterized_truncated_normal_op.cc

PiperOrigin-RevId: 372041718
Change-Id: Iff79e77a2bb27032423eefcb84211627b27dfe81",parameterized_truncated_normal_op.cc,"@@ -627,6 +627,9 @@ class ParameterizedTruncatedNormalOp : public OpKernel {
         ctx, TensorShapeUtils::IsVector(shape_tensor.shape()),
         errors::InvalidArgument(""Input shape should be a vector, got shape: "",
                                 shape_tensor.shape().DebugString()));
+    OP_REQUIRES(ctx, shape_tensor.NumElements() > 0,
+                errors::InvalidArgument(""Shape tensor must not be empty, got "",
+                                        shape_tensor.DebugString()));
     int32 num_batches = shape_tensor.flat<int32>()(0);
 
     int32 samples_per_batch = 1;
",1,train
ef0c008ee84bad91ec6725ddc42091e19a30cf0e,tensorflow/tensorflow,"Fix out of bound read in requantization_range_op.cc

PiperOrigin-RevId: 372129031
Change-Id: Ie684ab98a3840c5186ead3eafffc0e0ed0e8030d",requantization_range_op.cc,"@@ -46,6 +46,10 @@ class RequantizationRangeOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(ctx, ctx->input(1).NumElements() > 0,
+                errors::InvalidArgument(""Input min must not be empty.""));
+    OP_REQUIRES(ctx, ctx->input(2).NumElements() > 0,
+                errors::InvalidArgument(""Input max must not be empty.""));
     const float input_min_float = ctx->input(1).flat<float>()(0);
     const float input_max_float = ctx->input(2).flat<float>()(0);
     Tensor* output_min = nullptr;
",1,train
dcd7867de0fea4b72a2b34bd41eb74548dc23886,tensorflow/tensorflow,"Fix heap buffer overflow

PiperOrigin-RevId: 372132844
Change-Id: Idef9895efaf145f2b1c23d31983601ec980cd5e4",maxpooling_op.cc,"@@ -1014,6 +1014,9 @@ struct LaunchMaxPoolingGradWithArgmax<CPUDevice, T> {
         const int input_start = start * input_size_per_batch;
         const int input_end = limit * input_size_per_batch;
         for (int64 index = input_start; index < input_end; index++) {
+          if (index >= argmax.NumElements()) {
+            break;
+          }
           int64 grad_out_index = argmax_flat(index);
           if (!include_batch_in_index) {
             const int64 cur_batch = index / input_size_per_batch;
",1,test
79865b542f9ffdc9caeb255631f7c56f1d4b6517,tensorflow/tensorflow,"Fix memory corruption issue with `tf.raw_ops.DrawBoundingBoxesV2`.

PiperOrigin-RevId: 372033910
Change-Id: I8a9f4efc1c8ddaacbc26ec1fbe4bfdd6791c226d",draw_bounding_box_op.cc,"@@ -73,6 +73,12 @@ class DrawBoundingBoxesOp : public OpKernel {
         errors::InvalidArgument(""Channel depth should be either 1 (GRY), ""
                                 ""3 (RGB), or 4 (RGBA)""));
 
+    OP_REQUIRES(
+        context, boxes.dim_size(2) == 4,
+        errors::InvalidArgument(
+            ""The size of the third dimension of the box must be 4. Received: "",
+            boxes.dim_size(2)));
+
     const int64 batch_size = images.dim_size(0);
     const int64 height = images.dim_size(1);
     const int64 width = images.dim_size(2);
",1,train
f7cc8755ac6683131fdfa7a8a121f9d7a9dec6fb,tensorflow/tensorflow,"Add several missing validations in SDCA

PiperOrigin-RevId: 372172877
Change-Id: Id366da962432e18dcbfac847d11e98488bebb70a",sdca_internal.cc,"@@ -99,6 +99,10 @@ Status ModelWeights::Initialize(OpKernelContext* const context) {
   OpInputList sparse_weights_inputs;
   TF_RETURN_IF_ERROR(
       context->input_list(""sparse_weights"", &sparse_weights_inputs));
+  if (sparse_indices_inputs.size() != sparse_weights_inputs.size())
+    return errors::InvalidArgument(
+        ""sparse_indices and sparse_weights must have the same length, got "",
+        sparse_indices_inputs.size(), "" and "", sparse_weights_inputs.size());
   OpInputList dense_weights_inputs;
   TF_RETURN_IF_ERROR(
       context->input_list(""dense_weights"", &dense_weights_inputs));
@@ -106,10 +110,20 @@ Status ModelWeights::Initialize(OpKernelContext* const context) {
   OpOutputList sparse_weights_outputs;
   TF_RETURN_IF_ERROR(context->output_list(""out_delta_sparse_weights"",
                                           &sparse_weights_outputs));
+  if (sparse_weights_outputs.size() != sparse_weights_inputs.size())
+    return errors::InvalidArgument(
+        ""out_delta_sparse_weights and sparse_weights must have the same ""
+        ""length, got "",
+        sparse_weights_outputs.size(), "" and "", sparse_weights_inputs.size());
 
   OpOutputList dense_weights_outputs;
   TF_RETURN_IF_ERROR(
       context->output_list(""out_delta_dense_weights"", &dense_weights_outputs));
+  if (dense_weights_outputs.size() != dense_weights_inputs.size())
+    return errors::InvalidArgument(
+        ""out_delta_dense_weights and dense_weights must have the same length, ""
+        ""got "",
+        dense_weights_outputs.size(), "" and "", dense_weights_inputs.size());
 
   for (int i = 0; i < sparse_weights_inputs.size(); ++i) {
     Tensor* delta_t;
@@ -327,13 +341,28 @@ Status Examples::Initialize(OpKernelContext* const context,
   OpInputList sparse_example_indices_inputs;
   TF_RETURN_IF_ERROR(context->input_list(""sparse_example_indices"",
                                          &sparse_example_indices_inputs));
+  if (sparse_example_indices_inputs.size() != num_sparse_features)
+    return errors::InvalidArgument(
+        ""Expected "", num_sparse_features,
+        "" tensors in sparse_example_indices but got "",
+        sparse_example_indices_inputs.size());
   OpInputList sparse_feature_indices_inputs;
   TF_RETURN_IF_ERROR(context->input_list(""sparse_feature_indices"",
                                          &sparse_feature_indices_inputs));
+  if (sparse_feature_indices_inputs.size() != num_sparse_features)
+    return errors::InvalidArgument(
+        ""Expected "", num_sparse_features,
+        "" tensors in sparse_feature_indices but got "",
+        sparse_feature_indices_inputs.size());
   OpInputList sparse_feature_values_inputs;
   if (num_sparse_features_with_values > 0) {
     TF_RETURN_IF_ERROR(context->input_list(""sparse_feature_values"",
                                            &sparse_feature_values_inputs));
+    if (sparse_feature_values_inputs.size() != num_sparse_features_with_values)
+      return errors::InvalidArgument(
+          ""Expected "", num_sparse_features_with_values,
+          "" tensors in sparse_feature_values but got "",
+          sparse_feature_values_inputs.size());
   }
 
   const Tensor* example_weights_t;
@@ -400,6 +429,13 @@ Status Examples::CreateSparseFeatureRepresentation(
           sparse_example_indices_inputs[i].template flat<int64>();
       auto feature_indices =
           sparse_feature_indices_inputs[i].template flat<int64>();
+      if (example_indices.size() != feature_indices.size()) {
+        mutex_lock l(mu);
+        result = errors::InvalidArgument(
+            ""Found mismatched example_indices and feature_indices ["",
+            example_indices, ""] vs ["", feature_indices, ""]"");
+        return;
+      }
 
       // Parse features for each example. Features for a particular example
       // are at the offsets (start_id, end_id]
",1,train
376c352a37ce5a68b721406dc7e77ac4b6cf483d,tensorflow/tensorflow,"Don't do any work if output tensor is null (prevent div by 0)

PiperOrigin-RevId: 372208700
Change-Id: Iea6b6293e887ade8538facfdb50fb931e17f511e",maxpooling_op.cc,"@@ -1088,6 +1088,8 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel {
     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
                                 {0}, 0, out_shape, &grad_out));
 
+    if (out_shape.num_elements() == 0) return;  // nothing to be done
+
     LaunchMaxPoolingGradWithArgmax<Device, T>::launch(
         context, params, grad_in, argmax, grad_out, include_batch_in_index_);
   }
",1,test
a3d9f9be9ac2296615644061b40cefcee341dcc4,tensorflow/tensorflow,"Add missing validation to pooling_ops_3d

PiperOrigin-RevId: 372218727
Change-Id: I6b9ed4266aa7286c02f1f230d7bea922c1be547e",pooling_ops_3d.cc,"@@ -698,6 +698,19 @@ class MaxPooling3dGradGradOp : public OpKernel {
     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
                                 {2}, 0, tensor_out.shape(), &output));
 
+    // Given access patterns in LaunchMaxPooling3dGradGradOp, these tensors must
+    // have elements.
+    OP_REQUIRES(context, tensor_in.NumElements() > 0,
+                errors::InvalidArgument(""received empty tensor tensor_in: "",
+                                        tensor_in.DebugString()));
+    OP_REQUIRES(context, tensor_out.NumElements() > 0,
+                errors::InvalidArgument(""received empty tensor tensor_out: "",
+                                        tensor_out.DebugString()));
+    OP_REQUIRES(
+        context, out_grad_backprop.NumElements() > 0,
+        errors::InvalidArgument(""received empty tensor out_grad_backprop: "",
+                                out_grad_backprop.DebugString()));
+
     LaunchMaxPooling3dGradGradOp<Device, T>::launch(
         context, params, tensor_in, tensor_out, out_grad_backprop, output);
   }
",1,test
ecf768cbe50cedc0a45ce1ee223146a3d3d26d23,tensorflow/tensorflow,"Add missing validations to reverse_sequence_op

PiperOrigin-RevId: 372178683
Change-Id: Iac97ebab5b342f1262c77a7d9bcb4267b305ce5b",reverse_sequence_op.cc,"@@ -115,6 +115,10 @@ class ReverseSequenceOp : public OpKernel {
       : OpKernel(context) {
     OP_REQUIRES_OK(context, context->GetAttr(""batch_dim"", &batch_dim_));
     OP_REQUIRES_OK(context, context->GetAttr(""seq_dim"", &seq_dim_));
+    OP_REQUIRES(context, batch_dim_ >= 0,
+                errors::InvalidArgument(""Invalid batch_dim "", batch_dim_));
+    OP_REQUIRES(context, seq_dim_ >= 0,
+                errors::InvalidArgument(""Invalid seq_dim "", seq_dim_));
   }
 
   void Compute(OpKernelContext* context) override {
",1,train
63c6a29d0f2d692b247f7bf81f8732d6442fad09,tensorflow/tensorflow,"Add missing validation, prevent heap OOB

PiperOrigin-RevId: 372246723
Change-Id: I1a454a643810e77d7d14821b342098c56a09fbbf",pooling_ops_3d.cc,"@@ -693,6 +693,7 @@ class MaxPooling3dGradGradOp : public OpKernel {
 
     Pool3dParameters params{context,  ksize_,       stride_,
                             padding_, data_format_, tensor_in.shape()};
+    if (!context->status().ok()) return;  // params is invalid
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
@@ -710,6 +711,17 @@ class MaxPooling3dGradGradOp : public OpKernel {
         context, out_grad_backprop.NumElements() > 0,
         errors::InvalidArgument(""received empty tensor out_grad_backprop: "",
                                 out_grad_backprop.DebugString()));
+    OP_REQUIRES(context,
+                tensor_in.NumElements() == out_grad_backprop.NumElements(),
+                errors::InvalidArgument(""tensor_in and out_grad_backprop must ""
+                                        ""have same number of elements, got <"",
+                                        tensor_in.DebugString(), ""> and <"",
+                                        out_grad_backprop.DebugString(), "">""));
+    OP_REQUIRES(
+        context, tensor_out.NumElements() == output->NumElements(),
+        errors::InvalidArgument(
+            ""tensor_out and output must have same number of elements, got <"",
+            tensor_out.DebugString(), ""> and <"", output->DebugString(), "">""));
 
     LaunchMaxPooling3dGradGradOp<Device, T>::launch(
         context, params, tensor_in, tensor_out, out_grad_backprop, output);
",1,train
6fc9141f42f6a72180ecd24021c3e6b36165fe0d,tensorflow/tensorflow,"Fix assertion failure in pooling_ops_3d

PiperOrigin-RevId: 372364504
Change-Id: Iecde4fe26b47a8fa935d6e2611b5585ed5777781",pooling_ops_3d.cc,"@@ -383,6 +383,19 @@ struct LaunchAvgPooling3dGradOp<CPUDevice, T> {
                      const std::array<int64, 3>& output_shape,
                      const std::array<int64, 3>& padding,
                      TensorFormat data_format, Tensor* output) {
+    OP_REQUIRES(
+        context, tensor_in_shape.dim_size(0) == out_backprop.dim_size(0),
+        errors::InvalidArgument(
+            ""Expected first dimension of tensor_in_shape and ""
+            ""out_backprop to match, got "",
+            tensor_in_shape.dim_size(0), "" and "", out_backprop.dim_size(0)));
+    OP_REQUIRES(
+        context, tensor_in_shape.dim_size(4) == out_backprop.dim_size(4),
+        errors::InvalidArgument(
+            ""Expected last dimension of tensor_in_shape and ""
+            ""out_backprop to match, got "",
+            tensor_in_shape.dim_size(4), "" and "", out_backprop.dim_size(4)));
+
     output->flat<T>().setZero();
     std::array<int64, 3> input_size = {{tensor_in_shape.dim_size(3),
                                         tensor_in_shape.dim_size(2),
",1,train
12c727cee857fa19be717f336943d95fca4ffe4f,tensorflow/tensorflow,"Validate inputs of `FractionalAvgPoolGrad`.

PiperOrigin-RevId: 372420640
Change-Id: Icc583928e6cdc3062e12498e4d2337a8fe3da016",fractional_avg_pool_op.cc,"@@ -250,6 +250,19 @@ class FractionalAvgPoolGradOp : public OpKernel {
     const int64 out_cols = out_backprop.dim_size(2);
     const int64 out_depth = out_backprop.dim_size(3);
 
+    OP_REQUIRES(context, row_seq_tensor.NumElements() > out_rows,
+                errors::InvalidArgument(""Given out_backprop shape "",
+                                        out_backprop.shape().DebugString(),
+                                        "", row_seq_tensor must have at least "",
+                                        out_rows + 1, "" elements, but got "",
+                                        row_seq_tensor.NumElements()));
+    OP_REQUIRES(context, col_seq_tensor.NumElements() > out_cols,
+                errors::InvalidArgument(""Given out_backprop shape "",
+                                        out_backprop.shape().DebugString(),
+                                        "", col_seq_tensor must have at least "",
+                                        out_cols + 1, "" elements, but got "",
+                                        col_seq_tensor.NumElements()));
+
     auto row_seq_tensor_flat = row_seq_tensor.flat<int64>();
     auto col_seq_tensor_flat = col_seq_tensor.flat<int64>();
     auto orig_input_tensor_shape_flat = orig_input_tensor_shape.flat<int64>();
",1,train
a74768f8e4efbda4def9f16ee7e13cf3922ac5f7,tensorflow/tensorflow,"Prevent heap OOB error in `MaxPoolGrad`

PiperOrigin-RevId: 372424854
Change-Id: Idac0f23867ad8b0601cafbaaa52d5e64269e63a7",maxpooling_op.cc,"@@ -199,7 +199,9 @@ static void SpatialMaxPoolWithArgMaxHelper(
         // CHECK(input_backprop_index >= in_start && input_backprop_index <
         // in_end)
         FastBoundsCheck(input_backprop_index - in_start, in_end - in_start);
-        input_backprop_flat(input_backprop_index) += out_backprop_flat(index);
+        if (index < out_backprop.NumElements()) {
+          input_backprop_flat(input_backprop_index) += out_backprop_flat(index);
+        }
       }
     }
   };
",1,train
32fdcbff9d06d010d908fcc4bd4b36eb3ce15925,tensorflow/tensorflow,"Validate arguments of `FractionalMaxPoolGrad`

PiperOrigin-RevId: 372274982
Change-Id: If46b0c442efa4eaef635ce6a476717060420122c",fractional_max_pool_op.cc,"@@ -235,6 +235,20 @@ class FractionalMaxPoolGradOp : public OpKernel {
 
     // Just to make it similar to FractionalMaxPoolOp.
     constexpr int tensor_in_and_out_dims = 4;
+    OP_REQUIRES(
+        context, tensor_in.dims() == tensor_in_and_out_dims,
+        errors::InvalidArgument(""orig_input should be a tensor of rank 4, got "",
+                                tensor_in.DebugString()));
+    OP_REQUIRES(context, tensor_in.NumElements() > 0,
+                errors::InvalidArgument(""orig_input must not be empty, got "",
+                                        tensor_in.DebugString()));
+    OP_REQUIRES(context, tensor_out.dims() == tensor_in_and_out_dims,
+                errors::InvalidArgument(
+                    ""orig_output should be a tensor of rank 4, got "",
+                    tensor_out.DebugString()));
+    OP_REQUIRES(context, tensor_out.NumElements() > 0,
+                errors::InvalidArgument(""orig_output must not be empty, got "",
+                                        tensor_out.DebugString()));
     std::vector<int64> input_size(tensor_in_and_out_dims);
     std::vector<int64> output_size(tensor_in_and_out_dims);
     for (int i = 0; i < tensor_in_and_out_dims; ++i) {
",1,train
b1b323042264740c398140da32e93fb9c2c9f33e,tensorflow/tensorflow,"Fix SEGV in CTC ops

PiperOrigin-RevId: 372430279
Change-Id: I7ec2ad9d6f4d0980c33de45d27c6b17df5c6e26f",ctc_decoder_ops.cc,"@@ -70,6 +70,9 @@ class CTCDecodeHelper {
     if (inputs_shape.dims() != 3) {
       return errors::InvalidArgument(""inputs is not a 3-Tensor"");
     }
+    if (inputs_shape.num_elements() == 0) {
+      return errors::InvalidArgument(""inputs must not be empty"");
+    }
 
     const int64 max_time = inputs_shape.dim_size(0);
     const int64 batch_size = inputs_shape.dim_size(1);
",1,train
5899741d0421391ca878da47907b1452f06aaf1b,tensorflow/tensorflow,"Fix heap OOB read in dequantize op.

Also fixes SEGV in same op

PiperOrigin-RevId: 372437896
Change-Id: I135e94d360c2a1ce374c10f7e0fed1af603dbc02",dequantize_op.cc,"@@ -98,6 +98,18 @@ class DequantizeOp : public OpKernel {
     if (axis_ > -1) {
       num_slices = input.dim_size(axis_);
     }
+    OP_REQUIRES(ctx, input_min_tensor.NumElements() == num_slices,
+                errors::InvalidArgument(
+                    ""input_min_tensor must have as many elements as input on ""
+                    ""the dequantization axis ("",
+                    axis_, ""), got "", input_min_tensor.NumElements(),
+                    "", expected "", num_slices));
+    OP_REQUIRES(ctx, input_max_tensor.NumElements() == num_slices,
+                errors::InvalidArgument(
+                    ""input_max_tensor must have as many elements as input on ""
+                    ""the dequantization axis ("",
+                    axis_, ""), got "", input_max_tensor.NumElements(),
+                    "", expected "", num_slices));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, input.shape(), &output));
",1,train
6972f9dfe325636b3db4e0bc517ee22a159365c0,tensorflow/tensorflow,"Add missing valuidation to FusedBatchNorm.

PiperOrigin-RevId: 372460336
Change-Id: Ic8c4e4de67c58a741bd87f2e182bed07247d1126",fused_batch_norm_op.cc,"@@ -1282,6 +1282,32 @@ class FusedBatchNormOpBase : public OpKernel {
                   errors::InvalidArgument(""Error during tensor copy.""));
     }
 
+    const auto num_channels = GetTensorDim(x, tensor_format_, 'C');
+    OP_REQUIRES(
+        context, scale.NumElements() == num_channels,
+        errors::InvalidArgument(""scale must have the same number of elements ""
+                                ""as the channels of x, got "",
+                                scale.NumElements(), "" and "", num_channels));
+    OP_REQUIRES(
+        context, offset.NumElements() == num_channels,
+        errors::InvalidArgument(""offset must have the same number of elements ""
+                                ""as the channels of x, got "",
+                                offset.NumElements(), "" and "", num_channels));
+    if (estimated_mean.NumElements() != 0) {
+      OP_REQUIRES(context, estimated_mean.NumElements() == num_channels,
+                  errors::InvalidArgument(
+                      ""mean must be empty or have the same number of ""
+                      ""elements as the channels of x, got "",
+                      estimated_mean.NumElements(), "" and "", num_channels));
+    }
+    if (estimated_variance.NumElements() != 0) {
+      OP_REQUIRES(context, estimated_variance.NumElements() == num_channels,
+                  errors::InvalidArgument(
+                      ""variance must be empty or have the same number of ""
+                      ""elements as the channels of x, got "",
+                      estimated_variance.NumElements(), "" and "", num_channels));
+    }
+
     if (has_side_input_) {
       OP_REQUIRES(context, side_input->shape() == x.shape(),
                   errors::InvalidArgument(
@@ -1294,7 +1320,7 @@ class FusedBatchNormOpBase : public OpKernel {
       // NOTE(ezhulenev): This requirement is coming from implementation
       // details of cudnnBatchNormalizationForwardTrainingEx.
       OP_REQUIRES(
-          context, !is_training_ || x.dim_size(3) % 4 == 0,
+          context, !is_training_ || num_channels % 4 == 0,
           errors::InvalidArgument(""FusedBatchNorm with activation requires ""
                                   ""channel dimension to be a multiple of 4.""));
     }
",1,train
4c0ee937c0f61c4fc5f5d32d9bb4c67428012a60,tensorflow/tensorflow,"Prevent overflow in sparse op

PiperOrigin-RevId: 372442006
Change-Id: I60fe31cd7e56fb3501e97c63500caf902ddeee96",sparse_split_op.cc,"@@ -63,11 +63,18 @@ class SparseSplitOp : public OpKernel {
                                         input_shape.vec<int64>()(axis),
                                         ""), got "", num_split_));
 
+    // Prevent overflow by constructing the dense shape separately
+    TensorShape dense_shape;
+    const auto input_shape_flat = input_shape.flat<int64>();
+    for (int i = 0; i < input_shape.NumElements(); i++) {
+      OP_REQUIRES_OK(context,
+                     dense_shape.AddDimWithStatus(input_shape_flat(i)));
+    }
+
     sparse::SparseTensor sparse_tensor;
     OP_REQUIRES_OK(context,
-                   sparse::SparseTensor::Create(
-                       input_indices, input_values,
-                       TensorShape(input_shape.vec<int64>()), &sparse_tensor));
+                   sparse::SparseTensor::Create(input_indices, input_values,
+                                                dense_shape, &sparse_tensor));
 
     std::vector<sparse::SparseTensor> outputs;
     OP_REQUIRES_OK(context, sparse::SparseTensor::Split<T>(
",1,train
49847ae69a4e1a97ae7f2db5e217c77721e37948,tensorflow/tensorflow,"Fix division by zero in TFLite padding.

PiperOrigin-RevId: 370777494
Change-Id: Ic1331e4a1603b9e4c8aa183012a6c8237410aa0f",padding.h,"@@ -44,6 +44,11 @@ inline int ComputePaddingWithOffset(int stride, int dilation_rate, int in_size,
 inline int ComputeOutSize(TfLitePadding padding, int image_size,
                           int filter_size, int stride, int dilation_rate = 1) {
   int effective_filter_size = (filter_size - 1) * dilation_rate + 1;
+
+  // TODO(b/186448822): This uses 0 since the function has no other way to
+  // report error case
+  if (stride == 0) return 0;
+
   switch (padding) {
     case kTfLitePaddingSame:
       return (image_size + stride - 1) / stride;
",1,train
5f7975d09eac0f10ed8a17dbb6f5964977725adc,tensorflow/tensorflow,"Prevent another div by 0 in optimized pooling implementations TFLite

PiperOrigin-RevId: 370800091
Change-Id: I2119352f57fb5ca4f2051e0e2d749403304a979b",pooling.cc,"@@ -87,6 +87,10 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
   auto padding = params->padding;
   int out_width, out_height;
 
+  // Prevent division by 0 in optimized pooling implementations
+  TF_LITE_ENSURE(context, params->stride_height > 0);
+  TF_LITE_ENSURE(context, params->stride_width > 0);
+
   data->padding = ComputePaddingHeightWidth(
       params->stride_height, params->stride_width, 1, 1, height, width,
       params->filter_height, params->filter_width, padding, &out_height,
",1,train
5f7975d09eac0f10ed8a17dbb6f5964977725adc,tensorflow/tensorflow,"Prevent another div by 0 in optimized pooling implementations TFLite

PiperOrigin-RevId: 370800091
Change-Id: I2119352f57fb5ca4f2051e0e2d749403304a979b",pooling_test.cc,"@@ -1151,5 +1151,18 @@ TEST(FloatPoolingOpTest, L2PoolPaddingValidSlide1) {
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({3.5, 6.0, 6.5}));
 }
 
+#ifdef GTEST_HAS_DEATH_TEST
+TEST(FloatPoolingOpTest, MaxPoolWithZeroStride) {
+  EXPECT_DEATH(
+      FloatPoolingOpModel m(BuiltinOperator_MAX_POOL_2D,
+                            /*input=*/{TensorType_FLOAT32, {1, 2, 4, 1}},
+                            /*filter_width=*/2, /*filter_height=*/2,
+                            /*output=*/{TensorType_FLOAT32, {}},
+                            /*padding=*/Padding_VALID,
+                            /*stride_w=*/0, /*stride_h=*/0),
+      ""Cannot allocate tensors"");
+}
+#endif
+
 }  // namespace
 }  // namespace tflite
",1,train
0d45ea1ca641b21b73bcf9c00e0179cda284e7e7,tensorflow/tensorflow,"Prevent one more div by 0 in TFLite

PiperOrigin-RevId: 370800114
Change-Id: I6b956aeb8c458cc6f514408d2e89ffacfe249e57",space_to_depth.cc,"@@ -61,6 +61,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
   const int block_size = params->block_size;
+  TF_LITE_ENSURE(context, block_size > 0);
   const int input_height = input->dims->data[1];
   const int input_width = input->dims->data[2];
   int output_height = input_height / block_size;
",1,train
801c1c6be5324219689c98e1bd3e0ca365ee834d,tensorflow/tensorflow,"Fix another division by 0 in TFLite

PiperOrigin-RevId: 370800181
Change-Id: I924809166a6131f5075e6d45c455106538d755f9",transpose_conv.cc,"@@ -591,6 +591,10 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const auto* params =
       reinterpret_cast<TfLiteTransposeConvParams*>(node->builtin_data);
 
+  // Prevent divisions by 0
+  TF_LITE_ENSURE(context, params->stride_height > 0);
+  TF_LITE_ENSURE(context, params->stride_width > 0);
+
   // Resize any deferred dynamic tensors
   if (IsDynamicTensor(output)) {
     TF_LITE_ENSURE_OK(context, ResizeTensor(context, output_shape, output));
",1,train
8e45822aa0b9f5df4b4c64f221e64dc930a70a9d,tensorflow/tensorflow,"Handle one more division by 0 in TFLite.

PiperOrigin-RevId: 370800140
Change-Id: I9ab42e5aaccf02f226d1282611490a54cf7d273e",gather_nd.cc,"@@ -155,6 +155,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context,
                     GetOutputSafe(context, node, kOutputTensor, &output));
 
+  // Prevent division by 0 in the helper
+  TF_LITE_ENSURE(context, NumElements(params) > 0);
+
   switch (indices->type) {
     case kTfLiteInt32:
       return EvalGatherNd<int32_t>(context, params, indices, output);
",1,test
953f28dca13c92839ba389c055587cfe6c723578,tensorflow/tensorflow,"Prevent a null pointer exception in TFLite

PiperOrigin-RevId: 370800206
Change-Id: Idd437ebce4ff224120d8eefc1c14c062173b71d6",maximum_minimum.cc,"@@ -157,35 +157,37 @@ template <KernelType kernel_type, typename OpType>
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   OpContext op_context(context, node);
 
-    switch (op_context.output->type) {
-      case kTfLiteFloat32:
-        TFLiteOperation<kernel_type, float, OpType>(context, node, op_context);
-        break;
-      case kTfLiteUInt8:
-        TFLiteOperation<kernel_type, uint8_t, OpType>(context, node,
-                                                      op_context);
-        break;
-      case kTfLiteInt8:
-        TFLiteOperation<kernel_type, int8_t, OpType>(context, node, op_context);
-        break;
-      case kTfLiteInt32:
-        TFLiteOperation<kernel_type, int32_t, OpType>(context, node,
-                                                      op_context);
-        break;
-      case kTfLiteInt64:
-        TFLiteOperation<kernel_type, int64_t, OpType>(context, node,
-                                                      op_context);
-        break;
-      case kTfLiteInt16:
-        TFLiteOperation<kernel_type, int16_t, OpType>(context, node,
-                                                      op_context);
-        break;
-      default:
-        context->ReportError(context,
-                             ""Type %d is currently not supported by Maximum."",
-                             op_context.output->type);
-        return kTfLiteError;
-    }
+  // If inputs have no element, shortcircuit.
+  if (NumElements(op_context.input1) == 0 ||
+      NumElements(op_context.input2) == 0) {
+    return kTfLiteOk;
+  }
+
+  switch (op_context.output->type) {
+    case kTfLiteFloat32:
+      TFLiteOperation<kernel_type, float, OpType>(context, node, op_context);
+      break;
+    case kTfLiteUInt8:
+      TFLiteOperation<kernel_type, uint8_t, OpType>(context, node, op_context);
+      break;
+    case kTfLiteInt8:
+      TFLiteOperation<kernel_type, int8_t, OpType>(context, node, op_context);
+      break;
+    case kTfLiteInt32:
+      TFLiteOperation<kernel_type, int32_t, OpType>(context, node, op_context);
+      break;
+    case kTfLiteInt64:
+      TFLiteOperation<kernel_type, int64_t, OpType>(context, node, op_context);
+      break;
+    case kTfLiteInt16:
+      TFLiteOperation<kernel_type, int16_t, OpType>(context, node, op_context);
+      break;
+    default:
+      context->ReportError(context,
+                           ""Type %d is currently not supported by Maximum."",
+                           op_context.output->type);
+      return kTfLiteError;
+  }
   return kTfLiteOk;
 }
 
",1,train
9c1dc920d8ffb4893d6c9d27d1f039607b326743,tensorflow/tensorflow,"Prevent infinite loop/stack overflow in TFLite `while` op.

PiperOrigin-RevId: 370800333
Change-Id: I6a2e4ff849da339545c449db2af7e11ce6ff02c3",while.cc,"@@ -138,6 +138,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   auto* subgraphs = this_subgraph->GetSubgraphs();
   TF_LITE_ENSURE(context, op_data->cond_subgraph_index < subgraphs->size());
   TF_LITE_ENSURE(context, op_data->body_subgraph_index < subgraphs->size());
+  TF_LITE_ENSURE(context,
+                 op_data->cond_subgraph_index != op_data->body_subgraph_index);
 
   Subgraph* cond_subgraph = (*subgraphs)[op_data->cond_subgraph_index].get();
   Subgraph* body_subgraph = (*subgraphs)[op_data->body_subgraph_index].get();
",1,train
c6173f5fe66cdbab74f4f869311fe6aae2ba35f4,tensorflow/tensorflow,"TFLite: Error out when the graph has a recurion.

Recursion is currently unsupported.

PiperOrigin-RevId: 371708957
Change-Id: I8dfad0d85cbfe08e39ae8ea7bad21254ddee5003",subgraph.cc,"@@ -156,6 +156,42 @@ const char* GetTFLiteOpName(const TfLiteRegistration& op_reg) {
   return tflite::EnumNamesBuiltinOperator()[op_reg.builtin_code];
 }
 
+// An utility test to detect if the subgraph is abused:
+// 1. Detects if recursion exists in the graph (recursion is not currently
+//    supported.
+// 2. Detects if the interpreter / subgraph is used in multiple subgraphs.
+//    Note: It's clearly documented that the interpreter / subgraph are not
+//    thread-safe. This serves as a check with possible false negatives
+//    unless we switch to atomic boolean flags.
+class SubgraphGuard {
+ public:
+  SubgraphGuard(TfLiteContext* context, bool* is_subgraph_in_use)
+      : is_subgraph_in_use_(is_subgraph_in_use) {
+    if (*is_subgraph_in_use_) {
+      TF_LITE_KERNEL_LOG(
+          context,
+          ""Subgraph is already in use. Using an interpreter or a subgraph in ""
+          ""multiple threads is not supported. Recursion in the graph is not ""
+          ""supported."");
+      status_ = kTfLiteError;
+    } else {
+      *is_subgraph_in_use_ = true;
+    }
+  }
+  ~SubgraphGuard() {
+    // If tht original status was OK, recover the boolean flag.
+    if (status_ == kTfLiteOk) {
+      *is_subgraph_in_use_ = false;
+    }
+  }
+
+  TfLiteStatus status() const { return status_; }
+
+ private:
+  TfLiteStatus status_ = kTfLiteOk;
+  bool* is_subgraph_in_use_;
+};
+
 }  // namespace
 
 // A trivial implementation of GraphInfo around the Interpreter.
@@ -655,6 +691,7 @@ TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
 
 TfLiteStatus Subgraph::AllocateTensors() {
   TFLITE_SCOPED_TAGGED_DEFAULT_PROFILE(profiler_.get(), ""AllocateTensors"");
+
   if (!consistent_) {
     ReportError(""AllocateTensors() called on inconsistent model."");
     return kTfLiteError;
@@ -678,6 +715,12 @@ TfLiteStatus Subgraph::AllocateTensors() {
     return kTfLiteOk;
   }
 
+  // Note `AllocateTensors` sometimes calls itself recursively above
+  // for delegates. Therefore only the logic below need to be guarded
+  // by `SubgraphGuard`.
+  SubgraphGuard guard(&context_, &is_subgraph_in_use_);
+  TF_LITE_ENSURE_OK(&context_, guard.status());
+
   next_execution_plan_index_to_prepare_ = 0;
   next_execution_plan_index_to_plan_allocation_ = 0;
   next_original_execution_plan_index_to_prepare_ = 0;
@@ -1014,6 +1057,9 @@ TfLiteStatus Subgraph::PrepareOpsAndTensors() {
 }
 
 TfLiteStatus Subgraph::Invoke() {
+  SubgraphGuard guard(&context_, &is_subgraph_in_use_);
+  TF_LITE_ENSURE_OK(&context_, guard.status());
+
   if (!consistent_) {
     ReportError(""Invoke called on model that is not consistent."");
     return kTfLiteError;
",1,train
c6173f5fe66cdbab74f4f869311fe6aae2ba35f4,tensorflow/tensorflow,"TFLite: Error out when the graph has a recurion.

Recursion is currently unsupported.

PiperOrigin-RevId: 371708957
Change-Id: I8dfad0d85cbfe08e39ae8ea7bad21254ddee5003",subgraph.h,"@@ -759,6 +759,10 @@ class Subgraph {
   // Whether memory planner should be instantiated to retain intermediates for
   // debugging.
   bool preserve_all_tensors_ = false;
+
+  // Whether the subgraph is currently in use (e.g. running the `Invoke`
+  // or `AllocateTensors` functions).
+  bool is_subgraph_in_use_ = false;
 };
 
 }  // namespace tflite
",1,train
c6173f5fe66cdbab74f4f869311fe6aae2ba35f4,tensorflow/tensorflow,"TFLite: Error out when the graph has a recurion.

Recursion is currently unsupported.

PiperOrigin-RevId: 371708957
Change-Id: I8dfad0d85cbfe08e39ae8ea7bad21254ddee5003",while.cc,"@@ -138,8 +138,6 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   auto* subgraphs = this_subgraph->GetSubgraphs();
   TF_LITE_ENSURE(context, op_data->cond_subgraph_index < subgraphs->size());
   TF_LITE_ENSURE(context, op_data->body_subgraph_index < subgraphs->size());
-  TF_LITE_ENSURE(context,
-                 op_data->cond_subgraph_index != op_data->body_subgraph_index);
 
   Subgraph* cond_subgraph = (*subgraphs)[op_data->cond_subgraph_index].get();
   Subgraph* body_subgraph = (*subgraphs)[op_data->body_subgraph_index].get();
",1,train
c6173f5fe66cdbab74f4f869311fe6aae2ba35f4,tensorflow/tensorflow,"TFLite: Error out when the graph has a recurion.

Recursion is currently unsupported.

PiperOrigin-RevId: 371708957
Change-Id: I8dfad0d85cbfe08e39ae8ea7bad21254ddee5003",model_test.cc,"@@ -600,6 +600,25 @@ TEST(BasicFlatBufferModel, TestHandleMalformedModelReuseTensor) {
   ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk);
 }
 
+// Recursion & reentrant are not supported in TFLite.
+// The test ensures it fails gracefullly instead of crashing with
+// a stack overflow.
+TEST(BasicFlatBufferModel, TestUnsupportedRecursion) {
+  const auto model_path =
+      ""tensorflow/lite/testdata/unsupported_recursion.bin"";
+
+  std::unique_ptr<tflite::FlatBufferModel> model =
+      FlatBufferModel::BuildFromFile(model_path);
+  ASSERT_NE(model, nullptr);
+
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  InterpreterBuilder builder(*model, resolver);
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(builder(&interpreter), kTfLiteOk);
+  ASSERT_NE(interpreter, nullptr);
+  ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk);
+}
+
 // The models here have a buffer index for a tensor pointing to a null buffer.
 // This results in the tensor being interpreted as read-write, but the model
 // assumes the tensor is read-only. As such, `interpreter->Invoke()` would
",1,train
f8378920345f4f4604202d4ab15ef64b2aceaa16,tensorflow/tensorflow,"Prevent a null pointer dereference in TFLite.

PiperOrigin-RevId: 370800353
Change-Id: Ic9c9712ce5c6e384c954dcd640a5bd9ff05c9a05",subgraph.cc,"@@ -1060,10 +1060,17 @@ TfLiteStatus Subgraph::Invoke() {
         TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index));
       }
       if (tensor->data.raw == nullptr && tensor->bytes > 0) {
-        if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1) {
+        if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1 &&
+            tensor->dims->size != 1) {
           // In general, having a tensor here with no buffer will be an error.
-          // However, for the reshape operator, the second input tensor is only
-          // used for the shape, not for the data. Thus, null buffer is ok.
+          // However, for the reshape operator, the second input tensor is
+          // sometimes only used for the shape, not for the data. Thus, null
+          // buffer is ok in this situation.
+          // The situation where null buffer is not ok for reshape operator is
+          // only when there are 2 inputs given to the node and the one
+          // corresponding to the shape (i == 1) is a vector that contains all
+          // dimensions. See `GetOutputShape()` function in
+          // `tensorflow/lite/kernels/reshape.cc`
           continue;
         } else {
           // In all other cases, we need to return an error as otherwise we will
",1,test
2c74674348a4708ced58ad6eb1b23354df8ee044,tensorflow/tensorflow,"Prevent division by 0

PiperOrigin-RevId: 370979352
Change-Id: Ic79191c316d986fc6072ecaebfec9d5f2b924d00",batch_to_space_nd.cc,"@@ -78,6 +78,7 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
   int output_batch_size = input_size->data[0];
   for (int dim = 0; dim < spatial_dims_num; ++dim) {
     // Number of batch must be multiple of (block_shape[dim]).
+    TF_LITE_ENSURE(context, block_shape[dim] != 0);
     TF_LITE_ENSURE_EQ(context, output_batch_size % block_shape[dim], 0);
     output_batch_size = output_batch_size / block_shape[dim];
     output_size->data[dim + 1] = input_size->data[dim + 1] * block_shape[dim] -
",1,train
ff489d95a9006be080ad14feb378f2b4dac35552,tensorflow/tensorflow,"Prevent division by 0.

PiperOrigin-RevId: 370962554
Change-Id: I0b9b62f4d8e1046dd88f9433f8dfeaf61a901680",conv.cc,"@@ -545,6 +545,7 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context,
     // Only one scale factor per batch is typically necessary. See optimized
     // implementation for why we need to allocate for the height of the inputs
     // flattened to 2D.
+    TF_LITE_ENSURE(context, channels_in != 0);
     const int height = NumElements(input) / channels_in;
     int scaling_dims[1] = {height};
     if (!TfLiteIntArrayEqualsArray(scaling_factors->dims, 1, scaling_dims)) {
@@ -587,6 +588,7 @@ TfLiteStatus Prepare(KernelType kernel_type, TfLiteContext* context,
       input_offsets->type = kTfLiteInt32;
       input_offsets->allocation_type = kTfLiteArenaRw;
       // See above comment for the need to allocate for height of inputs.
+      TF_LITE_ENSURE(context, channels_in != 0);
       const int height = NumElements(input) / channels_in;
       const int input_offset_dims[1] = {height};
       if (!TfLiteIntArrayEqualsArray(input_offsets->dims, 1,
@@ -886,8 +888,9 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
   CalculateActivationRange(params->activation, &output_activation_min,
                            &output_activation_max);
 
-  const int input_size = NumElements(input) / SizeOfDimension(input, 0);
   const int batch_size = SizeOfDimension(input, 0);
+  TF_LITE_ENSURE(context, batch_size != 0);
+  const int input_size = NumElements(input) / batch_size;
   TfLiteTensor* quantized_input_tensor;
   TF_LITE_ENSURE_OK(context,
                     GetTemporarySafe(context, node, data->input_quantized_index,
@@ -989,8 +992,9 @@ TfLiteStatus EvalHybrid(TfLiteContext* context, TfLiteNode* node,
   CalculateActivationRange(params->activation, &output_activation_min,
                            &output_activation_max);
 
-  const int input_size = NumElements(input) / SizeOfDimension(input, 0);
   const int batch_size = SizeOfDimension(input, 0);
+  TF_LITE_ENSURE(context, batch_size != 0);
+  const int input_size = NumElements(input) / batch_size;
 
   const float* input_ptr = GetTensorData<float>(input);
   TfLiteTensor* quantized_input_tensor;
",1,train
106d8f4fb89335a2c52d7c895b7a7485465ca8d9,tensorflow/tensorflow,"Prevent division by 0 in TFLite

PiperOrigin-RevId: 370800311
Change-Id: I21ccdbd31c30118acc67df8751807ee2e0b12f91",depth_to_space.cc,"@@ -61,6 +61,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
   const int block_size = params->block_size;
+  TF_LITE_ENSURE(context, block_size > 0);
   const int input_height = input->dims->data[1];
   const int input_width = input->dims->data[2];
   const int input_channels = input->dims->data[3];
",1,train
106d8f4fb89335a2c52d7c895b7a7485465ca8d9,tensorflow/tensorflow,"Prevent division by 0 in TFLite

PiperOrigin-RevId: 370800311
Change-Id: I21ccdbd31c30118acc67df8751807ee2e0b12f91",depth_to_space_test.cc,"@@ -60,6 +60,11 @@ TEST(DepthToSpaceOpModel, BadBlockSize) {
   EXPECT_DEATH(DepthToSpaceOpModel({TensorType_FLOAT32, {1, 1, 1, 4}}, 4),
                ""Cannot allocate tensors"");
 }
+
+TEST(DepthToSpaceOpModel, NoBlockSize) {
+  EXPECT_DEATH(DepthToSpaceOpModel({TensorType_FLOAT32, {1, 1, 1, 4}}, 0),
+               ""Cannot allocate tensors"");
+}
 #endif
 
 TEST(DepthToSpaceOpModel, Float32) {
",1,train
106d8f4fb89335a2c52d7c895b7a7485465ca8d9,tensorflow/tensorflow,"Prevent division by 0 in TFLite

PiperOrigin-RevId: 370800311
Change-Id: I21ccdbd31c30118acc67df8751807ee2e0b12f91",depth_to_space.cc,"@@ -54,6 +54,7 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
   const int block_size = params->block_size;
+  TF_LITE_ENSURE(context, block_size > 0);
   const int input_height = input->dims->data[kHeightRank];
   const int input_width = input->dims->data[kWidthRank];
   const int input_channels = input->dims->data[kDepthRank];
",1,train
f61c57bd425878be108ec787f4d96390579fb83e,tensorflow/tensorflow,"Prevent division by 0

PiperOrigin-RevId: 370966645
Change-Id: I831bfd96c7eb77b02d7ebb744335f59f6e5728cb",embedding_lookup.cc,"@@ -71,6 +71,10 @@ TfLiteStatus EvalSimple(TfLiteContext* context, TfLiteNode* node,
                         const TfLiteTensor* lookup, const TfLiteTensor* value,
                         TfLiteTensor* output) {
   const int row_size = SizeOfDimension(value, 0);
+  if (row_size == 0) {
+    // Propagate empty tensor if input is empty
+    return kTfLiteOk;
+  }
   const int row_bytes = value->bytes / row_size;
 
   char* output_raw = GetTensorData<char>(output);
",1,train
6d36ba65577006affb272335b7c1abd829010708,tensorflow/tensorflow,"Prevent division by 0

PiperOrigin-RevId: 370984990
Change-Id: Ib324955bbeb1cbd97c82fd5d61a00a2697c9a2de",space_to_batch_nd.cc,"@@ -79,6 +79,7 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
   for (int dim = 0; dim < spatial_dims_num; ++dim) {
     int final_dim_size = (input_size->data[dim + 1] + paddings_data[dim * 2] +
                           paddings_data[dim * 2 + 1]);
+    TF_LITE_ENSURE(context, block_shape[dim] != 0);
     TF_LITE_ENSURE_EQ(context, final_dim_size % block_shape[dim], 0);
     output_size->data[dim + 1] = final_dim_size / block_shape[dim];
     output_batch_size *= block_shape[dim];
",1,train
6841e522a3e7d48706a02e8819836e809f738682,tensorflow/tensorflow,"Prevent division by 0

PiperOrigin-RevId: 370995582
Change-Id: I670ffaf52d1ff8823ec31ea5f438f9125b402223",svdf.cc,"@@ -99,6 +99,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const int rank = params->rank;
   const int batch_size = input->dims->data[0];
   const int num_filters = weights_feature->dims->data[0];
+  TF_LITE_ENSURE(context, rank != 0);
   TF_LITE_ENSURE_EQ(context, num_filters % rank, 0);
   const int num_units = num_filters / rank;
   const int memory_size = weights_time->dims->data[1];
",1,train
b22786e7e9b7bdb6a56936ff29cc7e9968d7bc1d,tensorflow/tensorflow,"Prevent division by 0

PiperOrigin-RevId: 370998952
Change-Id: I6b1d49079624ee1447d2d9b53a8976fb356cc8f5",split.cc,"@@ -60,6 +60,7 @@ TfLiteStatus ResizeOutputTensors(TfLiteContext* context, TfLiteNode* node,
   TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
 
   const int input_size = SizeOfDimension(input, axis_value);
+  TF_LITE_ENSURE(context, num_splits != 0);
   TF_LITE_ENSURE_MSG(context, input_size % num_splits == 0,
                      ""Not an even split"");
   const int slice_size = input_size / num_splits;
",1,train
3ebedd7e345453d68e279cfc3e4072648e5e12e5,tensorflow/tensorflow,"Prevent division by 0 in OneHot implementation

If input indices is degenerate, the implementation would do a divide by zero. See https://github.com/tensorflow/tensorflow/blob/745d57df6d5e9bc568666a2a48ed8dd629c27241/tensorflow/lite/kernels/one_hot.cc#L68-L72

PiperOrigin-RevId: 370966870
Change-Id: Ie018337811c8016b5a1d3a277d00d5f2e19a2058",one_hot.cc,"@@ -69,6 +69,11 @@ void OneHotComputeImpl(const OneHotContext& op_context) {
   for (int i = 0; i < op_context.axis; ++i) {
     prefix_dim_size *= op_context.indices->dims->data[i];
   }
+  if (prefix_dim_size == 0) {
+    // If indices tensor is degenerate, return a degenerate tensor, just like
+    // TensorFlow does.
+    return;
+  }
   const int suffix_dim_size = NumElements(op_context.indices) / prefix_dim_size;
   const int depth = *op_context.depth->data.i32;
 
",1,train
4253f96a58486ffe84b61c0415bb234a4632ee73,tensorflow/tensorflow,"Fix integer overflow in TFLite concat

PiperOrigin-RevId: 371013841
Change-Id: I6a4782ce7ca753e23ff31e7fb6aeb7f9d412cd29",concatenation.cc,"@@ -16,6 +16,8 @@ limitations under the License.
 
 #include <stdint.h>
 
+#include <limits>
+
 #include ""tensorflow/lite/c/builtin_op_data.h""
 #include ""tensorflow/lite/c/common.h""
 #include ""tensorflow/lite/kernels/internal/compatibility.h""
@@ -69,6 +71,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE_EQ(context, t->type, input_type);
     for (int d = 0; d < t0->dims->size; ++d) {
       if (d == axis) {
+        // Avoid integer overflow in sum_axis below
+        TF_LITE_ENSURE(context, t->dims->data[axis] >= 0);
+        TF_LITE_ENSURE(context, t->dims->data[axis] <=
+                                    std::numeric_limits<int>::max() - sum_axis);
         sum_axis += t->dims->data[axis];
       } else {
         TF_LITE_ENSURE_EQ(context, t->dims->data[d], t0->dims->data[d]);
",1,train
cbda3c6b2dbbd3fbdc482ff8c0170a78ec2e97d0,tensorflow/tensorflow,"Prevent divisions by 0

PiperOrigin-RevId: 371003153
Change-Id: Idef56c95b9fcaeb97f87e18c7a674dbeb5173204",depthwise_conv.cc,"@@ -285,8 +285,8 @@ TfLiteStatus ComputeDepthMultiplier(TfLiteContext* context,
                                     int16* depth_multiplier) {
   int num_filter_channels = SizeOfDimension(filter, 3);
   int num_input_channels = SizeOfDimension(input, 3);
+  TF_LITE_ENSURE(context, num_input_channels != 0);
   TF_LITE_ENSURE_EQ(context, num_filter_channels % num_input_channels, 0);
-
   *depth_multiplier = num_filter_channels / num_input_channels;
   return kTfLiteOk;
 }
@@ -455,8 +455,9 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
   float output_activation_min, output_activation_max;
   CalculateActivationRange(params->activation, &output_activation_min,
                            &output_activation_max);
-  const int input_size = NumElements(input) / SizeOfDimension(input, 0);
   const int batch_size = SizeOfDimension(input, 0);
+  TF_LITE_ENSURE(context, batch_size != 0);
+  const int input_size = NumElements(input) / batch_size;
   TfLiteTensor* input_quantized;
   TF_LITE_ENSURE_OK(context,
                     GetTemporarySafe(context, node, data->input_quantized_index,
",1,train
c59c37e7b2d563967da813fa50fe20b21f4da683,tensorflow/tensorflow,"Prevent array write out-of-bounds.

If user passes an invalid axis, then we copy one too many dimensions to the output in the loop below these checks. Even if we didn't do that, there will be further issues with an invalid axis, so we check for that right now.

PiperOrigin-RevId: 371023299
Change-Id: I9eca37ffc2b29e8e48710f500701270ef0790224",arg_min_max.cc,"@@ -48,6 +48,9 @@ TfLiteStatus ResizeOutput(TfLiteContext* context, const TfLiteTensor* input,
     axis_value += NumDimensions(input);
   }
 
+  TF_LITE_ENSURE(context, axis_value >= 0);
+  TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
+
   // Copy the input dimensions to output except the axis dimension.
   TfLiteIntArray* output_dims = TfLiteIntArrayCreate(NumDimensions(input) - 1);
   int j = 0;
",1,train
5117e0851348065ed59c991562c0ec80d9193db2,tensorflow/tensorflow,"Prevent a division by 0

PiperOrigin-RevId: 371007407
Change-Id: Iecf2718de48d6bf5a69b02a9df9deda8ec1b19d3",hashtable_lookup.cc,"@@ -112,6 +112,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 2, &value));
 
   const int num_rows = SizeOfDimension(value, 0);
+  TF_LITE_ENSURE(context, num_rows != 0);
   const int row_bytes = value->bytes / num_rows;
   void* pointer = nullptr;
   DynamicBuffer buf;
",1,train
7c8cc4ec69cd348e44ad6a2699057ca88faad3e5,tensorflow/tensorflow,"Fix a dangerous integer overflow and a malloc of negative size.

PiperOrigin-RevId: 371254154
Change-Id: I250a98a3df26328770167025670235a963a72da0",common.c,"@@ -45,8 +45,10 @@ int TfLiteIntArrayEqualsArray(const TfLiteIntArray* a, int b_size,
 #ifndef TF_LITE_STATIC_MEMORY
 
 TfLiteIntArray* TfLiteIntArrayCreate(int size) {
-  TfLiteIntArray* ret =
-      (TfLiteIntArray*)malloc(TfLiteIntArrayGetSizeInBytes(size));
+  int alloc_size = TfLiteIntArrayGetSizeInBytes(size);
+  if (alloc_size <= 0) return NULL;
+  TfLiteIntArray* ret = (TfLiteIntArray*)malloc(alloc_size);
+  if (!ret) return ret;
   ret->size = size;
   return ret;
 }
",1,test
7c8cc4ec69cd348e44ad6a2699057ca88faad3e5,tensorflow/tensorflow,"Fix a dangerous integer overflow and a malloc of negative size.

PiperOrigin-RevId: 371254154
Change-Id: I250a98a3df26328770167025670235a963a72da0",embedding_lookup_sparse.cc,"@@ -173,6 +173,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   // Resize output tensor.
   TfLiteIntArray* output_shape = TfLiteIntArrayCreate(output_rank);
+  TF_LITE_ENSURE(context, output_shape != nullptr);
   int k = 0;
   int embedding_size = 1;
   int lookup_size = 1;
",1,test
ae2daeb45abfe2c6dda539cf8d0d6f653d3ef412,tensorflow/tensorflow,"Prevent array OOB read/write

PiperOrigin-RevId: 371026165
Change-Id: I26ac6372c87246e03c7eb8c94e84c84d86054b36",split_v.cc,"@@ -96,6 +96,8 @@ TfLiteStatus ResizeOutputTensors(TfLiteContext* context, TfLiteNode* node,
     }
   }
 
+  TF_LITE_ENSURE(context, axis_value >= 0);
+  TF_LITE_ENSURE(context, axis_value < NumDimensions(input));
   const int input_size = SizeOfDimension(input, axis_value);
 
   if (minus_one_index != -1) {
",1,train
ba6822bd7b7324ba201a28b2f278c29a98edbef2,tensorflow/tensorflow,"Fix OOB issue with `tf.raw_ops.SparseSparseMinimum`.

PiperOrigin-RevId: 371005787
Change-Id: Ib686ccc077836e8b980b8b5a03936d36a8ecaf71",sparse_sparse_binary_op_shared.cc,"@@ -180,6 +180,11 @@ class SparseSparseBinaryOpShared : public OpKernel {
                                           "" for dimension "", i));
     }
 
+    OP_REQUIRES(
+        ctx, a_indices_t->dim_size(1) == b_indices_t->dim_size(1),
+        errors::InvalidArgument(
+            ""Indices' dimensions do not match: got "", a_indices_t->dim_size(1),
+            "" and "", b_indices_t->dim_size(1), "" for the second dimension.""));
     const int num_dims = a_indices_t->dim_size(1);
     const auto a_indices_mat = a_indices_t->matrix<int64>();
     const auto b_indices_mat = b_indices_t->matrix<int64>();
",1,train
f6fde895ef9c77d848061c0517f19d0ec2682f3a,tensorflow/tensorflow,"Validate that a and b are proper sparse tensors

PiperOrigin-RevId: 373274848
Change-Id: I3a665ac3a29dee9fb69bdf408a939330cb93ea75",sparse_sparse_binary_op_shared.cc,"@@ -150,6 +150,7 @@ class SparseSparseBinaryOpShared : public OpKernel {
 
     const int64 a_nnz = a_indices_t->dim_size(0);
     const int64 b_nnz = b_indices_t->dim_size(0);
+
     const auto a_values = a_values_t->vec<T>();
     const auto b_values = b_values_t->vec<T>();
 
@@ -166,6 +167,14 @@ class SparseSparseBinaryOpShared : public OpKernel {
                     ""Input shapes should be a vector but received shapes "",
                     a_shape_t->shape().DebugString(), "" and "",
                     b_shape_t->shape().DebugString()));
+    const int num_dims = a_indices_t->dim_size(1);
+    OP_REQUIRES(
+        ctx, a_shape_t->NumElements() == num_dims,
+        errors::InvalidArgument(""Second dimension of a_indices and length of ""
+                                ""a_shape must match, got "",
+                                num_dims, "" and "", a_shape_t->NumElements()));
+    OP_REQUIRES(ctx, num_dims > 0,
+                errors::InvalidArgument(""Tensors must not be empty""));
     OP_REQUIRES(ctx, a_shape_t->IsSameSize(*b_shape_t),
                 errors::InvalidArgument(
                     ""Operands do not have the same ranks; got shapes: "",
@@ -180,12 +189,6 @@ class SparseSparseBinaryOpShared : public OpKernel {
                                           "" for dimension "", i));
     }
 
-    OP_REQUIRES(
-        ctx, a_indices_t->dim_size(1) == b_indices_t->dim_size(1),
-        errors::InvalidArgument(
-            ""Indices' dimensions do not match: got "", a_indices_t->dim_size(1),
-            "" and "", b_indices_t->dim_size(1), "" for the second dimension.""));
-    const int num_dims = a_indices_t->dim_size(1);
     const auto a_indices_mat = a_indices_t->matrix<int64>();
     const auto b_indices_mat = b_indices_t->matrix<int64>();
     std::vector<T> a_augmented_values, b_augmented_values;
",1,test
b761c9b652af2107cfbc33efd19be0ce41daa33e,tensorflow/tensorflow,"Fix `tf.raw_ops.RaggedTensorToTensor` failing CHECK.

PiperOrigin-RevId: 368706628
Change-Id: I5c9ea4833f38835ee183ca50d63251dc89c9f3bc",ragged_tensor_to_tensor_op.cc,"@@ -208,7 +208,7 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
   }
 
   void CalculateOutputIndexRowSplit(
-      const RowPartitionTensor& row_split,
+      OpKernelContext* context, const RowPartitionTensor& row_split,
       const vector<INDEX_TYPE>& parent_output_index,
       INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
       vector<INDEX_TYPE>* result) {
@@ -233,7 +233,8 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
       }
     }
     if (row_split_size > 0) {
-      DCHECK_EQ(result->size(), row_split(row_split_size - 1));
+      OP_REQUIRES(context, result->size() == row_split(row_split_size - 1),
+                  errors::InvalidArgument(""Invalid row split size.""));
     }
   }
 
@@ -259,7 +260,7 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
   // result[7] = -1 because parent_output_index[value_rowids[6]] == -1
   // result[8] = parent_output_index[value_rowids[7]]
   void CalculateOutputIndexValueRowID(
-      const RowPartitionTensor& value_rowids,
+      OpKernelContext* context, const RowPartitionTensor& value_rowids,
       const vector<INDEX_TYPE>& parent_output_index,
       INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
       vector<INDEX_TYPE>* result) {
@@ -293,7 +294,8 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
       }
       result->push_back(current_output_index);
     }
-    DCHECK_EQ(result->size(), value_rowids.size());
+    OP_REQUIRES(context, result->size() == value_rowids.size(),
+                errors::InvalidArgument(""Invalid row ids.""));
   }
 
   Status CalculateOutputIndex(OpKernelContext* context, int dimension,
@@ -307,13 +309,13 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
     switch (partition_type) {
       case RowPartitionType::VALUE_ROWIDS:
         CalculateOutputIndexValueRowID(
-            row_partition_tensor, parent_output_index, output_index_multiplier,
-            output_size, result);
+            context, row_partition_tensor, parent_output_index,
+            output_index_multiplier, output_size, result);
         return tensorflow::Status::OK();
       case RowPartitionType::ROW_SPLITS:
-        CalculateOutputIndexRowSplit(row_partition_tensor, parent_output_index,
-                                     output_index_multiplier, output_size,
-                                     result);
+        CalculateOutputIndexRowSplit(
+            context, row_partition_tensor, parent_output_index,
+            output_index_multiplier, output_size, result);
         return tensorflow::Status::OK();
       default:
         return errors::InvalidArgument(
",1,test
c4d7afb6a5986b04505aca4466ae1951686c80f6,tensorflow/tensorflow,"Fix heap OOB / undefined behavior in `RaggedTensorToTensor`

PiperOrigin-RevId: 373244623
Change-Id: I2d6cbbc8c67b238a8815bf58097f7586d87c54f2",ragged_tensor_to_tensor_op.cc,"@@ -207,8 +207,8 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
     DCHECK_EQ(result->size(), first_dimension);
   }
 
-  void CalculateOutputIndexRowSplit(
-      OpKernelContext* context, const RowPartitionTensor& row_split,
+  Status CalculateOutputIndexRowSplit(
+      const RowPartitionTensor& row_split,
       const vector<INDEX_TYPE>& parent_output_index,
       INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
       vector<INDEX_TYPE>* result) {
@@ -232,10 +232,11 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
         result->push_back(-1);
       }
     }
-    if (row_split_size > 0) {
-      OP_REQUIRES(context, result->size() == row_split(row_split_size - 1),
-                  errors::InvalidArgument(""Invalid row split size.""));
+    if (row_split_size > 0 && result->size() != row_split(row_split_size - 1)) {
+      return errors::InvalidArgument(""Invalid row split size."");
     }
+
+    return Status::OK();
   }
 
   // Calculate the output index of the first element of a list.
@@ -259,20 +260,26 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
   // result[6] = -1 because parent_output_index[value_rowids[6]] == -1
   // result[7] = -1 because parent_output_index[value_rowids[6]] == -1
   // result[8] = parent_output_index[value_rowids[7]]
-  void CalculateOutputIndexValueRowID(
-      OpKernelContext* context, const RowPartitionTensor& value_rowids,
+  Status CalculateOutputIndexValueRowID(
+      const RowPartitionTensor& value_rowids,
       const vector<INDEX_TYPE>& parent_output_index,
       INDEX_TYPE output_index_multiplier, INDEX_TYPE output_size,
       vector<INDEX_TYPE>* result) {
     const INDEX_TYPE index_size = value_rowids.size();
     result->reserve(index_size);
     if (index_size == 0) {
-      return;
+      return Status::OK();
     }
 
     INDEX_TYPE current_output_column = 0;
     INDEX_TYPE current_value_rowid = value_rowids(0);
-    DCHECK_LT(current_value_rowid, parent_output_index.size());
+
+    if (current_value_rowid >= parent_output_index.size()) {
+      return errors::InvalidArgument(
+          ""Got current_value_rowid="", current_value_rowid,
+          "" which is not less than "", parent_output_index.size());
+    }
+
     INDEX_TYPE current_output_index = parent_output_index[current_value_rowid];
     result->push_back(current_output_index);
     for (INDEX_TYPE i = 1; i < index_size; ++i) {
@@ -289,13 +296,23 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
       } else {
         current_output_column = 0;
         current_value_rowid = next_value_rowid;
-        DCHECK_LT(next_value_rowid, parent_output_index.size());
+
+        if (next_value_rowid >= parent_output_index.size()) {
+          return errors::InvalidArgument(
+              ""Got next_value_rowid="", next_value_rowid,
+              "" which is not less than "", parent_output_index.size());
+        }
+
         current_output_index = parent_output_index[next_value_rowid];
       }
       result->push_back(current_output_index);
     }
-    OP_REQUIRES(context, result->size() == value_rowids.size(),
-                errors::InvalidArgument(""Invalid row ids.""));
+
+    if (result->size() != value_rowids.size()) {
+      return errors::InvalidArgument(""Invalid row ids."");
+    }
+
+    return Status::OK();
   }
 
   Status CalculateOutputIndex(OpKernelContext* context, int dimension,
@@ -308,10 +325,9 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
     auto partition_type = GetRowPartitionTypeByDimension(dimension);
     switch (partition_type) {
       case RowPartitionType::VALUE_ROWIDS:
-        CalculateOutputIndexValueRowID(
-            context, row_partition_tensor, parent_output_index,
-            output_index_multiplier, output_size, result);
-        return tensorflow::Status::OK();
+        return CalculateOutputIndexValueRowID(
+            row_partition_tensor, parent_output_index, output_index_multiplier,
+            output_size, result);
       case RowPartitionType::ROW_SPLITS:
         if (row_partition_tensor.size() - 1 > parent_output_index.size()) {
           return errors::InvalidArgument(
@@ -319,10 +335,9 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
               row_partition_tensor.size() - 1, "" > "",
               parent_output_index.size());
         }
-        CalculateOutputIndexRowSplit(
-            context, row_partition_tensor, parent_output_index,
-            output_index_multiplier, output_size, result);
-        return tensorflow::Status::OK();
+        return CalculateOutputIndexRowSplit(
+            row_partition_tensor, parent_output_index, output_index_multiplier,
+            output_size, result);
       default:
         return errors::InvalidArgument(
             ""Unsupported partition type:"",
",1,train
f94ef358bb3e91d517446454edff6535bcfe8e4a,tensorflow/tensorflow,"Fix `tf.raw_ops.RaggedTensorToTensor` failing CHECK in `tensor.cc`.

PiperOrigin-RevId: 368300502
Change-Id: I91255d23c4bfd3aa3c029aac773937c09daf3c64",ragged_tensor_to_tensor_op.cc,"@@ -345,6 +345,11 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     INDEX_TYPE first_dimension;
+    const Tensor first_partition_tensor =
+        context->input(kFirstPartitionInputIndex);
+    OP_REQUIRES(context, first_partition_tensor.NumElements() > 0,
+                errors::InvalidArgument(""Invalid first partition input. Tensor ""
+                                        ""requires at least one element.""));
     OP_REQUIRES_OK(context, GetFirstDimensionSize(context, &first_dimension));
     vector<INDEX_TYPE> output_size;
     OP_REQUIRES_OK(context,
",1,train
41727ff06111117bdf86b37db198217fd7a143cc,tensorflow/tensorflow,"Validate that a and b are proper sparse tensors

PiperOrigin-RevId: 373248068
Change-Id: I0a2041a0747901b3f00387a6a3bce9bca6b0b3b1",sparse_add_op.cc,"@@ -44,6 +44,11 @@ class SparseAddOp : public OpKernel {
                     b_indices->shape().DebugString()));
     const int64 a_nnz = a_indices->dim_size(0);
     const int64 b_nnz = b_indices->dim_size(0);
+    const int num_dims = a_indices->dim_size(1);
+    OP_REQUIRES(ctx, b_indices->dim_size(1) == num_dims,
+                errors::InvalidArgument(
+                    ""Input indices must have the same dimension, got "",
+                    num_dims, "" and "", b_indices->dim_size(1)));
 
     OP_REQUIRES_OK(ctx, ctx->input(""a_values"", &a_values_t));
     OP_REQUIRES_OK(ctx, ctx->input(""b_values"", &b_values_t));
@@ -72,6 +77,13 @@ class SparseAddOp : public OpKernel {
                     ""Input shapes should be a vector but received shapes "",
                     a_shape->shape().DebugString(), "" and "",
                     b_shape->shape().DebugString()));
+    OP_REQUIRES(
+        ctx, a_shape->NumElements() == num_dims,
+        errors::InvalidArgument(""Second dimension of a_indices and length of ""
+                                ""a_shape must match, got "",
+                                num_dims, "" and "", a_shape->NumElements()));
+    OP_REQUIRES(ctx, num_dims > 0,
+                errors::InvalidArgument(""Tesors must not be empty""));
     OP_REQUIRES(
         ctx, a_shape->IsSameSize(*b_shape),
         errors::InvalidArgument(
@@ -100,11 +112,6 @@ class SparseAddOp : public OpKernel {
     std::vector<std::pair<bool, int64>> entries_to_copy;  // from_a?, idx
     entries_to_copy.reserve(a_nnz + b_nnz);
     std::vector<T> out_values;
-    const int num_dims = a_shape->dim_size(0);
-
-    OP_REQUIRES(ctx, num_dims > 0,
-                errors::InvalidArgument(""Invalid input_a shape. Received: "",
-                                        a_shape->DebugString()));
 
     // The input and output sparse tensors are assumed to be ordered along
     // increasing dimension number.
",1,train
6fd02f44810754ae7481838b6a67c5df7f909ca3,tensorflow/tensorflow,"Fix `tf.raw_ops.SparseAdd ` invalid memory access failure.

PiperOrigin-RevId: 370568774
Change-Id: I5f73b31c865f2948a1c8dfb7ebd22b3cfb6405bf",sparse_add_op.cc,"@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include ""tensorflow/core/framework/op_kernel.h""
+#include ""tensorflow/core/framework/op_requires.h""
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_util.h""
@@ -101,6 +102,10 @@ class SparseAddOp : public OpKernel {
     std::vector<T> out_values;
     const int num_dims = a_shape->dim_size(0);
 
+    OP_REQUIRES(ctx, num_dims > 0,
+                errors::InvalidArgument(""Invalid input_a shape. Received: "",
+                                        a_shape->DebugString()));
+
     // The input and output sparse tensors are assumed to be ordered along
     // increasing dimension number.
     int64 i = 0, j = 0;
",1,test
c5b0d5f8ac19888e46ca14b0e27562e7fbbee9a9,tensorflow/tensorflow,"Fix the CHECK failure in tf.raw_ops.QuantizeAndDequantizeV2.

PiperOrigin-RevId: 371361603
Change-Id: Ia70e34d41adaadddf928e95e5e5c5c97d5bc60d0",quantize_and_dequantize_op.cc,"@@ -72,6 +72,9 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(
+        ctx, axis_ >= -1,
+        errors::InvalidArgument(""Axis must be at least -1. Found "", axis_));
     OP_REQUIRES(
         ctx, (axis_ == -1 || axis_ < input.shape().dims()),
         errors::InvalidArgument(""Shape must be at least rank "", axis_ + 1,
",1,train
1d04d7d93f4ed3854abf75d6b712d72c3f70d6b6,tensorflow/tensorflow,"Fix heap-buffer-overflow issue with `tf.raw_ops.SparseReshape`.

PiperOrigin-RevId: 371218558
Change-Id: I6a6dc5bf15b50a1d05bdd95e9ba347cb39f40f45",sparse_reshape_op.cc,"@@ -26,6 +26,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/kernels/reshape_util.h""
 #include ""tensorflow/core/lib/gtl/inlined_vector.h""
+#include ""tensorflow/core/platform/errors.h""
 
 namespace tensorflow {
 
@@ -38,6 +39,17 @@ class SparseReshapeOp : public OpKernel {
   explicit SparseReshapeOp(OpKernelConstruction* context) : OpKernel(context) {}
 
   void Compute(OpKernelContext* context) override {
+    const Tensor& input_indices_in = context->input(0);
+    const Tensor& input_shape_in = context->input(1);
+
+    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(input_indices_in.shape()),
+                errors::InvalidArgument(""Input must be a matrix.""));
+    OP_REQUIRES(context, TensorShapeUtils::IsVector(input_shape_in.shape()),
+                errors::InvalidArgument(""Input shape must be a vector.""));
+    OP_REQUIRES(context,
+                input_indices_in.dim_size(1) == input_shape_in.dim_size(0),
+                errors::InvalidArgument(
+                    ""Input tensor rank must match input shape length.""));
     ReshapeSparseTensor<Device>(context, context->input(0), context->input(1),
                                 context->input(2), 0 /* output indices index */,
                                 1 /* output shape index */);
",1,train
0ab290774f91a23bebe30a358fde4e53ab4876a0,tensorflow/tensorflow,"Ensure validation sticks in banded_triangular_solve_op

PiperOrigin-RevId: 373275480
Change-Id: Id7717cf275b2d6fdb9441fbbe166d555182d2e79",banded_triangular_solve_op.cc,"@@ -217,6 +217,7 @@ class BandedTriangularSolveOpCpu : public OpKernel {
     const Tensor& in1 = ctx->input(1);
 
     ValidateInputTensors(ctx, in0, in1);
+    if (!ctx->status().ok()) return;
 
     MatMulBCast bcast(in0.shape().dim_sizes(), in1.shape().dim_sizes());
     OP_REQUIRES(
",1,train
14607c0707040d775e06b6817325640cb4b5864c,tensorflow/tensorflow,"Fix nullptr deref in `tf.raw_ops.CTCLoss`.

PiperOrigin-RevId: 372266334
Change-Id: Ic52c3e9f13a38f54482d670907eda1688450862b",ctc_loss_op.cc,"@@ -109,6 +109,9 @@ class CTCLossOp : public OpKernel {
 
     const TensorShape& inputs_shape = inputs->shape();
     const int64 max_time = inputs_shape.dim_size(0);
+    OP_REQUIRES(ctx, max_time != 0,
+                errors::InvalidArgument(
+                    ""Max time or first dimension of input cannot be 0.""));
     const int64 batch_size = inputs_shape.dim_size(1);
     const int64 num_classes_raw = inputs_shape.dim_size(2);
     OP_REQUIRES(
",1,train
4504a081af71514bb1828048363e6540f797005b,tensorflow/tensorflow,"Fix OOB read issue with `tf.raw_ops.CTCLoss`.

PiperOrigin-RevId: 372242187
Change-Id: I347228ed8c04e1d2eb9d2479ae52f51d1b512c6e",ctc_loss_op.cc,"@@ -100,6 +100,10 @@ class CTCLossOp : public OpKernel {
                 errors::InvalidArgument(""sequence_length is not a vector""));
     OP_REQUIRES(ctx, TensorShapeUtils::IsMatrix(labels_indices->shape()),
                 errors::InvalidArgument(""labels_indices is not a matrix""));
+    OP_REQUIRES(ctx, labels_indices->dim_size(1) > 1,
+                errors::InvalidArgument(
+                    ""labels_indices second dimension must be >= 1. Received "",
+                    labels_indices->dim_size(1)));
     OP_REQUIRES(ctx, TensorShapeUtils::IsVector(labels_values->shape()),
                 errors::InvalidArgument(""labels_values is not a vector""));
 
",1,train
698e01511f62a3c185754db78ebce0eee1f0184d,tensorflow/tensorflow,"Fix `tf.io.decode_raw` bugs and update documentation.

Fixes cases where specifying `fixed_length` resulted in data loss and even segfault and corruption of the Python interpreter. The fix is subtle but needed due to pointer arithmetic rules.

Makes sure that `fixed_length` does not change the output when present but not needed.

Eliminates needless copy and cast in the main codepath.

PiperOrigin-RevId: 371322725
Change-Id: I514ef67a2961c86422f69d05122d31615e87896c",decode_padded_raw_op.cc,"@@ -19,6 +19,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/common_shape_fns.h""
 #include ""tensorflow/core/framework/op.h""
 #include ""tensorflow/core/framework/op_kernel.h""
+#include ""tensorflow/core/framework/op_requires.h""
 #include ""tensorflow/core/framework/shape_inference.h""
 
 namespace tensorflow {
@@ -83,14 +84,13 @@ class DecodePaddedRawOp : public OpKernel {
     // can copy the memory directly.
     if (!convert_data_endianness_ || sizeof(T) == 1) {
       for (int64 i = 0; i < flat_in.size(); ++i) {
-        const T* in_data = reinterpret_cast<const T*>(flat_in(i).data());
-
-        if (flat_in(i).size() > fixed_length) {
-          memcpy(out_data, in_data, fixed_length);
-        } else {
-          memcpy(out_data, in_data, flat_in(i).size());
-        }
-        out_data += fixed_length;
+        const auto to_copy =
+            std::min(flat_in(i).size(), static_cast<size_t>(fixed_length));
+        memcpy(out_data, flat_in(i).data(), to_copy);
+        // Note: increase out_data by width since it's already of type T* so
+        // each shift amount is implicitly multiplied by sizeof(T) according to
+        // pointer arithmetic rules.
+        out_data += width;
       }
     } else {
       // Otherwise, the data is not in the host's byte order, and rather than a
@@ -105,7 +105,10 @@ class DecodePaddedRawOp : public OpKernel {
              p_in += sizeof(T), p_out += sizeof(T)) {
           std::reverse_copy(p_in, p_in + sizeof(T), p_out);
         }
-        out_data += fixed_length;
+        // Note: increase out_data by width since it's already of type T* so
+        // each shift amount is implicitly multiplied by sizeof(T) according to
+        // pointer arithmetic rules.
+        out_data += width;
       }
     }
   }
",1,train
698e01511f62a3c185754db78ebce0eee1f0184d,tensorflow/tensorflow,"Fix `tf.io.decode_raw` bugs and update documentation.

Fixes cases where specifying `fixed_length` resulted in data loss and even segfault and corruption of the Python interpreter. The fix is subtle but needed due to pointer arithmetic rules.

Makes sure that `fixed_length` does not change the output when present but not needed.

Eliminates needless copy and cast in the main codepath.

PiperOrigin-RevId: 371322725
Change-Id: I514ef67a2961c86422f69d05122d31615e87896c",parsing_ops.py,"@@ -850,8 +850,8 @@ def decode_raw(input_bytes,
                name=None):
   r""""""Convert raw bytes from input tensor into numeric tensors.
 
-  The input tensor is interpreted as a sequence of bytes. These bytes are then
-  decoded as numbers in the format specified by `out_type`.
+  Every component of the input tensor is interpreted as a sequence of bytes.
+  These bytes are then decoded as numbers in the format specified by `out_type`.
 
   >>> tf.io.decode_raw(tf.constant(""1""), tf.uint8)
   <tf.Tensor: shape=(1,), dtype=uint8, numpy=array([49], dtype=uint8)>
@@ -909,22 +909,35 @@ def decode_raw(input_bytes,
   >>> tf.io.decode_raw(tf.constant([""1212""]), tf.uint16, fixed_length=4)
   <tf.Tensor: shape=(1, 2), dtype=uint16, numpy=array([[12849, 12849]], ...
 
-  Note: There is currently a bug in `fixed_length` that can result in data loss:
-
-  >>> # truncated to length of type as it matches fixed_length
-  >>> tf.io.decode_raw(tf.constant([""1212""]), tf.uint16, fixed_length=2)
-  <tf.Tensor: shape=(1, 1), dtype=uint16, numpy=array([[12849]], dtype=uint16)>
-  >>> # ignores the second component
-  >>> tf.io.decode_raw(tf.constant([""12"",""34""]), tf.uint16, fixed_length=2)
-  <tf.Tensor: shape=(2, 1), dtype=uint16, numpy=
-  array([[12849],
-         [    0]], dtype=uint16)>
-  >>> tf.io.decode_raw(tf.constant([""12"",""34""]), tf.uint16, fixed_length=4)
-  <tf.Tensor: shape=(2, 2), dtype=uint16, numpy=
-  array([[12849,     0],
-         [    0,     0]], dtype=uint16)>
-
-  This will be fixed on a future release of TensorFlow.
+  If the input value is larger than `fixed_length`, it is truncated:
+
+  >>> x=''.join([chr(1), chr(2), chr(3), chr(4)])
+  >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2)
+  <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([513], dtype=uint16)>
+  >>> hex(513)
+  '0x201'
+
+  If `little_endian` and `fixed_length` are specified, truncation to the fixed
+  length occurs before endianness conversion:
+
+  >>> x=''.join([chr(1), chr(2), chr(3), chr(4)])
+  >>> tf.io.decode_raw(x, tf.uint16, fixed_length=2, little_endian=False)
+  <tf.Tensor: shape=(1,), dtype=uint16, numpy=array([258], dtype=uint16)>
+  >>> hex(258)
+  '0x102'
+
+  If input values all have the same length, then specifying `fixed_length`
+  equal to the size of the strings should not change output:
+
+  >>> x = [""12345678"", ""87654321""]
+  >>> tf.io.decode_raw(x, tf.int16)
+  <tf.Tensor: shape=(2, 4), dtype=int16, numpy=
+  array([[12849, 13363, 13877, 14391],
+         [14136, 13622, 13108, 12594]], dtype=int16)>
+  >>> tf.io.decode_raw(x, tf.int16, fixed_length=len(x[0]))
+  <tf.Tensor: shape=(2, 4), dtype=int16, numpy=
+  array([[12849, 13363, 13877, 14391],
+         [14136, 13622, 13108, 12594]], dtype=int16)>
 
   Args:
     input_bytes:
",1,train
e07e1c3d26492c06f078c7e5bf2d138043e199c1,tensorflow/tensorflow,"Prevent memory overflow in ParseAttrValue from nested tensors.

PiperOrigin-RevId: 370108442
Change-Id: I84d64a5e8895a6aeffbf4749841b4c54d51b5889",attr_value_util.cc,"@@ -38,6 +38,9 @@ namespace {
 // Do not construct large tensors to compute their hash or compare for equality.
 constexpr int kMaxAttrValueTensorByteSize = 32 * 1024 * 1024;  // 32mb
 
+// Limit nesting of tensors to 100 deep to prevent memory overflow.
+constexpr int kMaxTensorNestDepth = 100;
+
 // Return the size of the tensor represented by this TensorProto. If shape is
 // not fully defined return -1.
 int64 TensorByteSize(const TensorProto& t) {
@@ -224,6 +227,54 @@ string SummarizeFunc(const NameAttrList& func) {
   return strings::StrCat(func.name(), ""["", absl::StrJoin(entries, "", ""), ""]"");
 }
 
+bool ParseAttrValueHelper_TensorNestsUnderLimit(int limit, string to_parse) {
+  int nests = 0;
+  int maxed_out = to_parse.length();
+  int open_curly = to_parse.find('{');
+  int open_bracket = to_parse.find('<');
+  int close_curly = to_parse.find('}');
+  int close_bracket = to_parse.find('>');
+  if (open_curly == -1) {
+    open_curly = maxed_out;
+  }
+  if (open_bracket == -1) {
+    open_bracket = maxed_out;
+  }
+  int min = std::min(open_curly, open_bracket);
+  do {
+    if (open_curly == maxed_out && open_bracket == maxed_out) {
+      return true;
+    }
+    if (min == open_curly) {
+      nests += 1;
+      open_curly = to_parse.find('{', open_curly + 1);
+      if (open_curly == -1) {
+        open_curly = maxed_out;
+      }
+    } else if (min == open_bracket) {
+      nests += 1;
+      open_bracket = to_parse.find('<', open_bracket + 1);
+      if (open_bracket == -1) {
+        open_bracket = maxed_out;
+      }
+    } else if (min == close_curly) {
+      nests -= 1;
+      close_curly = to_parse.find('}', close_curly + 1);
+      if (close_curly == -1) {
+        close_curly = maxed_out;
+      }
+    } else if (min == close_bracket) {
+      nests -= 1;
+      close_bracket = to_parse.find('>', close_bracket + 1);
+      if (close_bracket == -1) {
+        close_bracket = maxed_out;
+      }
+    }
+    min = std::min({open_curly, open_bracket, close_curly, close_bracket});
+  } while (nests < 100);
+  return false;
+}
+
 }  // namespace
 
 string SummarizeAttrValue(const AttrValue& attr_value) {
@@ -448,7 +499,12 @@ bool ParseAttrValue(StringPiece type, StringPiece text, AttrValue* out) {
   } else {
     to_parse = strings::StrCat(field_name, "": "", text);
   }
-
+  if (field_name == ""tensor"") {
+    if (!ParseAttrValueHelper_TensorNestsUnderLimit(kMaxTensorNestDepth,
+                                                    to_parse)) {
+      return false;
+    }
+  }
   return ProtoParseFromString(to_parse, out);
 }
 
",1,train
e6340f0665d53716ef3197ada88936c2a5f7a2d3,tensorflow/tensorflow,"Handle a special grappler case resulting in crash.

It might happen that a malformed input could be used to trick Grappler into trying to optimize a node with no inputs. This, in turn, would produce a null pointer dereference and a segfault.

PiperOrigin-RevId: 369242852
Change-Id: I2e5cbe7aec243d34a6d60220ac8ac9b16f136f6b",arithmetic_optimizer.cc,"@@ -2047,6 +2047,12 @@ class ReorderCastLikeAndValuePreserving : public ArithmeticOptimizerStage {
 
   Status TrySimplify(NodeDef* consumer, string* simplified_node_name) override {
     NodeDef* producer;
+
+    if (consumer->input_size() < 1) {
+      return errors::FailedPrecondition(""Node "", simplified_node_name,
+                                        "" lacks inputs"");
+    }
+
     TF_RETURN_IF_ERROR(GetInputNode(consumer->input(0), &producer));
     const bool producer_is_cast = IsCastLike(*producer);
     const bool can_optimize =
@@ -2538,6 +2544,11 @@ class ReplaceMulWithSquare : public ArithmeticOptimizerStage {
   ~ReplaceMulWithSquare() override = default;
 
   bool IsSupported(const NodeDef* node) const override {
+    if (!node || node->input_size() < 2) {
+      // Invalid node
+      return false;
+    }
+
     return IsAnyMul(*node) && node->input(0) == node->input(1);
   }
 
",1,train
e6340f0665d53716ef3197ada88936c2a5f7a2d3,tensorflow/tensorflow,"Handle a special grappler case resulting in crash.

It might happen that a malformed input could be used to trick Grappler into trying to optimize a node with no inputs. This, in turn, would produce a null pointer dereference and a segfault.

PiperOrigin-RevId: 369242852
Change-Id: I2e5cbe7aec243d34a6d60220ac8ac9b16f136f6b",dependency_optimizer.cc,"@@ -68,6 +68,12 @@ bool DependencyOptimizer::SafeToRemoveIdentity(const NodeDef& node) const {
     // The output values of this node may be needed.
     return false;
   }
+
+  if (node.input_size() < 1) {
+    // Node lacks input, is invalid
+    return false;
+  }
+
   const NodeDef* input = node_map_->GetNode(NodeName(node.input(0)));
   CHECK(input != nullptr) << ""node = "" << node.name()
                           << "" input = "" << node.input(0);
",1,train
82e6203221865de4008445b13c69b6826d2b28d9,tensorflow/tensorflow,"Fix segfaults in `tf.raw_ops.SparseCountSparseOutput`.

PiperOrigin-RevId: 360547563
Change-Id: I781c7af4b54a63d867c6e18d43a44d64a5c4e7c9",count_ops.cc,"@@ -192,6 +192,10 @@ class SparseCount : public OpKernel {
               ""; values shape: "", values.shape().DebugString()));
     }
 
+    OP_REQUIRES(context, shape.NumElements() != 0,
+                errors::InvalidArgument(
+                    ""The shape argument requires at least one element.""));
+
     bool is_1d = shape.NumElements() == 1;
     int num_batches = is_1d ? 1 : shape.flat<int64>()(0);
     int num_values = values.NumElements();
@@ -212,6 +216,14 @@ class SparseCount : public OpKernel {
 
     for (int idx = 0; idx < num_values; ++idx) {
       int batch = is_1d ? 0 : indices_values(idx, 0);
+      if (batch >= num_batches) {
+        OP_REQUIRES(context, batch < num_batches,
+                    errors::InvalidArgument(
+                        ""Indices value along the first dimension must be "",
+                        ""lower than the first index of the shape."", ""Got "",
+                        batch, "" as batch and "", num_batches,
+                        "" as the first dimension of the shape.""));
+      }
       const auto& value = values_values(idx);
       if (value >= 0 && (maxlength_ <= 0 || value < maxlength_)) {
         if (binary_output_) {
",1,train
87158f43f05f2720a374f3e6d22a7aaa3a33f750,tensorflow/tensorflow,"Prevent heap OOB in sparse reduction ops.

PiperOrigin-RevId: 387934524
Change-Id: I894aa30f1e454f09b471d565b4a325da49322c1a",sparse_reduce_op.cc,"@@ -219,7 +219,20 @@ class SparseReduceOp : public OpKernel {
     sp.Reorder<T>(reduction.reorder_dims);
     for (const auto &g : sp.group(reduction.group_by_dims)) {
       Op::template Run<T>(ctx, reduced_val, g.template values<T>());
+      OP_REQUIRES(ctx,
+                  output_strides.empty() ||
+                  (g.group().size() == output_strides.size()),
+                  errors::Internal(
+                      ""Expected group size and output_strides size to match"",
+                      "", but got "", g.group().size(), "" and "",
+                      output_strides.size()));
       const int64_t idx = CoordinatesToFlatIndex(g.group(), output_strides);
+      OP_REQUIRES(ctx,
+                  idx >= 0 && idx < out_flat.size(),
+                  errors::Internal(
+                      ""Obtained a write index of "", idx,
+                      "" which is outside of bounds of [0, "",
+                      out_flat.size(), "")""));
       out_flat(idx) = reduced_val();
       VLOG(2) << ""coords: "" << absl::StrJoin(g.group(), "","")
               << ""; idx: "" << idx << ""; group "" << Op::Name() << "": ""
",1,train
d9204be9f49520cdaaeb2541d1dc5187b23f31d9,tensorflow/tensorflow,"Disallow division by zero FPE in tf.raw_ops.SparseDenseCwiseDiv

PiperOrigin-RevId: 383959809
Change-Id: Ibe88458bdf66a686c93e354b8255dec94285c560",sparse_dense_binary_op_shared.cc,"@@ -114,7 +114,10 @@ class SparseDenseBinaryOpShared : public OpKernel {
     OP_REQUIRES_OK(
         ctx, ctx->allocate_temp(DataTypeToEnum<T>::value, TensorShape({nnz}),
                                 &dense_gathered));
-
+    bool op_is_div = false;
+    if (absl::StrContains(ctx->op_kernel().type_string_view(), ""Div"")) {
+      op_is_div = true;
+    }
     // Pulls relevant entries from the dense side, with reshape and broadcasting
     // *of the dense side* taken into account.  Use a TensorRef to avoid blowing
     // up memory.
@@ -143,6 +146,12 @@ class SparseDenseBinaryOpShared : public OpKernel {
           errors::InvalidArgument(""Provided indices are out-of-bounds w.r.t. "" \
                                   ""dense side with broadcasted shape""));       \
       dense_gathered_flat(i) = rhs_ref.coeff(idx);                             \
+      if (op_is_div) {                                                         \
+        OP_REQUIRES(ctx, dense_gathered_flat(i) != 0,                          \
+                    errors::InvalidArgument(                                   \
+                        ""SparseDenseCwiseDiv cannot divide by zero,""           \
+                        ""but input dense tensor contains zero ""));             \
+      }                                                                        \
     }                                                                          \
     break;                                                                     \
   }
",1,train
5dc7f6981fdaf74c8c5be41f393df705841fb7c5,tensorflow/tensorflow,"Fix accessing possible nullptr in tensorflow::data::CompressElement and UncompressElement which are used in tf.data.service.

PiperOrigin-RevId: 373920841
Change-Id: Ia88d78aee09fa19bb53a0f163fd19620d0c68743",compression_utils.cc,"@@ -29,9 +29,10 @@ Status CompressElement(const std::vector<Tensor>& element,
   int64 total_size = 0;
   for (auto& component : element) {
     if (DataTypeCanUseMemcpy(component.dtype())) {
-      // Some datatypes can be memcopied, allowing us to save two copies
-      // (AsProtoTensorContent and SerializeToArray).
-      total_size += DMAHelper::buffer(&component)->size();
+      const TensorBuffer* buffer = DMAHelper::buffer(&component);
+      if (buffer) {
+        total_size += buffer->size();
+      }
     } else {
       non_memcpy_components.emplace_back();
       component.AsProtoTensorContent(&non_memcpy_components.back());
@@ -53,8 +54,10 @@ Status CompressElement(const std::vector<Tensor>& element,
     component.shape().AsProto(metadata->mutable_tensor_shape());
     if (DataTypeCanUseMemcpy(component.dtype())) {
       const TensorBuffer* buffer = DMAHelper::buffer(&component);
-      memcpy(position, buffer->data(), buffer->size());
-      metadata->set_tensor_size_bytes(buffer->size());
+      if (buffer) {
+        memcpy(position, buffer->data(), buffer->size());
+        metadata->set_tensor_size_bytes(buffer->size());
+      }
     } else {
       TensorProto& proto = non_memcpy_components[non_memcpy_component_index++];
       proto.SerializeToArray(position, proto.ByteSizeLong());
@@ -94,8 +97,13 @@ Status UncompressElement(const CompressedElement& compressed,
     if (DataTypeCanUseMemcpy(metadata.dtype())) {
       out->emplace_back(metadata.dtype(), metadata.tensor_shape());
       TensorBuffer* buffer = DMAHelper::buffer(&out->back());
-      iov[i].iov_base = buffer->data();
-      iov[i].iov_len = buffer->size();
+      if (buffer) {
+        iov[i].iov_base = buffer->data();
+        iov[i].iov_len = buffer->size();
+      } else {
+        iov[i].iov_base = nullptr;
+        iov[i].iov_len = 0;
+      }
     } else {
       // Allocate an empty Tensor. We will fill it out later after
       // uncompressing into the tensor_proto_str.
",1,train
301ae88b331d37a2a16159b65b255f4f9eb39314,tensorflow/tensorflow,"Fix null ptr deref in tf.raw_ops.RaggedTensorToTensor

PiperOrigin-RevId: 384257511
Change-Id: I0484ad285039d132d6c41b284a7fcdd2b774a38e",ragged_tensor_to_tensor_op.cc,"@@ -348,6 +348,9 @@ class RaggedTensorToTensorBaseOp : public OpKernel {
   Status GetFirstDimensionSize(OpKernelContext* context, INDEX_TYPE* result) {
     const Tensor first_partition_tensor =
         context->input(kFirstPartitionInputIndex);
+    if (row_partition_types_.empty()) {
+      return errors::InvalidArgument(""No row_partition_types given."");
+    }
     const RowPartitionType first_partition_type = row_partition_types_[0];
     switch (first_partition_type) {
       case RowPartitionType::FIRST_DIM_SIZE:
",1,train
9e82dce6e6bd1f36a57e08fa85af213e2b2f2622,tensorflow/tensorflow,"Fix NPE in restoring code.

PiperOrigin-RevId: 388303253
Change-Id: Ia8c68568cb854bca538909a182b31a618d68ce55",save_restore_tensor.cc,"@@ -151,11 +151,18 @@ void RestoreTensor(OpKernelContext* context,
         context, size == 1,
         errors::InvalidArgument(
             ""Input 0 (file_pattern) must be a string scalar; got a tensor of "",
-            size, ""elements""));
+            size, "" elements""));
   }
   const string& file_pattern = file_pattern_t.flat<tstring>()(0);
 
   const Tensor& tensor_name_t = context->input(1);
+  {
+    const int64_t size = tensor_name_t.NumElements();
+    OP_REQUIRES(context, size > restore_index,
+                errors::InvalidArgument(
+                    ""Input 1 (file_pattern) must be a have at least "",
+                    restore_index + 1, "" elements""));
+  }
   const string& tensor_name = tensor_name_t.flat<tstring>()(restore_index);
 
   // If we cannot find a cached reader we will allocate our own.
",1,train
4923de56ec94fff7770df259ab7f2288a74feb41,tensorflow/tensorflow,"Don't do any work when reshaping 0 elements sparse tensor.

If reshaping to 0 elements tensor, check that input has no elements.
If reshaping no elements input, check that output has no elements.

PiperOrigin-RevId: 388296986
Change-Id: Iadc9fe7252e14313ca987e69bf0d7042fd10232a",reshape_util.cc,"@@ -174,6 +174,12 @@ void ReshapeSparseTensor(OpKernelContext *context,
                                           TensorShape({nnz, output_rank}),
                                           &result_indices));
   if (nnz > 0) {
+    OP_REQUIRES(
+        context, dense_size > 0 && product > 0,
+        errors::InvalidArgument(
+            ""Input tensor has "", nnz, "" non zero elements but input shape ("",
+            input_shape.DebugString(), "") or output shape ("",
+            output_shape.DebugString(), "") is empty""));
     OP_REQUIRES_OK(context, functor::ReshapeSparseTensorFunctor<Device>()(
                                 context, input_shape, output_shape,
                                 input_indices_in.matrix<int64>(),
",1,test
a2b743f6017d7b97af1fe49087ae15f0ac634373,tensorflow/tensorflow,"Fix heap OOB in `tf.raw_ops.RaggedGather`

PiperOrigin-RevId: 388355464
Change-Id: If14d96231d1cd7aad7c4d1c22c1bab1576b75717",ragged_gather_op.cc,"@@ -58,15 +58,21 @@ class RaggedGatherOpBase : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     // Get the input Tensors.
+
     OpInputList params_nested_splits_in;
     OP_REQUIRES_OK(context, context->input_list(""params_nested_splits"",
                                                 &params_nested_splits_in));
+    OP_REQUIRES(
+        context, params_nested_splits_in.size() > 0,
+        errors::InvalidArgument(""params_nested_splits must be non empty""));
+
     const Tensor& params_dense_values_in =
         context->input(params_nested_splits_in.size());
     const Tensor& indices_in =
         context->input(params_nested_splits_in.size() + 1);
 
-    DCHECK_GT(params_nested_splits_in.size(), 0);  // Enforced by REGISTER_OP.
+    OP_REQUIRES(context, params_nested_splits_in[0].dims() > 0,
+                errors::InvalidArgument(""Split tensors must not be scalars""));
     SPLITS_TYPE num_params = params_nested_splits_in[0].dim_size(0) - 1;
     OP_REQUIRES_OK(context, ValidateIndices(indices_in, num_params));
 
",1,train
4aacb30888638da75023e6601149415b39763d76,tensorflow/tensorflow,"Disallow division by zero FPE in `tf.raw_ops.ResourceScatterDiv`

Had to update a test that was broken.

PiperOrigin-RevId: 388516976
Change-Id: Ic358e6bf0559e011539974d453fc7aa18b427e9c",resource_variable_ops.cc,"@@ -873,6 +873,35 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_GATHER_ND_GPU);
 #undef REGISTER_GATHER_ND_ALL_INDICES
 #undef REGISTER_GATHER_ND_FULL
 
+namespace {
+
+template <typename Device>
+bool isCPUDevice() {
+  return false;
+}
+
+template <>
+bool isCPUDevice<CPUDevice>() {
+  return true;
+}
+
+template <typename T>
+bool ValidateInput(const Tensor& updates) {
+  const auto updates_flat = updates.flat<T>();
+  const T zero(0);
+  for (int i = 0; i < updates.NumElements(); i++) {
+    if (updates_flat(i) == zero) return false;
+  }
+  return true;
+}
+
+template <>
+bool ValidateInput<Variant>(const Tensor& updates) {
+  return true;
+}
+
+}  // namespace
+
 template <typename Device, typename T, typename Index, scatter_op::UpdateOp op>
 class ResourceScatterUpdateOp : public OpKernel {
  public:
@@ -939,6 +968,12 @@ class ResourceScatterUpdateOp : public OpKernel {
                                 "" indexing: "", params->dim_size(0), "" > "",
                                 std::numeric_limits<Index>::max()));
 
+    // Prevent division by 0
+    if (isCPUDevice<Device>() && op == tensorflow::scatter_op::UpdateOp::DIV) {
+      OP_REQUIRES(c, ValidateInput<T>(updates),
+                  errors::InvalidArgument(""updates must not contain 0""));
+    }
+
     if (N > 0) {
       auto indices_flat = indices.flat<Index>();
       auto params_flat = params->flat_outer_dims<T>();
",1,train
4aacb30888638da75023e6601149415b39763d76,tensorflow/tensorflow,"Disallow division by zero FPE in `tf.raw_ops.ResourceScatterDiv`

Had to update a test that was broken.

PiperOrigin-RevId: 388516976
Change-Id: Ic358e6bf0559e011539974d453fc7aa18b427e9c",sharded_variable_test.py,"@@ -175,8 +175,9 @@ class ShardedVariableTest(test.TestCase, parameterized.TestCase):
                             'scatter_update')
   def test_scatter_ops_even_partition(self, op):
     v = variables_lib.Variable(array_ops.zeros((30, 1)))
+    # Make sure values does not contain 0 due to testing `scatter_div`!
     sparse_delta = ops.IndexedSlices(
-        values=constant_op.constant([[0.], [1.], [2.], [3.], [4.]]),
+        values=constant_op.constant([[1.], [2.], [3.], [4.], [5.]]),
         indices=constant_op.constant([0, 10, 12, 21, 22]))
 
     v0 = variables_lib.Variable(array_ops.zeros((10, 1)))
",1,train
482da92095c4d48f8784b1f00dda4f81c28d2988,tensorflow/tensorflow,"Ensure non-empty padding_value input to tf.raw_ops.MatrixDiagPartV2, if a padding_value is input

PiperOrigin-RevId: 388314614
Change-Id: If0b51ad58d5d8543a6be6ce8f42ae4755c80d55f",matrix_diag_op.cc,"@@ -89,7 +89,10 @@ class MatrixDiagPartOp : public OpKernel {
           upper_diag_index = diag_index.flat<int32>()(1);
         }
       }
-      padding_value = context->input(2).flat<T>()(0);
+      const Tensor& padding_in = context->input(2);
+      OP_REQUIRES(context, padding_in.NumElements() == 1,
+                  errors::InvalidArgument(""Padding must be scalar.""));
+      padding_value = padding_in.flat<T>()(0);
     }
     const TensorShape& input_shape = input.shape();
 
",1,train
8a6e874437670045e6c7dc6154c7412b4a2135e2,tensorflow/tensorflow,"Validate num_elements input in tf.raw_ops.TensorListReserve

PiperOrigin-RevId: 383954564
Change-Id: I454bd78eff85bc4f16ddb7e608596971cca47f8f",list_kernels.cc,"@@ -302,6 +302,10 @@ class TensorListReserve : public OpKernel {
     PartialTensorShape element_shape;
     OP_REQUIRES_OK(c, TensorShapeFromTensor(c->input(0), &element_shape));
     int32 num_elements = c->input(1).scalar<int32>()();
+    OP_REQUIRES(c, num_elements >= 0,
+                errors::InvalidArgument(""The num_elements to reserve must be a ""
+                                        ""non negative number, but got "",
+                                        num_elements));
     TensorList output;
     output.element_shape = element_shape;
     output.element_dtype = element_dtype_;
",1,train
96f364a1ca3009f98980021c4b32be5fdcca33a1,tensorflow/tensorflow,"Validate axis input in tf.raw_ops.QuantizeAndDequantizeV4Grad

PiperOrigin-RevId: 388291385
Change-Id: I3bab68dc61d935afa96c0da021a7b722c6dc8dc8",quantize_and_dequantize_op.cc,"@@ -158,6 +158,13 @@ class QuantizeAndDequantizeV4GradientOp : public OpKernel {
     Tensor* input_backprop = nullptr;
     OP_REQUIRES_OK(ctx,
                    ctx->allocate_output(0, input.shape(), &input_backprop));
+    OP_REQUIRES(
+        ctx, axis_ >= -1,
+        errors::InvalidArgument(""Axis must be at least -1. Found "", axis_));
+    OP_REQUIRES(ctx, (axis_ == -1 || axis_ < input.shape().dims()),
+                errors::InvalidArgument(
+                    ""Axis should be -1 or 0 or a positive value less than "",
+                    input.shape().dims(), ""but given axis value was "", axis_));
 
     OP_REQUIRES(
         ctx, input.IsSameSize(gradient),
",1,train
c283e542a3f422420cfdb332414543b62fc4e4a5,tensorflow/tensorflow,"Disallow negative ngram_widths values in tf.raw_ops.StringNGrams

PiperOrigin-RevId: 387148179
Change-Id: I641395a09a208be72ef9b3ceb128cf8a83a0775b",string_ngrams_op.cc,"@@ -53,6 +53,12 @@ class StringNGramsOp : public tensorflow::OpKernel {
   }
 
   void Compute(tensorflow::OpKernelContext* context) override {
+    for (int ngram_width : ngram_widths_) {
+      OP_REQUIRES(
+          context, ngram_width > 0,
+          errors::InvalidArgument(""ngram_widths must contain positive values""));
+    }
+
     const tensorflow::Tensor* data;
     OP_REQUIRES_OK(context, context->input(""data"", &data));
     const auto& input_data = data->flat<tstring>().data();
",1,train
02cc160e29d20631de3859c6653184e3f876b9d7,tensorflow/tensorflow,"Prevent nullptr deref in SparseTensorSliceDataset

The arguments must determine a valid sparse tensor. This means that when indices are empty then the values must be empty too (and the reverse).

Also added test, by modifying existing test with empty sparse tensor to now run with an invalid sparse tensor input.

PiperOrigin-RevId: 388562757
Change-Id: Id8b54cd7c2316025b4f9a77292c8fb5344d17609",sparse_tensor_slice_dataset_op.cc,"@@ -241,6 +241,17 @@ class SparseTensorSliceDatasetOp : public DatasetOpKernel {
                 errors::InvalidArgument(
                     ""Input indices should be a matrix but received shape "",
                     indices->shape().DebugString()));
+
+    const auto num_indices = indices->NumElements();
+    const auto num_values = values->NumElements();
+    if (num_indices == 0 || num_values == 0) {
+      OP_REQUIRES(ctx, num_indices == num_values,
+                  errors::InvalidArgument(
+                      ""If indices or values are empty, the other one must also ""
+                      ""be. Got indices of shape "",
+                      indices->shape().DebugString(), "" and values of shape "",
+                      values->shape().DebugString()));
+    }
     OP_REQUIRES(ctx, TensorShapeUtils::IsVector(values->shape()),
                 errors::InvalidArgument(
                     ""Input values should be a vector but received shape "",
",1,train
02cc160e29d20631de3859c6653184e3f876b9d7,tensorflow/tensorflow,"Prevent nullptr deref in SparseTensorSliceDataset

The arguments must determine a valid sparse tensor. This means that when indices are empty then the values must be empty too (and the reverse).

Also added test, by modifying existing test with empty sparse tensor to now run with an invalid sparse tensor input.

PiperOrigin-RevId: 388562757
Change-Id: Id8b54cd7c2316025b4f9a77292c8fb5344d17609",from_sparse_tensor_slices_test.py,"@@ -118,6 +118,26 @@ class FromSparseTensorSlicesTest(test_base.DatasetTestBase,
       with self.assertRaises(errors.OutOfRangeError):
         sess.run(get_next)
 
+  @combinations.generate(combinations.combine(tf_api_version=1, mode=[""graph""]))
+  def testEmptySparseTensorSlicesInvalid(self):
+    """"""Test a dataset based on invalid `tf.sparse.SparseTensor`.""""""
+    st = array_ops.sparse_placeholder(dtypes.float64)
+    iterator = dataset_ops.make_initializable_iterator(
+        dataset_ops.Dataset.from_sparse_tensor_slices(st))
+    init_op = iterator.initializer
+
+    with self.cached_session() as sess:
+      # Test with an empty sparse tensor but with non empty values.
+      empty_indices = np.empty((0, 4), dtype=np.int64)
+      non_empty_values = [1, 2, 3, 4]
+      empty_dense_shape = [0, 4, 37, 9]
+      sparse_feed = sparse_tensor.SparseTensorValue(empty_indices,
+                                                    non_empty_values,
+                                                    empty_dense_shape)
+      # Here, we expect the test to fail when running the feed.
+      with self.assertRaises(errors.InvalidArgumentError):
+        sess.run(init_op, feed_dict={st: sparse_feed})
+
   @combinations.generate(combinations.combine(tf_api_version=2, mode=[""eager""]))
   def testFromSparseTensorSlicesError(self):
     with self.assertRaises(AttributeError):
",1,train
9728c60e136912a12d99ca56e106b7cce7af5986,tensorflow/tensorflow,"Ensure validation sticks in `save_restore_v2_ops.cc`

PiperOrigin-RevId: 387924206
Change-Id: I6156842eb3230076b5812c0815f3e66bd5241454",save_restore_v2_ops.cc,"@@ -98,6 +98,7 @@ class SaveV2 : public OpKernel {
     const Tensor& shape_and_slices = context->input(2);
     ValidateInputs(true /* is save op */, context, prefix, tensor_names,
                    shape_and_slices);
+    if (!context->status().ok()) return;
 
     const int kFixedInputs = 3;  // Prefix, tensor names, shape_and_slices.
     const int num_tensors = static_cast<int>(tensor_names.NumElements());
@@ -177,6 +178,7 @@ class RestoreV2 : public OpKernel {
                                         "" expected dtypes.""));
     ValidateInputs(false /* not save op */, context, prefix, tensor_names,
                    shape_and_slices);
+    if (!context->status().ok()) return;
 
     const string& prefix_string = prefix.scalar<tstring>()();
 
",1,test
7bdf50bb4f5c54a4997c379092888546c97c3ebd,tensorflow/tensorflow,"Ensure non-empty compressed input in tf.raw_ops.UncompressElement

PiperOrigin-RevId: 383955815
Change-Id: I072a84fd02738dd2f51b3f42836ed80067dba4a8",compression_ops.cc,"@@ -48,6 +48,11 @@ void UncompressElementOp::Compute(OpKernelContext* ctx) {
   Tensor tensor = ctx->input(0);
   const Variant& variant = tensor.scalar<Variant>()();
   const CompressedElement* compressed = variant.get<CompressedElement>();
+  OP_REQUIRES(
+      ctx, compressed != nullptr,
+      errors::InvalidArgument(
+          ""Input does not contain a compressed element. Instead got tensor "",
+          tensor.DebugString()));
 
   std::vector<Tensor> components;
   OP_REQUIRES_OK(ctx, UncompressElement(*compressed, &components));
",1,train
e0b6e58c328059829c3eb968136f17aa72b6c876,tensorflow/tensorflow,"Fix segfault/heap buffer overflow in `{Experimental,}DatasetToTFRecord` where dataset is numeric.

Code assumes only strings inputs and then interprets numbers as valid `tstring`s. Then, when trying to compute the CRC of the record this results in heap buffer overflow.

PiperOrigin-RevId: 387675909
Change-Id: I7396b9b8afc1ac744112af7c0b1cd7bb41e0f556",to_tf_record_op.cc,"@@ -18,6 +18,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/function_handle_cache.h""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/resource_mgr.h""
+#include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/kernels/ops_util.h""
 #include ""tensorflow/core/lib/core/threadpool.h""
 #include ""tensorflow/core/lib/io/record_writer.h""
@@ -91,8 +92,20 @@ class ToTFRecordOp : public AsyncOpKernel {
     TF_RETURN_IF_ERROR(finalized_dataset->MakeIterator(
         &iter_ctx, /*parent=*/nullptr, ""ToTFRecordOpIterator"", &iterator));
 
+    const int num_output_dtypes = finalized_dataset->output_dtypes().size();
+    if (num_output_dtypes != 1) {
+      return errors::InvalidArgument(
+          ""ToTFRecordOp currently only support datasets of 1 single column, "",
+          ""but got "", num_output_dtypes);
+    }
+    const DataType dt = finalized_dataset->output_dtypes()[0];
+    if (dt != DT_STRING) {
+      return errors::InvalidArgument(
+          ""ToTFRecordOp currently only supports DT_STRING dataypes, but got "",
+          DataTypeString(dt));
+    }
     std::vector<Tensor> components;
-    components.reserve(finalized_dataset->output_dtypes().size());
+    components.reserve(num_output_dtypes);
     bool end_of_sequence;
     do {
       TF_RETURN_IF_ERROR(
",1,train
0f931751fb20f565c4e94aa6df58d54a003cdb30,tensorflow/tensorflow,"Validate dimensions of input tensor in `FractionalAvgPoolGrad`

PiperOrigin-RevId: 388286227
Change-Id: Ieb7566155e92acc8993a2212c76deacadc0edc8a",fractional_avg_pool_op.cc,"@@ -271,6 +271,18 @@ class FractionalAvgPoolGradOp : public OpKernel {
     const int64_t in_rows = orig_input_tensor_shape_flat(1);
     const int64_t in_cols = orig_input_tensor_shape_flat(2);
     const int64_t in_depth = orig_input_tensor_shape_flat(3);
+    OP_REQUIRES(
+        context, in_batch != 0,
+        errors::InvalidArgument(""Batch dimension of input must not be 0""));
+    OP_REQUIRES(
+        context, in_rows != 0,
+        errors::InvalidArgument(""Rows dimension of input must not be 0""));
+    OP_REQUIRES(
+        context, in_cols != 0,
+        errors::InvalidArgument(""Columns dimension of input must not be 0""));
+    OP_REQUIRES(
+        context, in_depth != 0,
+        errors::InvalidArgument(""Depth dimension of input must not be 0""));
 
     constexpr int tensor_in_and_out_dims = 4;
     // Transform orig_input_tensor_shape into TensorShape
",1,test
5ecec9c6fbdbc6be03295685190a45e7eee726ab,tensorflow/tensorflow,"Prevent use after free.

A very old version of the code used `result` as a simple pointer to a resource. Two years later, the pointer got changed to a `unique_ptr` but author forgot to remove the call to `Unref`. Three years after that, we finally uncover the UAF.

PiperOrigin-RevId: 387924872
Change-Id: I70fb6f199164de49fac20c168132a07b84903f9b",resource_ops.cc,"@@ -53,6 +53,7 @@ class BoostedTreesCreateEnsembleOp : public OpKernel {
     if (!result->InitFromSerialized(
             tree_ensemble_serialized_t->scalar<tstring>()(), stamp_token)) {
       result->Unref();
+      result.release();  // Needed due to the `->Unref` above, to prevent UAF
       OP_REQUIRES(
           context, false,
           errors::InvalidArgument(""Unable to parse tree ensemble proto.""));
",1,train
ac117ee8a8ea57b73d34665cdf00ef3303bc0b11,tensorflow/tensorflow,"Prevent division by 0 in `resource_variable_ops.cc`

PiperOrigin-RevId: 387939939
Change-Id: Ib04902d63756633999959a70613f2eaa30c2c151",resource_variable_ops.cc,"@@ -710,7 +710,8 @@ class ResourceGatherOp : public OpKernel {
         copy_functor(c->eigen_device<Device>(), tmp_indices.flat<Index>(),
                      indices.flat<Index>());
 
-        AddBatchOffsets(&tmp_indices, params);
+        AddBatchOffsets(c, &tmp_indices, params);
+        if (!c->status().ok()) return;
         op_indices = &tmp_indices;
       }
 
@@ -742,11 +743,17 @@ class ResourceGatherOp : public OpKernel {
   // Example: batch_dims = 1, indices = [[0, 1, 2], [0, 1, 2]]
   // If indexing into a params dimension of size 4, then the indices will become
   // [0, 1, 2, 4, 5, 6]
-  void AddBatchOffsets(Tensor* indices, const Tensor& params) {
+  void AddBatchOffsets(OpKernelContext* ctx, Tensor* indices,
+                       const Tensor& params) {
     int64_t batch_size = 1;  // The size of all batch dimensions.
     for (int idx = 0; idx < batch_dims_; ++idx) {
       batch_size *= params.dim_size(idx);
     }
+    OP_REQUIRES(
+        ctx, batch_size != 0,
+        errors::InvalidArgument(
+            ""Inner size of indices would result in batch_size of 0 and a "",
+            ""division by 0 in the implementation. This is illegal""));
 
     auto indices_flat = indices->flat<Index>();
     int64_t const index_inner_size = indices->NumElements() / batch_size;
",1,train
bc9c546ce7015c57c2f15c168b3d9201de679a1d,tensorflow/tensorflow,"Prevent heap oob access in `resource_variable_ops.cc`

PiperOrigin-RevId: 387936433
Change-Id: I9e71ddaa8dbd51ec6afbf163a6b3b591f193b4f6",resource_variable_ops.cc,"@@ -660,6 +660,11 @@ class ResourceGatherOp : public OpKernel {
     OP_REQUIRES(
         c, TensorShapeUtils::IsVectorOrHigher(params.shape()),
         errors::InvalidArgument(""params must be at least 1 dimensional""));
+    OP_REQUIRES(
+        c, params.shape().dims() >= batch_dims_,
+        errors::InvalidArgument(""params must have at least "", batch_dims_,
+                                "" (batch_dims) dimensions but it has shape "",
+                                params.shape().DebugString()));
 
     // Check that we have enough index space
     const int64_t N = indices.NumElements();
",1,train
01cff3f986259d661103412a20745928c727326f,tensorflow/tensorflow,"Fix heap OOB due to dimension mismatch in `ResourceScatterUpdate`

PiperOrigin-RevId: 388292801
Change-Id: Id9bd7244d98d41b1517d4771850b32782c0cc949",resource_variable_ops.cc,"@@ -955,11 +955,12 @@ class ResourceScatterUpdateOp : public OpKernel {
                         params->dim_size(0), "")""));
       } else {
         int64_t num_updates = updates.NumElements();
-        OP_REQUIRES(c, num_updates % N == 0,
-                    errors::InvalidArgument(
-                        ""shape of indices ("", indices.shape().DebugString(),
-                        "") is not compatible with the shape of updates ("",
-                        updates.shape().DebugString(), "")""));
+        OP_REQUIRES(
+            c, TensorShapeUtils::StartsWith(updates.shape(), indices.shape()),
+            errors::InvalidArgument(
+                ""The shape of indices ("", indices.shape().DebugString(),
+                "") must be a prefix of the shape of updates ("",
+                updates.shape().DebugString(), "")""));
         auto updates_flat = updates.shaped<T, 2>({N, num_updates / N});
 
         functor::ScatterFunctor<Device, T, Index, op> functor;
",1,test
1071f554dbd09f7e101324d366eec5f4fe5a3ece,tensorflow/tensorflow,"Add missing validation to `RaggedTensorToSparse`.

There needs to be a check that the splits allow for valid ragged tensors.

PiperOrigin-RevId: 387712169
Change-Id: I2499175324b82b65d159a260c7f83b98ceb5cc7d",ragged_tensor_to_sparse_kernel.cc,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_shape.h""
+#include ""tensorflow/core/platform/errors.h""
 
 namespace tensorflow {
 
@@ -38,7 +39,8 @@ class RaggedTensorToSparseOp : public OpKernel {
     OP_REQUIRES_OK(
         context, context->input_list(""rt_nested_splits"", &rt_nested_splits_in));
     const int rt_nested_splits_len = rt_nested_splits_in.size();
-    DCHECK_GT(rt_nested_splits_len, 0);  // Enforced by REGISTER_OP.
+    OP_REQUIRES(context, rt_nested_splits_len > 0,
+                errors::InvalidArgument(""rt_nested_splits must be non empty""));
     std::vector<ConstFlatSplits> rt_nested_splits;
     rt_nested_splits.reserve(rt_nested_splits_len);
     for (int i = 0; i < rt_nested_splits_len; ++i) {
@@ -162,6 +164,14 @@ class RaggedTensorToSparseOp : public OpKernel {
       if (rt_nested_splits[i](0) != 0) {
         return InvalidArgument(""First value of ragged splits must be 0."");
       }
+      for (int j = 1; j < rt_nested_splits[i].size(); ++j) {
+        if (rt_nested_splits[i](j) < rt_nested_splits[i](j - 1)) {
+          return InvalidArgument(
+              ""Ragged splits should be non decreasing, but we got "",
+              rt_nested_splits[i](j - 1), "" followed by "",
+              rt_nested_splits[i](j));
+        }
+      }
       if (i > 0) {
         SPLITS_TYPE last_split =
             rt_nested_splits[i - 1](rt_nested_splits[i - 1].size() - 1);
",1,test
f2a673bd34f0d64b8e40a551ac78989d16daad09,tensorflow/tensorflow,"Add missing validation to `matrix_diag_op.cc`

PiperOrigin-RevId: 387923533
Change-Id: Idfffeb328d5f9c6748d992d28a56d6e9e45103a0",matrix_diag_op.cc,"@@ -73,6 +73,9 @@ class MatrixDiagPartOp : public OpKernel {
                   errors::InvalidArgument(
                       ""diag_index must be a scalar or vector, received shape: "",
                       diag_index.shape().DebugString()));
+      OP_REQUIRES(context, diag_index.NumElements() > 0,
+                  errors::InvalidArgument(
+                      ""Expected diag_index to have at least 1 element""));
       lower_diag_index = diag_index.flat<int32>()(0);
       upper_diag_index = lower_diag_index;
       if (TensorShapeUtils::IsVector(diag_index.shape())) {
@@ -179,6 +182,9 @@ class MatrixDiagOp : public OpKernel {
                   errors::InvalidArgument(
                       ""diag_index must be a scalar or vector, received shape: "",
                       diag_index.shape().DebugString()));
+      OP_REQUIRES(context, diag_index.NumElements() > 0,
+                  errors::InvalidArgument(
+                      ""Expected diag_index to have at least 1 element""));
       lower_diag_index = diag_index.flat<int32>()(0);
       upper_diag_index = lower_diag_index;
       if (TensorShapeUtils::IsVector(diag_index.shape())) {
",1,test
ff8894044dfae5568ecbf2ed514c1a37dc394f1b,tensorflow/tensorflow,"Add one missing valdiation to `matrix_set_diag_op.cc`

PiperOrigin-RevId: 387923408
Change-Id: If6a97b9098c13879400f56c22f91555cdf0ce5d7",matrix_set_diag_op.cc,"@@ -70,6 +70,9 @@ class MatrixSetDiagOp : public OpKernel {
                   errors::InvalidArgument(
                       ""diag_index must be a scalar or vector, received shape: "",
                       diag_index.shape().DebugString()));
+      OP_REQUIRES(
+          context, diag_index.NumElements() > 0,
+          errors::InvalidArgument(""diag_index must have at least one element""));
       lower_diag_index = diag_index.flat<int32>()(0);
       upper_diag_index = lower_diag_index;
       if (TensorShapeUtils::IsVector(diag_index.shape())) {
",1,train
93f428fd1768df147171ed674fee1fc5ab8309ec,tensorflow/tensorflow,"Fix nullptr deref and heap OOB access in binary cwise ops.

PiperOrigin-RevId: 387936777
Change-Id: I608b8074cec36a982cca622b7144cb2c43e6e19f",cwise_ops_common.h,"@@ -265,6 +265,11 @@ class SimpleBinaryOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     const Tensor& in0 = ctx->input(0);
     const Tensor& in1 = ctx->input(1);
+    OP_REQUIRES(
+        ctx, in0.NumElements() == in1.NumElements(),
+        errors::InvalidArgument(""The two arguments to a cwise op must have ""
+                                ""same number of elements, got "",
+                                in0.NumElements(), "" and "", in1.NumElements()));
     auto in0_flat = in0.flat<Tin>();
     auto in1_flat = in1.flat<Tin>();
     const Device& eigen_device = ctx->eigen_device<Device>();
",1,train
e86605c0a336c088b638da02135ea6f9f6753618,tensorflow/tensorflow,"Fix FPE in inpace update ops.

PiperOrigin-RevId: 388303197
Change-Id: Ib48309b6213ffe53eba81004b00e889d653e4b83",inplace_ops.cc,"@@ -225,7 +225,7 @@ class InplaceOpBase : public OpKernel {
 
     Tensor y = x;  // This creates an alias intentionally.
     // Skip processing if tensors are empty.
-    if (x.NumElements() > 0 || v.NumElements() > 0) {
+    if (x.NumElements() > 0 && v.NumElements() > 0) {
       OP_REQUIRES_OK(ctx, DoCompute(ctx, i, v, &y));
     }
     ctx->set_output(0, y);
",1,train
8a84f7a2b5a2b27ecf88d25bad9ac777cd2f7992,tensorflow/tensorflow,"Ensure num_streams >= 0 in tf.raw_ops.BoostedTreesCreateQuantileStreamResource

PiperOrigin-RevId: 387452765
Change-Id: I9990c760e177fabca6a3b9b4612ceeaeeba51495",quantile_ops.cc,"@@ -116,6 +116,9 @@ class BoostedTreesCreateQuantileStreamResourceOp : public OpKernel {
     const Tensor* num_streams_t;
     OP_REQUIRES_OK(context, context->input(kNumStreamsName, &num_streams_t));
     int64_t num_streams = num_streams_t->scalar<int64>()();
+    OP_REQUIRES(context, num_streams >= 0,
+                errors::InvalidArgument(
+                    ""Num_streams input cannot be a negative integer""));
 
     auto result =
         new QuantileStreamResource(epsilon, max_elements_, num_streams);
",1,train
429f009d2b2c09028647dd4bb7b3f6f414bbaad7,tensorflow/tensorflow,"Add remaining missing validation to `BoostedTreesCalculateBestFeatureSplit`

PiperOrigin-RevId: 387423006
Change-Id: I8eaf30efb223011519e60707bfa751b275d3a443",stats_ops.cc,"@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 
 #include <limits>
+#include <string>
 #include <vector>
 
 #include ""third_party/eigen3/Eigen/Core""
@@ -22,6 +23,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor_shape.h""
 #include ""tensorflow/core/kernels/boosted_trees/boosted_trees.pb.h""
 #include ""tensorflow/core/kernels/boosted_trees/tree_helper.h""
+#include ""tensorflow/core/platform/errors.h""
 #include ""tensorflow/core/platform/logging.h""
 
 namespace tensorflow {
@@ -254,12 +256,18 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
     // node_id_range
     const Tensor* node_id_range_t;
     OP_REQUIRES_OK(context, context->input(""node_id_range"", &node_id_range_t));
+    OP_REQUIRES(
+        context, node_id_range_t->NumElements() == 2,
+        errors::InvalidArgument(""node_id_range argument must have shape [2]""));
     const auto node_id_range = node_id_range_t->vec<int32>();
     const int32_t node_id_first = node_id_range(0);  // inclusive
     const int32_t node_id_last = node_id_range(1);   // exclusive
 
     const Tensor* stats_summary_t;
     OP_REQUIRES_OK(context, context->input(""stats_summary"", &stats_summary_t));
+    OP_REQUIRES(
+        context, stats_summary_t->shape().dims() == 4,
+        errors::InvalidArgument(""stats_summary argument must have rank 4""));
     TTypes<float, 4>::ConstTensor stats_summary =
         stats_summary_t->tensor<float, 4>();
     const int32_t feature_dims = stats_summary_t->dim_size(1);
@@ -272,6 +280,8 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
 
     const Tensor* l1_t;
     OP_REQUIRES_OK(context, context->input(""l1"", &l1_t));
+    OP_REQUIRES(context, l1_t->NumElements() == 1,
+                errors::InvalidArgument(""l1 argument must be a scalar""));
     const auto l1 = l1_t->scalar<float>()();
     DCHECK_GE(l1, 0);
     if (logits_dim_ > 1) {
@@ -281,17 +291,25 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
 
     const Tensor* l2_t;
     OP_REQUIRES_OK(context, context->input(""l2"", &l2_t));
+    OP_REQUIRES(context, l2_t->NumElements() == 1,
+                errors::InvalidArgument(""l2 argument must be a scalar""));
     const auto l2 = l2_t->scalar<float>()();
     DCHECK_GE(l2, 0);
 
     const Tensor* tree_complexity_t;
     OP_REQUIRES_OK(context,
                    context->input(""tree_complexity"", &tree_complexity_t));
+    OP_REQUIRES(
+        context, tree_complexity_t->NumElements() == 1,
+        errors::InvalidArgument(""tree_complexity argument must be a scalar""));
     const auto tree_complexity = tree_complexity_t->scalar<float>()();
 
     const Tensor* min_node_weight_t;
     OP_REQUIRES_OK(context,
                    context->input(""min_node_weight"", &min_node_weight_t));
+    OP_REQUIRES(
+        context, min_node_weight_t->NumElements() == 1,
+        errors::InvalidArgument(""min_node_weight argument must be a scalar""));
     const auto min_node_weight = min_node_weight_t->scalar<float>()();
 
     std::vector<int32> output_node_ids;
@@ -300,7 +318,7 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
     std::vector<int32> output_thresholds;
     std::vector<Eigen::VectorXf> output_left_node_contribs;
     std::vector<Eigen::VectorXf> output_right_node_contribs;
-    std::vector<string> output_split_types;
+    std::vector<std::string> output_split_types;
 
     // TODO(tanzheny) parallelize the computation.
     // Iterate each node and find the best gain per node.
",1,train
9c87c32c710d0b5b53dc6fd3bfde4046e1f7a5ad,tensorflow/tensorflow,"Disallow empty node_id_range in tf.raw_ops.BoostedTreesCalculateBestFeatureSplitV2 and tf.raw_ops.BoostedTreesCalculateBestGainsPerFeature

PiperOrigin-RevId: 387165936
Change-Id: I2f70341af96236b2776c2a592c917d549c1fc1e2",stats_ops.cc,"@@ -51,6 +51,16 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel {
     // node_id_range
     const Tensor* node_id_range_t;
     OP_REQUIRES_OK(context, context->input(""node_id_range"", &node_id_range_t));
+    OP_REQUIRES(
+        context, node_id_range_t->dims() == 1,
+        errors::InvalidArgument(""node_id_range must be a rank 1 tensor, but ""
+                                ""given node_id_range has dims of "",
+                                node_id_range_t->dims()));
+    OP_REQUIRES(context, node_id_range_t->dim_size(0) == 2,
+                errors::InvalidArgument(
+                    ""node_id_range must be a rank 1 tensor with shape=[2], but ""
+                    ""given node_id_range has shape "",
+                    node_id_range_t->dim_size(0), "" on its first dim""));
     const auto node_id_range = node_id_range_t->vec<int32>();
     const int32_t node_id_first = node_id_range(0);  // inclusive
     const int32_t node_id_last = node_id_range(1);   // exclusive
@@ -570,6 +580,16 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel {
     const Tensor* node_id_range_t;
     OP_REQUIRES_OK(context, context->input(""node_id_range"", &node_id_range_t));
     const auto node_id_range = node_id_range_t->vec<int32>();
+    OP_REQUIRES(
+        context, node_id_range_t->dims() == 1,
+        errors::InvalidArgument(""node_id_range must be a rank 1 tensor, but ""
+                                ""given node_id_range has dims of "",
+                                node_id_range_t->dims()));
+    OP_REQUIRES(context, node_id_range_t->dim_size(0) == 2,
+                errors::InvalidArgument(
+                    ""node_id_range must be a rank 1 tensor with shape=[2], but ""
+                    ""given node_id_range has shape "",
+                    node_id_range_t->dim_size(0), "" on its first dim""));
     const int32_t node_id_first = node_id_range(0);  // Inclusive.
     const int32_t node_id_last = node_id_range(1);   // Exclusive.
 
",1,train
6da6620efad397c85493b8f8667b821403516708,tensorflow/tensorflow,"Secure tf.raw_ops.QuantizeV2

Validate size and shape of min_range and max_range
Ensure axis is within input dims limits

PiperOrigin-RevId: 387232799
Change-Id: I36975281f7b5758e9e31a8dcc73fe610ef456318",quantize_op.cc,"@@ -113,7 +113,50 @@ class QuantizeV2Op : public OpKernel {
 
     int num_slices = 1;
     if (axis_ > -1) {
+      OP_REQUIRES(
+          ctx, input.dims() > axis_,
+          errors::InvalidArgument(
+              ""Axis is on a zero-based index, so its value must always be less ""
+              ""than number of input's dims, but given axis value was "",
+              axis_, "" and input's dims was "", input.dims()));
       num_slices = input.dim_size(axis_);
+      OP_REQUIRES(ctx, input_min_range.dims() == 1,
+                  errors::InvalidArgument(
+                      ""If axis is specified, min_range must be a 1-D tensor ""
+                      ""whose size matches the axis dimension of the input and ""
+                      ""output tensors, but min_range dims are "",
+                      input_min_range.dims()));
+      OP_REQUIRES(ctx, input_min_range.dim_size(0) == num_slices,
+                  errors::InvalidArgument(
+                      ""If axis is specified, min_range must be a 1-D tensor ""
+                      ""whose size matches the axis dimension of the input and ""
+                      ""output tensors, but min_range is a 1-D tensor of size "",
+                      input_min_range.dim_size(0),
+                      "" and input's axis dimension is of size "", num_slices));
+      OP_REQUIRES(ctx, input_max_range.dims() == 1,
+                  errors::InvalidArgument(
+                      ""If axis is specified, max_range must be a 1-D tensor ""
+                      ""whose size matches the axis dimension of the input and ""
+                      ""output tensors, but max_range dims are "",
+                      input_max_range.dims()));
+      OP_REQUIRES(ctx, input_max_range.dim_size(0) == num_slices,
+                  errors::InvalidArgument(
+                      ""If axis is specified, max_range must be a 1-D tensor ""
+                      ""whose size matches the axis dimension of the input and ""
+                      ""output tensors, but max_range is a 1-D tensor of size "",
+                      input_max_range.dim_size(0),
+                      "" and input's axis dimension is of size "", num_slices));
+    } else {
+      OP_REQUIRES(ctx, input_min_range.NumElements() == 1,
+                  errors::InvalidArgument(
+                      ""If axis is not specified, min_range must contain a ""
+                      ""single float element, but it contains "",
+                      input_min_range.NumElements(), "" elements""));
+      OP_REQUIRES(ctx, input_max_range.NumElements() == 1,
+                  errors::InvalidArgument(
+                      ""If axis is not specified, max_range must contain a ""
+                      ""single float element, but it contains "",
+                      input_max_range.NumElements(), "" elements""));
     }
 
     const TensorShape& minmax_shape = ctx->input(1).shape();
",1,test
e84c975313e8e8e38bb2ea118196369c45c51378,tensorflow/tensorflow,"In tf.raw_ops.BoostedTreesSparseCalculateBestFeatureSplit, limit stat_dim in stats_summary_indices to under stats_dims in stats_summary_shape

PiperOrigin-RevId: 387171191
Change-Id: I83ca8a75b22aa78c037e8b98779da6cced16bfaa",stats_ops.cc,"@@ -1050,6 +1050,13 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
       const int32_t feature_dim = stats_summary_indices(idx, 1);
       const int32_t bucket_id = stats_summary_indices(idx, 2);
       const int32_t stat_dim = stats_summary_indices(idx, 3);
+      OP_REQUIRES(context, stat_dim < stats_dims,
+                  errors::InvalidArgument(
+                      ""Stat dim, the sum of logits dim and hessian dim in ""
+                      ""stats_summary_indices, cannot be greater than stats ""
+                      ""dims, the last value in stats_summary_shape, which was "",
+                      stats_dims, "". At index ("", idx,
+                      "", 4), stats_summary_indices contains value "", stat_dim));
       std::pair<FeatureMapIterator, bool> const& f_insert_result = f_map.insert(
           FeatureMapIterator::value_type(feature_dim, BucketMap()));
       auto& b_map = f_insert_result.first->second;
",1,test
203214568f5bc237603dbab6e1fd389f1572f5c9,tensorflow/tensorflow,"Reorganize and add more validation to MKL requantization

PiperOrigin-RevId: 387901341
Change-Id: I2515b9034c64e113db0bcec8337d30643ab0a0f1",mkl_requantize_per_channel_op.cc,"@@ -49,35 +49,45 @@ class MklRequantizePerChannelOp : public OpKernel {
   void Compute(OpKernelContext* ctx) override {
     try {
       const Tensor& input = ctx->input(kInputTensorIndex);
+      OP_REQUIRES(
+          ctx, input.dims() == 4,
+          errors::InvalidArgument(""Current RequantizePerChannel operator""
+                                  ""supports 4D tensors only.""));
+
       const Tensor& input_min_vec = ctx->input(kInputMinVecIndex);
+      size_t depth = input_min_vec.NumElements();
       float* input_min_vec_data = (float*)const_cast<void*>(
           static_cast<const void*>(input_min_vec.flat<float>().data()));
+
       const Tensor& input_max_vec = ctx->input(kInputMaxVecIndex);
+      OP_REQUIRES(
+          ctx, input_max_vec.NumElements() == depth,
+          errors::InvalidArgument(""input_max has incorrect size, expected "",
+                                  depth, "" was "", input_max_vec.NumElements()));
       float* input_max_vec_data = (float*)const_cast<void*>(
           static_cast<const void*>(input_max_vec.flat<float>().data()));
 
       const Tensor& input_requested_min = ctx->input(this->kRequestMinIndex);
+      OP_REQUIRES(
+          ctx, input_requested_min.NumElements() == 1,
+          errors::InvalidArgument(""requested_output_min must be a scalar""));
       const float input_requested_min_float =
           input_requested_min.flat<float>()(0);
+
       const Tensor& input_requested_max = ctx->input(this->kRequestMaxIndex);
+      OP_REQUIRES(
+          ctx, input_requested_min.NumElements() == 1,
+          errors::InvalidArgument(""requested_output_max must be a scalar""));
       const float input_requested_max_float =
           input_requested_max.flat<float>()(0);
 
-      size_t depth = input_min_vec.NumElements();
-      OP_REQUIRES(
-          ctx, input.dims() == 4,
-          errors::InvalidArgument(""Current RequantizePerChannel operator""
-                                  ""supports 4D tensors only.""));
-      OP_REQUIRES(
-          ctx, input_min_vec.dim_size(0) == depth,
-          errors::InvalidArgument(""input_min has incorrect size, expected "",
-                                  depth, "" was "", input_min_vec.dim_size(0)));
-      OP_REQUIRES(
-          ctx, input_max_vec.dim_size(0) == depth,
-          errors::InvalidArgument(""input_max has incorrect size, expected "",
-                                  depth, "" was "", input_max_vec.dim_size(0)));
-
-      if (out_type_ == DT_QINT8) DCHECK(input_requested_min_float < 0.0f);
+      if (out_type_ == DT_QINT8) {
+        OP_REQUIRES(ctx, input_requested_min_float < 0.0f,
+                    errors::InvalidArgument(
+                        ""If out_type is QINT8, requested_output_max must be ""
+                        ""non negative, got "",
+                        input_requested_min_float));
+      }
 
       const float factor = (out_type_ == DT_QINT8) ? 127.0f : 255.0f;
       const float requested_min_max =
",1,train
9e62869465573cb2d9b5053f1fa02a81fce21d69,tensorflow/tensorflow,"Add more validation to `RequantizationRangePerChannel`.

PiperOrigin-RevId: 387693946
Change-Id: Ife8dcbdb021bec4787eef6a4361dd08f17c14bd6",mkl_requantization_range_per_channel_op.cc,"@@ -57,6 +57,20 @@ class MklRequantizationRangePerChannelOp : public OpKernel {
         ctx, input_max.dim_size(0) == depth,
         errors::InvalidArgument(""input_max has incorrect size, expected "",
                                 depth, "" was "", input_max.dim_size(0)));
+    OP_REQUIRES(
+        ctx, input_min.NumElements() == depth,
+        errors::InvalidArgument(""input_min must have the same number of ""
+                                ""elements as input_max, got "",
+                                input_min.NumElements(), "" and "", depth));
+    OP_REQUIRES(ctx, input.NumElements() > 0,
+                errors::InvalidArgument(""input must not be empty""));
+    OP_REQUIRES(ctx, input.dims() == 4,
+                errors::InvalidArgument(""input must be in NHWC format""));
+    OP_REQUIRES(
+        ctx, input.dim_size(3) == depth,
+        errors::InvalidArgument(
+            ""input must have same number of channels as length of input_min: "",
+            input.dim_size(3), "" vs "", depth));
 
     const float* input_min_data = input_min.flat<float>().data();
     const float* input_max_data = input_max.flat<float>().data();
",1,test
be7a4de6adfbd303ce08be4332554dff70362612,tensorflow/tensorflow,"Ensure non-empty rt_nested_splits in tf.raw_ops.RaggedTensorToVariant

PiperOrigin-RevId: 387664237
Change-Id: Ia1700c34b5610873d63561abc86e23b46ead93b3",ragged_tensor_to_variant_op.cc,"@@ -157,6 +157,12 @@ class RaggedTensorToVariantOp : public OpKernel {
       return;
     }
 
+    // Checked here instead of at input in case batched_input_ is false
+    OP_REQUIRES(context, ragged_nested_splits_len > 0,
+                errors::InvalidArgument(
+                    ""rt_nested_splits must be a list of one or more, but ""
+                    ""received rt_nested_splits of length 0.""));
+
     // Unbatch the Ragged Tensor and encode the components.
     std::vector<RaggedTensorVariant> unbatched_ragged_input;
     auto batched_splits_top_vec =
",1,train
2e0ee46f1a47675152d3d865797a18358881d7a6,tensorflow/tensorflow,"Ensure non-empty input_splits in tf.raw_ops.UnicodeEncode

PiperOrigin-RevId: 387170080
Change-Id: I3b489acc51c5cb4124c535b9df7cc6e62ef21766",unicode_ops.cc,"@@ -533,6 +533,10 @@ class UnicodeEncodeOp : public OpKernel {
     const Tensor& input_splits = context->input(1);
     const auto input_splits_flat = input_splits.flat<SPLITS_TYPE>();
 
+    OP_REQUIRES(
+        context, input_splits.NumElements() > 0,
+        errors::InvalidArgument(""Input_splits should contain elements, but ""
+                                ""given input_values has 0 elements""));
     // Operation will treat first argument in input_splits as if it were zero
     // regardless of its actual value since splits should begin with zero and
     // end with the length of the input values vector.
",1,test
a776040a5e7ebf76eeb7eb923bf1ae417dd4d233,tensorflow/tensorflow,"Disallow dims input of 0 in tf.raw_ops.UnravelIndex

PiperOrigin-RevId: 384284198
Change-Id: Ia1804ef1aec57b4d857ea507e6891bcccde18e9b",unravel_index_op.cc,"@@ -53,6 +53,14 @@ class UnravelIndexOp : public OpKernel {
                                 dims_tensor.shape().DebugString(), ""\""""));
 
     auto dims = dims_tensor.vec<Tidx>();
+    // Make sure dims does not contain a zero
+    for (int i = 0; i < dims.size(); i++) {
+      OP_REQUIRES(
+          ctx, dims(i) != 0,
+          errors::InvalidArgument(""Input dims cannot contain a dim of zero, ""
+                                  ""but dims contains zero at index "",
+                                  i));
+    }
 
     // Chek to make sure indices is not out of boundary
     Eigen::Tensor<Tidx, 0, Eigen::RowMajor> dims_prod_eigen = dims.prod();
",1,train
a776040a5e7ebf76eeb7eb923bf1ae417dd4d233,tensorflow/tensorflow,"Disallow dims input of 0 in tf.raw_ops.UnravelIndex

PiperOrigin-RevId: 384284198
Change-Id: Ia1804ef1aec57b4d857ea507e6891bcccde18e9b",array_ops_test.py,"@@ -1575,7 +1575,7 @@ class UnravelIndexTest(test_util.TensorFlowTestCase):
     with self.cached_session():
       for dtype in [dtypes.int32, dtypes.int64]:
         with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                    ""index is out of bound as with dims""):
+                                    ""dims cannot contain a dim of zero""):
           indices = constant_op.constant([2, 5, 7], dtype=dtype)
           dims = constant_op.constant([3, 0], dtype=dtype)
           self.evaluate(array_ops.unravel_index(indices=indices, dims=dims))
",1,train
3a7362750d5c372420aa8f0caf7bf5b5c3d0f52d,tensorflow/tensorflow,"Prevent crash/heap OOB due to integer conversion to unsigned in NMS kernels

PiperOrigin-RevId: 387938262
Change-Id: Id361a715307e7179977cf5c64391c199a966f2ad",non_max_suppression_op.cc,"@@ -169,6 +169,8 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& scores,
                            bool pad_to_max_output_size = false,
                            int* ptr_num_valid_outputs = nullptr) {
   const int output_size = max_output_size.scalar<int>()();
+  OP_REQUIRES(context, output_size >= 0,
+              errors::InvalidArgument(""output size must be non-negative""));
 
   std::vector<T> scores_data(num_boxes);
   std::copy_n(scores.flat<T>().data(), num_boxes, scores_data.begin());
@@ -768,6 +770,9 @@ class NonMaxSuppressionV4Op : public OpKernel {
         context, scores, num_boxes, max_output_size, iou_threshold_val,
         score_threshold_val, dummy_soft_nms_sigma, similarity_fn,
         return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs);
+    if (!context->status().ok()) {
+      return;
+    }
 
     // Allocate scalar output tensor for number of indices computed.
     Tensor* num_outputs_t = nullptr;
@@ -845,6 +850,9 @@ class NonMaxSuppressionV5Op : public OpKernel {
         context, scores, num_boxes, max_output_size, iou_threshold_val,
         score_threshold_val, soft_nms_sigma_val, similarity_fn,
         return_scores_tensor_, pad_to_max_output_size_, &num_valid_outputs);
+    if (!context->status().ok()) {
+      return;
+    }
 
     // Allocate scalar output tensor for number of indices computed.
     Tensor* num_outputs_t = nullptr;
",1,train
b5cdbf12ffcaaffecf98f22a6be5a64bb96e4f58,tensorflow/tensorflow,"Prevent overflow due to integer conversion to unsigned.

PiperOrigin-RevId: 387738045
Change-Id: Id7e95bc07e02df1c66b72bd09f389608c87bdebe",non_max_suppression_op.cc,"@@ -930,6 +930,8 @@ class CombinedNonMaxSuppressionOp : public OpKernel {
         errors::InvalidArgument(""max_size_per_class must be 0-D, got shape "",
                                 max_output_size.shape().DebugString()));
     const int max_size_per_class = max_output_size.scalar<int>()();
+    OP_REQUIRES(context, max_size_per_class > 0,
+                errors::InvalidArgument(""max_size_per_class must be positive""));
     // max_total_size: scalar
     const Tensor& max_total_size = context->input(3);
     OP_REQUIRES(
",1,train
42459e4273c2e47a3232cc16c4f4fff3b3a35c38,tensorflow/tensorflow,"Prevent CHECK-fail/heap OOB in UpperBound and LowerBound

PiperOrigin-RevId: 387738073
Change-Id: Iee74de95ddad18440d052a75a5a1cb67544f490a",searchsorted_op.cc,"@@ -86,6 +86,10 @@ class UpperBoundOp : public OpKernel {
     const Tensor& sorted_inputs_t = ctx->input(0);
     const Tensor& values_t = ctx->input(1);
 
+    // inputs must be at least a matrix
+    OP_REQUIRES(
+        ctx, sorted_inputs_t.shape().dims() >= 2,
+        errors::InvalidArgument(""sorted input argument must be a matrix""));
     // must have same batch dim_size for both
     OP_REQUIRES(ctx, sorted_inputs_t.dim_size(0) == values_t.dim_size(0),
                 Status(error::INVALID_ARGUMENT,
@@ -127,6 +131,10 @@ class LowerBoundOp : public OpKernel {
     const Tensor& sorted_inputs_t = ctx->input(0);
     const Tensor& values_t = ctx->input(1);
 
+    // inputs must be at least a matrix
+    OP_REQUIRES(
+        ctx, sorted_inputs_t.shape().dims() >= 2,
+        errors::InvalidArgument(""sorted input argument must be a matrix""));
     // must have same batch dim_size for both
     OP_REQUIRES(ctx, sorted_inputs_t.dim_size(0) == values_t.dim_size(0),
                 Status(error::INVALID_ARGUMENT,
",1,train
532f5c5a547126c634fefd43bbad1dc6417678ac,tensorflow/tensorflow,"Prevent nullptr deref in validation of indexes in map ops.

PiperOrigin-RevId: 387738023
Change-Id: I83d18d36a7b82ffd2a40b5124a4e5b4c72238f27",map_stage_op.cc,"@@ -210,9 +210,9 @@ class StagingMap : public ResourceBase {
                                    const OptionalTuple& tuple)
       TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
     if (tuple[index].has_value()) {
-      return Status(errors::InvalidArgument(
+      return errors::InvalidArgument(
           ""The tensor for index '"", index, ""' for key '"", key.scalar<int64>()(),
-          ""' was already initialized '"", dtypes_.size(), ""'.""));
+          ""' was already initialized '"", dtypes_.size(), ""'."");
     }
 
     return Status::OK();
@@ -220,6 +220,10 @@ class StagingMap : public ResourceBase {
 
   // Check that the indices are strictly ordered
   Status check_index_ordering(const Tensor& indices) {
+    if (indices.NumElements() == 0) {
+      return errors::InvalidArgument(""Indices are empty"");
+    }
+
     auto findices = indices.flat<int>();
 
     for (std::size_t i = 0; i < findices.dimension(0) - 1; ++i) {
@@ -227,8 +231,7 @@ class StagingMap : public ResourceBase {
         continue;
       }
 
-      return Status(
-          errors::InvalidArgument(""Indices are not strictly ordered""));
+      return errors::InvalidArgument(""Indices are not strictly ordered"");
     }
 
     return Status::OK();
@@ -238,10 +241,10 @@ class StagingMap : public ResourceBase {
   Status check_memory_limit(std::size_t bytes)
       TF_EXCLUSIVE_LOCKS_REQUIRED(mu_) {
     if (has_memory_limit() && bytes > memory_limit_) {
-      return Status(errors::ResourceExhausted(
+      return errors::ResourceExhausted(
           ""Attempted to insert tensors with combined size of '"", bytes,
           ""' bytes into Staging Area with a memory limit of '"", memory_limit_,
-          ""'.""));
+          ""'."");
     }
 
     return Status::OK();
",1,train
a4e138660270e7599793fa438cd7b2fc2ce215a6,tensorflow/tensorflow,"Add remaining validation to `sdca_internal.cc`

PiperOrigin-RevId: 387738010
Change-Id: I28eedcfd87a53aaf34deb075acea1f8c95470808",sdca_internal.cc,"@@ -380,6 +380,11 @@ Status Examples::Initialize(OpKernelContext* const context,
   const Tensor* example_labels_t;
   TF_RETURN_IF_ERROR(context->input(""example_labels"", &example_labels_t));
   auto example_labels = example_labels_t->flat<float>();
+  if (example_labels.size() != num_examples) {
+    return errors::InvalidArgument(""Expected "", num_examples,
+                                   "" example labels but got "",
+                                   example_labels.size());
+  }
 
   OpInputList dense_features_inputs;
   TF_RETURN_IF_ERROR(
",1,test
d7de67733925de196ec8863a33445b73f9562d1d,tensorflow/tensorflow,"Prevent a CHECK-fail due to empty tensor input in `map_stage_op.cc`

PiperOrigin-RevId: 387737906
Change-Id: Idc52df0c71c7ed6e2dd633b651a581932f277c8a",map_stage_op.cc,"@@ -527,6 +527,8 @@ class MapStageOp : public OpKernel {
     OP_REQUIRES_OK(ctx, ctx->input(""key"", &key_tensor));
     OP_REQUIRES_OK(ctx, ctx->input(""indices"", &indices_tensor));
     OP_REQUIRES_OK(ctx, ctx->input_list(""values"", &values_tensor));
+    OP_REQUIRES(ctx, key_tensor->NumElements() > 0,
+                errors::InvalidArgument(""key must not be empty""));
 
     // Create copy for insertion into Staging Area
     Tensor key(*key_tensor);
",1,train
136b51f10903e044308cf77117c0ed9871350475,tensorflow/tensorflow,"Add missing validation to `maxpooling_op.cc`

PiperOrigin-RevId: 387932441
Change-Id: I43a0b24e6a12cc965611144ba035accd384594b9",maxpooling_op.cc,"@@ -74,6 +74,7 @@ static void SpatialMaxPoolWithArgMaxHelper(
         errors::Internal(""SpatialMaxPoolWithArgMaxHelper requires Targmax ""
                          ""to be int64 when input_backprop != nullptr""));
   }
+  if (tensor_in.NumElements() == 0 || output->NumElements() == 0) return;
 
   typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
       ConstEigenMatrixMap;
@@ -949,6 +950,10 @@ class MaxPoolingWithArgmaxOp : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     const Tensor& tensor_in = context->input(0);
+    OP_REQUIRES(context, tensor_in.dims() == 4,
+                errors::InvalidArgument(""tensor_in must be 4-dimensional (2)""));
+    OP_REQUIRES(context, tensor_in.NumElements() > 0,
+                errors::InvalidArgument(""tensor_in must not be empty (2)""));
 
     PoolParameters params{context,
                           ksize_,
",1,train
136b51f10903e044308cf77117c0ed9871350475,tensorflow/tensorflow,"Add missing validation to `maxpooling_op.cc`

PiperOrigin-RevId: 387932441
Change-Id: I43a0b24e6a12cc965611144ba035accd384594b9",pooling_ops_common.cc,"@@ -171,6 +171,8 @@ PoolParameters::PoolParameters(OpKernelContext* context,
     pad_depth = 0;
     out_depth = depth;
   } else {
+    OP_REQUIRES(context, depth_window > 0,
+                errors::InvalidArgument(""depth_window must not be 0""));
     // Our current version of depthwise max pooling does not support
     // any padding, and expects the depth_window to equal the
     // depth_stride (no overlapping).
",1,train
8a793b5d7f59e37ac7f3cd0954a750a2fe76bad4,tensorflow/tensorflow,"Prevent division by 0 in common shape functions.

PiperOrigin-RevId: 387712197
Change-Id: Id25c7460e35b68aeeeac23b9a88e455b443ee149",common_shape_fns.cc,"@@ -672,6 +672,8 @@ Status Conv2DShapeImpl(shape_inference::InferenceContext* c,
   if (c->ValueKnown(input_depth_dim) && c->ValueKnown(filter_input_depth_dim)) {
     int64_t input_depth_value = c->Value(input_depth_dim),
             filter_input_depth_value = c->Value(filter_input_depth_dim);
+    if (filter_input_depth_value == 0)
+      return errors::InvalidArgument(""Depth of filter must not be 0"");
     if (input_depth_value % filter_input_depth_value != 0)
       return errors::InvalidArgument(
           ""Depth of input ("", input_depth_value,
@@ -681,6 +683,8 @@ Status Conv2DShapeImpl(shape_inference::InferenceContext* c,
       int64_t num_groups = input_depth_value / filter_input_depth_value;
       if (c->ValueKnown(output_depth_dim)) {
         int64_t output_depth_value = c->Value(output_depth_dim);
+        if (num_groups == 0)
+          return errors::InvalidArgument(""Number of groups must not be 0"");
         if (output_depth_value % num_groups != 0)
           return errors::InvalidArgument(
               ""Depth of output ("", output_depth_value,
@@ -816,6 +820,8 @@ Status Conv3DShape(shape_inference::InferenceContext* c) {
   if (c->ValueKnown(input_depth_dim) && c->ValueKnown(filter_input_depth_dim)) {
     int64_t input_depth_value = c->Value(input_depth_dim),
             filter_input_depth_value = c->Value(filter_input_depth_dim);
+    if (filter_input_depth_value == 0)
+      return errors::InvalidArgument(""Depth of filter must not be 0"");
     if (input_depth_value % filter_input_depth_value != 0)
       return errors::InvalidArgument(
           ""Depth of input ("", input_depth_value,
@@ -825,6 +831,8 @@ Status Conv3DShape(shape_inference::InferenceContext* c) {
       int64_t num_groups = input_depth_value / filter_input_depth_value;
       if (c->ValueKnown(output_depth_dim)) {
         int64_t output_depth_value = c->Value(output_depth_dim);
+        if (num_groups == 0)
+          return errors::InvalidArgument(""Number of groups must not be 0"");
         if (output_depth_value % num_groups != 0)
           return errors::InvalidArgument(
               ""Depth of output ("", output_depth_value,
@@ -2456,6 +2464,9 @@ Status SparseReduceShapeFn(InferenceContext* c) {
 
     int64_t ndims = shape_vec.size();
     absl::flat_hash_set<int64> axes;
+    if (ndims == 0)
+      return errors::InvalidArgument(
+          ""Number of dims in shape tensor must not be 0"");
     for (int i = 0; i < axes_vec.size(); i++) {
       axes.insert((axes_vec(i) + ndims) % ndims);
     }
",1,test
578e634b4f1c1c684d4b4294f9e5281b2133b3ed,tensorflow/tensorflow,"Prevent a segfault in shape inference due to bad inputs.

PiperOrigin-RevId: 387737970
Change-Id: Ibd1cf3dbdce1dd2ab47fd633d5c5a57f7d8fb6e9",sparse_ops.cc,"@@ -16,6 +16,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/common_shape_fns.h""
 #include ""tensorflow/core/framework/op.h""
 #include ""tensorflow/core/framework/shape_inference.h""
+#include ""tensorflow/core/platform/errors.h""
 
 namespace tensorflow {
 
@@ -619,6 +620,8 @@ REGISTER_OP(""SparseFillEmptyRows"")
       DimensionHandle unused_dim;
       TF_RETURN_IF_ERROR(c->Merge(c->Dim(input_indices, 1),
                                   c->Dim(input_shape, 0), &unused_dim));
+      if (c->Value(c->NumElements(input_shape)) == 0)
+        return errors::InvalidArgument(""dense_shape must not be empty"");
       ShapeHandle output_indices =
           c->Matrix(InferenceContext::kUnknownDim, c->NumElements(input_shape));
       ShapeHandle output_values = c->Vector(InferenceContext::kUnknownDim);
",1,train
da857cfa0fde8f79ad0afdbc94e88b5d4bbec764,tensorflow/tensorflow,"Fix a shape inference issue leading to nullptr deref.

PiperOrigin-RevId: 387712259
Change-Id: I7e670772b259c068a501a187cd89f18773bb95a1",array_ops.cc,"@@ -2990,6 +2990,10 @@ REGISTER_OP(""Dequantize"")
       if (!s.ok() && s.code() != error::NOT_FOUND) {
         return s;
       }
+      if (axis < -1) {
+        return errors::InvalidArgument(""axis should be at least -1, got "",
+                                       axis);
+      }
       const int minmax_rank = (axis == -1) ? 0 : 1;
       TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
       ShapeHandle minmax;
",1,train
23d6383eb6c14084a8fc3bdf164043b974818012,tensorflow/tensorflow,"Use the safer `safe_load` function instead of `unsafe_load` when possible

There is no need to open ourselves up to arbitrary code execution, especially since this is not in a performance critical loop, so we can take the slowdown due to safety.

PiperOrigin-RevId: 388501098
Change-Id: I3434318a5e07a798490533b554f46752397837e5",functional.py,"@@ -53,7 +53,7 @@ class Functional(training_lib.Model):
   than with subclassed `Model`s, specifically:
 
   - Model cloning (`keras.models.clone`)
-  - Serialization (`model.get_config()/from_config`, `model.to_json()/to_yaml()`
+  - Serialization (`model.get_config()/from_config`, `model.to_json()`
   - Whole-model saving (`model.save()`)
 
   A `Functional` model can be instantiated by passing two arguments to
",1,train
23d6383eb6c14084a8fc3bdf164043b974818012,tensorflow/tensorflow,"Use the safer `safe_load` function instead of `unsafe_load` when possible

There is no need to open ourselves up to arbitrary code execution, especially since this is not in a performance critical loop, so we can take the slowdown due to safety.

PiperOrigin-RevId: 388501098
Change-Id: I3434318a5e07a798490533b554f46752397837e5",functional_test.py,"@@ -47,11 +47,6 @@ from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.platform import test
 from tensorflow.python.training.tracking.util import Checkpoint
 
-try:
-  import yaml  # pylint:disable=g-import-not-at-top
-except ImportError:
-  yaml = None
-
 
 class NetworkConstructionTest(keras_parameterized.TestCase):
 
@@ -627,10 +622,6 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
       json_str = model.to_json()
       models.model_from_json(json_str)
 
-      if yaml is not None:
-        yaml_str = model.to_yaml()
-        models.model_from_yaml(yaml_str)
-
   @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def test_invalid_graphs(self):
     a = layers.Input(shape=(32,), name='input_a')
@@ -1361,10 +1352,6 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
     json_str = model.to_json()
     models.model_from_json(json_str)
 
-    if yaml is not None:
-      yaml_str = model.to_yaml()
-      models.model_from_yaml(yaml_str)
-
   def test_subclassed_error_if_init_not_called(self):
 
     class MyNetwork(training_lib.Model):
",1,train
23d6383eb6c14084a8fc3bdf164043b974818012,tensorflow/tensorflow,"Use the safer `safe_load` function instead of `unsafe_load` when possible

There is no need to open ourselves up to arbitrary code execution, especially since this is not in a performance critical loop, so we can take the slowdown due to safety.

PiperOrigin-RevId: 388501098
Change-Id: I3434318a5e07a798490533b554f46752397837e5",training.py,"@@ -87,11 +87,6 @@ try:
   import h5py
 except ImportError:
   h5py = None
-
-try:
-  import yaml
-except ImportError:
-  yaml = None
 # pylint: enable=g-import-not-at-top
 
 
@@ -2416,6 +2411,9 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector):
   def to_yaml(self, **kwargs):
     """"""Returns a yaml string containing the network configuration.
 
+    Note: Since TF 2.6, this method is no longer supported and will raise a
+    RuntimeError.
+
     To load a network from a yaml save file, use
     `keras.models.model_from_yaml(yaml_string, custom_objects={})`.
 
@@ -2431,12 +2429,12 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector):
         A YAML string.
 
     Raises:
-        ImportError: if yaml module is not found.
+        RuntimeError: announces that the method poses a security risk
     """"""
-    if yaml is None:
-      raise ImportError(
-          'Requires yaml module installed (`pip install pyyaml`).')
-    return yaml.dump(self._updated_config(), **kwargs)
+    raise RuntimeError(
+        'Method `model.to_yaml()` has been removed due to security risk of '
+        'arbitrary code execution. Please use `model.to_json()` instead.'
+    )
 
   def reset_states(self):
     for layer in self.layers:
",1,train
23d6383eb6c14084a8fc3bdf164043b974818012,tensorflow/tensorflow,"Use the safer `safe_load` function instead of `unsafe_load` when possible

There is no need to open ourselves up to arbitrary code execution, especially since this is not in a performance critical loop, so we can take the slowdown due to safety.

PiperOrigin-RevId: 388501098
Change-Id: I3434318a5e07a798490533b554f46752397837e5",model_config.py,"@@ -18,18 +18,11 @@
 from tensorflow.python.keras.saving.saved_model import json_utils
 from tensorflow.python.util.tf_export import keras_export
 
-# pylint: disable=g-import-not-at-top
-try:
-  import yaml
-except ImportError:
-  yaml = None
-# pylint: enable=g-import-not-at-top
-
 
 @keras_export('keras.models.model_from_config')
 def model_from_config(config, custom_objects=None):
   """"""Instantiates a Keras model from its config.
- 
+
   Usage:
   ```
   # for a Functional API model
@@ -63,17 +56,8 @@ def model_from_config(config, custom_objects=None):
 def model_from_yaml(yaml_string, custom_objects=None):
   """"""Parses a yaml model configuration file and returns a model instance.
 
-  Usage:
-
-  >>> model = tf.keras.Sequential([
-  ...     tf.keras.layers.Dense(5, input_shape=(3,)),
-  ...     tf.keras.layers.Softmax()])
-  >>> try:
-  ...   import yaml
-  ...   config = model.to_yaml()
-  ...   loaded_model = tf.keras.models.model_from_yaml(config)
-  ... except ImportError:
-  ...   pass
+  Note: Since TF 2.6, this method is no longer supported and will raise a
+  RuntimeError.
 
   Args:
       yaml_string: YAML string or open file encoding a model configuration.
@@ -85,19 +69,13 @@ def model_from_yaml(yaml_string, custom_objects=None):
       A Keras model instance (uncompiled).
 
   Raises:
-      ImportError: if yaml module is not found.
+      RuntimeError: announces that the method poses a security risk
   """"""
-  if yaml is None:
-    raise ImportError('Requires yaml module installed (`pip install pyyaml`).')
-  # The method unsafe_load only exists in PyYAML 5.x+, so which branch of the
-  # try block is covered by tests depends on the installed version of PyYAML.
-  try:
-    # PyYAML 5.x+
-    config = yaml.unsafe_load(yaml_string)
-  except AttributeError:
-    config = yaml.load(yaml_string)
-  from tensorflow.python.keras.layers import deserialize  # pylint: disable=g-import-not-at-top
-  return deserialize(config, custom_objects=custom_objects)
+  raise RuntimeError(
+      'Method `model_from_yaml()` has been removed due to security risk of '
+      'arbitrary code execution. Please use `Model.to_json()` and '
+      '`model_from_json()` instead.'
+  )
 
 
 @keras_export('keras.models.model_from_json')
",1,train
4e2565483d0ffcadc719bd44893fb7f609bb5f12,tensorflow/tensorflow,"Fix bug that could cause map_fn to produce incorrect results (rather than an error)
when mapping over a ragged tensor with an inappropriate fn_output_signature.  (Note: there are cases where the default value for fn_output_signature is not appropriate, so the user needs to explicitly specify the correct output signature.)

PiperOrigin-RevId: 387606546
Change-Id: Ib4ea27b9634e6ab413f211cfe809a69a90f0e2cd",ragged_tensor_from_variant_op.cc,"@@ -174,7 +174,23 @@ Status NestedStackRaggedTensors(
   auto output_values_flat =
       output_ragged->mutable_values()->flat_outer_dims<VALUE_TYPE, 2>();
   int values_index = 0;
+
+  TensorShape expected_value_shape = component_values_shape;
+  expected_value_shape.RemoveDim(0);
+
   for (int i = 0; i < ragged_components.size(); i++) {
+    // Check that the flat_values tensor shape is compatible.
+    TensorShape value_shape = ragged_components[i].values().shape();
+    value_shape.RemoveDim(0);
+    if (value_shape != expected_value_shape) {
+      return errors::InvalidArgument(
+          ""All flat_values must have compatible shapes.  Shape at index 0: "",
+          expected_value_shape, "".  Shape at index "", i, "": "", value_shape,
+          "".  If you are using tf.map_fn, then you may need to specify an ""
+          ""explicit fn_output_signature with appropriate ragged_rank, and/or ""
+          ""convert output tensors to RaggedTensors."");
+    }
+
     auto component_values_flat =
         ragged_components[i].values().flat_outer_dims<VALUE_TYPE, 2>();
     int num_inner_elements = ragged_components[i].values().NumElements();
",1,train
4e2565483d0ffcadc719bd44893fb7f609bb5f12,tensorflow/tensorflow,"Fix bug that could cause map_fn to produce incorrect results (rather than an error)
when mapping over a ragged tensor with an inappropriate fn_output_signature.  (Note: there are cases where the default value for fn_output_signature is not appropriate, so the user needs to explicitly specify the correct output signature.)

PiperOrigin-RevId: 387606546
Change-Id: Ib4ea27b9634e6ab413f211cfe809a69a90f0e2cd",ragged_map_fn_op_test.py,"@@ -21,9 +21,11 @@ from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import map_fn as map_fn_lib
 from tensorflow.python.ops import math_ops as mo
 from tensorflow.python.ops import string_ops
 from tensorflow.python.ops.ragged import ragged_factory_ops
@@ -309,6 +311,27 @@ class RaggedMapOpTest(test_util.TensorFlowTestCase,
     )
     self.assertAllEqual(id_t2, [[0, 5], [0, 4]])
 
+  def testRaggedMapWithIncorrectFnOutputSignature(self):
+    x = ragged_factory_ops.constant([[1, 2, 3, 4], [1]])
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                'All flat_values must have compatible shapes'):
+      y = map_fn_lib.map_fn(lambda r: map_fn_lib.map_fn(lambda y: r, r), x)
+      self.evaluate(y)
+
+  def testNestedRaggedMapWithFnOutputSignature(self):
+    ragged1d = ragged_tensor.RaggedTensorSpec([None], dtypes.int32)
+    ragged2d = ragged_tensor.RaggedTensorSpec([None, None], dtypes.int32)
+
+    x = ragged_factory_ops.constant([[1, 2, 3, 4], [1]])
+    # pylint: disable=g-long-lambda
+    y = map_fn_lib.map_fn(
+        lambda r: map_fn_lib.map_fn(
+            lambda y: r, r, fn_output_signature=ragged1d),
+        x,
+        fn_output_signature=ragged2d)
+    expected = [[[1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4], [1, 2, 3, 4]], [[1]]]
+    self.assertAllEqual(y, expected)
+
 
 if __name__ == '__main__':
   googletest.main()
",1,train
718721986aa137691ee23f03638867151f74935f,tensorflow/tensorflow,"Prevent division by 0 in `fully_connected.cc`

PiperOrigin-RevId: 385137282
Change-Id: If201e69b6e0048f0be001330b4b977e2b46db2cb",fully_connected.cc,"@@ -223,6 +223,7 @@ TfLiteStatus PrepareImpl(TfLiteContext* context, TfLiteNode* node) {
   }
 
   TF_LITE_ENSURE_EQ(context, NumDimensions(filter), 2);
+  TF_LITE_ENSURE(context, filter->dims->data[1] != 0);
   const int batch_size = input_size / filter->dims->data[1];
   const int num_units = filter->dims->data[0];
 
",1,train
5b048e87e4e55990dae6b547add4dae59f4e1c76,tensorflow/tensorflow,"Fix a null pointer exception in SVDF

This is due to not checking that `GetVariableInput` returns non-null tensor.

Also fix a potential null pointer exception in `GetVariableInput`.

PiperOrigin-RevId: 385160147
Change-Id: Iadf3f0705b036a9014d27caa5a8bbd91f4c4c401",kernel_util.cc,"@@ -119,6 +119,7 @@ TfLiteStatus GetInputSafe(const TfLiteContext* context, const TfLiteNode* node,
 TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
                                int index) {
   TfLiteTensor* tensor = GetMutableInput(context, node, index);
+  if (tensor == nullptr) return nullptr;
   return tensor->is_variable ? tensor : nullptr;
 }
 
",1,train
5b048e87e4e55990dae6b547add4dae59f4e1c76,tensorflow/tensorflow,"Fix a null pointer exception in SVDF

This is due to not checking that `GetVariableInput` returns non-null tensor.

Also fix a potential null pointer exception in `GetVariableInput`.

PiperOrigin-RevId: 385160147
Change-Id: Iadf3f0705b036a9014d27caa5a8bbd91f4c4c401",svdf.cc,"@@ -299,6 +299,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                     GetTemporarySafe(context, node, /*index=*/0, &scratch));
 
   TfLiteTensor* state = GetVariableInput(context, node, kStateTensor);
+  TF_LITE_ENSURE(context, state != nullptr);
   TfLiteTensor* output;
   TF_LITE_ENSURE_OK(context,
                     GetOutputSafe(context, node, kOutputTensor, &output));
",1,train
4a91f2069f7145aab6ba2d8cfe41be8a110c18a5,tensorflow/tensorflow,"Fix a null pointer exception caused by branching on uninitialized data.

This is due to not checking that the params for the quantization exists. If there is no quantization, we should not access the `.params` field.

PiperOrigin-RevId: 385168337
Change-Id: I28661e4f12ba1c92cfeae23d22a3fb2df2a2c6a4",unidirectional_sequence_lstm.cc,"@@ -62,8 +62,12 @@ TfLiteStatus PopulateQuantizedLstmParams8x8_16(
       context,
       GetOutputSafe(context, node, lstm::full::kOutputTensor, &output_tensor));
 
+  TF_LITE_ENSURE(context,
+                 cell_state->quantization.type != kTfLiteNoQuantization);
   auto* cell_state_params =
       static_cast<TfLiteAffineQuantization*>(cell_state->quantization.params);
+  TF_LITE_ENSURE(context,
+                 output_tensor->quantization.type != kTfLiteNoQuantization);
   auto* proj_params = static_cast<TfLiteAffineQuantization*>(
       output_tensor->quantization.params);
   if (cell_clip > 0.0) {
@@ -160,6 +164,8 @@ TfLiteStatus PopulateQuantizedLstmParams8x8_16(
       TfLiteTensor* intermediate;
       TF_LITE_ENSURE_OK(context,
                         GetIntermediatesSafe(context, node, i, &intermediate));
+      TF_LITE_ENSURE(context,
+                     intermediate->quantization.type != kTfLiteNoQuantization);
       auto* params = static_cast<TfLiteAffineQuantization*>(
           intermediate->quantization.params);
       intermediate_scale.push_back(params->scale->data[0]);
@@ -174,6 +180,7 @@ TfLiteStatus PopulateQuantizedLstmParams8x8_16(
   // is ignored.
   TfLiteTensor* hidden;
   TF_LITE_ENSURE_OK(context, GetIntermediatesSafe(context, node, 4, &hidden));
+  TF_LITE_ENSURE(context, hidden->quantization.type != kTfLiteNoQuantization);
   auto* hidden_params =
       static_cast<TfLiteAffineQuantization*>(hidden->quantization.params);
   intermediate_scale.push_back(hidden_params->scale->data[0]);
@@ -760,6 +767,8 @@ TfLiteStatus PopulatePrecomputedZPTimesWeightsWithBias(TfLiteContext* context,
 
   const TfLiteTensor* intermediate =
       &context->tensors[node->intermediates->data[4]];
+  TF_LITE_ENSURE(context,
+                 intermediate->quantization.type != kTfLiteNoQuantization);
   const auto* params =
       static_cast<TfLiteAffineQuantization*>(intermediate->quantization.params);
   const int32_t hidden_zp = params->zero_point->data[0];
",1,train
537bc7c723439b9194a358f64d871dd326c18887,tensorflow/tensorflow,"Fix a null pointer exception caused by branching on uninitialized data.

This is due to not checking that the params for the quantization exists. If there is no quantization, we should not access the `.params` field.

PiperOrigin-RevId: 385163909
Change-Id: I2beb8d50649b6542db224c163033fbcbaa49314f",svdf.cc,"@@ -256,14 +256,21 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                                                      output_temp_size_array));
 
     // Calculate effective scales.
+    TF_LITE_ENSURE(context, input->quantization.type != kTfLiteNoQuantization);
     auto* input_params =
         reinterpret_cast<TfLiteAffineQuantization*>(input->quantization.params);
+    TF_LITE_ENSURE(context,
+                   weights_feature->quantization.type != kTfLiteNoQuantization);
     auto* weights_feature_params = reinterpret_cast<TfLiteAffineQuantization*>(
         weights_feature->quantization.params);
+    TF_LITE_ENSURE(context, state->quantization.type != kTfLiteNoQuantization);
     auto* state_params =
         reinterpret_cast<TfLiteAffineQuantization*>(state->quantization.params);
+    TF_LITE_ENSURE(context,
+                   weights_time->quantization.type != kTfLiteNoQuantization);
     auto* weight_time_params = reinterpret_cast<TfLiteAffineQuantization*>(
         weights_time->quantization.params);
+    TF_LITE_ENSURE(context, output->quantization.type != kTfLiteNoQuantization);
     auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
         output->quantization.params);
     const double effective_scale_1 = input_params->scale->data[0] *
",1,train
8933b8a21280696ab119b63263babdb54c298538,tensorflow/tensorflow,"Fix a null pointer exception caused by branching on uninitialized data.

This is due to not checking that the params for the quantization exists. If there is no quantization, we should not access the `.params` field.

PiperOrigin-RevId: 385173491
Change-Id: I8fc476c4b274fdb21ba741caa0fbc6d1b8840663",depthwise_conv.cc,"@@ -176,6 +176,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   if (data_type != kTfLiteFloat32) {
     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
                       kTfLiteAffineQuantization);
+    TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
     const auto* affine_quantization =
         reinterpret_cast<TfLiteAffineQuantization*>(
             filter->quantization.params);
@@ -195,6 +196,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
 
   if (is_hybrid) {
+    TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
     const auto* affine_quantization =
         reinterpret_cast<TfLiteAffineQuantization*>(
             filter->quantization.params);
@@ -495,6 +497,7 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
   op_params.weights_offset = 0;
   op_params.float_activation_min = output_activation_min;
   op_params.float_activation_max = output_activation_max;
+  TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
   const auto* affine_quantization =
       reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
   if (kernel_type == kReference) {
",1,test
1e206baedf8bef0334cca3eb92bab134ef525a28,tensorflow/tensorflow,"Prevent a division by 0 in division ops.

PiperOrigin-RevId: 385223169
Change-Id: Ia4228960b5d2aa44480385f74bdd70d21a3613c3",div.cc,"@@ -216,9 +216,23 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context,
                     GetOutputSafe(context, node, kOutputTensor, &output));
 
-  if (output->type == kTfLiteFloat32 || output->type == kTfLiteInt32) {
+  // TODO(b/193904910): This can written with C++ templates
+#define TF_LITE_CHECK_DIV_NON_ZERO(data_type)                       \
+  const auto* input2_data = GetTensorData<data_type>(input2);       \
+  const size_t input2_elements = input2->bytes / sizeof(data_type); \
+  for (size_t i = 0; i < input2_elements; i++) {                    \
+    TF_LITE_ENSURE(context, input2_data[i] != 0);                   \
+  }
+
+  if (output->type == kTfLiteFloat32) {
+    // Div by zero seems ok in this case, just like in TF case infinities are
+    // returned. So we don't do a check at this point.
+    EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
+  } else if (output->type == kTfLiteInt32) {
+    TF_LITE_CHECK_DIV_NON_ZERO(int32_t);
     EvalDiv<kernel_type>(context, node, params, data, input1, input2, output);
   } else if (output->type == kTfLiteUInt8) {
+    TF_LITE_CHECK_DIV_NON_ZERO(uint8_t);
     TF_LITE_ENSURE_OK(
         context, EvalQuantized<kernel_type>(context, node, params, data, input1,
                                             input2, output));
@@ -229,6 +243,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         output->type);
     return kTfLiteError;
   }
+#undef TF_LITE_CHECK_DIV_NON_ZERO
 
   return kTfLiteOk;
 }
",1,test
d94ffe08a65400f898241c0374e9edc6fa8ed257,tensorflow/tensorflow,"Prevent an OOB read in `expand_dims.cc`

The for loop that follows this check assumes that `axis` is between `0` and `input_dims.size`. If user supplied `axis` is negative, the if code before this check is supposed to bring it back to positive (similar to how in Python one can do `l[-3]` to mean `l[-3 + len(l)]`).

PiperOrigin-RevId: 387200206
Change-Id: I162f4feba12d547c3a4340833ae682016a2ebfab",expand_dims.cc,"@@ -37,6 +37,7 @@ TfLiteStatus ExpandTensorDim(TfLiteContext* context, const TfLiteTensor& input,
     axis = input_dims.size + 1 + axis;
   }
   TF_LITE_ENSURE(context, axis <= input_dims.size);
+  TF_LITE_ENSURE(context, axis >= 0);
 
   TfLiteIntArray* output_dims = TfLiteIntArrayCreate(input_dims.size + 1);
   for (int i = 0; i < output_dims->size; ++i) {
",1,train
dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops.

PiperOrigin-RevId: 385184660
Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",averagepool_quantized_test.cc,"@@ -40,12 +40,14 @@ void RunOneAveragePoolTest(const PoolParams& params,
   std::vector<int8> optimized_averagePool_output(buffer_size);
   std::vector<int8> reference_averagePool_output(buffer_size);
 
-  reference_integer_ops::AveragePool(params, input_shape, input_data,
-                                     output_shape,
-                                     reference_averagePool_output.data());
-  optimized_integer_ops::AveragePool(params, input_shape, input_data,
-                                     output_shape,
-                                     optimized_averagePool_output.data());
+  bool reference_success = reference_integer_ops::AveragePool(
+      params, input_shape, input_data, output_shape,
+      reference_averagePool_output.data());
+  bool optimized_success = optimized_integer_ops::AveragePool(
+      params, input_shape, input_data, output_shape,
+      optimized_averagePool_output.data());
+  EXPECT_TRUE(reference_success);
+  EXPECT_TRUE(optimized_success);
 
   for (int i = 0; i < buffer_size; i++) {
     EXPECT_TRUE(reference_averagePool_output[i] ==
",1,train
dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops.

PiperOrigin-RevId: 385184660
Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",pooling.h,"@@ -144,7 +144,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
   }
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape, const int8* input_data,
                         const RuntimeShape& output_shape, int8* output_data) {
   ruy::profiler::ScopeLabel label(""AveragePool/8bitWith32bitAccumulator"");
@@ -192,6 +192,7 @@ inline void AveragePool(const PoolParams& params,
               std::min(params.filter_height, input_height - in_y_origin);
           const int filter_count =
               (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+          if (filter_count == 0) return false;
           memset(acc, 0, tranche_depth * sizeof(acc[0]));
           const int8* input_ptr =
               input_data + depth_base +
@@ -267,6 +268,7 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
 }  // namespace optimized_integer_ops
",1,train
dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops.

PiperOrigin-RevId: 385184660
Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",legacy_optimized_ops.h,"@@ -3761,7 +3761,7 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims,
                output_data, output_dims);
 }
 
-inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
+inline bool AveragePool(const float* input_data, const Dims<4>& input_dims,
                         int stride_width, int stride_height, int pad_width,
                         int pad_height, int kwidth, int kheight,
                         float output_activation_min,
@@ -3776,35 +3776,37 @@ inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
   params.padding_values.width = pad_width;
   params.float_activation_min = output_activation_min;
   params.float_activation_max = output_activation_max;
-  AveragePool(params, DimsToShape(input_dims), input_data,
-              DimsToShape(output_dims), output_data);
+  return AveragePool(params, DimsToShape(input_dims), input_data,
+                     DimsToShape(output_dims), output_data);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const float* input_data, const Dims<4>& input_dims,
+bool AveragePool(const float* input_data, const Dims<4>& input_dims,
                  int stride_width, int stride_height, int pad_width,
                  int pad_height, int kwidth, int kheight, float* output_data,
                  const Dims<4>& output_dims) {
   float output_activation_min, output_activation_max;
   GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
 
-  AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
-              pad_height, kwidth, kheight, output_activation_min,
-              output_activation_max, output_data, output_dims);
+  return AveragePool(input_data, input_dims, stride_width, stride_height,
+                     pad_width, pad_height, kwidth, kheight,
+                     output_activation_min, output_activation_max, output_data,
+                     output_dims);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
+bool AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
                  int pad_width, int pad_height, int filter_width,
                  int filter_height, float* output_data,
                  const Dims<4>& output_dims) {
-  AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
-                  filter_width, filter_height, output_data, output_dims);
+  return AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width,
+                         pad_height, filter_width, filter_height, output_data,
+                         output_dims);
 }
 
-inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
+inline bool AveragePool(const uint8* input_data, const Dims<4>& input_dims,
                         int stride_width, int stride_height, int pad_width,
                         int pad_height, int filter_width, int filter_height,
                         int32 output_activation_min,
@@ -3819,13 +3821,13 @@ inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
   params.padding_values.width = pad_width;
   params.quantized_activation_min = output_activation_min;
   params.quantized_activation_max = output_activation_max;
-  AveragePool(params, DimsToShape(input_dims), input_data,
-              DimsToShape(output_dims), output_data);
+  return AveragePool(params, DimsToShape(input_dims), input_data,
+                     DimsToShape(output_dims), output_data);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
+bool AveragePool(const uint8* input_data, const Dims<4>& input_dims,
                  int stride_width, int stride_height, int pad_width,
                  int pad_height, int filter_width, int filter_height,
                  int32 output_activation_min, int32 output_activation_max,
@@ -3839,21 +3841,23 @@ void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
     TFLITE_DCHECK_EQ(output_activation_min, 0);
     TFLITE_DCHECK_EQ(output_activation_max, 255);
   }
-  AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
-              pad_height, filter_width, filter_height, output_activation_min,
-              output_activation_max, output_data, output_dims);
+  return AveragePool(input_data, input_dims, stride_width, stride_height,
+                     pad_width, pad_height, filter_width, filter_height,
+                     output_activation_min, output_activation_max, output_data,
+                     output_dims);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
+bool AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
                  int pad_width, int pad_height, int filter_width,
                  int filter_height, int32 output_activation_min,
                  int32 output_activation_max, uint8* output_data,
                  const Dims<4>& output_dims) {
-  AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
-                  filter_width, filter_height, output_activation_min,
-                  output_activation_max, output_data, output_dims);
+  return AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width,
+                         pad_height, filter_width, filter_height,
+                         output_activation_min, output_activation_max,
+                         output_data, output_dims);
 }
 
 inline void MaxPool(const float* input_data, const Dims<4>& input_dims,
",1,train
dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops.

PiperOrigin-RevId: 385184660
Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",optimized_ops.h,"@@ -3172,7 +3172,7 @@ inline int NodeOffset(int b, int h, int w, int height, int width) {
   return (b * height + h) * width + w;
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const float* input_data,
                         const RuntimeShape& output_shape, float* output_data) {
@@ -3187,6 +3187,9 @@ inline void AveragePool(const PoolParams& params,
   const int stride_height = params.stride_height;
   const int stride_width = params.stride_width;
 
+  if (stride_height == 0) return false;
+  if (stride_width == 0) return false;
+
   // TODO(benoitjacob) make this a proper reference impl without Eigen!
   const auto in_mat = MapAsMatrixWithLastDimAsRows(input_data, input_shape);
   auto out_mat = MapAsMatrixWithLastDimAsRows(output_data, output_shape);
@@ -3232,9 +3235,11 @@ inline void AveragePool(const PoolParams& params,
                                                   params.float_activation_min,
                                                   params.float_activation_max);
   }
+
+  return true;
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const uint8* input_data,
                         const RuntimeShape& output_shape, uint8* output_data) {
@@ -3283,6 +3288,7 @@ inline void AveragePool(const PoolParams& params,
               std::min(params.filter_height, input_height - in_y_origin);
           const int filter_count =
               (filter_x_end - filter_x_start) * (filter_y_end - filter_y_start);
+          if (filter_count == 0) return false;
           memset(acc, 0, tranche_depth * sizeof(acc[0]));
           const uint8* input_ptr =
               input_data + depth_base +
@@ -3369,6 +3375,7 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
 inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
",1,train
dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops.

PiperOrigin-RevId: 385184660
Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",pooling.h,"@@ -21,7 +21,7 @@ limitations under the License.
 namespace tflite {
 namespace reference_integer_ops {
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const int8_t* input_data,
                         const RuntimeShape& output_shape, int8_t* output_data) {
@@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           // Round to the closest integer value.
           acc = acc > 0 ? (acc + filter_count / 2) / filter_count
                         : (acc - filter_count / 2) / filter_count;
@@ -77,6 +78,7 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
 inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
@@ -136,7 +138,7 @@ inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
   }
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const int16_t* input_data,
                         const RuntimeShape& output_shape,
@@ -182,6 +184,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           // Round to the closest integer value.
           acc = acc > 0 ? (acc + filter_count / 2) / filter_count
                         : (acc - filter_count / 2) / filter_count;
@@ -193,6 +196,7 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
 inline void MaxPool(const PoolParams& params, const RuntimeShape& input_shape,
",1,train
dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops.

PiperOrigin-RevId: 385184660
Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",legacy_reference_ops.h,"@@ -1487,7 +1487,7 @@ void Sub(const T* input1_data, const Dims<4>& input1_dims, const T* input2_data,
       output_data);
 }
 
-inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
+inline bool AveragePool(const float* input_data, const Dims<4>& input_dims,
                         int stride_width, int stride_height, int pad_width,
                         int pad_height, int kwidth, int kheight,
                         float output_activation_min,
@@ -1502,8 +1502,8 @@ inline void AveragePool(const float* input_data, const Dims<4>& input_dims,
   params.padding_values.width = pad_width;
   params.float_activation_min = output_activation_min;
   params.float_activation_max = output_activation_max;
-  AveragePool(params, DimsToShape(input_dims), input_data,
-              DimsToShape(output_dims), output_data);
+  return AveragePool(params, DimsToShape(input_dims), input_data,
+                     DimsToShape(output_dims), output_data);
 }
 
 // Transitional version that will be moved shortly to legacy_reference_ops, as
@@ -1562,29 +1562,31 @@ inline void BroadcastMul(const uint8* input1_data, const Dims<4>& input1_dims,
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const float* input_data, const Dims<4>& input_dims,
+bool AveragePool(const float* input_data, const Dims<4>& input_dims,
                  int stride_width, int stride_height, int pad_width,
                  int pad_height, int kwidth, int kheight, float* output_data,
                  const Dims<4>& output_dims) {
   float output_activation_min, output_activation_max;
   GetActivationMinMax(Ac, &output_activation_min, &output_activation_max);
 
-  AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
-              pad_height, kwidth, kheight, output_activation_min,
-              output_activation_max, output_data, output_dims);
+  return AveragePool(input_data, input_dims, stride_width, stride_height,
+                     pad_width, pad_height, kwidth, kheight,
+                     output_activation_min, output_activation_max, output_data,
+                     output_dims);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
+bool AveragePool(const float* input_data, const Dims<4>& input_dims, int stride,
                  int pad_width, int pad_height, int filter_width,
                  int filter_height, float* output_data,
                  const Dims<4>& output_dims) {
-  AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
-                  filter_width, filter_height, output_data, output_dims);
+  return AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width,
+                         pad_height, filter_width, filter_height, output_data,
+                         output_dims);
 }
 
-inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
+inline bool AveragePool(const uint8* input_data, const Dims<4>& input_dims,
                         int stride_width, int stride_height, int pad_width,
                         int pad_height, int filter_width, int filter_height,
                         int32 output_activation_min,
@@ -1599,13 +1601,13 @@ inline void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
   params.padding_values.width = pad_width;
   params.quantized_activation_min = output_activation_min;
   params.quantized_activation_max = output_activation_max;
-  AveragePool(params, DimsToShape(input_dims), input_data,
-              DimsToShape(output_dims), output_data);
+  return AveragePool(params, DimsToShape(input_dims), input_data,
+                     DimsToShape(output_dims), output_data);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
+bool AveragePool(const uint8* input_data, const Dims<4>& input_dims,
                  int stride_width, int stride_height, int pad_width,
                  int pad_height, int filter_width, int filter_height,
                  int32 output_activation_min, int32 output_activation_max,
@@ -1619,21 +1621,23 @@ void AveragePool(const uint8* input_data, const Dims<4>& input_dims,
     TFLITE_DCHECK_EQ(output_activation_min, 0);
     TFLITE_DCHECK_EQ(output_activation_max, 255);
   }
-  AveragePool(input_data, input_dims, stride_width, stride_height, pad_width,
-              pad_height, filter_width, filter_height, output_activation_min,
-              output_activation_max, output_data, output_dims);
+  return AveragePool(input_data, input_dims, stride_width, stride_height,
+                     pad_width, pad_height, filter_width, filter_height,
+                     output_activation_min, output_activation_max, output_data,
+                     output_dims);
 }
 
 // legacy, for compatibility with old checked-in code
 template <FusedActivationFunctionType Ac>
-void AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
+bool AveragePool(const uint8* input_data, const Dims<4>& input_dims, int stride,
                  int pad_width, int pad_height, int filter_width,
                  int filter_height, int32 output_activation_min,
                  int32 output_activation_max, uint8* output_data,
                  const Dims<4>& output_dims) {
-  AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width, pad_height,
-                  filter_width, filter_height, output_activation_min,
-                  output_activation_max, output_data, output_dims);
+  return AveragePool<Ac>(input_data, input_dims, stride, stride, pad_width,
+                         pad_height, filter_width, filter_height,
+                         output_activation_min, output_activation_max,
+                         output_data, output_dims);
 }
 
 inline void MaxPool(const float* input_data, const Dims<4>& input_dims,
",1,train
dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops.

PiperOrigin-RevId: 385184660
Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",pooling.h,"@@ -23,7 +23,7 @@ limitations under the License.
 namespace tflite {
 namespace reference_ops {
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const float* input_data,
                         const RuntimeShape& output_shape, float* output_data) {
@@ -66,6 +66,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           const float average = total / filter_count;
           output_data[Offset(output_shape, batch, out_y, out_x, channel)] =
               ActivationFunctionWithMinMax(average, params.float_activation_min,
@@ -74,9 +75,10 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
-inline void AveragePool(const PoolParams& params,
+inline bool AveragePool(const PoolParams& params,
                         const RuntimeShape& input_shape,
                         const uint8_t* input_data,
                         const RuntimeShape& output_shape,
@@ -122,6 +124,7 @@ inline void AveragePool(const PoolParams& params,
               filter_count++;
             }
           }
+          if (filter_count == 0) return false;
           acc = (acc + filter_count / 2) / filter_count;
           acc = std::max(acc, params.quantized_activation_min);
           acc = std::min(acc, params.quantized_activation_max);
@@ -131,6 +134,7 @@ inline void AveragePool(const PoolParams& params,
       }
     }
   }
+  return true;
 }
 
 inline void L2Pool(const PoolParams& params, const RuntimeShape& input_shape,
",1,train
dfa22b348b70bb89d6d6ec0ff53973bacb4f4695,tensorflow/tensorflow,"Prevent a division by 0 in average ops.

PiperOrigin-RevId: 385184660
Change-Id: I7affd4554f9b336fca29ac68f633232c094d0bd3",pooling.cc,"@@ -117,117 +117,126 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 template <KernelType kernel_type>
-void AverageEvalFloat(TfLiteContext* context, TfLiteNode* node,
-                      TfLitePoolParams* params, OpData* data,
-                      const TfLiteTensor* input, TfLiteTensor* output) {
+TfLiteStatus AverageEvalFloat(TfLiteContext* context, TfLiteNode* node,
+                              TfLitePoolParams* params, OpData* data,
+                              const TfLiteTensor* input, TfLiteTensor* output) {
   float activation_min, activation_max;
   CalculateActivationRange(params->activation, &activation_min,
                            &activation_max);
-#define TF_LITE_AVERAGE_POOL(type)                                       \
-  tflite::PoolParams op_params;                                          \
-  op_params.stride_height = params->stride_height;                       \
-  op_params.stride_width = params->stride_width;                         \
-  op_params.filter_height = params->filter_height;                       \
-  op_params.filter_width = params->filter_width;                         \
-  op_params.padding_values.height = data->padding.height;                \
-  op_params.padding_values.width = data->padding.width;                  \
-  op_params.float_activation_min = activation_min;                       \
-  op_params.float_activation_max = activation_max;                       \
-  type::AveragePool(op_params, GetTensorShape(input),                    \
-                    GetTensorData<float>(input), GetTensorShape(output), \
-                    GetTensorData<float>(output))
+#define TF_LITE_AVERAGE_POOL(type)                                            \
+  tflite::PoolParams op_params;                                               \
+  op_params.stride_height = params->stride_height;                            \
+  op_params.stride_width = params->stride_width;                              \
+  op_params.filter_height = params->filter_height;                            \
+  op_params.filter_width = params->filter_width;                              \
+  op_params.padding_values.height = data->padding.height;                     \
+  op_params.padding_values.width = data->padding.width;                       \
+  op_params.float_activation_min = activation_min;                            \
+  op_params.float_activation_max = activation_max;                            \
+  TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \
+                                            GetTensorData<float>(input),      \
+                                            GetTensorShape(output),           \
+                                            GetTensorData<float>(output)))
   if (kernel_type == kReference) {
     TF_LITE_AVERAGE_POOL(reference_ops);
   } else {
     TF_LITE_AVERAGE_POOL(optimized_ops);
   }
 #undef TF_LITE_AVERAGE_POOL
+  return kTfLiteOk;
 }
 
 template <KernelType kernel_type>
-void AverageEvalQuantizedUint8(TfLiteContext* context, TfLiteNode* node,
-                               TfLitePoolParams* params, OpData* data,
-                               const TfLiteTensor* input,
-                               TfLiteTensor* output) {
+TfLiteStatus AverageEvalQuantizedUint8(TfLiteContext* context, TfLiteNode* node,
+                                       TfLitePoolParams* params, OpData* data,
+                                       const TfLiteTensor* input,
+                                       TfLiteTensor* output) {
   int32_t activation_min;
   int32_t activation_max;
   (void)CalculateActivationRangeQuantized(context, params->activation, output,
                                           &activation_min, &activation_max);
-#define TF_LITE_AVERAGE_POOL(type)                                         \
-  tflite::PoolParams op_params;                                            \
-  op_params.stride_height = params->stride_height;                         \
-  op_params.stride_width = params->stride_width;                           \
-  op_params.filter_height = params->filter_height;                         \
-  op_params.filter_width = params->filter_width;                           \
-  op_params.padding_values.height = data->padding.height;                  \
-  op_params.padding_values.width = data->padding.width;                    \
-  op_params.quantized_activation_min = activation_min;                     \
-  op_params.quantized_activation_max = activation_max;                     \
-  type::AveragePool(op_params, GetTensorShape(input),                      \
-                    GetTensorData<uint8_t>(input), GetTensorShape(output), \
-                    GetTensorData<uint8_t>(output))
+#define TF_LITE_AVERAGE_POOL(type)                                            \
+  tflite::PoolParams op_params;                                               \
+  op_params.stride_height = params->stride_height;                            \
+  op_params.stride_width = params->stride_width;                              \
+  op_params.filter_height = params->filter_height;                            \
+  op_params.filter_width = params->filter_width;                              \
+  op_params.padding_values.height = data->padding.height;                     \
+  op_params.padding_values.width = data->padding.width;                       \
+  op_params.quantized_activation_min = activation_min;                        \
+  op_params.quantized_activation_max = activation_max;                        \
+  TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \
+                                            GetTensorData<uint8_t>(input),    \
+                                            GetTensorShape(output),           \
+                                            GetTensorData<uint8_t>(output)))
   if (kernel_type == kReference) {
     TF_LITE_AVERAGE_POOL(reference_ops);
   } else {
     TF_LITE_AVERAGE_POOL(optimized_ops);
   }
 #undef TF_LITE_AVERAGE_POOL
+  return kTfLiteOk;
 }
 
 template <KernelType kernel_type>
-void AverageEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
-                              TfLitePoolParams* params, OpData* data,
-                              const TfLiteTensor* input, TfLiteTensor* output) {
+TfLiteStatus AverageEvalQuantizedInt8(TfLiteContext* context, TfLiteNode* node,
+                                      TfLitePoolParams* params, OpData* data,
+                                      const TfLiteTensor* input,
+                                      TfLiteTensor* output) {
   int32_t activation_min;
   int32_t activation_max;
 
   (void)CalculateActivationRangeQuantized(context, params->activation, output,
                                           &activation_min, &activation_max);
-#define TF_LITE_AVERAGE_POOL(type)                                        \
-  tflite::PoolParams op_params;                                           \
-  op_params.stride_height = params->stride_height;                        \
-  op_params.stride_width = params->stride_width;                          \
-  op_params.filter_height = params->filter_height;                        \
-  op_params.filter_width = params->filter_width;                          \
-  op_params.padding_values.height = data->padding.height;                 \
-  op_params.padding_values.width = data->padding.width;                   \
-  op_params.quantized_activation_min = activation_min;                    \
-  op_params.quantized_activation_max = activation_max;                    \
-  type::AveragePool(op_params, GetTensorShape(input),                     \
-                    GetTensorData<int8_t>(input), GetTensorShape(output), \
-                    GetTensorData<int8_t>(output))
+#define TF_LITE_AVERAGE_POOL(type)                                            \
+  tflite::PoolParams op_params;                                               \
+  op_params.stride_height = params->stride_height;                            \
+  op_params.stride_width = params->stride_width;                              \
+  op_params.filter_height = params->filter_height;                            \
+  op_params.filter_width = params->filter_width;                              \
+  op_params.padding_values.height = data->padding.height;                     \
+  op_params.padding_values.width = data->padding.width;                       \
+  op_params.quantized_activation_min = activation_min;                        \
+  op_params.quantized_activation_max = activation_max;                        \
+  TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \
+                                            GetTensorData<int8_t>(input),     \
+                                            GetTensorShape(output),           \
+                                            GetTensorData<int8_t>(output)))
   if (kernel_type == kReference) {
     TF_LITE_AVERAGE_POOL(reference_integer_ops);
   } else {
     TF_LITE_AVERAGE_POOL(optimized_integer_ops);
   }
 #undef TF_LITE_AVERAGE_POOL
+  return kTfLiteOk;
 }
 
 template <KernelType kernel_type>
-void AverageEvalQuantizedInt16(TfLiteContext* context, TfLiteNode* node,
-                               TfLitePoolParams* params, OpData* data,
-                               const TfLiteTensor* input,
-                               TfLiteTensor* output) {
+TfLiteStatus AverageEvalQuantizedInt16(TfLiteContext* context, TfLiteNode* node,
+                                       TfLitePoolParams* params, OpData* data,
+                                       const TfLiteTensor* input,
+                                       TfLiteTensor* output) {
   int32_t activation_min;
   int32_t activation_max;
   CalculateActivationRangeQuantized(context, params->activation, output,
                                     &activation_min, &activation_max);
-#define TF_LITE_AVERAGE_POOL(type)                                         \
-  tflite::PoolParams op_params;                                            \
-  op_params.stride_height = params->stride_height;                         \
-  op_params.stride_width = params->stride_width;                           \
-  op_params.filter_height = params->filter_height;                         \
-  op_params.filter_width = params->filter_width;                           \
-  op_params.padding_values.height = data->padding.height;                  \
-  op_params.padding_values.width = data->padding.width;                    \
-  op_params.quantized_activation_min = activation_min;                     \
-  op_params.quantized_activation_max = activation_max;                     \
-  type::AveragePool(op_params, GetTensorShape(input),                      \
-                    GetTensorData<int16_t>(input), GetTensorShape(output), \
-                    GetTensorData<int16_t>(output))
+#define TF_LITE_AVERAGE_POOL(type)                                            \
+  tflite::PoolParams op_params;                                               \
+  op_params.stride_height = params->stride_height;                            \
+  op_params.stride_width = params->stride_width;                              \
+  op_params.filter_height = params->filter_height;                            \
+  op_params.filter_width = params->filter_width;                              \
+  op_params.padding_values.height = data->padding.height;                     \
+  op_params.padding_values.width = data->padding.width;                       \
+  op_params.quantized_activation_min = activation_min;                        \
+  op_params.quantized_activation_max = activation_max;                        \
+  TF_LITE_ENSURE(context, type::AveragePool(op_params, GetTensorShape(input), \
+                                            GetTensorData<int16_t>(input),    \
+                                            GetTensorShape(output),           \
+                                            GetTensorData<int16_t>(output)))
   TF_LITE_AVERAGE_POOL(reference_integer_ops);
 #undef TF_LITE_AVERAGE_POOL
+  return kTfLiteOk;
 }
 
 template <KernelType kernel_type>
@@ -380,20 +389,17 @@ TfLiteStatus AverageEval(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
   switch (input->type) {  // Already know in/out types are same.
     case kTfLiteFloat32:
-      AverageEvalFloat<kernel_type>(context, node, params, data, input, output);
-      break;
+      return AverageEvalFloat<kernel_type>(context, node, params, data, input,
+                                           output);
     case kTfLiteUInt8:
-      AverageEvalQuantizedUint8<kernel_type>(context, node, params, data, input,
-                                             output);
-      break;
+      return AverageEvalQuantizedUint8<kernel_type>(context, node, params, data,
+                                                    input, output);
     case kTfLiteInt8:
-      AverageEvalQuantizedInt8<kernel_type>(context, node, params, data, input,
-                                            output);
-      break;
+      return AverageEvalQuantizedInt8<kernel_type>(context, node, params, data,
+                                                   input, output);
     case kTfLiteInt16:
-      AverageEvalQuantizedInt16<kernel_type>(context, node, params, data, input,
-                                             output);
-      break;
+      return AverageEvalQuantizedInt16<kernel_type>(context, node, params, data,
+                                                    input, output);
     default:
       TF_LITE_KERNEL_LOG(context, ""Type %s not currently supported."",
                          TfLiteTypeGetName(input->type));
",1,train
bb6a0383ed553c286f87ca88c207f6774d5c4a8f,tensorflow/tensorflow,"Prevent heap OOB read in TFLite's `gather_nd.cc`.

Passing negative indices is illegal but there was a missing check so that resulted in OOB accesses.

PiperOrigin-RevId: 387208551
Change-Id: I6b7a8a62d3e7c13a16d81619e5bc23ae2cdbc7fd",gather_nd.cc,"@@ -123,6 +123,17 @@ TfLiteStatus GatherNdString(const TfLiteTensor* params,
 template <typename IndicesT>
 TfLiteStatus EvalGatherNd(TfLiteContext* context, const TfLiteTensor* params,
                           const TfLiteTensor* indices, TfLiteTensor* output) {
+  bool indices_has_only_positive_elements = true;
+  const auto* indices_values = GetTensorData<IndicesT>(indices);
+  const size_t num_indices = indices->bytes / sizeof(IndicesT);
+  for (size_t i = 0; i < num_indices; i++) {
+    if (indices_values[i] < 0) {
+      indices_has_only_positive_elements = false;
+      break;
+    }
+  }
+  TF_LITE_ENSURE(context, indices_has_only_positive_elements);
+
   switch (params->type) {
     case kTfLiteFloat32:
       return GatherNd<float, IndicesT>(params, indices, output);
",1,train
eb921122119a6b6e470ee98b89e65d721663179d,tensorflow/tensorflow,"Prevent heap OOB read in TFLite's `gather.cc`.

Passing negative indices is illegal but there was a missing check so that resulted in OOB accesses.

PiperOrigin-RevId: 387231300
Change-Id: I3111b54b2f232638d795be17efc46abe4ede6bf8",gather.cc,"@@ -117,8 +117,20 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 template <typename InputT, typename PositionsT>
-TfLiteStatus Gather(const TfLiteGatherParams& params, const TfLiteTensor* input,
-                    const TfLiteTensor* positions, TfLiteTensor* output) {
+TfLiteStatus Gather(TfLiteContext* context, const TfLiteGatherParams& params,
+                    const TfLiteTensor* input, const TfLiteTensor* positions,
+                    TfLiteTensor* output) {
+  const PositionsT* indexes = GetTensorData<PositionsT>(positions);
+  bool indices_has_only_positive_elements = true;
+  const size_t num_indices = positions->bytes / sizeof(PositionsT);
+  for (size_t i = 0; i < num_indices; i++) {
+    if (indexes[i] < 0) {
+      indices_has_only_positive_elements = false;
+      break;
+    }
+  }
+  TF_LITE_ENSURE(context, indices_has_only_positive_elements);
+
   tflite::GatherParams op_params;
   op_params.axis = params.axis;
   op_params.batch_dims = params.batch_dims;
@@ -134,7 +146,18 @@ TfLiteStatus GatherStrings(TfLiteContext* context, const TfLiteTensor* input,
                            const TfLiteTensor* positions,
                            TfLiteTensor* output) {
   DynamicBuffer buffer;
+
   const PositionT* indexes = GetTensorData<PositionT>(positions);
+  bool indices_has_only_positive_elements = true;
+  const size_t num_indices = positions->bytes / sizeof(PositionT);
+  for (size_t i = 0; i < num_indices; i++) {
+    if (indexes[i] < 0) {
+      indices_has_only_positive_elements = false;
+      break;
+    }
+  }
+  TF_LITE_ENSURE(context, indices_has_only_positive_elements);
+
   const PositionT num_strings = GetStringCount(input);
   const int num_indexes = NumElements(positions);
 
@@ -163,19 +186,26 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   if (positions->type == kTfLiteInt32) {
     switch (input->type) {
       case kTfLiteFloat32:
-        return Gather<float, int32_t>(*params, input, positions, output);
+        return Gather<float, int32_t>(context, *params, input, positions,
+                                      output);
       case kTfLiteUInt8:
-        return Gather<uint8_t, int32_t>(*params, input, positions, output);
+        return Gather<uint8_t, int32_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt8:
-        return Gather<int8_t, int32_t>(*params, input, positions, output);
+        return Gather<int8_t, int32_t>(context, *params, input, positions,
+                                       output);
       case kTfLiteInt16:
-        return Gather<int16_t, int32_t>(*params, input, positions, output);
+        return Gather<int16_t, int32_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt32:
-        return Gather<int32_t, int32_t>(*params, input, positions, output);
+        return Gather<int32_t, int32_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt64:
-        return Gather<int64_t, int32_t>(*params, input, positions, output);
+        return Gather<int64_t, int32_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteBool:
-        return Gather<bool, int32_t>(*params, input, positions, output);
+        return Gather<bool, int32_t>(context, *params, input, positions,
+                                     output);
       case kTfLiteString:
         return GatherStrings<int32_t>(context, input, positions, output);
       default:
@@ -187,19 +217,26 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   if (positions->type == kTfLiteInt64) {
     switch (input->type) {
       case kTfLiteFloat32:
-        return Gather<float, int64_t>(*params, input, positions, output);
+        return Gather<float, int64_t>(context, *params, input, positions,
+                                      output);
       case kTfLiteUInt8:
-        return Gather<uint8_t, int64_t>(*params, input, positions, output);
+        return Gather<uint8_t, int64_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt8:
-        return Gather<int8_t, int64_t>(*params, input, positions, output);
+        return Gather<int8_t, int64_t>(context, *params, input, positions,
+                                       output);
       case kTfLiteInt16:
-        return Gather<int16_t, int64_t>(*params, input, positions, output);
+        return Gather<int16_t, int64_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt32:
-        return Gather<int32_t, int64_t>(*params, input, positions, output);
+        return Gather<int32_t, int64_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteInt64:
-        return Gather<int64_t, int64_t>(*params, input, positions, output);
+        return Gather<int64_t, int64_t>(context, *params, input, positions,
+                                        output);
       case kTfLiteBool:
-        return Gather<bool, int64_t>(*params, input, positions, output);
+        return Gather<bool, int64_t>(context, *params, input, positions,
+                                     output);
       case kTfLiteString:
         return GatherStrings<int64_t>(context, input, positions, output);
       default:
",1,train
15691e456c7dc9bd6be203b09765b063bf4a380c,tensorflow/tensorflow,"Prevent dereferencing of null pointers in TFLite's `add.cc`.

PiperOrigin-RevId: 387244946
Change-Id: I56094233327fbd8439b92e1dbb1262176e00eeb9",optimized_ops.h,"@@ -265,7 +265,7 @@ inline void BinaryBroadcastFiveFold(const ArithmeticParams& unswitched_params,
       // We have broadcast y2*y3*y4 of input2 data y1 times, and now move on.
       input2_data_reset = input2_data_ptr;
     }
-  } else {
+  } else if (input1_data_ptr != nullptr) {
     // Special case of y4 == 1, in which the innermost loop is a single
     // element and can be combined with the next (y3) as an inner broadcast.
     //
",1,train
d6b57f461b39fd1aa8c1b870f1b974aac3554955,tensorflow/tensorflow,"Prevent nullptr dereference in MLIR TFLite dialect/optimizer.

PiperOrigin-RevId: 387220762
Change-Id: Id136ef04bb3d36123b4685d316ae81a9ec924d6b",optimize.cc,"@@ -68,6 +68,9 @@ constexpr char kRelu6[] = ""RELU6"";
 constexpr char kRelu1[] = ""RELU_N1_TO_1"";
 
 bool L2NormalizeReduceAxis(Value sq_op, DenseElementsAttr axis) {
+  if (axis.getNumElements() == 0) {
+    return false;
+  }
   if (sq_op.getType().cast<ShapedType>().getRank() - 1 ==
           *axis.getValues<int>().begin() ||
       *axis.getValues<int>().begin() == -1) {
",1,train
ee119d4a498979525046fba1c3dd3f13a039fbb1,tensorflow/tensorflow,"Fix segmentation fault in shape inference logic.

When running shape functions, some functions (such as `MutableHashTableShape`)
produce extra output information in the form of a `ShapeAndType` struct.  The
shapes embedded in this struct are owned by an inference context that is
cleaned up almost immediately; if the upstream code attempts to access this
shape information, it can trigger a segfault.

`ShapeRefiner` is mitigating this for normal output shapes by cloning them
(and thus putting the newly created shape under ownership of an inference
context that will not die), but we were not doing the same for shapes and
types.  This commit fixes that by doing similar logic on output shapes and
types.

PiperOrigin-RevId: 384761124
Change-Id: I07c0c42d29dfbb55bfa13ec1f09ef825fb0a1a1d",shape_refiner.cc,"@@ -120,9 +120,26 @@ Status ShapeRefiner::InferShapesForFunctionSubNode(
     TF_RETURN_IF_ERROR(outer_context->MakeShapeFromShapeProto(proto, &handle));
     outer_context->set_output(index, handle);
 
-    auto* resource = node_context->input_handle_shapes_and_types(0);
+    const std::vector<ShapeAndType>* resource =
+        node_context->input_handle_shapes_and_types(0);
     if (resource) {
-      outer_context->set_output_handle_shapes_and_types(index, *resource);
+      // `ShapesAndType`s contain `ShapeHandle`s.  These `ShapeHandle`s point
+      // to `Shape`s that are owned by a different inference context too.  We
+      // need to copy them to the outer context to prevent them from being
+      // destroyed before they are used.
+      std::vector<ShapeAndType> copied_shapes_and_types;
+      for (auto& shape_and_type : *resource) {
+        ShapeHandle handle;
+        TensorShapeProto proto;
+        node_context->ShapeHandleToProto(shape_and_type.shape, &proto);
+        TF_RETURN_IF_ERROR(
+            outer_context->MakeShapeFromShapeProto(proto, &handle));
+        copied_shapes_and_types.push_back(
+            ShapeAndType(handle, shape_and_type.dtype, shape_and_type.type));
+      }
+
+      outer_context->set_output_handle_shapes_and_types(
+          index, copied_shapes_and_types);
     }
   }
 
",1,test
0575b640091680cfb70f4dd93e70658de43b94f9,tensorflow/tensorflow,"Prevent division by 0 in LSH projection.

PiperOrigin-RevId: 387225857
Change-Id: Iaeb572a763618c64f503e0026f6dd9fd769bf50c",lsh_projection.cc,"@@ -28,7 +28,7 @@ limitations under the License.
 //
 // Input:
 //   Tensor[0]: Hash functions. Dim.size == 2, DataType: Float.
-//              Tensor[0].Dim[0]: Num of hash functions.
+//              Tensor[0].Dim[0]: Num of hash functions. Must be at least 1.
 //              Tensor[0].Dim[1]: Num of projected output bits generated by
 //                                each hash function.
 //   In sparse case, Tensor[0].Dim[1] + ceil( log2(Tensor[0].Dim[0] )) <= 32.
@@ -82,6 +82,7 @@ TfLiteStatus Resize(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input;
   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &input));
   TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
+  TF_LITE_ENSURE(context, SizeOfDimension(input, 0) >= 1);
 
   if (NumInputs(node) == 3) {
     const TfLiteTensor* weight;
",1,test
7c1692bd417eb4f9b33ead749a41166d6080af85,tensorflow/tensorflow,"PR #51732: Fix crash of tf.image.crop_and_resize when input is large number

Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/51732

This PR is part of the effort in #46890 where
tf.image.crop_and_resize will crash if shape consists of large number.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
Copybara import of the project:

--
c8d87055a56d8740d27ad8bdc74a7459ede6900e by Yong Tang <yong.tang.github@outlook.com>:

Fix crash of tf.image.crop_and_resize when input is large number

This PR is part of the effort in 46890 where
tf.image.crop_and_resize will crash if shape consists of large number.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/51732 from yongtang:46890-tf.image.crop_and_resize c8d87055a56d8740d27ad8bdc74a7459ede6900e
PiperOrigin-RevId: 394109830
Change-Id: If049dad0844df9353722029ee95bc76819eda1f4",crop_and_resize_op.cc,"@@ -170,14 +170,15 @@ class CropAndResizeOp : public AsyncOpKernel {
         context, crop_height > 0 && crop_width > 0,
         errors::InvalidArgument(""crop dimensions must be positive""), done);
 
+    TensorShape shape;
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(num_boxes), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(crop_height), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(crop_width), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(depth), done);
     // Allocate output tensor.
     Tensor* output = nullptr;
-    OP_REQUIRES_OK_ASYNC(
-        context,
-        context->allocate_output(
-            0, TensorShape({num_boxes, crop_height, crop_width, depth}),
-            &output),
-        done);
+    OP_REQUIRES_OK_ASYNC(context, context->allocate_output(0, shape, &output),
+                         done);
 
     auto compute_callback = [this, context, output]() {
       const Tensor& image = context->input(0);
@@ -417,14 +418,15 @@ class CropAndResizeGradImageOp : public AsyncOpKernel {
           done);
     }
 
+    TensorShape shape;
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(batch_size), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(image_height), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(image_width), done);
+    OP_REQUIRES_OK_ASYNC(context, shape.AddDimWithStatus(depth), done);
     // Allocate output tensor.
     Tensor* output = nullptr;
-    OP_REQUIRES_OK_ASYNC(
-        context,
-        context->allocate_output(
-            0, TensorShape({batch_size, image_height, image_width, depth}),
-            &output),
-        done);
+    OP_REQUIRES_OK_ASYNC(context, context->allocate_output(0, shape, &output),
+                         done);
 
     auto compute_callback = [this, context, output]() {
       const Tensor& grads = context->input(0);
",1,train
7c1692bd417eb4f9b33ead749a41166d6080af85,tensorflow/tensorflow,"PR #51732: Fix crash of tf.image.crop_and_resize when input is large number

Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/51732

This PR is part of the effort in #46890 where
tf.image.crop_and_resize will crash if shape consists of large number.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
Copybara import of the project:

--
c8d87055a56d8740d27ad8bdc74a7459ede6900e by Yong Tang <yong.tang.github@outlook.com>:

Fix crash of tf.image.crop_and_resize when input is large number

This PR is part of the effort in 46890 where
tf.image.crop_and_resize will crash if shape consists of large number.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>
COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/tensorflow/pull/51732 from yongtang:46890-tf.image.crop_and_resize c8d87055a56d8740d27ad8bdc74a7459ede6900e
PiperOrigin-RevId: 394109830
Change-Id: If049dad0844df9353722029ee95bc76819eda1f4",image_ops_test.py,"@@ -6075,6 +6075,16 @@ class DecodeImageTest(test_util.TensorFlowTestCase, parameterized.TestCase):
             crop_size=[1, 1])
         self.evaluate(op)
 
+  def testImageCropAndResizeWithInvalidInput(self):
+    with self.session():
+      with self.assertRaises((errors.InternalError, ValueError)):
+        op = image_ops_impl.crop_and_resize_v2(
+            image=np.ones((1, 1, 1, 1)),
+            boxes=np.ones((11, 4)),
+            box_indices=np.ones((11)),
+            crop_size=[2065374891, 1145309325])
+        self.evaluate(op)
+
   @parameterized.named_parameters(
       (""_jpeg"", ""JPEG"", ""jpeg_merge_test1.jpg""),
       (""_png"", ""PNG"", ""lena_rgba.png""),
",1,train
f09caa532b6e1ac8d2aa61b7832c78c5b79300c6,tensorflow/tensorflow,"Fix EinsumHelper::ParseEquation to avoid uninitialized accesses.

EinsumHelper::ParseEquation is supposed to return true or false in
input_has_ellipsis and output_has_ellipsis to indicate whether there is
ellipsis in the inputs and output. Previously, when there is no ellipsis in the
inputs or output, the routine doesn't assign false to the variables. This
change initializes the two variables with false to fix the problem.
PiperOrigin-RevId: 391772004
Change-Id: I17b6c88aadef4131470378e48cced054bf252e86",einsum_op_impl.h,"@@ -153,6 +153,7 @@ struct EinsumHelper {
     input_has_ellipsis->resize(num_inputs);
     for (int i = 0; i < num_inputs; ++i) {
       input_label_counts->at(i).resize(num_labels);
+      input_has_ellipsis->at(i) = false;
       for (const int label : input_labels->at(i)) {
         if (label != kEllipsisLabel)
           input_label_counts->at(i)[label] += 1;
@@ -161,6 +162,7 @@ struct EinsumHelper {
       }
     }
     output_label_counts->resize(num_labels);
+    *output_has_ellipsis = false;
     for (const int label : *output_labels) {
       if (label != kEllipsisLabel)
         output_label_counts->at(label) += 1;
",1,test
368af875869a204b4ac552b9ddda59f6a46a56ec,tensorflow/tensorflow,"Avoid buffer overflow when loading tensors with insufficient data from checkpoints.

`CopyDataFromTensorSliceToTensorSlice` does not (and cannot conveniently)
provide any bounds checking on its own, so the size is instead checked prior
to passing unvalidated data to that function.

PiperOrigin-RevId: 392971286
Change-Id: If2073b36d4d5eedd386329f56729395fd7effee1",saved_tensor_slice_util.h,"@@ -59,6 +59,9 @@ Status ParseShapeAndSlice(const string& shape_and_slice, TensorShape* shape,
 template <typename T>
 struct SaveTypeTraits;
 
+template <typename T>
+int TensorProtoDataSize(const TensorProto& t);
+
 template <typename T>
 const typename SaveTypeTraits<T>::SavedType* TensorProtoData(
     const TensorProto& t);
@@ -95,6 +98,10 @@ void Fill(T* data, size_t n, TensorProto* t);
 #define TENSOR_PROTO_EXTRACT_TYPE(TYPE, FIELD, FTYPE)             \
   TENSOR_PROTO_EXTRACT_TYPE_HELPER(TYPE, FIELD, FTYPE, FTYPE)     \
   template <>                                                     \
+  inline int TensorProtoDataSize<TYPE>(const TensorProto& t) {    \
+    return t.FIELD##_val_size();                                  \
+  }                                                               \
+  template <>                                                     \
   inline void Fill(const TYPE* data, size_t n, TensorProto* t) {  \
     typename protobuf::RepeatedField<FTYPE> copy(data, data + n); \
     t->mutable_##FIELD##_val()->Swap(&copy);                      \
@@ -104,6 +111,10 @@ void Fill(T* data, size_t n, TensorProto* t);
 #define TENSOR_PROTO_EXTRACT_TYPE_COMPLEX(TYPE, FIELD, FTYPE)       \
   TENSOR_PROTO_EXTRACT_TYPE_HELPER(TYPE, FIELD, FTYPE, TYPE)        \
   template <>                                                       \
+  inline int TensorProtoDataSize<TYPE>(const TensorProto& t) {      \
+    return t.FIELD##_val_size() / 2;                                \
+  }                                                                 \
+  template <>                                                       \
   inline void Fill(const TYPE* data, size_t n, TensorProto* t) {    \
     const FTYPE* sub = reinterpret_cast<const FTYPE*>(data);        \
     typename protobuf::RepeatedField<FTYPE> copy(sub, sub + 2 * n); \
@@ -136,6 +147,11 @@ TENSOR_PROTO_EXTRACT_TYPE(quint16, int, int32);
 template <>
 struct SaveTypeTraits<qint32> : SaveTypeTraits<int32> {};
 
+template <>
+inline int TensorProtoDataSize<qint32>(const TensorProto& t) {
+  return t.int_val_size();
+}
+
 template <>
 inline const int32* TensorProtoData<qint32>(const TensorProto& t) {
   static_assert(SaveTypeTraits<qint32>::supported,
@@ -158,6 +174,11 @@ struct SaveTypeTraits<Eigen::half> {
   typedef protobuf::RepeatedField<int32> RepeatedField;
 };
 
+template <>
+inline int TensorProtoDataSize<Eigen::half>(const TensorProto& t) {
+  return t.half_val_size();
+}
+
 template <>
 inline const int* TensorProtoData<Eigen::half>(const TensorProto& t) {
   return t.half_val().data();
@@ -187,6 +208,11 @@ struct SaveTypeTraits<tstring> {
   typedef protobuf::RepeatedPtrField<string> RepeatedField;
 };
 
+template <>
+inline int TensorProtoDataSize<tstring>(const TensorProto& t) {
+  return t.string_val_size();
+}
+
 template <>
 inline const string* const* TensorProtoData<tstring>(const TensorProto& t) {
   static_assert(SaveTypeTraits<tstring>::supported,
",1,train
368af875869a204b4ac552b9ddda59f6a46a56ec,tensorflow/tensorflow,"Avoid buffer overflow when loading tensors with insufficient data from checkpoints.

`CopyDataFromTensorSliceToTensorSlice` does not (and cannot conveniently)
provide any bounds checking on its own, so the size is instead checked prior
to passing unvalidated data to that function.

PiperOrigin-RevId: 392971286
Change-Id: If2073b36d4d5eedd386329f56729395fd7effee1",tensor_slice_reader.h,"@@ -181,6 +181,22 @@ bool TensorSliceReader::CopySliceData(const string& name,
               << slice_s.DebugString() << "": computed key = "" << key;
       return false;
     }
+    // Ensure the TensorSlice contains the expected amount of data.
+    TensorShape shp_s;
+    Status s = slice_s.SliceTensorShape(tss->shape(), &shp_s);
+    if (!s.ok()) {
+      VLOG(1) << ""Failed to slice tensor "" << name << "", slice ""
+              << slice_s.DebugString() << "": "" << s;
+      return false;
+    }
+    if (checkpoint::TensorProtoDataSize<T>(sts.data().data()) !=
+        shp_s.num_elements()) {
+      VLOG(1) << ""Tensor "" << name << "", slice "" << slice_s.DebugString()
+              << "" had an unexpected amount of data: expected = ""
+              << shp_s.num_elements() << "", got = ""
+              << checkpoint::TensorProtoDataSize<T>(sts.data().data());
+      return false;
+    }
     CopyDataFromTensorSliceToTensorSlice(
         tss->shape(), slice_s, slice,
         checkpoint::TensorProtoData<T>(sts.data().data()), data);
",1,train
368af875869a204b4ac552b9ddda59f6a46a56ec,tensorflow/tensorflow,"Avoid buffer overflow when loading tensors with insufficient data from checkpoints.

`CopyDataFromTensorSliceToTensorSlice` does not (and cannot conveniently)
provide any bounds checking on its own, so the size is instead checked prior
to passing unvalidated data to that function.

PiperOrigin-RevId: 392971286
Change-Id: If2073b36d4d5eedd386329f56729395fd7effee1",tensor_slice_reader_test.cc,"@@ -459,6 +459,33 @@ TEST(TensorSliceReaderTest, InvalidTensorSlice) {
   EXPECT_FALSE(reader.status().ok());
 }
 
+TEST(TensorSliceReaderTest, MissingTensorData) {
+  const string fname =
+      io::JoinPath(testing::TmpDir(), ""missing_data_checkpoint"");
+  TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder);
+  const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  TF_ASSERT_OK(writer.Add(""test"", TensorShape({4, 5}),
+                          TensorSlice::ParseOrDie(""0,2:-""), data));
+  TF_ASSERT_OK(writer.Finish());
+
+  MutateSavedTensorSlices(fname, [&](SavedTensorSlices sts) {
+    if (sts.has_data()) {
+      // Replace the data with only 4 elements.
+      Fill(data, 4, sts.mutable_data()->mutable_data());
+    }
+    return sts.SerializeAsString();
+  });
+
+  TensorSliceReader reader(fname, OpenTableTensorSliceReader);
+  TF_ASSERT_OK(reader.status());
+
+  // The tensor should be present, but loading it should fail due to the missing
+  // data.
+  EXPECT_TRUE(reader.HasTensor(""test"", nullptr, nullptr));
+  std::unique_ptr<Tensor> tensor;
+  EXPECT_FALSE(reader.GetTensor(""test"", &tensor).ok());
+}
+
 void CachedTensorSliceReaderTesterHelper(
     const TensorSliceWriter::CreateBuilderFunction& create_function,
     const TensorSliceReader::OpenTableFunction& open_function) {
",1,train
abcced051cb1bd8fb05046ac3b6023a7ebcc4578,tensorflow/tensorflow,"Prevent crashes when loading tensor slices with unsupported types.

Also fix the `Tensor(const TensorShape&)` constructor swapping the LOG(FATAL)
messages for the unset and unsupported types.

PiperOrigin-RevId: 392695027
Change-Id: I4beda7db950db951d273e3259a7c8534ece49354",tensor.cc,"@@ -52,6 +52,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/gtl/inlined_vector.h""
 #include ""tensorflow/core/lib/strings/str_util.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
+#include ""tensorflow/core/platform/errors.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/protobuf.h""
@@ -723,11 +724,11 @@ bool Tensor::RefCountIsOne() const {
 // The macro CASES() expands to a switch statement conditioned on
 // TYPE_ENUM. Each case expands the STMTS after a typedef for T.
 #define SINGLE_ARG(...) __VA_ARGS__
-#define CASE(TYPE, STMTS)             \
-  case DataTypeToEnum<TYPE>::value: { \
-    typedef TYPE T;                   \
-    STMTS;                            \
-    break;                            \
+#define CASE(TYPE, STMTS)               \
+  case DataTypeToEnum<TYPE>::value: {   \
+    typedef TF_ATTRIBUTE_UNUSED TYPE T; \
+    STMTS;                              \
+    break;                              \
   }
 #define CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, INVALID, DEFAULT) \
   switch (TYPE_ENUM) {                                         \
@@ -763,9 +764,8 @@ bool Tensor::RefCountIsOne() const {
   }
 
 #define CASES(TYPE_ENUM, STMTS)                                      \
-  CASES_WITH_DEFAULT(TYPE_ENUM, STMTS,                               \
-                     LOG(FATAL) << ""Unexpected type: "" << TYPE_ENUM; \
-                     , LOG(FATAL) << ""Type not set"";)
+  CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << ""Type not set""; \
+                     , LOG(FATAL) << ""Unexpected type: "" << TYPE_ENUM;)
 
 Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape)
     : shape_(shape), buf_(nullptr) {
@@ -795,6 +795,16 @@ Tensor::Tensor(Allocator* a, DataType type, const TensorShape& shape,
   }
 }
 
+Status Tensor::BuildTensor(DataType type, const TensorShape& shape,
+                           Tensor* out_tensor) {
+  // Avoid crashes due to invalid or unsupported types.
+  CASES_WITH_DEFAULT(
+      type, {}, return errors::InvalidArgument(""Type not set""),
+      return errors::InvalidArgument(""Unexpected type: "", DataType_Name(type)));
+  *out_tensor = Tensor(type, shape);
+  return Status::OK();
+}
+
 // NOTE(mrry): The default allocator for a Tensor (when none is specified) is
 // the default CPU allocator for NUMA zone 0. Accessing that currently involves
 // acquiring a lock, which guards initialization of the per-NUMA zone
",1,test
abcced051cb1bd8fb05046ac3b6023a7ebcc4578,tensorflow/tensorflow,"Prevent crashes when loading tensor slices with unsupported types.

Also fix the `Tensor(const TensorShape&)` constructor swapping the LOG(FATAL)
messages for the unset and unsupported types.

PiperOrigin-RevId: 392695027
Change-Id: I4beda7db950db951d273e3259a7c8534ece49354",tensor.h,"@@ -170,6 +170,15 @@ class Tensor {
   /// for details.
   explicit Tensor(DataType type);
 
+  /// \brief Initializes a tensor with the input `type` and `shape`, or returns
+  /// an error and leaves `out_tensor` unmodified. This factory method should be
+  /// used instead of the corresponding constructor if calling code cannot
+  /// validate that the `DataType` is valid and supported.
+  ///
+  /// The underlying buffer is allocated using a `CPUAllocator`.
+  static Status BuildTensor(DataType type, const TensorShape& shape,
+                            Tensor* out_tensor);
+
  private:
   // A tag type for selecting the `Tensor` constructor overload that creates a
   // scalar tensor in host memory.
",1,test
abcced051cb1bd8fb05046ac3b6023a7ebcc4578,tensorflow/tensorflow,"Prevent crashes when loading tensor slices with unsupported types.

Also fix the `Tensor(const TensorShape&)` constructor swapping the LOG(FATAL)
messages for the unset and unsupported types.

PiperOrigin-RevId: 392695027
Change-Id: I4beda7db950db951d273e3259a7c8534ece49354",tensor_slice_reader.cc,"@@ -248,7 +248,9 @@ Status TensorSliceReader::GetTensor(
     slice = tss->Slices().begin()->second.slice;
   }
 
-  std::unique_ptr<tensorflow::Tensor> t(new tensorflow::Tensor(type, shape));
+  std::unique_ptr<tensorflow::Tensor> t(new tensorflow::Tensor);
+  Status s = tensorflow::Tensor::BuildTensor(type, shape, t.get());
+  if (!s.ok()) return s;
   bool success = false;
 
 #define READER_COPY(dt)                                                  \
",1,test
abcced051cb1bd8fb05046ac3b6023a7ebcc4578,tensorflow/tensorflow,"Prevent crashes when loading tensor slices with unsupported types.

Also fix the `Tensor(const TensorShape&)` constructor swapping the LOG(FATAL)
messages for the unset and unsupported types.

PiperOrigin-RevId: 392695027
Change-Id: I4beda7db950db951d273e3259a7c8534ece49354",tensor_slice_reader_test.cc,"@@ -13,15 +13,19 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include <utility>
-
 #include ""tensorflow/core/util/tensor_slice_reader.h""
 
+#include <utility>
+#include <vector>
+
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/framework/versions.pb.h""
 #include ""tensorflow/core/lib/core/status_test_util.h""
 #include ""tensorflow/core/lib/core/stringpiece.h""
+#include ""tensorflow/core/lib/io/iterator.h""
 #include ""tensorflow/core/lib/io/path.h""
+#include ""tensorflow/core/lib/io/table.h""
+#include ""tensorflow/core/lib/io/table_builder.h""
 #include ""tensorflow/core/lib/strings/str_util.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/env.h""
@@ -30,6 +34,7 @@ limitations under the License.
 #include ""tensorflow/core/platform/test.h""
 #include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/core/public/version.h""
+#include ""tensorflow/core/util/saved_tensor_slice.pb.h""
 #include ""tensorflow/core/util/saved_tensor_slice_util.h""
 #include ""tensorflow/core/util/tensor_slice_reader_cache.h""
 #include ""tensorflow/core/util/tensor_slice_writer.h""
@@ -309,6 +314,102 @@ TEST_SIMPLE_INT(int16, int32)
 TEST_SIMPLE_INT(int8, int32)
 TEST_SIMPLE_INT(uint8, int32)
 
+// Modifies the SavedTensorSlices messages in a checkpoint to allow creating
+// malformed or unsupported checkpoints.
+void MutateSavedTensorSlices(
+    const std::string& fname,
+    const std::function<std::string(SavedTensorSlices)>& mutator) {
+  table::Options options;
+  options.compression = table::kNoCompression;
+
+  // Read all entres from the table.
+  std::vector<std::pair<std::string, std::string>> entries;
+  {
+    std::unique_ptr<RandomAccessFile> file;
+    TF_CHECK_OK(Env::Default()->NewRandomAccessFile(fname, &file));
+    uint64 file_size;
+    TF_CHECK_OK(Env::Default()->GetFileSize(fname, &file_size));
+    table::Table* t;
+    TF_CHECK_OK(table::Table::Open(options, file.get(), file_size, &t));
+    std::unique_ptr<table::Table> table(t);
+    std::unique_ptr<table::Iterator> it(table->NewIterator());
+    for (it->Seek(""""); it->Valid(); it->Next()) {
+      entries.emplace_back(it->key(), it->value());
+    }
+    TF_CHECK_OK(it->status());
+  }
+
+  // Rewrite the table, mutating each value.
+  {
+    std::unique_ptr<WritableFile> file;
+    TF_CHECK_OK(Env::Default()->NewWritableFile(fname, &file));
+    table::TableBuilder builder(options, file.get());
+    for (const auto& entry : entries) {
+      SavedTensorSlices sts;
+      CHECK(sts.ParseFromString(entry.second));
+      builder.Add(entry.first, mutator(std::move(sts)));
+    }
+    TF_CHECK_OK(builder.Finish());
+    TF_CHECK_OK(file->Close());
+  }
+}
+
+TEST(TensorSliceReaderTest, MissingTensorType) {
+  const string fname = io::JoinPath(testing::TmpDir(), ""invalid_checkpoint"");
+  TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder);
+  const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  TensorShape shape({4, 5});
+  TensorSlice slice = TensorSlice::ParseOrDie(""0,2:-"");
+  TF_CHECK_OK(writer.Add(""test"", shape, slice, data));
+  TF_CHECK_OK(writer.Finish());
+
+  MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) {
+    if (sts.has_meta()) {
+      for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) {
+        tensor.clear_type();
+      }
+    }
+    return sts.SerializeAsString();
+  });
+
+  TensorSliceReader reader(fname, OpenTableTensorSliceReader);
+  TF_CHECK_OK(reader.status());
+
+  // The tensor should be present, but loading it should fail due to the
+  // unset (invalid) type.
+  EXPECT_TRUE(reader.HasTensor(""test"", nullptr, nullptr));
+  std::unique_ptr<Tensor> tensor;
+  EXPECT_FALSE(reader.GetTensor(""test"", &tensor).ok());
+}
+
+TEST(TensorSliceReaderTest, UnsupportedTensorType) {
+  const string fname = io::JoinPath(testing::TmpDir(), ""int32_ref_checkpoint"");
+  TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder);
+  const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  TensorShape shape({4, 5});
+  TensorSlice slice = TensorSlice::ParseOrDie(""0,2:-"");
+  TF_CHECK_OK(writer.Add(""test"", shape, slice, data));
+  TF_CHECK_OK(writer.Finish());
+
+  MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) {
+    if (sts.has_meta()) {
+      for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) {
+        tensor.set_type(DT_INT32_REF);
+      }
+    }
+    return sts.SerializeAsString();
+  });
+
+  TensorSliceReader reader(fname, OpenTableTensorSliceReader);
+  TF_CHECK_OK(reader.status());
+
+  // The tensor should be present, but loading it should fail due to the
+  // unsupported type.
+  EXPECT_TRUE(reader.HasTensor(""test"", nullptr, nullptr));
+  std::unique_ptr<Tensor> tensor;
+  EXPECT_FALSE(reader.GetTensor(""test"", &tensor).ok());
+}
+
 void CachedTensorSliceReaderTesterHelper(
     const TensorSliceWriter::CreateBuilderFunction& create_function,
     const TensorSliceReader::OpenTableFunction& open_function) {
",1,test
b619c6f865715ca3b15ef1842b5b95edbaa710ad,tensorflow/tensorflow,"Use BuildTensorShapeBase when parsing unverified TensorShapes during checkpoint loading.

This avoids crashing when the TensorShape has negative dimensions.

PiperOrigin-RevId: 392769882
Change-Id: Id1f7ae7fcf8142193556af47abfda81b13d3cce4",tensor_slice_reader.cc,"@@ -168,7 +168,9 @@ void TensorSliceReader::LoadShard(int shard) const {
                           ""checkpoint"");
   if (!status_.ok()) return;
   for (const SavedSliceMeta& ssm : sts.meta().tensor()) {
-    TensorShape ssm_shape(ssm.shape());
+    TensorShape ssm_shape;
+    status_ = TensorShape::BuildTensorShapeBase(ssm.shape(), &ssm_shape);
+    if (!status_.ok()) return;
     for (const TensorSliceProto& tsp : ssm.slice()) {
       TensorSlice ss_slice(tsp);
       status_ = RegisterTensorSlice(ssm.name(), ssm_shape, ssm.type(), fname,
",1,train
b619c6f865715ca3b15ef1842b5b95edbaa710ad,tensorflow/tensorflow,"Use BuildTensorShapeBase when parsing unverified TensorShapes during checkpoint loading.

This avoids crashing when the TensorShape has negative dimensions.

PiperOrigin-RevId: 392769882
Change-Id: Id1f7ae7fcf8142193556af47abfda81b13d3cce4",tensor_slice_reader_test.cc,"@@ -18,6 +18,7 @@ limitations under the License.
 #include <utility>
 #include <vector>
 
+#include ""tensorflow/core/framework/tensor_shape.pb.h""
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/framework/versions.pb.h""
 #include ""tensorflow/core/lib/core/status_test_util.h""
@@ -410,6 +411,31 @@ TEST(TensorSliceReaderTest, UnsupportedTensorType) {
   EXPECT_FALSE(reader.GetTensor(""test"", &tensor).ok());
 }
 
+TEST(TensorSliceReaderTest, NegativeTensorShapeDimension) {
+  const string fname =
+      io::JoinPath(testing::TmpDir(), ""negative_dim_checkpoint"");
+  TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder);
+  const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  TF_CHECK_OK(writer.Add(""test"", TensorShape({4, 5}),
+                         TensorSlice::ParseOrDie(""0,2:-""), data));
+  TF_CHECK_OK(writer.Finish());
+
+  MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) {
+    if (sts.has_meta()) {
+      for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) {
+        for (auto& dim : *tensor.mutable_shape()->mutable_dim()) {
+          dim.set_size(-dim.size());
+        }
+      }
+    }
+    return sts.SerializeAsString();
+  });
+
+  TensorSliceReader reader(fname, OpenTableTensorSliceReader);
+  // The negative dimension should cause loading to fail.
+  EXPECT_FALSE(reader.status().ok());
+}
+
 void CachedTensorSliceReaderTesterHelper(
     const TensorSliceWriter::CreateBuilderFunction& create_function,
     const TensorSliceReader::OpenTableFunction& open_function) {
",1,train
e8dc63704c88007ee4713076605c90188d66f3d2,tensorflow/tensorflow,"Add BuildTensorSlice for building from unvalidated TensorSliceProtos.

This avoids several sources of crashes and undefined behavior when loading
invalid checkpoints.

PiperOrigin-RevId: 392785704
Change-Id: Icd9713c768b882f3b58b427eddac376060696833",tensor_slice.cc,"@@ -14,7 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include ""tensorflow/core/framework/tensor_slice.h""
+
+#include <limits>
 #include <vector>
+
 #include ""tensorflow/core/lib/core/errors.h""
 #include ""tensorflow/core/lib/strings/numbers.h""
 #include ""tensorflow/core/lib/strings/str_util.h""
@@ -44,6 +47,34 @@ TensorSlice::TensorSlice(
   }
 }
 
+Status TensorSlice::BuildTensorSlice(const TensorSliceProto& proto,
+                                     TensorSlice* output) {
+  output->Clear();
+  output->starts_.reserve(proto.extent_size());
+  output->lengths_.reserve(proto.extent_size());
+  for (const auto& e : proto.extent()) {
+    int64_t l = GetExtentLength(e);
+    if (e.start() != 0 || l != kFullExtent) {
+      if (e.start() < 0 || l <= 0) {
+        return errors::InvalidArgument(
+            ""Expected non-negative start and positive length but got start = "",
+            e.start(), "", length = "", l, "": extent = "", e.ShortDebugString());
+      }
+      // Calculating the extent end must not cause signed integer overflow.
+      if (static_cast<uint64_t>(e.start()) + static_cast<uint64_t>(e.length()) >
+          std::numeric_limits<int64_t>::max()) {
+        return errors::InvalidArgument(
+            ""Extent end exceeds the maximum possible size: extent = "",
+            e.ShortDebugString());
+      }
+    }
+    output->starts_.push_back(e.start());
+    output->lengths_.push_back(l);
+  }
+
+  return Status::OK();
+}
+
 Status TensorSlice::Parse(const string& str, TensorSlice* slice) {
   std::vector<string> items = str_util::Split(str, ':', str_util::SkipEmpty());
   slice->starts_.reserve(items.size());
",1,train
e8dc63704c88007ee4713076605c90188d66f3d2,tensorflow/tensorflow,"Add BuildTensorSlice for building from unvalidated TensorSliceProtos.

This avoids several sources of crashes and undefined behavior when loading
invalid checkpoints.

PiperOrigin-RevId: 392785704
Change-Id: Icd9713c768b882f3b58b427eddac376060696833",tensor_slice.h,"@@ -48,6 +48,12 @@ class TensorSlice {
   explicit TensorSlice(
       std::initializer_list<std::pair<int64_t, int64_t>> extents);
 
+  // This factory methods should be used instead of the constructor that takes a
+  // `TensorSliceProto` if calling code cannot validate that the sizes specify a
+  // valid `TensorSlice`.
+  static Status BuildTensorSlice(const TensorSliceProto& proto,
+                                 TensorSlice* output);
+
   static Status Parse(const string& str, TensorSlice* output);
   static TensorSlice ParseOrDie(const string& str) {
     TensorSlice ret;
",1,train
e8dc63704c88007ee4713076605c90188d66f3d2,tensorflow/tensorflow,"Add BuildTensorSlice for building from unvalidated TensorSliceProtos.

This avoids several sources of crashes and undefined behavior when loading
invalid checkpoints.

PiperOrigin-RevId: 392785704
Change-Id: Icd9713c768b882f3b58b427eddac376060696833",tensor_slice_test.cc,"@@ -15,6 +15,8 @@ limitations under the License.
 
 #include ""tensorflow/core/framework/tensor_slice.h""
 
+#include <limits>
+
 #include ""tensorflow/core/lib/core/status_test_util.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/protobuf.h""
@@ -125,6 +127,48 @@ TEST(TensorSliceTest, Serialization) {
   }
 }
 
+// Testing `BuildTensorSlice` with valid and invalid input protos.
+TEST(TensorSliceTest, BuildTensorSlice) {
+  TensorSliceProto proto;
+  TensorSlice({{0, -1}, {0, 10}, {14, 1}}).AsProto(&proto);
+  TensorSlice s;
+
+  // Successful building.
+  {
+    TF_ASSERT_OK(TensorSlice::BuildTensorSlice(proto, &s));
+    EXPECT_EQ(""-:0,10:14,1"", s.DebugString());
+  }
+
+  // Failed building due to negative extent start.
+  {
+    TensorSliceProto invalid_proto = proto;
+    invalid_proto.mutable_extent(0)->set_start(-1);
+    EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok());
+  }
+
+  // Failed building due to negative extent length.
+  {
+    TensorSliceProto invalid_proto = proto;
+    invalid_proto.mutable_extent(2)->set_length(-1);
+    EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok());
+  }
+
+  // Failed building due to missing extent length.
+  {
+    TensorSliceProto invalid_proto = proto;
+    invalid_proto.mutable_extent(2)->clear_length();
+    EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok());
+  }
+
+  // Failed building due to extent end overflowing.
+  {
+    TensorSliceProto invalid_proto = proto;
+    invalid_proto.mutable_extent(2)->set_length(
+        std::numeric_limits<int64_t>::max());
+    EXPECT_FALSE(TensorSlice::BuildTensorSlice(invalid_proto, &s).ok());
+  }
+}
+
 // Testing the slice intersection
 TEST(TensorSliceTest, Intersection) {
   // ""EVERYTHING"" intersects with everything
",1,train
e8dc63704c88007ee4713076605c90188d66f3d2,tensorflow/tensorflow,"Add BuildTensorSlice for building from unvalidated TensorSliceProtos.

This avoids several sources of crashes and undefined behavior when loading
invalid checkpoints.

PiperOrigin-RevId: 392785704
Change-Id: Icd9713c768b882f3b58b427eddac376060696833",tensor_slice_reader.cc,"@@ -172,7 +172,9 @@ void TensorSliceReader::LoadShard(int shard) const {
     status_ = TensorShape::BuildTensorShapeBase(ssm.shape(), &ssm_shape);
     if (!status_.ok()) return;
     for (const TensorSliceProto& tsp : ssm.slice()) {
-      TensorSlice ss_slice(tsp);
+      TensorSlice ss_slice;
+      status_ = TensorSlice::BuildTensorSlice(tsp, &ss_slice);
+      if (!status_.ok()) return;
       status_ = RegisterTensorSlice(ssm.name(), ssm_shape, ssm.type(), fname,
                                     ss_slice, &tensors_);
       if (!status_.ok()) return;
",1,train
e8dc63704c88007ee4713076605c90188d66f3d2,tensorflow/tensorflow,"Add BuildTensorSlice for building from unvalidated TensorSliceProtos.

This avoids several sources of crashes and undefined behavior when loading
invalid checkpoints.

PiperOrigin-RevId: 392785704
Change-Id: Icd9713c768b882f3b58b427eddac376060696833",tensor_slice_reader_test.cc,"@@ -436,6 +436,29 @@ TEST(TensorSliceReaderTest, NegativeTensorShapeDimension) {
   EXPECT_FALSE(reader.status().ok());
 }
 
+TEST(TensorSliceReaderTest, InvalidTensorSlice) {
+  const string fname =
+      io::JoinPath(testing::TmpDir(), ""invalid_slice_checkpoint"");
+  TensorSliceWriter writer(fname, CreateTableTensorSliceBuilder);
+  const int32 data[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
+  TF_CHECK_OK(writer.Add(""test"", TensorShape({4, 5}),
+                         TensorSlice::ParseOrDie(""0,2:-""), data));
+  TF_CHECK_OK(writer.Finish());
+
+  MutateSavedTensorSlices(fname, [](SavedTensorSlices sts) {
+    if (sts.has_meta()) {
+      for (auto& tensor : *sts.mutable_meta()->mutable_tensor()) {
+        tensor.mutable_slice(0)->mutable_extent(0)->set_length(-10);
+      }
+    }
+    return sts.SerializeAsString();
+  });
+
+  TensorSliceReader reader(fname, OpenTableTensorSliceReader);
+  // The negative exent length should cause loading to fail.
+  EXPECT_FALSE(reader.status().ok());
+}
+
 void CachedTensorSliceReaderTesterHelper(
     const TensorSliceWriter::CreateBuilderFunction& create_function,
     const TensorSliceReader::OpenTableFunction& open_function) {
",1,train
7731e8dfbe4a56773be5dc94d631611211156659,tensorflow/tensorflow,"Don't constant-fold DT_RESOURCE constants.

PiperOrigin-RevId: 391803952
Change-Id: I0ea3ec31d3e7dfda0f03b4027a237f08d00a3091",constant_folding.cc,"@@ -30,6 +30,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/log_memory.h""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/types.h""
+#include ""tensorflow/core/framework/types.pb.h""
 #include ""tensorflow/core/graph/algorithm.h""
 #include ""tensorflow/core/graph/node_builder.h""
 #include ""tensorflow/core/graph/subgraph.h""
@@ -223,7 +224,8 @@ bool IsConstantFoldable(
     std::unordered_map<const Node*, std::vector<Tensor>>*
         shape_replacement_map) {
   if (n->IsConstant()) {
-    return true;
+    // Skip constant folding resources as they cannot be deep copied.
+    return n->output_type(0) != DT_RESOURCE;
   }
   if (MaybeReplaceShapeOp(n, shape_map, shape_replacement_map)) {
     return true;
",1,train
7cf73a2274732c9d82af51c2bc2cf90d13cd7e6d,tensorflow/tensorflow,"Address QuantizeAndDequantizeV* heap oob. Added additional checks for the 'axis' attribute.

PiperOrigin-RevId: 402446942
Change-Id: Id2f6b82e4e740d0550329be02621c46466b5a5b9",array_ops.cc,"@@ -2863,7 +2863,10 @@ REGISTER_OP(""QuantizeAndDequantizeV2"")
       ShapeHandle minmax;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), minmax_rank, &minmax));
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), minmax, &minmax));
-      if (axis != -1) {
+      if (axis < -1) {
+        return errors::InvalidArgument(""axis should be at least -1, got "",
+                                       axis);
+      } else if (axis != -1) {
         ShapeHandle input;
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
@@ -2895,7 +2898,10 @@ REGISTER_OP(""QuantizeAndDequantizeV4"")
       ShapeHandle minmax;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), minmax_rank, &minmax));
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), minmax, &minmax));
-      if (axis != -1) {
+      if (axis < -1) {
+        return errors::InvalidArgument(""axis should be at least -1, got "",
+                                       axis);
+      } else if (axis != -1) {
         ShapeHandle input;
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
@@ -2923,7 +2929,10 @@ REGISTER_OP(""QuantizeAndDequantizeV4Grad"")
       ShapeHandle minmax;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(2), minmax_rank, &minmax));
       TF_RETURN_IF_ERROR(c->Merge(c->input(3), minmax, &minmax));
-      if (axis != -1) {
+      if (axis < -1) {
+        return errors::InvalidArgument(""axis should be at least -1, got "",
+                                       axis);
+      } else if (axis != -1) {
         ShapeHandle input;
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
@@ -2956,7 +2965,10 @@ REGISTER_OP(""QuantizeAndDequantizeV3"")
       ShapeHandle minmax;
       TF_RETURN_IF_ERROR(c->WithRank(c->input(1), minmax_rank, &minmax));
       TF_RETURN_IF_ERROR(c->Merge(c->input(2), minmax, &minmax));
-      if (axis != -1) {
+      if (axis < -1) {
+        return errors::InvalidArgument(""axis should be at least -1, got "",
+                                       axis);
+      } else if (axis != -1) {
         ShapeHandle input;
         TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), axis + 1, &input));
         DimensionHandle depth;
",1,train
7cf73a2274732c9d82af51c2bc2cf90d13cd7e6d,tensorflow/tensorflow,"Address QuantizeAndDequantizeV* heap oob. Added additional checks for the 'axis' attribute.

PiperOrigin-RevId: 402446942
Change-Id: Id2f6b82e4e740d0550329be02621c46466b5a5b9",array_ops_test.cc,"@@ -1374,6 +1374,8 @@ TEST(ArrayOpsTest, QuantizeAndDequantizeV2_ShapeFn) {
   INFER_ERROR(""Shapes must be equal rank, but are 1 and 0"", op,
               ""[1,2,?,4,5];[];[1]"");
   INFER_ERROR(""Shape must be rank 0 but is rank 1"", op, ""[1,2,?,4,5];[1];[1]"");
+  (*op.node_def.mutable_attr())[""axis""].set_i(-2);
+  INFER_ERROR(""axis should be at least -1, got -2"", op, ""?;?;?"");
 }
 
 TEST(ArrayOpsTest, SpaceToBatch_ShapeFn) {
",1,train
4d74d8a00b07441cba090a02e0dd9ed385145bf4,tensorflow/tensorflow,"Fix crash in softmax-xent when some input dimensions are 1.

Before, tf.nn.softmax_cross_entropy_with_logits would fail a CHECK if one input tensor had shape (1, 1) and the other did not.

In particular, the call to ToIndexArray<2> here https://github.com/tensorflow/tensorflow/blob/1f3da84a89702d3b4f234ee83762d738caffe098/tensorflow/core/kernels/xent_op.cc#L99 would fail, since the call assumed the array had two dimensions. If both dimensions were 1, BCast would merge the two dimensions into a single dimension. Passing fewer_dims_optimization=false stops this optimization

PiperOrigin-RevId: 384844496
Change-Id: Ifb02dc74964132c3ed3f3bc98b0858dbe4e258b7",xent_op.cc,"@@ -46,7 +46,8 @@ class SoftmaxXentWithLogitsOp : public OpKernel {
     TensorShape shape_in = logits_in.shape();
 
     BCast bcast(BCast::FromShape(logits_in.shape()),
-                BCast::FromShape(labels_in.shape()));
+                BCast::FromShape(labels_in.shape()),
+                /*fewer_dims_optimization=*/false);
     if (!logits_in.IsSameSize(labels_in)) {
       OP_REQUIRES(context, bcast.IsValid(),
                   errors::InvalidArgument(
@@ -88,20 +89,12 @@ class SoftmaxXentWithLogitsOp : public OpKernel {
                                 {0}, 1, shape_in, &back_out));
     if (shape_in.dim_size(0) > 0) {
       functor::XentFunctor<Device, T> functor;
-      if (logits_in.IsSameSize(labels_in)) {
-        functor(context->eigen_device<Device>(), shape_in.AsEigenDSizes<2>(),
-                Eigen::array<Eigen::DenseIndex, 2>{1, 1},
-                Eigen::array<Eigen::DenseIndex, 2>{1, 1}, logits_in.matrix<T>(),
-                labels_in.matrix<T>(), scratch.matrix<T>(), loss_out->vec<T>(),
-                back_out->matrix<T>());
-      } else {
-        functor(context->eigen_device<Device>(), shape_in.AsEigenDSizes<2>(),
-                BCast::ToIndexArray<2>(bcast.x_bcast()),
-                BCast::ToIndexArray<2>(bcast.y_bcast()),
-                logits_in.template shaped<T, 2>(bcast.x_reshape()),
-                labels_in.template shaped<T, 2>(bcast.y_reshape()),
-                scratch.matrix<T>(), loss_out->vec<T>(), back_out->matrix<T>());
-      }
+      functor(context->eigen_device<Device>(), shape_in.AsEigenDSizes<2>(),
+              BCast::ToIndexArray<2>(bcast.x_bcast()),
+              BCast::ToIndexArray<2>(bcast.y_bcast()),
+              logits_in.template shaped<T, 2>(bcast.x_reshape()),
+              labels_in.template shaped<T, 2>(bcast.y_reshape()),
+              scratch.matrix<T>(), loss_out->vec<T>(), back_out->matrix<T>());
     }
   }
 };
",1,train
4d74d8a00b07441cba090a02e0dd9ed385145bf4,tensorflow/tensorflow,"Fix crash in softmax-xent when some input dimensions are 1.

Before, tf.nn.softmax_cross_entropy_with_logits would fail a CHECK if one input tensor had shape (1, 1) and the other did not.

In particular, the call to ToIndexArray<2> here https://github.com/tensorflow/tensorflow/blob/1f3da84a89702d3b4f234ee83762d738caffe098/tensorflow/core/kernels/xent_op.cc#L99 would fail, since the call assumed the array had two dimensions. If both dimensions were 1, BCast would merge the two dimensions into a single dimension. Passing fewer_dims_optimization=false stops this optimization

PiperOrigin-RevId: 384844496
Change-Id: Ifb02dc74964132c3ed3f3bc98b0858dbe4e258b7",xent_op_test.py,"@@ -63,6 +63,13 @@ class XentOpTest(xent_op_test_base.XentOpTestBase):
     self.assertAllCloseAccordingToType(np_loss, tf_loss)
     self.assertAllCloseAccordingToType(np_gradient, tf_gradient)
 
+    tf_f = constant_op.constant(np.array([[1.]]).astype(np.float32))
+    tf_l = constant_op.constant(np.array([[1.], [1.]]).astype(np.float32))
+    tf_loss, tf_gradient = gen_nn_ops.softmax_cross_entropy_with_logits(
+        tf_f, tf_l)
+    self.assertAllClose([0, 0], tf_loss)
+    self.assertAllCloseAccordingToType([[0], [0]], tf_gradient)
+
   @test_util.run_deprecated_v1
   def testNotMatrix(self):
     with self.cached_session():
",1,train
4d74d8a00b07441cba090a02e0dd9ed385145bf4,tensorflow/tensorflow,"Fix crash in softmax-xent when some input dimensions are 1.

Before, tf.nn.softmax_cross_entropy_with_logits would fail a CHECK if one input tensor had shape (1, 1) and the other did not.

In particular, the call to ToIndexArray<2> here https://github.com/tensorflow/tensorflow/blob/1f3da84a89702d3b4f234ee83762d738caffe098/tensorflow/core/kernels/xent_op.cc#L99 would fail, since the call assumed the array had two dimensions. If both dimensions were 1, BCast would merge the two dimensions into a single dimension. Passing fewer_dims_optimization=false stops this optimization

PiperOrigin-RevId: 384844496
Change-Id: Ifb02dc74964132c3ed3f3bc98b0858dbe4e258b7",xent_op_test_base.py,"@@ -151,6 +151,9 @@ class XentOpTestBase(test.TestCase):
     labels = np.array([[0., 0., 0., 1.]]).astype(np.float16)
     logits = np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float16)
     self._testXent2D(labels, logits, with_placeholders=True)
+    labels = np.array([[1.]]).astype(np.float16)
+    logits = np.array([[1.], [2.]]).astype(np.float16)
+    self._testXent2D(labels, logits, with_placeholders=True)
     labels = np.array([[0.], [2.], [0.25]]).astype(np.float16)
     logits = np.array([[1., 1., 1., 1.], [1., 2., 3., 4.],
                        [1., 2., 3., 4.]]).astype(np.float16)
",1,train
4dddb2fd0b01cdd196101afbba6518658a2c9e07,tensorflow/tensorflow,"Fix segfault in pools on empty shapes when certain dimension were very large.

Pooling ops multiply certain components of the input shape, e.g. by multiplying input.shape[1] * input.shape[2] * input.shape[3]. This multiplication could overflow an int64 value if shape[0] was 0 but shape[1], shape[2], and shape[3] were very large, e.g. by passing an input with shape (0, 2**25, 2**25, 2**25).

PiperOrigin-RevId: 404644978
Change-Id: Ic79f89c970357ca2962b1f231449066db9403146",pooling_ops_common.h,"@@ -189,6 +189,9 @@ class MaxPoolingOp : public OpKernel {
   void SpatialMaxPool(OpKernelContext* context, Tensor* output,
                       const Tensor& tensor_in, const PoolParameters& params,
                       const Padding& padding) {
+    if (output->NumElements() == 0) {
+      return;
+    }
     // On GPU, use Eigen's Spatial Max Pooling.  On CPU, use an
     // EigenMatrix version that is currently faster than Eigen's
     // Spatial MaxPooling implementation.
@@ -443,6 +446,9 @@ class MaxPoolingV2Op : public OpKernel {
   void SpatialMaxPool(OpKernelContext* context, Tensor* output,
                       const Tensor& tensor_in, const PoolParameters& params,
                       const Padding& padding) {
+    if (output->NumElements() == 0) {
+      return;
+    }
     // On GPU, use Eigen's Spatial Max Pooling.  On CPU, use an
     // EigenMatrix version that is currently faster than Eigen's
     // Spatial MaxPooling implementation.
@@ -561,6 +567,9 @@ template <typename Device, typename T>
 void SpatialAvgPool(OpKernelContext* context, Tensor* output,
                     const Tensor& input, const PoolParameters& params,
                     const Padding& padding) {
+  if (output->NumElements() == 0) {
+    return;
+  }
   typedef Eigen::Map<const Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
       ConstEigenMatrixMap;
   typedef Eigen::Map<Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic>>
",1,train
579261dcd446385831fe4f7457d802a59685121d,tensorflow/tensorflow,"Fix crash in MatrixSolve when inputs have different batch dimensions.

Before, the process would crash or certain elements would be silently ignored. Now an InvalidArgument is raised.

PiperOrigin-RevId: 384844020
Change-Id: Iba44417e383bdd0e1abc4012bfca83b2377dd335",matrix_solve_op.cc,"@@ -143,15 +143,22 @@ class MatrixSolveOpGpu : public AsyncOpKernel {
                       done);
     OP_REQUIRES_ASYNC(
         context, input.dim_size(ndims - 2) == n,
-        errors::InvalidArgument(""Input matrices must be squares, got"",
+        errors::InvalidArgument(""Input matrices must be squares, got "",
                                 input.dim_size(ndims - 2), "" != "", n),
         done);
     OP_REQUIRES_ASYNC(context, rhs.dim_size(ndims - 2) == n,
                       errors::InvalidArgument(
                           ""Input matrix and right-hand side must have the ""
-                          ""same number of rows, got"",
+                          ""same number of rows, got "",
                           n, "" != "", rhs.dim_size(ndims - 2)),
                       done);
+    for (int dim = 0; dim < ndims - 2; dim++) {
+      OP_REQUIRES_ASYNC(
+          context, input.dim_size(dim) == rhs.dim_size(dim),
+          errors::InvalidArgument(
+              ""All input tensors must have the same outer dimensions.""),
+          done);
+    }
 
     // Allocate output.
     Tensor* output;
",1,train
579261dcd446385831fe4f7457d802a59685121d,tensorflow/tensorflow,"Fix crash in MatrixSolve when inputs have different batch dimensions.

Before, the process would crash or certain elements would be silently ignored. Now an InvalidArgument is raised.

PiperOrigin-RevId: 384844020
Change-Id: Iba44417e383bdd0e1abc4012bfca83b2377dd335",matrix_solve_op_test.py,"@@ -112,6 +112,12 @@ class MatrixSolveOpTest(test.TestCase):
     with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
       self.evaluate(linalg_ops.matrix_solve(matrix, rhs))
 
+    # The matrix and right-hand side should have the same batch dimensions
+    matrix = np.random.normal(size=(2, 6, 2, 2))
+    rhs = np.random.normal(size=(2, 3, 2, 2))
+    with self.assertRaises((ValueError, errors_impl.InvalidArgumentError)):
+      self.evaluate(linalg_ops.matrix_solve(matrix, rhs))
+
   def testNotInvertible(self):
     # The input should be invertible.
     with self.assertRaisesOpError(""Input matrix is not invertible.""):
",1,train
68422b215e618df5ad375bcdc6d2052e9fd3080a,tensorflow/tensorflow,"Add shape checks to GPU TridiagonalMatMul.

When given invalid shapes, the GPU TridiagonalMatMul op could read invalid or uninitialized GPU memory.

PiperOrigin-RevId: 401775483
Change-Id: Ib5500aeb8225e50d4ce790b06d2c34751f544ad8",tridiagonal_matmul_op_gpu.cu.cc,"@@ -66,6 +66,12 @@ class TridiagonalMatMulOpGpu : public OpKernel {
     const Tensor& rhs = context->input(3);
 
     const int ndims = rhs.dims();
+    OP_REQUIRES(
+        context, ndims >= 2,
+        errors::InvalidArgument(""Input must have rank >= 2, but got "", ndims));
+    OP_REQUIRES_OK(context, ValidateInputTensor(superdiag, ""superdiag"", rhs));
+    OP_REQUIRES_OK(context, ValidateInputTensor(maindiag, ""maindiag"", rhs));
+    OP_REQUIRES_OK(context, ValidateInputTensor(subdiag, ""subdiag"", rhs));
     int64 batch_size = 1;
     for (int i = 0; i < ndims - 2; i++) {
       batch_size *= rhs.dim_size(i);
@@ -85,6 +91,39 @@ class TridiagonalMatMulOpGpu : public OpKernel {
         maindiag.flat<Scalar>().data(), subdiag.flat<Scalar>().data(),
         rhs.flat<Scalar>().data(), output->flat<Scalar>().data()));
   }
+
+ private:
+  Status ValidateInputTensor(const Tensor& tensor,
+                             const std::string& tensor_name,
+                             const Tensor& rhs) {
+    const int ndims = rhs.dims();
+    if (tensor.dims() != ndims) {
+      return errors::InvalidArgument(tensor_name,
+                                     "" must have same rank as rhs, but got "",
+                                     tensor.dims(), "" and "", ndims);
+    }
+    for (int i = 0; i < ndims - 2; i++) {
+      if (tensor.dim_size(i) != rhs.dim_size(i)) {
+        return errors::InvalidArgument(
+            tensor_name,
+            "" must have same outer dimensions as rhs, but for index "", i,
+            "", got "", tensor.dim_size(i), "" and "", rhs.dim_size(i));
+      }
+    }
+    if (tensor.dim_size(ndims - 2) != 1) {
+      return errors::InvalidArgument(
+          tensor_name, ""'s second-to-last dimension must be 1, but got "",
+          tensor.dim_size(ndims - 2));
+    }
+    if (tensor.dim_size(ndims - 1) != rhs.dim_size(ndims - 2)) {
+      return errors::InvalidArgument(tensor_name,
+                                     ""'s last dimension size must be rhs's ""
+                                     ""second-to-last dimension size, but got "",
+                                     tensor.dim_size(ndims - 1), "" and "",
+                                     rhs.dim_size(ndims - 2));
+    }
+    return Status::OK();
+  }
 };
 
 REGISTER_LINALG_OP_GPU(""TridiagonalMatMul"", (TridiagonalMatMulOpGpu<float>),
",1,train
68422b215e618df5ad375bcdc6d2052e9fd3080a,tensorflow/tensorflow,"Add shape checks to GPU TridiagonalMatMul.

When given invalid shapes, the GPU TridiagonalMatMul op could read invalid or uninitialized GPU memory.

PiperOrigin-RevId: 401775483
Change-Id: Ib5500aeb8225e50d4ce790b06d2c34751f544ad8",tridiagonal_matmul_op_test.py,"@@ -19,12 +19,15 @@ import itertools
 import numpy as np
 
 from tensorflow.python.client import session
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradient_checker_v2
+from tensorflow.python.ops import linalg_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.linalg import linalg_impl
@@ -175,6 +178,37 @@ class TridiagonalMulOpTest(test.TestCase):
     rhs = self._randomComplexArray((b, m, n))
     self._gradientTest(diags, rhs, dtype=dtypes.complex128)
 
+  def _testErrorWithShapesEager(self, exception_regex, superdiag_shape,
+                                maindiag_shape, subdiag_shape, rhs_shape):
+    with context.eager_mode():
+      superdiag = array_ops.ones(superdiag_shape)
+      maindiag = array_ops.ones(maindiag_shape)
+      subdiag = array_ops.ones(subdiag_shape)
+      rhs = array_ops.ones(rhs_shape)
+      with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
+                                  exception_regex):
+        linalg_ops.tridiagonal_mat_mul(superdiag, maindiag, subdiag, rhs)
+
+  def testInvalidShapesEagerGpu(self):
+    if not test.is_gpu_available():
+      self.skipTest('Test requires GPU')
+    self._testErrorWithShapesEager('Input must have rank >= 2, but got ',
+                                   [2], [2], [2], [2])
+    self._testErrorWithShapesEager(
+        'superdiag must have same rank as rhs, but got 3 and 2',
+        [2, 1, 2], [2, 1], [2, 1], [2, 2])
+    self._testErrorWithShapesEager(
+        'maindiag must have same outer dimensions as rhs, but for index 0, got '
+        '3 and 2',
+        [2, 1, 2], [3, 1, 2], [2, 1, 2], [2, 2, 2])
+    self._testErrorWithShapesEager(
+        ""subdiag's second-to-last dimension must be 1, but got 3"",
+        [2, 1, 2], [2, 1, 2], [2, 3, 2], [2, 2, 2])
+    self._testErrorWithShapesEager(
+        ""subdiag's last dimension size must be rhs's second-to-last dimension ""
+        ""size, but got 3 and 2"",
+        [2, 1, 2], [2, 1, 2], [2, 1, 3], [2, 2, 2])
+
   # Benchmark
   class TridiagonalMatMulBenchmark(test.Benchmark):
     sizes = [(100000, 1, 1), (1000000, 1, 1), (10000000, 1, 1), (100000, 10, 1),
",1,train
da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed.

PiperOrigin-RevId: 401913101
Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",maxpooling_op.cc,"@@ -325,6 +325,14 @@ class MaxPoolingGradOp : public OpKernel {
     if (!context->status().ok()) {
       return;
     }
+    OP_REQUIRES(context, tensor_out.shape() == params.forward_output_shape(),
+                errors::InvalidArgument(""Expected orig_output shape to be "",
+                                        params.forward_output_shape(),
+                                        "", but got "", tensor_out.shape()));
+    OP_REQUIRES(context, out_backprop.shape() == params.forward_output_shape(),
+                errors::InvalidArgument(""Expected grad shape to be "",
+                                        params.forward_output_shape(),
+                                        "", but got "", out_backprop.shape()));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
@@ -538,6 +546,18 @@ class MaxPoolingGradGradOp : public OpKernel {
                           /*explicit_paddings=*/{},
                           FORMAT_NHWC,
                           tensor_in.shape()};
+    if (!context->status().ok()) {
+      return;
+    }
+    OP_REQUIRES(context, tensor_out.shape() == params.forward_output_shape(),
+                errors::InvalidArgument(""Expected orig_output shape to be "",
+                                        params.forward_output_shape(),
+                                        "", but got "", tensor_out.shape()));
+    OP_REQUIRES(
+        context, out_grad_backprop.shape() == tensor_in.shape(),
+        errors::InvalidArgument(""Expected grad shape to be "", tensor_in.shape(),
+                                "", but got "", out_grad_backprop.shape()));
+
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
                                 {2}, 0, tensor_out.shape(), &output));
@@ -742,6 +762,17 @@ class MaxPoolingGradGradOp<Eigen::GpuDevice, T> : public OpKernel {
                           /*explicit_paddings=*/{},
                           data_format_,
                           tensor_in.shape()};
+    if (!context->status().ok()) {
+      return;
+    }
+    OP_REQUIRES(context, tensor_out.shape() == params.forward_output_shape(),
+                errors::InvalidArgument(""Expected orig_output shape to be "",
+                                        params.forward_output_shape(),
+                                        "", but got "", tensor_out.shape()));
+    OP_REQUIRES(
+        context, out_grad_backprop.shape() == tensor_in.shape(),
+        errors::InvalidArgument(""Expected grad shape to be "", tensor_in.shape(),
+                                "", but got "", out_grad_backprop.shape()));
 
     functor::MaxPoolGradBackwardNoMask<T>()(
         data_format_, tensor_in.flat<T>().data(), tensor_out.flat<T>().data(),
@@ -1096,6 +1127,14 @@ class MaxPoolingGradWithArgmaxOp : public OpKernel {
     if (!context->status().ok()) {
       return;
     }
+    OP_REQUIRES(context, grad_in.shape() == params.forward_output_shape(),
+                errors::InvalidArgument(""Expected grad shape to be "",
+                                        params.forward_output_shape(),
+                                        "", but got "", grad_in.shape()));
+    OP_REQUIRES(context, argmax.shape() == params.forward_output_shape(),
+                errors::InvalidArgument(""Expected argmax shape to be "",
+                                        params.forward_output_shape(),
+                                        "", but got "", argmax.shape()));
 
     TensorShape out_shape({params.tensor_in_batch, params.tensor_in_rows,
                            params.tensor_in_cols, params.depth});
@@ -1156,6 +1195,14 @@ class MaxPoolingGradGradWithArgmaxOp : public OpKernel {
     if (!context->status().ok()) {
       return;
     }
+    OP_REQUIRES(
+        context, grad_in.shape() == tensor_in.shape(),
+        errors::InvalidArgument(""Expected grad shape to be "", tensor_in.shape(),
+                                "", but got "", grad_in.shape()));
+    OP_REQUIRES(context, argmax.shape() == params.forward_output_shape(),
+                errors::InvalidArgument(""Expected argmax shape to be "",
+                                        params.forward_output_shape(),
+                                        "", but got "", argmax.shape()));
 
     TensorShape out_shape({params.tensor_in_batch, params.out_height,
                            params.out_width, params.depth});
",1,train
da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed.

PiperOrigin-RevId: 401913101
Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",pooling_ops_3d.cc,"@@ -366,6 +366,19 @@ class MaxPooling3dGradOp : public OpKernel {
 
     OP_REQUIRES_OK(context, Get3dOutputSize(input_size, window, stride,
                                             padding_, &out, &padding));
+
+    const int64_t depth = GetTensorDim(tensor_in, data_format_, 'C');
+    const int64_t in_batch = GetTensorDim(tensor_in, data_format_, 'N');
+    TensorShape out_shape = ShapeFromFormat(data_format_, in_batch,
+                                            {{out[2], out[1], out[0]}}, depth);
+    OP_REQUIRES(
+        context, tensor_out.shape() == out_shape,
+        errors::InvalidArgument(""Expected orig_output shape to be "", out_shape,
+                                "", but got "", tensor_out.shape()));
+    OP_REQUIRES(context, out_backprop.shape() == out_shape,
+                errors::InvalidArgument(""Expected grad shape to be "", out_shape,
+                                        "", but got "", out_backprop.shape()));
+
     LaunchMaxPooling3dGradOp<Device, T>::launch(
         context, tensor_in, tensor_out, out_backprop, window, stride, out,
         padding, data_format_, input_backprop);
@@ -712,6 +725,14 @@ class MaxPooling3dGradGradOp : public OpKernel {
     Pool3dParameters params{context,  ksize_,       stride_,
                             padding_, data_format_, tensor_in.shape()};
     if (!context->status().ok()) return;  // params is invalid
+    OP_REQUIRES(context, tensor_out.shape() == params.forward_output_shape(),
+                errors::InvalidArgument(""Expected orig_output shape to be "",
+                                        params.forward_output_shape(),
+                                        "", but got "", tensor_out.shape()));
+    OP_REQUIRES(
+        context, out_grad_backprop.shape() == tensor_in.shape(),
+        errors::InvalidArgument(""Expected grad shape to be "", tensor_in.shape(),
+                                "", but got "", out_grad_backprop.shape()));
 
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
",1,train
da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed.

PiperOrigin-RevId: 401913101
Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",pooling_ops_common.cc,"@@ -465,6 +465,16 @@ void DnnPoolingGradOp<T>::Compute(
   if (!context->status().ok()) {
     return;
   }
+  if (tensor_out) {
+    OP_REQUIRES(context, tensor_out->shape() == params.forward_output_shape(),
+                errors::InvalidArgument(""Expected orig_output shape to be "",
+                                        params.forward_output_shape(),
+                                        "", but got "", tensor_out->shape()));
+  }
+  OP_REQUIRES(context, out_backprop.shape() == params.forward_output_shape(),
+              errors::InvalidArgument(""Expected grad shape to be "",
+                                      params.forward_output_shape(),
+                                      "", but got "", out_backprop.shape()));
 
   TensorFormat transformed_input_data_format = data_format;
 
",1,train
da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed.

PiperOrigin-RevId: 401913101
Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",pooling_ops_common.h,"@@ -83,11 +83,6 @@ struct PoolParameters {
   TensorFormat data_format;
 };
 
-// Checks if the sizes of the paddings are less than the size of window.
-// This is required for MaxPool because it pads with -inf, so the pooling
-// window cannot fully cover the padded area.
-Status CheckPaddingSize(PoolParameters& params);
-
 // An implementation of MaxPooling (forward).
 // TODO (yongtang): Remove MaxPoolingOp and use MaxPoolingV2Op,
 //     QuantizedMaxPoolingOp depends on MaxPoolingOp so keep intact for now
",1,train
da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed.

PiperOrigin-RevId: 401913101
Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",pooling_ops_3d_test.py,"@@ -16,9 +16,13 @@
 
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import errors
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import nn_ops
@@ -515,6 +519,44 @@ class PoolingTest(test.TestCase):
           pool_3d = f(input_tensor, ksize=[2, 2, 0], strides=1, padding=""VALID"")
           self.evaluate(pool_3d)
 
+  def testMaxPoolGradEagerShapeErrors(self):
+    with context.eager_mode():
+      orig_in = array_ops.ones((1, 1, 1, 1, 1))
+
+      # Test invalid orig_out shape
+      orig_out = array_ops.ones((1, 1, 1, 1, 2))
+      grad = array_ops.ones((1, 1, 1, 1, 1))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          r""Expected orig_output shape to be \[1,1,1,1,1\], but got ""
+          r""\[1,1,1,1,2\]""):
+        gen_nn_ops.max_pool3d_grad(
+            orig_in, orig_out, grad, ksize=[1, 1, 1, 1, 1],
+            strides=[1, 1, 1, 1, 1], padding=""VALID"")
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          r""Expected orig_output shape to be \[1,1,1,1,1\], but got ""
+          r""\[1,1,1,1,2\]""):
+        gen_nn_ops.max_pool3d_grad_grad(
+            orig_in, orig_out, grad, ksize=[1, 1, 1, 1, 1],
+            strides=[1, 1, 1, 1, 1], padding=""VALID"")
+
+      # Test invalid grad shape
+      orig_out = array_ops.ones((1, 1, 1, 1, 1))
+      grad = array_ops.ones((1, 1, 1, 1, 2))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          r""Expected grad shape to be \[1,1,1,1,1\], but got \[1,1,1,1,2\]""):
+        gen_nn_ops.max_pool3d_grad(
+            orig_in, orig_out, grad, ksize=[1, 1, 1, 1, 1],
+            strides=[1, 1, 1, 1, 1], padding=""VALID"")
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          r""Expected grad shape to be \[1,1,1,1,1\], but got \[1,1,1,1,2\]""):
+        gen_nn_ops.max_pool3d_grad_grad(
+            orig_in, orig_out, grad, ksize=[1, 1, 1, 1, 1],
+            strides=[1, 1, 1, 1, 1], padding=""VALID"")
+
 
 if __name__ == ""__main__"":
   test.main()
",1,train
da4aad5946be30e5f049920fa076e1f7ef021261,tensorflow/tensorflow,"Roll forward https://github.com/tensorflow/tensorflow/commit/ab0ca4bbc66a476aea305f81c69e0201b5876d0a. The internal test that it broke has been fixed.

PiperOrigin-RevId: 401913101
Change-Id: I67f095899187e38101fbb10289c5e444b0a9e8c0",pooling_ops_test.py,"@@ -618,6 +618,7 @@ class PoolingTest(test.TestCase, parameterized.TestCase):
 
   @parameterized.parameters(
       GetTestConfigsDicts(nn_ops.max_pool, nn_ops.max_pool_v2))
+  @test_util.xla_allow_fallback(""XLA doesn't support explicit padding"")
   @test_util.run_deprecated_v1
   def testMaxPoolNegativeInputExpPaddingAdv(self, **kwargs):
     expected_output = [-1, -1, -3, -5, -7, -7, -9, -11, -19, -19, -21, -23, -31,
@@ -2390,6 +2391,82 @@ class PoolingTest(test.TestCase, parameterized.TestCase):
             explicit_paddings=[1, 1, 1, 1, 1, 1, 0, 0],
             data_format=""NHWC""))
 
+  def testMaxPoolGradEagerShapeErrors(self):
+    with context.eager_mode():
+      orig_in = array_ops.ones((1, 1, 1, 1))
+
+      # Test invalid orig_out shape
+      orig_out = array_ops.ones((1, 1, 1, 2))
+      grad = array_ops.ones((1, 1, 1, 1))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          r""Expected orig_output shape to be \[1,1,1,1\], but got \[1,1,1,2\]""):
+        gen_nn_ops.max_pool_grad(
+            orig_in, orig_out, grad, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
+            padding=""VALID"")
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          r""Expected orig_output shape to be \[1,1,1,1\], but got \[1,1,1,2\]""):
+        gen_nn_ops.max_pool_grad_grad(
+            orig_in, orig_out, grad, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
+            padding=""VALID"")
+
+      # Test invalid grad shape
+      orig_out = array_ops.ones((1, 1, 1, 1))
+      grad = array_ops.ones((1, 1, 1, 2))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          r""Expected grad shape to be \[1,1,1,1\], but got \[1,1,1,2\]""):
+        gen_nn_ops.max_pool_grad(
+            orig_in, orig_out, grad, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
+            padding=""VALID"")
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          r""Expected grad shape to be \[1,1,1,1\], but got \[1,1,1,2\]""):
+        gen_nn_ops.max_pool_grad_grad(
+            orig_in, orig_out, grad, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
+            padding=""VALID"")
+
+  def testMaxPoolGradWithArgmaxEagerShapeErrors(self):
+    with context.eager_mode():
+      inp = array_ops.ones((1, 1, 1, 1))
+
+      # Test invalid grad shape
+      grad = array_ops.ones((1, 1, 1, 2))
+      argmax = array_ops.zeros((1, 1, 1, 1), dtype=dtypes.int64)
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          r""Expected grad shape to be \[1,1,1,1\], but got \[1,1,1,2\]""):
+        gen_nn_ops.max_pool_grad_with_argmax(
+            inp, grad, argmax, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
+            padding=""VALID"")
+      # max_pool_grad_grad_with_argmax is only implemented for GPUs
+      if test.is_gpu_available():
+        with self.assertRaisesRegex(
+            errors_impl.InvalidArgumentError,
+            r""Expected grad shape to be \[1,1,1,1\], but got \[1,1,1,2\]""):
+          gen_nn_ops.max_pool_grad_grad_with_argmax(
+              inp, grad, argmax, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
+              padding=""VALID"")
+
+      # Test invalid argmax shape
+      grad = array_ops.ones((1, 1, 1, 1))
+      argmax = array_ops.ones((1, 1, 1, 2), dtype=dtypes.int64)
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          r""Expected argmax shape to be \[1,1,1,1\], but got \[1,1,1,2\]""):
+        gen_nn_ops.max_pool_grad_with_argmax(
+            inp, grad, argmax, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
+            padding=""VALID"")
+      # max_pool_grad_grad_with_argmax is only implemented for GPUs
+      if test.is_gpu_available():
+        with self.assertRaisesRegex(
+            errors_impl.InvalidArgumentError,
+            r""Expected argmax shape to be \[1,1,1,1\], but got \[1,1,1,2\]""):
+          gen_nn_ops.max_pool_grad_grad_with_argmax(
+              inp, grad, argmax, ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
+              padding=""VALID"")
+
 
 def GetMaxPoolFwdTest(input_size, filter_size, strides, padding):
 
",1,train
e7f497570abb6b4ae5af4970620cd880e4c0c904,tensorflow/tensorflow,"Fix segfault on OOM in Conv2D.

PiperOrigin-RevId: 404655317
Change-Id: I33588dbd3f5d0fef980e3c908bf5515a9ee09ce7",conv_ops.cc,"@@ -183,12 +183,18 @@ struct LaunchGrouped {
     auto on_shuffled = [&]() { shuffles_completed.DecrementCount(); };
 
     // Shuffle input into temporary tensor.
-    Tensor input_shuffled(input.dtype(), TensorShape(post_shuffle(input)));
+    Tensor input_shuffled;
+    OP_REQUIRES_OK(
+        ctx, ctx->allocate_temp(input.dtype(), TensorShape(post_shuffle(input)),
+                                &input_shuffled));
     input_shuffled.tensor<T, 5>().device(device, on_shuffled) =
         input.shaped<T, 5>(pre_shuffle(input)).shuffle(shuffle);
 
     // Shuffle filter into temporary tensor.
-    Tensor filter_shuffled(filter.dtype(), TensorShape(post_shuffle(filter)));
+    Tensor filter_shuffled;
+    OP_REQUIRES_OK(ctx, ctx->allocate_temp(filter.dtype(),
+                                           TensorShape(post_shuffle(filter)),
+                                           &filter_shuffled));
     filter_shuffled.tensor<T, 5>().device(device, on_shuffled) =
         filter.shaped<T, 5>(pre_shuffle(filter)).shuffle(shuffle);
 
@@ -196,7 +202,10 @@ struct LaunchGrouped {
     shuffles_completed.Wait();
 
     // Write group convolution results into temporary output tensor.
-    Tensor output_shuffled(output->dtype(), TensorShape(post_shuffle(*output)));
+    Tensor output_shuffled;
+    OP_REQUIRES_OK(ctx, ctx->allocate_temp(output->dtype(),
+                                           TensorShape(post_shuffle(*output)),
+                                           &output_shuffled));
 
     for (int64_t i = 0; i < num_groups; ++i) {
       // TODO(ezhulenev): Run this loop using `parallelFor` (regular parallelFor
",1,train
f2c3931113eaafe9ef558faaddd48e00a6606235,tensorflow/tensorflow,"Adding more validation checks to _ParallelConcatUpdate to avoid NPE.

PiperOrigin-RevId: 402569467
Change-Id: I2db122dab68be2a5e4e8dd3375f5a70c4d2307ec",inplace_ops.cc,"@@ -71,6 +71,15 @@ class ParallelConcatUpdate : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     auto value = ctx->input(0);
+    // Value should be at least rank 1. Also the 0th dimension should be
+    // at least loc_.
+    OP_REQUIRES(ctx, value.dims() >= 1,
+                errors::InvalidArgument(""value should be at least rank 1.""));
+    OP_REQUIRES(
+        ctx, value.dim_size(0) > loc_,
+        errors::InvalidArgument(""0th dimension of value = "", value.dim_size(0),
+                                "" is less than loc_="", loc_));
+
     auto update = ctx->input(1);
 
     OP_REQUIRES(
",1,train
f2c3931113eaafe9ef558faaddd48e00a6606235,tensorflow/tensorflow,"Adding more validation checks to _ParallelConcatUpdate to avoid NPE.

PiperOrigin-RevId: 402569467
Change-Id: I2db122dab68be2a5e4e8dd3375f5a70c4d2307ec",stack_op_test.py,"@@ -16,12 +16,16 @@
 
 import numpy as np
 
+from tensorflow.python import tf2
 from tensorflow.python.eager import context
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.platform import test
 
@@ -69,6 +73,19 @@ class StackOpTest(test.TestCase):
             c = array_ops.parallel_stack(xs)
             self.assertAllEqual(c, data)
 
+  def testParallelConcatShapeZero(self):
+    if not tf2.enabled():
+      self.skipTest(""only fails in TF2"")
+
+    @def_function.function
+    def f():
+      y = gen_array_ops.parallel_concat(values=[[""tf""]], shape=0)
+      return y
+
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                r""0th dimension of value .* is less than""):
+      f()
+
   def testSimpleParallelGPU(self):
     # tf.parallel_stack is only supported in graph mode.
     with ops.Graph().as_default():
",1,train
5c8c9a8bfe750f9743d0c859bae112060b216f5c,tensorflow/tensorflow,"Fixing security fixes in boosted trees ops

PiperOrigin-RevId: 405669548
Change-Id: Iae224d240d1779bcc02405c2fff99785644fbd0d",stats_ops.cc,"@@ -72,7 +72,10 @@ class BoostedTreesCalculateBestGainsPerFeatureOp : public OpKernel {
                                                 &stats_summary_list));
     const int64_t num_buckets = stats_summary_list[0].dim_size(1);
     // Check for single logit: 1 gradient + 1 hessian value.
-    DCHECK_EQ(stats_summary_list[0].dim_size(2), 2);
+    OP_REQUIRES(context, stats_summary_list[0].dim_size(2) == 2,
+                errors::InvalidArgument(""stats_summary_list[0] must have ""
+                                        ""exactly 2 dimensions, obtained: "",
+                                        stats_summary_list[0].dim_size(2)));
     std::vector<TTypes<float, 3>::ConstTensor> stats_summary;
     stats_summary.reserve(stats_summary_list.size());
     for (const auto& tensor : stats_summary_list) {
@@ -275,8 +278,13 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
     const int32_t num_buckets = stats_summary_t->dim_size(2) - 1;
     const int32_t logits_dim = logits_dim_;
     const int32_t hessian_dim = stats_summary_t->dim_size(3) - logits_dim;
-    DCHECK_GT(hessian_dim, 0);
-    DCHECK_LE(hessian_dim, logits_dim * logits_dim);
+    OP_REQUIRES(context, hessian_dim > 0,
+                errors::InvalidArgument(""hessian dim should be < 0, got "",
+                                        hessian_dim));
+    OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim,
+                errors::InvalidArgument(
+                    ""hessian dim should be <= "", logits_dim * logits_dim,
+                    "" but got: "", hessian_dim));
 
     const Tensor* l1_t;
     OP_REQUIRES_OK(context, context->input(""l1"", &l1_t));
@@ -624,8 +632,13 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel {
     const int32_t logits_dim = logits_dim_;
     const int32_t hessian_dim =
         stats_summaries_list[0].dim_size(3) - logits_dim;
-    DCHECK_GT(hessian_dim, 0);
-    DCHECK_LE(hessian_dim, logits_dim * logits_dim);
+    OP_REQUIRES(context, hessian_dim > 0,
+                errors::InvalidArgument(""hessian dim should be < 0, got "",
+                                        hessian_dim));
+    OP_REQUIRES(context, hessian_dim <= logits_dim * logits_dim,
+                errors::InvalidArgument(
+                    ""hessian dim should be <= "", logits_dim * logits_dim,
+                    "" but got: "", hessian_dim));
 
     // Vector of stats_summaries; each element is stats for feature of shape
     // [max_splits, feature_dim, num_buckets, logits_dim + hessian_dim].
@@ -1002,6 +1015,10 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
     const Tensor* node_id_range_t;
     OP_REQUIRES_OK(context, context->input(""node_id_range"", &node_id_range_t));
     const auto node_id_range = node_id_range_t->vec<int32>();
+    OP_REQUIRES(
+        context, node_id_range.size() == 2,
+        errors::InvalidArgument(""node_id_range should have 2 entries, got: "",
+                                node_id_range.size()));
     const int32_t node_id_first = node_id_range(0);  // inclusive
     const int32_t node_id_last = node_id_range(1);   // exclusive
 
@@ -1075,6 +1092,11 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
                       ""dims, the last value in stats_summary_shape, which was "",
                       stats_dims, "". At index ("", idx,
                       "", 4), stats_summary_indices contains value "", stat_dim));
+      OP_REQUIRES(context, stat_dim >= 0,
+                  errors::InvalidArgument(
+                      ""Stat dim, the sum of logits dim and hessian dim in ""
+                      ""stats_summary_indices, should be >= 0, which was "",
+                      stat_dim, "" at index "", idx));
       std::pair<FeatureMapIterator, bool> const& f_insert_result = f_map.insert(
           FeatureMapIterator::value_type(feature_dim, BucketMap()));
       auto& b_map = f_insert_result.first->second;
@@ -1307,6 +1329,12 @@ class BoostedTreesMakeStatsSummaryOp : public OpKernel {
     const Tensor* gradients_t;
     OP_REQUIRES_OK(context, context->input(""gradients"", &gradients_t));
     const auto gradients = gradients_t->matrix<float>();
+    OP_REQUIRES(
+        context, node_ids.size() == gradients.dimension(0),
+        errors::InvalidArgument(
+            ""node_ids size should match 0th dim of gradients. node ids ""
+            ""size: "",
+            node_ids.size(), "", gradients dim0: "", gradients.dimension(0)));
     // hessians
     const Tensor* hessians_t;
     OP_REQUIRES_OK(context, context->input(""hessians"", &hessians_t));
@@ -1376,6 +1404,13 @@ class BoostedTreesAggregateStatsOp : public OpKernel {
     OP_REQUIRES_OK(context, context->input(""gradients"", &gradients_t));
     const auto gradients = gradients_t->matrix<float>();
 
+    OP_REQUIRES(
+        context, node_ids.size() == gradients.dimension(0),
+        errors::InvalidArgument(
+            ""node_ids size should match 0th dim of gradients. node ids ""
+            ""size: "",
+            node_ids.size(), "", gradients dim0: "", gradients.dimension(0)));
+
     // hessians.
     const Tensor* hessians_t;
     OP_REQUIRES_OK(context, context->input(""hessians"", &hessians_t));
@@ -1406,6 +1441,9 @@ class BoostedTreesAggregateStatsOp : public OpKernel {
 
     for (int i = 0; i < batch_size; ++i) {
       const int32_t node = node_ids(i);
+      OP_REQUIRES(context, node >= 0,
+                  errors::InvalidArgument(
+                      ""node_ids "", i, ""th entry should be >=0, got: "", node));
       for (int feature_dim = 0; feature_dim < feature_dims; ++feature_dim) {
         const int32_t feature_value = feature(i, feature_dim);
         const int32_t bucket =
@@ -1612,7 +1650,12 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel {
     const int64_t stats_dims = logits_dims + hessians_dims;
     const int64_t num_sparse_entries = feature_indices_t->dim_size(0);
     const int32_t feature_dims = feature_shape(1);
-    DCHECK_LE(num_sparse_entries, batch_size * feature_dims);
+    OP_REQUIRES(context, num_sparse_entries <= batch_size * feature_dims,
+                errors::InvalidArgument(
+                    ""feature_indices dim0 should be <= gradients dim0 * ""
+                    ""feature_shape[1]. features_indices dim0: "",
+                    num_sparse_entries, "" gradients dim0: "", batch_size,
+                    "", feature_shape[1]: "", feature_dims));
 
     // Aggregate statistics info to map.
     StatsPartitionMap stats_map;
",1,train
5c8c9a8bfe750f9743d0c859bae112060b216f5c,tensorflow/tensorflow,"Fixing security fixes in boosted trees ops

PiperOrigin-RevId: 405669548
Change-Id: Iae224d240d1779bcc02405c2fff99785644fbd0d",stats_ops_test.py,"@@ -17,9 +17,11 @@ import numpy as np
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import boosted_trees_ops
+from tensorflow.python.ops import gen_boosted_trees_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import googletest
 
@@ -1665,6 +1667,199 @@ class StatsOpsTest(test_util.TensorFlowTestCase):
     """"""Tests numeric precision.""""""
     self._verify_precision(length=50000000)
 
+  def testBoostedTreesCalculateBestGainsPerFeatureSecurity(self):
+    node_id_range = [1, 2]
+    stats_summary_list = [[[[]]]]
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [1.0]
+    min_node_weight = [1.17]
+    max_splits = 1
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_gains_per_feature(
+          node_id_range=node_id_range,
+          stats_summary_list=stats_summary_list,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          max_splits=max_splits)
+
+  def testBoostedTreesCalculateBestFeatureSplitSecurity(self):
+    node_id_range = [1, 2]
+    stats_summary = [[[[]]]]
+    split_type = 'equality'
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [1.0]
+    min_node_weight = [1.17]
+    logits_dimension = 5
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split(
+          node_id_range=node_id_range,
+          stats_summary=stats_summary,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          logits_dimension=logits_dimension,
+          split_type=split_type)
+
+  def testBoostedTreesCalculateBestFeatureSplitSecurity2(self):
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split(
+          node_id_range=[0, 8],
+          stats_summary=[[[[1.0], [2.0], [3.0]]]],
+          l1=[0.5],
+          l2=[0.5],
+          tree_complexity=[0.1],
+          min_node_weight=[1.0],
+          logits_dimension=8)
+
+  def testBoostedTreesCalculateBestFeatureSplitV2Security(self):
+    node_id_range = [1, 2]
+    stats_summaries_list = [[[[[]]]]]
+    split_types = ['inequality']
+    candidate_feature_ids = [1, 2, 3, 4]
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [1.0]
+    min_node_weight = [1.17]
+    logits_dimension = 5
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_calculate_best_feature_split_v2(
+          node_id_range=node_id_range,
+          stats_summaries_list=stats_summaries_list,
+          split_types=split_types,
+          candidate_feature_ids=candidate_feature_ids,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          logits_dimension=logits_dimension)
+
+  def testBoostedTreesSparseCalculateBestFeatureSplitSecurity(self):
+    node_id_range = []
+    stats_summary_indices = [[]]
+    stats_summary_values = [1.0]
+    stats_summary_shape = [1, 1, 1, 1]
+    l1 = [1.0]
+    l2 = [1.0]
+    tree_complexity = [0.5]
+    min_node_weight = [1.0]
+    logits_dimension = 3
+    split_type = 'inequality'
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split(
+          node_id_range=node_id_range,
+          stats_summary_indices=stats_summary_indices,
+          stats_summary_values=stats_summary_values,
+          stats_summary_shape=stats_summary_shape,
+          l1=l1,
+          l2=l2,
+          tree_complexity=tree_complexity,
+          min_node_weight=min_node_weight,
+          logits_dimension=logits_dimension,
+          split_type=split_type)
+
+  def testBoostedTreesSparseCalculateBestFeatureSplitSecurity2(self):
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_sparse_calculate_best_feature_split(
+          node_id_range=[0, 1],
+          stats_summary_indices=[[0, -1, -1, -1], [1, 0, -1, 0], [1, 0, 0, -1]],
+          stats_summary_values=[0.1, 0.2, 0.3],
+          stats_summary_shape=[1, 1, 1, 1],
+          l1=[0.5],
+          l2=[0.5],
+          tree_complexity=[0.1],
+          min_node_weight=[1.0],
+          logits_dimension=1)
+
+  def testBoostedTreesMakeStatsSummarySecurity(self):
+    node_ids = [1, 2]
+    gradients = [[]]
+    hessians = [[0.2], [0.1]]
+    bucketized_features_list = [[1], [2]]
+    max_splits = 3
+    num_buckets = 3
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_make_stats_summary(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          bucketized_features_list=bucketized_features_list,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
+  def testBoostedTreesMakeStatsSummarySecurity2(self):
+    node_ids = [1, 2, 3]
+    gradients = [[0.1], [0.2]]
+    hessians = [[0.2], [0.1]]
+    bucketized_features_list = [[1], [2]]
+    max_splits = 3
+    num_buckets = 3
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_make_stats_summary(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          bucketized_features_list=bucketized_features_list,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
+  def testBoostedTreesAggregateStatsSecurity(self):
+    node_ids = [1, 2]
+    gradients = [[]]
+    hessians = [[100.0]]
+    feature = [[0, 0, 0]]
+    max_splits = 100
+    num_buckets = 100
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_aggregate_stats(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          feature=feature,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
+  def testBoostedTreesAggregateStatsSecurity2(self):
+    node_ids = [-10]
+    gradients = [[0.0, 0.0]]
+    hessians = [[100.0]]
+    feature = [[0, 0, 0]]
+    max_splits = 100
+    num_buckets = 100
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      self.evaluate(
+          gen_boosted_trees_ops.boosted_trees_aggregate_stats(
+              node_ids=node_ids,
+              gradients=gradients,
+              hessians=hessians,
+              feature=feature,
+              max_splits=max_splits,
+              num_buckets=num_buckets))
+
+  def testBoostedTreesSparseAggregateStatsSecurity(self):
+    node_ids = []
+    gradients = [[1.0]]
+    hessians = [[100.0]]
+    feature_indices = [[0, 0, 0]]
+    feature_values = [0, 0, 0]
+    feature_shape = [0, 0, 0]
+    max_splits = 100
+    num_buckets = 100
+    with self.assertRaises((errors.InvalidArgumentError, ValueError)):
+      gen_boosted_trees_ops.boosted_trees_sparse_aggregate_stats(
+          node_ids=node_ids,
+          gradients=gradients,
+          hessians=hessians,
+          feature_indices=feature_indices,
+          feature_values=feature_values,
+          feature_shape=feature_shape,
+          max_splits=max_splits,
+          num_buckets=num_buckets)
+
 
 class BestMultiDimFeatureSplitMultiClassV2Op(StatsOpsTest):
   """"""Tests multi-class/multi-regression for best splits using V2 op.""""""
",1,train
701cfaca222a82afbeeb17496bd718baa65a67d2,tensorflow/tensorflow,"Fix heap out of bounds error in tf.raw_ops.SparseCountSparseOutput shape inference when it is called with invalid inputs, and add a test for it.

PiperOrigin-RevId: 405766415
Change-Id: I77d244ef35f351ef7b6f821efd959cac2c66db24",count_ops.cc,"@@ -41,6 +41,8 @@ Status DenseCountSparseOutputShapeFn(InferenceContext *c) {
 }
 
 Status SparseCountSparseOutputShapeFn(InferenceContext *c) {
+  ShapeHandle unused;
+  TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 2, &unused));
   auto rank = c->Dim(c->input(0), 1);
   auto nvals = c->UnknownDim();
   c->set_output(0, c->Matrix(nvals, rank));  // out.indices
",1,train
701cfaca222a82afbeeb17496bd718baa65a67d2,tensorflow/tensorflow,"Fix heap out of bounds error in tf.raw_ops.SparseCountSparseOutput shape inference when it is called with invalid inputs, and add a test for it.

PiperOrigin-RevId: 405766415
Change-Id: I77d244ef35f351ef7b6f821efd959cac2c66db24",bincount_ops_test.py,"@@ -831,6 +831,25 @@ class TestSparseCountFailureModes(test.TestCase):
       self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
 
 
+class RawOpsHeapOobTest(test.TestCase, parameterized.TestCase):
+
+  @test_util.run_v1_only(""Test security error"")
+  def testSparseCountSparseOutputBadIndicesShapeTooSmall(self):
+    indices = [1]
+    values = [[1]]
+    weights = []
+    dense_shape = [10]
+    with self.assertRaisesRegex(ValueError,
+                                ""Shape must be rank 2 but is rank 1 for""):
+      self.evaluate(
+          gen_count_ops.SparseCountSparseOutput(
+              indices=indices,
+              values=values,
+              dense_shape=dense_shape,
+              weights=weights,
+              binary_output=True))
+
+
 @test_util.run_all_in_graph_and_eager_modes
 @test_util.disable_tfrt
 class RawOpsTest(test.TestCase, parameterized.TestCase):
",1,train
a0d64445116c43cf46a5666bd4eee28e7a82f244,tensorflow/tensorflow,"Prevent OOB access in QuantizeV2 shape inference

PiperOrigin-RevId: 400309614
Change-Id: I31412c71b05b4f21b677f7fa715a61499cbee39d",common_shape_fns.cc,"@@ -2559,6 +2559,9 @@ Status QuantizeV2Shape(InferenceContext* c) {
   if (!s.ok() && s.code() != error::NOT_FOUND) {
     return s;
   }
+  if (axis < -1) {
+    return errors::InvalidArgument(""axis should be at least -1, got "", axis);
+  }
   const int minmax_rank = (axis == -1) ? 0 : 1;
   TF_RETURN_IF_ERROR(shape_inference::UnchangedShape(c));
   ShapeHandle minmax;
",1,test
fa6b7782fbb14aa08d767bc799c531f5e1fb3bb8,tensorflow/tensorflow,"Fix null pointer exception in shape inference function when tf.ragged.cross() is called with invalid inputs.

PiperOrigin-RevId: 400045848
Change-Id: Ia65501583b85cf1ec14a252d83fbdd716817a516",ragged_array_ops.cc,"@@ -99,6 +99,13 @@ REGISTER_OP(""RaggedCross"")
       int dense_start = num_ragged * 2 + num_sparse * 3;
       for (int i = 0; i < dense_types.size(); ++i) {
         ShapeHandle dense_input = c->input(i + dense_start);
+        int32 rank = c->Rank(dense_input);
+        if (rank == InferenceContext::kUnknownRank) {
+          continue;
+        } else if (rank != 2) {
+          return errors::InvalidArgument(
+              ""tf.ragged.cross only supports inputs with rank=2"");
+        }
         int64_t batch_size = c->Value(c->Dim(dense_input, 0));
         if (batch_size != InferenceContext::kUnknownDim) {
           ShapeHandle row_splits = c->Vector(batch_size + 1);
",1,train
fa6b7782fbb14aa08d767bc799c531f5e1fb3bb8,tensorflow/tensorflow,"Fix null pointer exception in shape inference function when tf.ragged.cross() is called with invalid inputs.

PiperOrigin-RevId: 400045848
Change-Id: Ia65501583b85cf1ec14a252d83fbdd716817a516",ragged_cross_op_test.py,"@@ -18,10 +18,12 @@ from absl.testing import parameterized
 
 import numpy as np
 
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops.ragged import ragged_array_ops
@@ -358,6 +360,16 @@ class RaggedCrossOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
                   dense_const([[2], [3]])],
           exception=(ValueError, errors.InvalidArgumentError),
           message='inputs must all have the same batch dimension size'),
+      dict(
+          testcase_name='3DDenseTensor',
+          inputs=[dense_const([[[1]]])],
+          exception=(ValueError, errors.InvalidArgumentError),
+          message='tf.ragged.cross only supports inputs with rank=2'),
+      dict(
+          testcase_name='0DDenseTensor',
+          inputs=[dense_const(1)],
+          exception=(ValueError, errors.InvalidArgumentError),
+          message='tf.ragged.cross only supports inputs with rank=2'),
   ])
   def testStaticError(self, inputs, exception=ValueError, message=None):
     with self.assertRaisesRegex(exception, message):
@@ -368,17 +380,36 @@ class RaggedCrossOpTest(test_util.TensorFlowTestCase, parameterized.TestCase):
           testcase_name='3DRaggedTensor',
           inputs=[ragged_const([[[1]]], ragged_rank=1)],
           message='tf.ragged.cross only supports inputs with rank=2'),
+      dict(
+          testcase_name='0DDenseTensor',
+          inputs=[dense_const(1)],
+          signature=[[tensor_spec.TensorSpec(None, dtypes.int32)]],
+          exception=(ValueError, errors.InvalidArgumentError),
+          message='tf.ragged.cross only supports inputs with rank=2'),
+      dict(
+          testcase_name='1DDenseTensor',
+          inputs=[dense_const([1])],
+          signature=[[tensor_spec.TensorSpec(None, dtypes.int32)]],
+          exception=(ValueError, errors.InvalidArgumentError),
+          message='tf.ragged.cross only supports inputs with rank=2'),
       dict(
           testcase_name='3DDenseTensor',
           inputs=[dense_const([[[1]]])],
+          signature=[[tensor_spec.TensorSpec(None, dtypes.int32)]],
+          exception=(ValueError, errors.InvalidArgumentError),
           message='tf.ragged.cross only supports inputs with rank=2'),
   ])
   def testRuntimeError(self,
                        inputs,
                        exception=errors.InvalidArgumentError,
-                       message=None):
+                       message=None,
+                       signature=None):
+    @def_function.function(input_signature=signature)
+    def fn(x):
+      return ragged_array_ops.cross(x)
+
     with self.assertRaisesRegex(exception, message):
-      self.evaluate(ragged_array_ops.cross(inputs))
+      self.evaluate(fn(inputs))
 
   def _ragged_to_sparse(self, t):
     if ragged_tensor.is_ragged(t):
",1,train
afac8158d43691661ad083f6dd9e56f327c1dcb7,tensorflow/tensorflow,"Fix the deadlock issue of recursive tf.function.

Replace threading.Lock with threading.RLock to allow recursive tf.function.

PiperOrigin-RevId: 401282729
Change-Id: I3d10416f2eb2c15e2055bb4f4afee3d62bd6c428",def_function.py,"@@ -572,7 +572,7 @@ class Function(core.GenericFunction):
       ValueError: if `input_signature` is not None and the `python_function`'s
         argspec has keyword arguments.
     """"""
-    self._lock = threading.Lock()
+    self._lock = threading.RLock()
     self._python_function = python_function
     self._function_spec = function_lib.FunctionSpec.from_function_and_signature(
         python_function,
@@ -613,7 +613,7 @@ class Function(core.GenericFunction):
   def __setstate__(self, state):
     """"""Restore from pickled state.""""""
     self.__dict__ = state
-    self._lock = threading.Lock()
+    self._lock = threading.RLock()
     self._descriptor_cache = weakref.WeakKeyDictionary()
     self._key_for_call_stats = self._get_key_for_call_stats()
 
",1,train
afac8158d43691661ad083f6dd9e56f327c1dcb7,tensorflow/tensorflow,"Fix the deadlock issue of recursive tf.function.

Replace threading.Lock with threading.RLock to allow recursive tf.function.

PiperOrigin-RevId: 401282729
Change-Id: I3d10416f2eb2c15e2055bb4f4afee3d62bd6c428",def_function_test.py,"@@ -25,6 +25,7 @@ from absl.testing import parameterized
 from six.moves import range
 
 from tensorflow.python.autograph.core import converter
+from tensorflow.python.eager import backprop
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import lift_to_graph
 from tensorflow.python.framework import constant_op
@@ -36,6 +37,7 @@ from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
 from tensorflow.python.module import module
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import cond_v2
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
@@ -1261,6 +1263,117 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase):
     self.assertAllEqual(obj2.testDouble.experimental_get_tracing_count(), 3)
     self.assertAllEqual(obj1.testDouble.experimental_get_tracing_count(), 2)
 
+  def test_recursive_tf_function(self):
+
+    @def_function.function
+    def recursive_fn(n):
+      if n > 0:
+        return recursive_fn(n - 1)
+      return 1
+
+    self.assertEqual(recursive_fn(5).numpy(), 1)
+
+  def test_recursive_tf_function_with_gradients(self):
+
+    @def_function.function
+    def recursive_fn(n, x):
+      if n > 0:
+        return n * recursive_fn(n - 1, x)
+      else:
+        return x
+
+    x = variables.Variable(1.0)
+    with backprop.GradientTape() as tape:
+      g = recursive_fn(5, x)
+
+    dg_dx = tape.gradient(g, x)
+    self.assertEqual(dg_dx.numpy(), 120)
+
+  def test_recursive_python_function(self):
+
+    def recursive_py_fn(n):
+      if n > 0:
+        return recursive_py_fn(n - 1)
+      return 1
+
+    @def_function.function
+    def recursive_fn(n):
+      return recursive_py_fn(n)
+
+    self.assertEqual(recursive_fn(5).numpy(), 1)
+
+  def test_recursive_python_function_with_gradients(self):
+
+    def recursive_py_fn(n, x):
+      if n > 0:
+        return n * recursive_py_fn(n - 1, x)
+      return x
+
+    @def_function.function
+    def recursive_fn(n, x):
+      return recursive_py_fn(n, x)
+
+    x = variables.Variable(1.0)
+    with backprop.GradientTape() as tape:
+      g = recursive_fn(5, x)
+
+    dg_dx = tape.gradient(g, x)
+    self.assertEqual(dg_dx.numpy(), 120)
+
+  def test_recursive_tf_function_call_each_other(self):
+
+    @def_function.function
+    def recursive_fn1(n):
+      if n <= 1:
+        return 1
+      return recursive_fn2(n - 1)
+
+    @def_function.function
+    def recursive_fn2(n):
+      if n <= 1:
+        return 2
+      return recursive_fn1(n - 1)
+
+    self.assertEqual(recursive_fn1(5).numpy(), 1)
+    self.assertEqual(recursive_fn1(6).numpy(), 2)
+    self.assertEqual(recursive_fn2(5).numpy(), 2)
+    self.assertEqual(recursive_fn2(6).numpy(), 1)
+
+  def test_recursive_tf_function_call_each_other_with_gradients(self):
+
+    @def_function.function
+    def recursive_fn1(n, x):
+      if n <= 1:
+        return x
+      return n * recursive_fn2(n - 1, x)
+
+    @def_function.function
+    def recursive_fn2(n, x):
+      if n <= 1:
+        return 2 * x
+      return n * recursive_fn1(n - 1, x)
+
+    x = variables.Variable(1.0)
+    with backprop.GradientTape() as tape:
+      g1 = recursive_fn1(5, x)
+
+    dg1_dx = tape.gradient(g1, x)
+    self.assertEqual(dg1_dx.numpy(), 120)
+
+    with backprop.GradientTape() as tape:
+      g2 = recursive_fn2(5, x)
+
+    dg2_dx = tape.gradient(g2, x)
+    self.assertEqual(dg2_dx.numpy(), 240)
+
+  def test_recursive_tf_function_with_cond(self):
+    @def_function.function(autograph=False)
+    def recursive_fn(n):
+      return cond_v2.cond_v2(n > 0, recursive_fn(n - 1), 1)
+
+    with self.assertRaises(RecursionError):
+      recursive_fn(constant_op.constant(5))
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
",1,train
afac8158d43691661ad083f6dd9e56f327c1dcb7,tensorflow/tensorflow,"Fix the deadlock issue of recursive tf.function.

Replace threading.Lock with threading.RLock to allow recursive tf.function.

PiperOrigin-RevId: 401282729
Change-Id: I3d10416f2eb2c15e2055bb4f4afee3d62bd6c428",function.py,"@@ -3037,7 +3037,7 @@ class Function(object):
     if self.input_signature is not None:
       self._hashable_input_signature = hash(self.flat_input_signature)
 
-    self._lock = threading.Lock()
+    self._lock = threading.RLock()
     # _descriptor_cache is a of instance of a class to an instance-specific
     # `Function`, used to make sure defun-decorated methods create different
     # functions for each instance.
",1,train
d3738dd70f1c9ceb547258cbb82d853da8771850,tensorflow/tensorflow,"Ensuring that the input to DeserializeSparse is not a scalar.

PiperOrigin-RevId: 400554784
Change-Id: Ib658701040d4f707f20b8706e251d5fff46b2671",sparse_ops.cc,"@@ -16,6 +16,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/common_shape_fns.h""
 #include ""tensorflow/core/framework/op.h""
 #include ""tensorflow/core/framework/shape_inference.h""
+#include ""tensorflow/core/framework/types.pb.h""
 #include ""tensorflow/core/platform/errors.h""
 
 namespace tensorflow {
@@ -159,6 +160,8 @@ REGISTER_OP(""DeserializeSparse"")
     .Attr(""Tserialized: {string, variant} = DT_STRING"")
     .SetShapeFn([](InferenceContext* c) {
       // serialized sparse is [?, ..., ?, 3] vector.
+      ShapeHandle unused_shape;
+      TF_RETURN_IF_ERROR(c->WithRankAtLeast(c->input(0), 1, &unused_shape));
       DimensionHandle unused;
       TF_RETURN_IF_ERROR(c->WithValue(c->Dim(c->input(0), -1), 3, &unused));
       c->set_output(0, c->Matrix(InferenceContext::kUnknownDim,
",1,test
d3738dd70f1c9ceb547258cbb82d853da8771850,tensorflow/tensorflow,"Ensuring that the input to DeserializeSparse is not a scalar.

PiperOrigin-RevId: 400554784
Change-Id: Ib658701040d4f707f20b8706e251d5fff46b2671",sparse_serialization_ops_test.py,"@@ -16,10 +16,12 @@
 
 import numpy as np
 
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import sparse_tensor as sparse_tensor_lib
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_resource_variable_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.platform import test
 
@@ -460,6 +462,18 @@ class SerializeSparseTest(test.TestCase):
     self._testDeserializeFailsInvalidProtoHelper(
         sparse_ops.serialize_sparse, sparse_ops.deserialize_many_sparse)
 
+  def testDeserializeInvalidVariant(self):
+    mu = gen_resource_variable_ops.mutex_v2()
+    mu_lock = gen_resource_variable_ops.mutex_lock(mutex=mu)
+
+    @def_function.function
+    def f():
+      return sparse_ops.deserialize_sparse(
+          serialized_sparse=mu_lock, dtype=dtypes.int32)
+
+    with self.assertRaisesRegex(ValueError, r""Shape must be at least rank 1""):
+      f()
+
 
 if __name__ == ""__main__"":
   test.main()
",1,test
c79ba87153ee343401dbe9d1954d7f79e521eb14,tensorflow/tensorflow,"Make Transpose's shape inference function validate that negative `perm` values are within the tensor's rank.

PiperOrigin-RevId: 403252853
Change-Id: Ia6b31b45b237312668bb31c2c3b3c7bbce2d2610",array_ops.cc,"@@ -168,7 +168,7 @@ Status TransposeShapeFn(InferenceContext* c) {
 
     for (int32_t i = 0; i < rank; ++i) {
       int64_t in_idx = data[i];
-      if (in_idx >= rank) {
+      if (in_idx >= rank || in_idx <= -rank) {
         return errors::InvalidArgument(""perm dim "", in_idx,
                                        "" is out of range of input rank "", rank);
       }
",1,train
05cbebd3c6bb8f517a158b0155debb8df79017ff,tensorflow/tensorflow,"Fix a NPE issue in invalid Exit op. Now it will report an error instead of crash.

PiperOrigin-RevId: 404089902
Change-Id: Ia6ec55445ea70ad045a4d339d354959ad0618f2a",immutable_executor_state.cc,"@@ -316,6 +316,10 @@ Status ImmutableExecutorState::BuildControlFlowInfo(const Graph* g,
     } else if (IsExit(curr_node)) {
       // Exit to the parent frame.
       parent = parent_nodes[curr_id];
+      if (!parent) {
+        return errors::InvalidArgument(
+            ""Invalid Exit op: Cannot find a corresponding Enter op."");
+      }
       frame_name = cf_info->frame_names[parent->id()];
       parent = parent_nodes[parent->id()];
     } else {
",1,train
a8ad3e5e79c75f36edb81e0ba3f3c0c5442aeddc,tensorflow/tensorflow,"Update TPU AllToAll op to avoid divide by 0.

PiperOrigin-RevId: 400259638
Change-Id: Ic4cfe4fe7159da38caed8044ee005f898e42cd86",tpu_cross_replica_ops.cc,"@@ -32,6 +32,7 @@ REGISTER_OP(""AllToAll"")
     .Attr(""split_count: int"")
     .SetShapeFn([](InferenceContext* c) {
       ShapeHandle input = c->input(0);
+      ShapeHandle group_assignment = c->input(1);
       if (!c->RankKnown(input)) {
         c->set_output(0, c->UnknownShape());
         return Status::OK();
@@ -42,6 +43,21 @@ REGISTER_OP(""AllToAll"")
       int split_dimension;
       int split_count;
       TF_RETURN_IF_ERROR(c->GetAttr(""split_count"", &split_count));
+      if (split_count < 1) {
+        return errors::InvalidArgument(""split_count "", split_count,
+                                       "" must at least be one."");
+      }
+      if (c->RankKnown(group_assignment) && c->Rank(group_assignment) != 2) {
+        return errors::InvalidArgument(""group_assignment must have rank 2."");
+      }
+      DimensionHandle num_replicas_per_group = c->Dim(group_assignment, 1);
+      if (c->ValueKnown(num_replicas_per_group) &&
+          (c->Value(num_replicas_per_group) != split_count)) {
+        return errors::InvalidArgument(
+            ""split_count "", split_count,
+            "" must equal the size of the second dimension of group_assignment "",
+            c->Value(num_replicas_per_group));
+      }
 
       TF_RETURN_IF_ERROR(c->GetAttr(""concat_dimension"", &concat_dimension));
 
@@ -65,6 +81,12 @@ REGISTER_OP(""AllToAll"")
           dims[i] = c->MakeDim(c->Value(dims[i]) * split_count);
         }
         if (i == split_dimension) {
+          if (c->ValueKnown(dims[i]) &&
+              (c->Value(dims[i]) % split_count != 0)) {
+            return errors::InvalidArgument(
+                ""input dimension "", c->Value(dims[i]),
+                "" not divisible by split_count "", split_count);
+          }
           dims[i] = c->MakeDim(c->Value(dims[i]) / split_count);
         }
       }
",1,train
a8ad3e5e79c75f36edb81e0ba3f3c0c5442aeddc,tensorflow/tensorflow,"Update TPU AllToAll op to avoid divide by 0.

PiperOrigin-RevId: 400259638
Change-Id: Ic4cfe4fe7159da38caed8044ee005f898e42cd86",tpu_test.py,"@@ -32,6 +32,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.tpu import tpu
 from tensorflow.python.tpu import tpu_feed
 from tensorflow.python.tpu import training_loop
+from tensorflow.python.tpu.ops import tpu_ops
 
 
 class TPUContextTest(test.TestCase):
@@ -165,6 +166,51 @@ class TPUGraphPruneTest(test.TestCase):
         graph.get_operation_by_name(""import/y"").get_attr(
             tpu._TPU_REPLICATE_ATTR)
 
+
+class TPUOpsTest(test.TestCase):
+
+  def test_all_to_all_zero_split_count(self):
+    with self.assertRaisesRegex(
+        ValueError, ""split_count 0 must at least be one""):
+      tpu_ops.all_to_all(
+          x=[0.0, 0.1652, 0.6543],
+          group_assignment=[1, -1],
+          concat_dimension=0,
+          split_dimension=0,
+          split_count=0)
+
+  def test_all_to_all_group_assignment_wrong_shape(self):
+    with self.assertRaisesRegex(
+        ValueError, ""group_assignment must have rank 2""):
+      tpu_ops.all_to_all(
+          x=[0.0, 0.1652, 0.6543],
+          group_assignment=[1, -1],
+          concat_dimension=0,
+          split_dimension=0,
+          split_count=2)
+
+  def test_all_to_all_split_count_not_equal_to_group_assignment_shape(self):
+    with self.assertRaisesRegex(
+        ValueError, ""split_count 1 must equal the size of the second dimension ""
+        ""of group_assignment 2""):
+      tpu_ops.all_to_all(
+          x=[0.0, 0.1652, 0.6543],
+          group_assignment=[[0, 1], [2, 3]],
+          concat_dimension=0,
+          split_dimension=0,
+          split_count=1)
+
+  def test_all_to_all_split_count_not_divide_input_shape(self):
+    with self.assertRaisesRegex(
+        ValueError, ""input dimension 3 not divisible by split_count 2""):
+      tpu_ops.all_to_all(
+          x=[[0.0], [0.1652], [0.6543]],
+          group_assignment=[[0, 1], [2, 3]],
+          concat_dimension=1,
+          split_dimension=0,
+          split_count=2)
+
+
 def do_einsum():
   a = array_ops.placeholder(dtype=dtypes.float32, name=""a"", shape=[2, 3, 4])
   b = array_ops.placeholder(dtype=dtypes.float32, name=""b"", shape=[2, 4, 5])
",1,train
e6cf28c72ba2eb949ca950d834dd6d66bb01cfae,tensorflow/tensorflow,"Validate that matrix dimension sizes in SparseMatMul are positive.

PiperOrigin-RevId: 401149683
Change-Id: Ib33eafc561a39c8741ece80b2edce6d4aae9a57d",sparse_matmul_op.cc,"@@ -32,6 +32,7 @@ limitations under the License.
 #include ""tensorflow/core/kernels/fill_functor.h""
 #include ""tensorflow/core/lib/core/blocking_counter.h""
 #include ""tensorflow/core/lib/core/threadpool.h""
+#include ""tensorflow/core/platform/errors.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/mutex.h""
@@ -980,9 +981,18 @@ class SparseMatMulOp : public OpKernel {
                 errors::InvalidArgument(
                     ""Matrix size incompatible: a: "", a.shape().DebugString(),
                     "", b: "", b.shape().DebugString()));
+    OP_REQUIRES(ctx, m >= 0 && n >= 0 && k >= 0,
+                errors::InvalidArgument(
+                    ""Matrix dimensions cannot be negative: a: "",
+                    a.shape().DebugString(), "", b: "", b.shape().DebugString()));
     Tensor* output = nullptr;
     OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({m, n}), &output));
 
+    // Return early if at least one of the output dimension size is 0.
+    if (m == 0 || n == 0) {
+      return;
+    }
+
     if (k == 0) {
       // If the inner dimension k in the matrix multiplication is zero, we fill
       // the output with zeros.
",1,train
ca38dab9d3ee66c5de06f11af9a4b1200da5ef75,tensorflow/tensorflow,"Fix undefined behavior in CollectiveReduceV2 and others

We should not call done after it's moved.

PiperOrigin-RevId: 400838185
Change-Id: Ifc979740054b8f8c6f4d50acc89472fe60c4fdb1",collective_ops.cc,"@@ -494,15 +494,17 @@ class CollectiveOpV2Kernel : public AsyncOpKernel {
                               const Tensor& group_size, const Tensor& group_key,
                               const Tensor& instance_key) {
     if (group_size.dims() > 0) {
-      return errors::Internal(""Unexpected dimensions on input group_size, got "",
-                              group_size.shape().DebugString());
+      return errors::InvalidArgument(
+          ""Unexpected dimensions on input group_size, got "",
+          group_size.shape().DebugString());
     }
     if (group_key.dims() > 0) {
-      return errors::Internal(""Unexpected dimensions on input group_key, got "",
-                              group_key.shape().DebugString());
+      return errors::InvalidArgument(
+          ""Unexpected dimensions on input group_key, got "",
+          group_key.shape().DebugString());
     }
     if (instance_key.dims() > 0) {
-      return errors::Internal(
+      return errors::InvalidArgument(
           ""Unexpected dimensions on input instance_key, got "",
           instance_key.shape().DebugString());
     }
@@ -625,7 +627,7 @@ class CollectiveReduceV2OpKernel : public CollectiveOpV2Kernel {
                                               /*group_size*/ c->input(1),
                                               /*group_key*/ c->input(2),
                                               /*instance_key*/ c->input(3)),
-                         done);
+                         done_with_cleanup);
     col_params->instance.shape = c->input(0).shape();
     col_params->merge_op = merge_op_.get();
     col_params->final_op = final_op_.get();
@@ -855,14 +857,15 @@ class CollectiveInitializeCommunicatorOpKernel : public AsyncOpKernel {
 
   Status CheckInputs(Tensor group_size_t, Tensor group_key_t) {
     if (group_size_t.dims() > 0) {
-      return errors::Internal(
+      return errors::InvalidArgument(
           ""Unexpected dimensions on input group_size. ""
           ""It shoulbe a scalar, got tensor with shape "",
           group_size_t.shape().DebugString());
     }
     if (group_key_t.dims() > 0) {
-      return errors::Internal(""Unexpected dimensions on input group_key, got "",
-                              group_key_t.shape().DebugString());
+      return errors::InvalidArgument(
+          ""Unexpected dimensions on input group_key, got "",
+          group_key_t.shape().DebugString());
     }
 
     auto group_size = group_size_t.unaligned_flat<int32>()(0);
@@ -1084,7 +1087,7 @@ class CollectiveReduceV3OpKernel : public CollectiveOpV3Kernel {
     };
     core::RefCountPtr<CollectiveGroupResource> resource;
     OP_REQUIRES_OK_ASYNC(c, LookupResource(c, HandleFromInput(c, 1), &resource),
-                         done);
+                         done_with_cleanup);
 
     Tensor group_assignment = c->input(2);
 
@@ -1134,7 +1137,7 @@ class CollectiveAllToAllV3OpKernel : public CollectiveOpV3Kernel {
     };
     core::RefCountPtr<CollectiveGroupResource> resource;
     OP_REQUIRES_OK_ASYNC(c, LookupResource(c, HandleFromInput(c, 1), &resource),
-                         done);
+                         done_with_cleanup);
 
     Tensor group_assignment = c->input(2);
 
",1,train
ca38dab9d3ee66c5de06f11af9a4b1200da5ef75,tensorflow/tensorflow,"Fix undefined behavior in CollectiveReduceV2 and others

We should not call done after it's moved.

PiperOrigin-RevId: 400838185
Change-Id: Ifc979740054b8f8c6f4d50acc89472fe60c4fdb1",collective_ops_test.py,"@@ -1182,6 +1182,69 @@ class InputPipelineTest(test.TestCase):
     self.assertAllEqual(self.evaluate(f()), [[3.], [3.]])
 
 
+@combinations.generate(
+    combinations.times(
+        combinations.combine(collective_op=[
+            combinations.NamedObject('all_reduce_v2',
+                                     CollectiveOpsV2.all_reduce),
+            combinations.NamedObject('all_gather_v2',
+                                     CollectiveOpsV2.all_gather)
+        ]), device_combination))
+class InvalidInputTest(test.TestCase, parameterized.TestCase):
+
+  def setUp(self):
+    _setup_context()
+    super().setUp()
+
+  def testInvalidGroupKey(self, collective_op, device, communication):
+    dev0 = '/device:%s:0' % device
+    group_size = 2
+    group_key = [100]
+    instance_key = 100
+    in_tensor = constant_op.constant([1.])
+
+    with self.assertRaises(errors.InvalidArgumentError):
+      with ops.device(dev0):
+        collective_op(
+            in_tensor,
+            group_size,
+            group_key,
+            instance_key,
+            communication_hint=communication)
+
+  def testInvalidGroupSize(self, collective_op, device, communication):
+    dev0 = '/device:%s:0' % device
+    group_size = -2
+    group_key = 100
+    instance_key = 100
+    in_tensor = constant_op.constant([1.])
+
+    with self.assertRaises(errors.InvalidArgumentError):
+      with ops.device(dev0):
+        collective_op(
+            in_tensor,
+            group_size,
+            group_key,
+            instance_key,
+            communication_hint=communication)
+
+  def testInvalidInstanceKey(self, collective_op, device, communication):
+    dev0 = '/device:%s:0' % device
+    group_size = 2
+    group_key = 100
+    instance_key = [100]
+    in_tensor = constant_op.constant([1.])
+
+    with self.assertRaises(errors.InvalidArgumentError):
+      with ops.device(dev0):
+        collective_op(
+            in_tensor,
+            group_size,
+            group_key,
+            instance_key,
+            communication_hint=communication)
+
+
 class CollectiveOpsV3Test(test.TestCase, parameterized.TestCase):
 
   def setUp(self):
",1,train
af5fcebb37c8b5d71c237f4e59c6477015c78ce6,tensorflow/tensorflow,"Fix access to undefined memory during shape inference of Cudnn*.

PiperOrigin-RevId: 400324259
Change-Id: Ie3b7859d19ae24ee9ac2adf413bdc1e851bbc604",cudnn_rnn_ops.cc,"@@ -81,11 +81,17 @@ REGISTER_OP(""CudnnRNN"")
     .Attr(""seed2: int = 0"")
     .Attr(""is_training: bool = true"")
     .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
       auto input_shape = c->input(0);
       auto input_h_shape = c->input(1);
+      TF_RETURN_IF_ERROR(c->WithRank(input_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(input_h_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &unused));
+
       auto seq_length = c->Dim(input_shape, 0);
       auto batch_size = c->Dim(input_shape, 1);
       auto num_units = c->Dim(input_h_shape, 2);
+
       string direction;
       TF_RETURN_IF_ERROR(c->GetAttr(""direction"", &direction));
       string rnn_mode;
@@ -124,8 +130,13 @@ REGISTER_OP(""CudnnRNNV2"")
     .Attr(""seed2: int = 0"")
     .Attr(""is_training: bool = true"")
     .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
       auto input_shape = c->input(0);
       auto input_h_shape = c->input(1);
+      TF_RETURN_IF_ERROR(c->WithRank(input_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(input_h_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &unused));
+
       auto seq_length = c->Dim(input_shape, 0);
       auto batch_size = c->Dim(input_shape, 1);
       auto num_units = c->Dim(input_h_shape, 2);
@@ -171,16 +182,26 @@ REGISTER_OP(""CudnnRNNV3"")
     .Attr(""is_training: bool = true"")
     .Attr(""time_major: bool = true"")
     .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
       auto input_shape = c->input(0);
       auto input_h_shape = c->input(1);
       auto input_c_shape = c->input(2);
+      TF_RETURN_IF_ERROR(c->WithRank(input_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(input_h_shape, 3, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(3), 1, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(4), 1, &unused));
+
       auto max_seq_length = c->Dim(input_shape, 0);
       auto batch_size = c->Dim(input_shape, 1);
       auto num_units = c->Dim(input_h_shape, 2);
+
       string direction;
       TF_RETURN_IF_ERROR(c->GetAttr(""direction"", &direction));
       string rnn_mode;
       TF_RETURN_IF_ERROR(c->GetAttr(""rnn_mode"", &rnn_mode));
+      if (rnn_mode == ""lstm"") {
+        TF_RETURN_IF_ERROR(c->WithRank(input_c_shape, 3, &unused));
+      }
       int dir_count = (direction == ""bidirectional"") ? 2 : 1;
       DimensionHandle output_size;
       TF_RETURN_IF_ERROR(c->Multiply(num_units, dir_count, &output_size));
",1,train
af5fcebb37c8b5d71c237f4e59c6477015c78ce6,tensorflow/tensorflow,"Fix access to undefined memory during shape inference of Cudnn*.

PiperOrigin-RevId: 400324259
Change-Id: Ie3b7859d19ae24ee9ac2adf413bdc1e851bbc604",cudnn_rnn_ops_test.cc,"@@ -68,6 +68,11 @@ TEST(CudnnRNNOpsTest, ForwardLstm_ShapeFn) {
                    .Attr(""direction"", ""unidirectional"")
                    .Finalize(&op.node_def));
   INFER_OK(op, input_shapes_desc, output_shapes_desc);
+  INFER_ERROR(""Shape must be rank 3 "", op, ""[];[?,?,?];[?,?,?];[?]"");
+  INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[];[?,?,?];[?]"");
+  // Disabled because the kernel does not check shape of input_c.
+  // INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[?,?,?];[?];[?]"");
+  INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[?,?,?];[]"");
 }
 
 TEST(CudnnRNNOpsTest, ForwardV2Lstm_ShapeFn) {
@@ -100,6 +105,11 @@ TEST(CudnnRNNOpsTest, ForwardV2Lstm_ShapeFn) {
                    .Attr(""direction"", ""unidirectional"")
                    .Finalize(&op.node_def));
   INFER_OK(op, input_shapes_desc, output_shapes_desc);
+  INFER_ERROR(""Shape must be rank 3 "", op, ""[];[?,?,?];[?,?,?];[?]"");
+  INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[];[?,?,?];[?]"");
+  // Disabled because the kernel does not check shape of input_c.
+  // INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[?,?,?];[?];[?]"");
+  INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[?,?,?];[]"");
 }
 
 TEST(CudnnRNNOpsTest, ForwardV3Lstm_ShapeFn) {
@@ -137,6 +147,52 @@ TEST(CudnnRNNOpsTest, ForwardV3Lstm_ShapeFn) {
                    .Attr(""direction"", ""unidirectional"")
                    .Finalize(&op.node_def));
   INFER_OK(op, input_shapes_desc, output_shapes_desc);
+  INFER_ERROR(""Shape must be rank 3 "", op, ""[];[?,?,?];[?,?,?];[?];[?]"");
+  INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[];[?,?,?];[?];[?]"");
+  INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[?,?,?];[];[?];[?]"");
+  INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[?,?,?];[];[?]"");
+  INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[?,?,?];[?];[]"");
+}
+
+TEST(CudnnRNNOpsTest, ForwardV3Gru) {
+  int max_seq_length = 2;
+  int batch_size = 3;
+  int num_units = 4;
+  int num_layers = 5;
+  int dir_count = 1;
+  std::vector<int> input_shape = {max_seq_length, batch_size, num_units};
+  std::vector<int> input_h_shape = {num_layers * dir_count, batch_size,
+                                    num_units};
+  std::vector<int> input_c_shape = {num_layers * dir_count, batch_size,
+                                    num_units};
+  std::vector<int> output_shape = {max_seq_length, batch_size,
+                                   num_units * dir_count};
+  std::vector<int> seq_lengths_shape = {batch_size};
+  auto shape_to_str = [](const std::vector<int>& v) {
+    return strings::StrCat(""["", absl::StrJoin(v, "",""), ""]"");
+  };
+  string input_shapes_desc = strings::StrCat(
+      shape_to_str(input_shape), "";"", shape_to_str(input_h_shape), "";"",
+      shape_to_str(input_c_shape), "";"", ""[?]"", "";"",
+      shape_to_str(seq_lengths_shape));
+  string output_shapes_desc = ""[d0_0,d0_1,d1_2];in1;[];?;?"";
+
+  ShapeInferenceTestOp op(""CudnnRNNV3"");
+  TF_ASSERT_OK(NodeDefBuilder(""test"", ""CudnnRNNV3"")
+                   .Input({""input"", 0, DT_FLOAT})
+                   .Input({""input_h"", 0, DT_FLOAT})
+                   .Input({""input_c"", 0, DT_FLOAT})
+                   .Input({""params"", 0, DT_FLOAT})
+                   .Input({""sequence_lengths"", 0, DT_INT32})
+                   .Attr(""rnn_mode"", ""gru"")
+                   .Attr(""input_mode"", ""auto_select"")
+                   .Attr(""direction"", ""unidirectional"")
+                   .Finalize(&op.node_def));
+  INFER_OK(op, input_shapes_desc, output_shapes_desc);
+  INFER_ERROR(""Shape must be rank 3 "", op, ""[];[?,?,?];[];[?];[?]"");
+  INFER_ERROR(""Shape must be rank 3 "", op, ""[?,?,?];[];[];[?];[?]"");
+  INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[];[];[?]"");
+  INFER_ERROR(""Shape must be rank 1 "", op, ""[?,?,?];[?,?,?];[];[?];[]"");
 }
 
 }  // end namespace tensorflow
",1,train
25d622ffc432acc736b14ca3904177579e733cc6,tensorflow/tensorflow,"A negative size in one of the split sizes allowed the computed size of another
to exceed the total dimension, leading to a segfault and security vulnerability.
Adding a check for negative sizes prevents this.

PiperOrigin-RevId: 401035665
Change-Id: I79bbe329787dac82aa4bf60397a9129b716aedab",split_v_op.cc,"@@ -138,6 +138,13 @@ class SplitVOpBase : public OpKernel {
       (*split_sizes_vec)[neg_one_dim] = input_size_split_dim - determined_size;
     }
 
+    for (int i = 0; i < split_sizes_vec->size(); ++i) {
+      const Tlen& split_size = (*split_sizes_vec)[i];
+      OP_REQUIRES(context, split_size >= Tlen(0),
+                  errors::InvalidArgument(""Split size at index "", i,
+                                          "" must be >= 0. Got: "", split_size));
+    }
+
     // Special case 2: split along the 1st dimension. The requirements are that
     // either we are splitting the outer dimension of two or more such that
     // every outer subpart is aligned or that the split sizes mean that they are
",1,test
25d622ffc432acc736b14ca3904177579e733cc6,tensorflow/tensorflow,"A negative size in one of the split sizes allowed the computed size of another
to exceed the total dimension, leading to a segfault and security vulnerability.
Adding a check for negative sizes prevents this.

PiperOrigin-RevId: 401035665
Change-Id: I79bbe329787dac82aa4bf60397a9129b716aedab",array_ops.cc,"@@ -681,6 +681,12 @@ REGISTER_OP(""SplitV"")
           if (data[i] == -1 && c->ValueKnown(split_dim_size)) {
             size = split_dim_size - total_size;
           }
+          // If we have a negative known size (either explicit, or computed
+          // via -1), then the split sizes are invalid.
+          if (size < -1 || (size == -1 && c->ValueKnown(split_dim_size))) {
+            return errors::InvalidArgument(""Split size at index "", i,
+                                           "" must be >= 0. Got: "", size);
+          }
           TF_RETURN_IF_ERROR(
               c->ReplaceDim(input, split_dim, c->MakeDim(size), &output_shape));
           c->set_output(i, output_shape);
",1,test
25d622ffc432acc736b14ca3904177579e733cc6,tensorflow/tensorflow,"A negative size in one of the split sizes allowed the computed size of another
to exceed the total dimension, leading to a segfault and security vulnerability.
Adding a check for negative sizes prevents this.

PiperOrigin-RevId: 401035665
Change-Id: I79bbe329787dac82aa4bf60397a9129b716aedab",split_op_test.py,"@@ -384,6 +384,24 @@ class SplitOpTest(test.TestCase):
                                   ""must have exactly one element""):
         sess.run(y, {x: np.array([], dtype=np.int32), splits: [4, 11, 15]})
 
+  @test_util.run_in_graph_and_eager_modes
+  def testNegativeSizes(self):
+    x = constant_op.constant([1, 2, 3], dtypes.float32)
+    # A size of -1 signifies to determine size based on sum of other splits.
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                ""Split size at index 1 must be >= 0. Got: -2""):
+      splits = [-1, -2]
+      self.evaluate(array_ops.split(x, splits, axis=0))
+
+  @test_util.run_in_graph_and_eager_modes
+  def testBadSplitSizes(self):
+    x = constant_op.constant([1, 2], dtypes.float32)
+    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
+                                ""Determined shape must either match input""
+                                ""|can't split axis""):
+      splits = [1, 2]
+      self.evaluate(array_ops.split(x, splits, axis=0))
+
 
 if __name__ == ""__main__"":
   test.main()
",1,test
aab9998916c2ffbd8f0592059fad352622f89cda,tensorflow/tensorflow,"Add shape checks to FusedBatchNorm kernels.

PiperOrigin-RevId: 399755576
Change-Id: If8049fde109cc33badb5509d174b9b95aee1ea5e",fused_batch_norm_op.cc,"@@ -1340,18 +1340,20 @@ class FusedBatchNormOpBase : public OpKernel {
         errors::InvalidArgument(""offset must have the same number of elements ""
                                 ""as the channels of x, got "",
                                 offset.NumElements(), "" and "", num_channels));
-    if (estimated_mean.NumElements() != 0) {
+    if (!is_training_ || exponential_avg_factor_ != 1.) {
+      std::string prefix_msg = is_training_ ? ""When exponential_avg_factor != 1""
+                                            : ""When is_training=false"";
       OP_REQUIRES(context, estimated_mean.NumElements() == num_channels,
                   errors::InvalidArgument(
-                      ""mean must be empty or have the same number of ""
-                      ""elements as the channels of x, got "",
+                      prefix_msg,
+                      "", mean must have the same number ""
+                      ""of elements as the channels of x, got "",
                       estimated_mean.NumElements(), "" and "", num_channels));
-    }
-    if (estimated_variance.NumElements() != 0) {
       OP_REQUIRES(context, estimated_variance.NumElements() == num_channels,
                   errors::InvalidArgument(
-                      ""variance must be empty or have the same number of ""
-                      ""elements as the channels of x, got "",
+                      prefix_msg,
+                      "", variance must have the same ""
+                      ""number of elements as the channels of x, got "",
                       estimated_variance.NumElements(), "" and "", num_channels));
     }
 
@@ -1543,6 +1545,11 @@ class FusedBatchNormGradOpBase : public OpKernel {
                 errors::InvalidArgument(
                     ""saved variance must be 1-dimensional"",
                     saved_maybe_inv_var_or_pop_var.shape().DebugString()));
+    OP_REQUIRES(
+        context, x.shape() == y_backprop.shape(),
+        errors::InvalidArgument(
+            ""x and y_backprop must have same shape, but x has shape "",
+            x.shape(), "" and y_backprop has shape "", y_backprop.shape()));
     if (use_activation) {
       OP_REQUIRES(
           context, x.dim_size(3) % 4 == 0,
@@ -1569,6 +1576,23 @@ class FusedBatchNormGradOpBase : public OpKernel {
                   errors::InvalidArgument(""Error during tensor copy.""));
     }
 
+    const auto num_channels = GetTensorDim(x, tensor_format_, 'C');
+    OP_REQUIRES(
+        context, scale.NumElements() == num_channels,
+        errors::InvalidArgument(""scale must have the same number of elements ""
+                                ""as the channels of x, got "",
+                                scale.NumElements(), "" and "", num_channels));
+    OP_REQUIRES(
+        context, saved_mean_or_pop_mean.NumElements() == num_channels,
+        errors::InvalidArgument(""reserve_space_1 must have the same number of ""
+                                ""elements as the channels of x, got "",
+                                scale.NumElements(), "" and "", num_channels));
+    OP_REQUIRES(
+        context, saved_maybe_inv_var_or_pop_var.NumElements() == num_channels,
+        errors::InvalidArgument(""reserve_space_2 must have the same number of ""
+                                ""elements as the channels of x, got "",
+                                scale.NumElements(), "" and "", num_channels));
+
     Tensor* x_backprop = nullptr;
     auto alloc_shape = use_reshape ? dest_shape : x_shape;
     OP_REQUIRES_OK(context,
",1,train
aab9998916c2ffbd8f0592059fad352622f89cda,tensorflow/tensorflow,"Add shape checks to FusedBatchNorm kernels.

PiperOrigin-RevId: 399755576
Change-Id: If8049fde109cc33badb5509d174b9b95aee1ea5e",nn_fused_batchnorm_test.py,"@@ -16,10 +16,13 @@
 
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
@@ -694,6 +697,126 @@ class BatchNormalizationTest(test.TestCase):
     y_ref = np.maximum(y_ref, 0.)
     self.assertAllClose(y_ref, y_val, atol=1e-3)
 
+  def testEagerShapeErrors(self):
+    with context.eager_mode():
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((3,))
+      offset = array_ops.ones((2,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'scale must have the same number of elements'):
+        nn_impl.fused_batch_norm(x, scale, offset)
+
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      offset = array_ops.ones((3,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'offset must have the same number of elements'):
+        nn_impl.fused_batch_norm(x, scale, offset)
+
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      offset = array_ops.ones((2,))
+      mean = array_ops.ones((0,))
+      variance = array_ops.ones((2,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'When is_training=false, mean must have the same number of elements'):
+        nn_impl.fused_batch_norm(
+            x, scale, offset, mean=mean, variance=variance, is_training=False)
+
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      offset = array_ops.ones((2,))
+      mean = array_ops.ones((2,))
+      variance = array_ops.ones((0,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'When is_training=false, variance must have the same number of '
+          'elements'):
+        nn_impl.fused_batch_norm(
+            x, scale, offset, mean=mean, variance=variance, is_training=False)
+
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      offset = array_ops.ones((2,))
+      mean = array_ops.ones((0,))
+      variance = array_ops.ones((2,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'When exponential_avg_factor != 1, mean must have the same number of '
+          'elements'):
+        nn_impl.fused_batch_norm(
+            x,
+            scale,
+            offset,
+            mean=mean,
+            variance=variance,
+            exponential_avg_factor=0.5)
+
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      offset = array_ops.ones((2,))
+      mean = array_ops.ones((2,))
+      variance = array_ops.ones((0,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'When exponential_avg_factor != 1, variance must have the same '
+          'number of elements'):
+        nn_impl.fused_batch_norm(
+            x,
+            scale,
+            offset,
+            mean=mean,
+            variance=variance,
+            exponential_avg_factor=0.5)
+
+  def testEagerShapeGradErrors(self):
+    with context.eager_mode():
+      y_backprop = array_ops.ones((2, 2, 2, 3))
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      reserve_space_1 = array_ops.ones((2,))
+      reserve_space_2 = array_ops.ones((2,))
+      with self.assertRaisesRegex(errors_impl.InvalidArgumentError,
+                                  'x and y_backprop must have same shape,'):
+        gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale,
+                                            reserve_space_1, reserve_space_2)
+
+      y_backprop = array_ops.ones((2, 2, 2, 2))
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((3,))
+      reserve_space_1 = array_ops.ones((2,))
+      reserve_space_2 = array_ops.ones((2,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'scale must have the same number of elements'):
+        gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale,
+                                            reserve_space_1, reserve_space_2)
+
+      y_backprop = array_ops.ones((2, 2, 2, 2))
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      reserve_space_1 = array_ops.ones((3,))
+      reserve_space_2 = array_ops.ones((2,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'reserve_space_1 must have the same number of elements'):
+        gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale,
+                                            reserve_space_1, reserve_space_2)
+
+      y_backprop = array_ops.ones((2, 2, 2, 2))
+      x = array_ops.ones((2, 2, 2, 2))
+      scale = array_ops.ones((2,))
+      reserve_space_1 = array_ops.ones((2,))
+      reserve_space_2 = array_ops.ones((3,))
+      with self.assertRaisesRegex(
+          errors_impl.InvalidArgumentError,
+          'reserve_space_2 must have the same number of elements'):
+        gen_nn_ops.fused_batch_norm_grad_v2(y_backprop, x, scale,
+                                            reserve_space_1, reserve_space_2)
+
 
 if __name__ == '__main__':
   test.main()
",1,train
67bfd9feeecfb3c61d80f0e46d89c170fbee682b,tensorflow/tensorflow,"Make SparseFillEmptyRows validate that the length of `values` must be equal to the number of index tuples.

PiperOrigin-RevId: 399969549
Change-Id: I3c2f2ca1c1d2cc88bb5951c6958b38c16e9436c8",sparse_fill_empty_rows_op.cc,"@@ -24,11 +24,13 @@ limitations under the License.
 #include <vector>
 
 #include ""tensorflow/core/framework/op_kernel.h""
+#include ""tensorflow/core/framework/op_requires.h""
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_util.h""
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/lib/gtl/inlined_vector.h""
+#include ""tensorflow/core/platform/errors.h""
 #include ""tensorflow/core/util/sparse/sparse_tensor.h""
 
 namespace tensorflow {
@@ -222,6 +224,12 @@ void SparseFillEmptyRowsOpImpl(OpKernelContext* context,
                     errors::InvalidArgument(""values must be a vector, saw: "",
                                             values_t.shape().DebugString()),
                     done);
+  OP_REQUIRES_ASYNC(
+      context, indices_t.dim_size(0) == values_t.dim_size(0),
+      errors::InvalidArgument(""The length of `values` ("", values_t.dim_size(0),
+                              "") must match the first dimension of `indices` ("",
+                              indices_t.dim_size(0), "").""),
+      done);
   OP_REQUIRES_ASYNC(
       context, TensorShapeUtils::IsScalar(default_value_t.shape()),
       errors::InvalidArgument(""default_value must be a scalar, saw: "",
",1,train
68867bf01239d9e1048f98cbad185bf4761bedd3,tensorflow/tensorflow,"Prevent unitialized variable use in grappler.

PiperOrigin-RevId: 399702928
Change-Id: Id7e75451fbff297692dfb687f60ea04b25c96b24",auto_parallel.cc,"@@ -152,7 +152,7 @@ Status AutoParallel::Initialize(const GrapplerItem& item) {
   TF_RETURN_IF_ERROR(ComputeTransitiveFanin(graph_, item.fetch, &train_nodes));
   LOG(INFO) << ""Number of training nodes: "" << train_nodes.size();
 
-  const NodeDef* dequeue_node;
+  const NodeDef* dequeue_node = nullptr;
   for (const auto& train_node : train_nodes) {
     if (IsDequeueOp(*train_node)) {
       dequeue_node = train_node;
",1,train
68867bf01239d9e1048f98cbad185bf4761bedd3,tensorflow/tensorflow,"Prevent unitialized variable use in grappler.

PiperOrigin-RevId: 399702928
Change-Id: Id7e75451fbff297692dfb687f60ea04b25c96b24",auto_parallel_test.cc,"@@ -126,6 +126,30 @@ TEST_F(AutoParallelTest, SimpleParallel) {
   EXPECT_EQ(""^AutoParallel-Control-Fetch"", node_gradient.input(0));
 }
 
+TEST_F(AutoParallelTest, SimpleParallelNoDequeue) {
+  tensorflow::Scope s = tensorflow::Scope::DisabledShapeInferenceScope();
+  Output constant_a = ops::Const(s.WithOpName(""constant_a""), 1.0f, {1});
+  Output constant_c = ops::Const(s.WithOpName(""constant_c""), 1.0f, {1});
+  Output constant_b = ops::Const(s.WithOpName(""constant_b""), 1, {1});
+  Output var = ops::Variable(s.WithOpName(""var""), {1}, DT_FLOAT);
+  Output assign = ops::Assign(s.WithOpName(""assign""), {var}, {constant_a});
+  Output add = ops::AddN(s.WithOpName(""add""), {constant_a, constant_c});
+  Output learning_rate = ops::Const(s.WithOpName(""learning_rate""), 0.01f, {1});
+  Output apply_gradient = ops::ApplyGradientDescent(
+      s.WithOpName(""apply_gradient""), {var}, {learning_rate}, {add});
+
+  GrapplerItem item;
+  item.init_ops.push_back(""assign"");
+  item.fetch.push_back(""apply_gradient"");
+  item.init_ops.push_back(""assign"");
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  AutoParallel parallel(2);
+  GraphDef output;
+  Status status = parallel.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
",1,train
f410212e373eb2aec4c9e60bf3702eba99a38aba,tensorflow/tensorflow,"Prevent out-of-bound accesses in SparseBincount.

PiperOrigin-RevId: 399918616
Change-Id: I11d154f4444d3fde1f09c5c40628b8671791a30d",bincount_op.cc,"@@ -405,6 +405,16 @@ class SparseBincountOp : public OpKernel {
       for (int64_t i = 0; i < indices_mat.dimension(0); ++i) {
         const int64_t batch = indices_mat(i, 0);
         const Tidx bin = values(i);
+        OP_REQUIRES(
+            ctx, batch < out.dimension(0),
+            errors::InvalidArgument(""Index out of bound. `batch` ("", batch,
+                                    "") must be less than the dimension size ("",
+                                    out.dimension(0), "").""));
+        OP_REQUIRES(
+            ctx, bin < out.dimension(1),
+            errors::InvalidArgument(""Index out ouf bound. `bin` ("", bin,
+                                    "") must be less then the dimension size ("",
+                                    out.dimension(1), "").""));
         if (bin < size) {
           if (binary_output_) {
             out(batch, bin) = T(1);
",1,train
1cb6bb6c2a6019417c9adaf9e6843ba75ee2580b,tensorflow/tensorflow,"Add error checking to ImmutableConst OP that strings are not yet supported.

PiperOrigin-RevId: 401065359
Change-Id: I9dd2bd2a2c36f22f4a05153daf6ebdc4613469d2",immutable_constant_op.cc,"@@ -100,6 +100,9 @@ void ImmutableConstantOp::Compute(OpKernelContext* ctx) {
 
   OP_REQUIRES_OK(ctx,
                  allocator->InitializeFromRegion(region_name_, ctx->env()));
+  OP_REQUIRES(ctx, dtype_ != DT_STRING,
+              errors::Unimplemented(""Sorry, DT_STRING is not currently ""
+                                    ""supported for ImmutableConstOp.""));
   ctx->set_output(0, Tensor(allocator.get(), dtype_, shape_));
   OP_REQUIRES_OK(ctx, allocator->allocation_status());
   // Allocator is owned by the tensor from this point.
",1,train
1cb6bb6c2a6019417c9adaf9e6843ba75ee2580b,tensorflow/tensorflow,"Add error checking to ImmutableConst OP that strings are not yet supported.

PiperOrigin-RevId: 401065359
Change-Id: I9dd2bd2a2c36f22f4a05153daf6ebdc4613469d2",immutable_constant_op_test.cc,"@@ -146,7 +146,8 @@ TEST(ImmutableConstantOpTest, ExecutionError) {
       error::INTERNAL);
 }
 
-Status CreateTempFile(Env* env, float value, uint64 size, string* filename) {
+Status CreateTempFileFloat(Env* env, float value, uint64 size,
+                           string* filename) {
   const string dir = testing::TmpDir();
   *filename = io::JoinPath(dir, strings::StrCat(""file_"", value));
   std::unique_ptr<WritableFile> file;
@@ -166,8 +167,8 @@ TEST(ImmutableConstantOpTest, FromFile) {
   auto root = Scope::NewRootScope().ExitOnError();
 
   string two_file, three_file;
-  TF_ASSERT_OK(CreateTempFile(env, 2.0f, 1000, &two_file));
-  TF_ASSERT_OK(CreateTempFile(env, 3.0f, 1000, &three_file));
+  TF_ASSERT_OK(CreateTempFileFloat(env, 2.0f, 1000, &two_file));
+  TF_ASSERT_OK(CreateTempFileFloat(env, 3.0f, 1000, &three_file));
   auto node1 = ops::ImmutableConst(root, DT_FLOAT, kFileTensorShape, two_file);
   auto node2 =
       ops::ImmutableConst(root, DT_FLOAT, kFileTensorShape, three_file);
@@ -190,5 +191,39 @@ TEST(ImmutableConstantOpTest, FromFile) {
   EXPECT_EQ(outputs.front().flat<float>()(2), 2.0f * 3.0f);
 }
 
+Status CreateTempFileBadString(Env* env, char value, uint64 size,
+                               const string suffix, string* filename) {
+  const string dir = testing::TmpDir();
+  *filename = io::JoinPath(dir, strings::StrCat(""file_"", suffix));
+  std::unique_ptr<WritableFile> file;
+  TF_RETURN_IF_ERROR(env->NewWritableFile(*filename, &file));
+  TF_RETURN_IF_ERROR(file->Append(std::string(size, value)));
+  TF_RETURN_IF_ERROR(file->Close());
+  return Status::OK();
+}
+
+TEST(ImmutableConstantOpTest, FromFileStringUnimplmented) {
+  const TensorShape kFileTensorShape({1});
+  Env* env = Env::Default();
+  auto root = Scope::NewRootScope().ExitOnError();
+
+  string bad_file;
+  TF_ASSERT_OK(CreateTempFileBadString(env, '\xe2', 128, ""bad_e2"", &bad_file));
+  auto result =
+      ops::ImmutableConst(root, DT_STRING, kFileTensorShape, bad_file);
+  GraphDef graph_def;
+  TF_ASSERT_OK(root.ToGraphDef(&graph_def));
+  SessionOptions session_options;
+  session_options.env = Env::Default();
+  std::unique_ptr<Session> session(NewSession(session_options));
+  ASSERT_TRUE(session != nullptr) << ""Failed to create session"";
+  TF_ASSERT_OK(session->Create(graph_def)) << ""Can't create test graph"";
+  std::vector<Tensor> outputs;
+  // Check that the run returned error.
+  EXPECT_EQ(
+      session->Run({}, {result.node()->name() + "":0""}, {}, &outputs).code(),
+      error::UNIMPLEMENTED);
+}
+
 }  // namespace
 }  // namespace tensorflow
",1,train
3712a2d3455e6ccb924daa5724a3652a86f6b585,tensorflow/tensorflow,"Fix macros for converting little endian to host for TF_TSRT_OFFSET GetSize

Make the macro that converts little endian data do nothing on little endian hosts,
and byte swap otherwise.
This only affects getting the size of TStrings of type ""Offset"".

Added a test for TStrings of type ""Offset"" that checks if type and size are consistent.

PiperOrigin-RevId: 400789721
Change-Id: I1398bffd842ab1631614b212b7c3a2af88d99538",ctstring_internal.h,"@@ -63,9 +63,9 @@ static inline uint32_t TF_swap32(uint32_t host_int) {
 #endif
 
 #if TF_TSTRING_LITTLE_ENDIAN
-#define TF_le32toh(x) TF_swap32(x)
-#else  // TF_TSTRING_LITTLE_ENDIAN
 #define TF_le32toh(x) x
+#else  // TF_TSTRING_LITTLE_ENDIAN
+#define TF_le32toh(x) TF_swap32(x)
 #endif  // TF_TSTRING_LITTLE_ENDIAN
 
 static inline size_t TF_align16(size_t i) { return (i + 0xF) & ~0xF; }
",1,train
3712a2d3455e6ccb924daa5724a3652a86f6b585,tensorflow/tensorflow,"Fix macros for converting little endian to host for TF_TSRT_OFFSET GetSize

Make the macro that converts little endian data do nothing on little endian hosts,
and byte swap otherwise.
This only affects getting the size of TStrings of type ""Offset"".

Added a test for TStrings of type ""Offset"" that checks if type and size are consistent.

PiperOrigin-RevId: 400789721
Change-Id: I1398bffd842ab1631614b212b7c3a2af88d99538",ctstring_test.cc,"@@ -18,6 +18,7 @@ limitations under the License.
 #include <memory>
 #include <string>
 
+#include ""tensorflow/core/platform/ctstring_internal.h""
 #include ""tensorflow/core/platform/test.h""
 
 static const char kLongString[] =
@@ -380,3 +381,29 @@ TEST(TF_CTStringTest, ResizeReserve) {
     TF_TString_Dealloc(&s70);
   }
 }
+
+TEST(TF_CTStringTest, OffsetType) {
+  {
+    TF_TString s71;
+
+    TF_TString_Init(&s71);
+    size_t header_length = 24;
+    size_t size = 8;
+    TF_TString_ResizeUninitialized(&s71, header_length + size);
+    uint32_t save_size = s71.u.offset.size;
+    uint32_t save_offset = s71.u.offset.offset;
+    uint32_t save_count = s71.u.offset.count;
+
+    s71.u.offset.size = TF_TString_ToInternalSizeT(size, TF_TSTR_OFFSET);
+    s71.u.offset.offset = header_length;
+    s71.u.offset.count = 0;
+    EXPECT_EQ(size, TF_TString_GetSize(&s71));
+    EXPECT_EQ(TF_TSTR_OFFSET, TF_TString_GetType(&s71));
+
+    // restore state so string can be deallocated
+    s71.u.offset.size = save_size;
+    s71.u.offset.offset = save_offset;
+    s71.u.offset.count = save_count;
+    TF_TString_Dealloc(&s71);
+  }
+}
",1,train
8b202f08d52e8206af2bdb2112a62fafbc546ec7,tensorflow/tensorflow,"Remove use of `eval` when evaluating the input example.

Use `ast.eval_literal` instead which safely evaluates the expression.

PiperOrigin-RevId: 400012249
Change-Id: I5ff98608ea2d736d093aa488af723ff4f6707e02",saved_model_cli.py,"@@ -20,6 +20,7 @@ https://www.tensorflow.org/guide/saved_model#cli_to_inspect_and_execute_savedmod
 """"""
 
 import argparse
+import ast
 import os
 import re
 import sys
@@ -521,7 +522,7 @@ def preprocess_inputs_arg_string(inputs_str):
   return input_dict
 
 
-def preprocess_input_exprs_arg_string(input_exprs_str):
+def preprocess_input_exprs_arg_string(input_exprs_str, safe=True):
   """"""Parses input arg into dictionary that maps input key to python expression.
 
   Parses input string in the format of 'input_key=<python expression>' into a
@@ -529,8 +530,10 @@ def preprocess_input_exprs_arg_string(input_exprs_str):
 
   Args:
     input_exprs_str: A string that specifies python expression for input keys.
-    Each input is separated by semicolon. For each input key:
+      Each input is separated by semicolon. For each input key:
         'input_key=<python expression>'
+    safe: Whether to evaluate the python expression as literals or allow
+      arbitrary calls (e.g. numpy usage).
 
   Returns:
     A dictionary that maps input keys to their values.
@@ -545,8 +548,15 @@ def preprocess_input_exprs_arg_string(input_exprs_str):
       raise RuntimeError('--input_exprs ""%s"" format is incorrect. Please follow'
                          '""<input_key>=<python expression>""' % input_exprs_str)
     input_key, expr = input_raw.split('=', 1)
-    # ast.literal_eval does not work with numpy expressions
-    input_dict[input_key] = eval(expr)  # pylint: disable=eval-used
+    if safe:
+      try:
+        input_dict[input_key] = ast.literal_eval(expr)
+      except:
+        raise RuntimeError(
+            f'Expression ""{expr}"" is not a valid python literal.')
+    else:
+      # ast.literal_eval does not work with numpy expressions
+      input_dict[input_key] = eval(expr)  # pylint: disable=eval-used
   return input_dict
 
 
@@ -659,7 +669,7 @@ def load_inputs_from_input_arg_string(inputs_str, input_exprs_str,
   tensor_key_feed_dict = {}
 
   inputs = preprocess_inputs_arg_string(inputs_str)
-  input_exprs = preprocess_input_exprs_arg_string(input_exprs_str)
+  input_exprs = preprocess_input_exprs_arg_string(input_exprs_str, safe=False)
   input_examples = preprocess_input_examples_arg_string(input_examples_str)
 
   for input_tensor_key, (filename, variable_name) in inputs.items():
@@ -923,8 +933,10 @@ def add_run_subparser(subparsers):
   parser_run.add_argument('--inputs', type=str, default='', help=msg)
   msg = ('Specifying inputs by python expressions, in the format of'
          ' ""<input_key>=\'<python expression>\'"", separated by \';\'. '
-         'numpy module is available as \'np\'. '
-         'Will override duplicate input keys from --inputs option.')
+         'numpy module is available as \'np\'. Please note that the expression '
+         'will be evaluated as-is, and is susceptible to code injection. '
+         'When this is set, the value will override duplicate input keys from '
+         '--inputs option.')
   parser_run.add_argument('--input_exprs', type=str, default='', help=msg)
   msg = (
       'Specifying tf.Example inputs as list of dictionaries. For example: '
",1,train
8b202f08d52e8206af2bdb2112a62fafbc546ec7,tensorflow/tensorflow,"Remove use of `eval` when evaluating the input example.

Use `ast.eval_literal` instead which safely evaluates the expression.

PiperOrigin-RevId: 400012249
Change-Id: I5ff98608ea2d736d093aa488af723ff4f6707e02",saved_model_cli_test.py,"@@ -382,7 +382,7 @@ Defined Functions:
     input_expr_str = 'input3=np.zeros([2,2]);input4=[4,5]'
     input_dict = saved_model_cli.preprocess_inputs_arg_string(input_str)
     input_expr_dict = saved_model_cli.preprocess_input_exprs_arg_string(
-        input_expr_str)
+        input_expr_str, safe=False)
     self.assertTrue(input_dict['input1'] == ('/path/file.txt', 'ab3'))
     self.assertTrue(input_dict['input2'] == ('file2', None))
     print(input_expr_dict['input3'])
@@ -418,6 +418,11 @@ Defined Functions:
           }
     """""", feature)
 
+  def testInputPreprocessExampleWithCodeInjection(self):
+    input_examples_str = 'inputs=os.system(""echo hacked"")'
+    with self.assertRaisesRegex(RuntimeError, 'not a valid python literal.'):
+      saved_model_cli.preprocess_input_examples_arg_string(input_examples_str)
+
   def testInputPreProcessFileNames(self):
     input_str = (r'inputx=C:\Program Files\data.npz[v:0];'
                  r'input:0=c:\PROGRA~1\data.npy')
@@ -434,8 +439,8 @@ Defined Functions:
     with self.assertRaises(RuntimeError):
       saved_model_cli.preprocess_inputs_arg_string(input_str)
     input_str = 'inputx:np.zeros((5))'
-    with self.assertRaises(RuntimeError):
-      saved_model_cli.preprocess_input_exprs_arg_string(input_str)
+    with self.assertRaisesRegex(RuntimeError, 'format is incorrect'):
+      saved_model_cli.preprocess_input_exprs_arg_string(input_str, safe=False)
 
   def testInputParserNPY(self):
     x0 = np.array([[1], [2]])
",1,train
da8558533d925694483d2c136a9220d6d49d843c,tensorflow/tensorflow,"Fix undefined behavior in `tf.raw_ops.Switch` in eager mode.

PiperOrigin-RevId: 332578058
Change-Id: I9727571d2f21476b10d8aa27c1b7176564b76ac9",kernel_and_device.cc,"@@ -308,7 +308,12 @@ Status KernelAndDeviceOp::Run(
   if (outputs != nullptr) {
     outputs->clear();
     for (int i = 0; i < context.num_outputs(); ++i) {
-      outputs->push_back(Tensor(*context.mutable_output(i)));
+      const auto* output_tensor = context.mutable_output(i);
+      if (output_tensor != nullptr) {
+        outputs->push_back(Tensor(*output_tensor));
+      } else {
+        outputs->push_back(Tensor());
+      }
     }
   }
   return Status::OK();
",1,test
da8558533d925694483d2c136a9220d6d49d843c,tensorflow/tensorflow,"Fix undefined behavior in `tf.raw_ops.Switch` in eager mode.

PiperOrigin-RevId: 332578058
Change-Id: I9727571d2f21476b10d8aa27c1b7176564b76ac9",control_flow_ops_py_test.py,"@@ -4579,6 +4579,14 @@ class ControlFlowTest(test.TestCase, parameterized.TestCase):
       result = control_flow_ops.merge([v_f, v_t])
       self.evaluate(result)
 
+  def testSwitchEagerMode(self):
+    if not context.executing_eagerly():
+      return
+    input_data = [1, 2, 3, 4]
+    vf, vt = control_flow_ops.switch(input_data, False)
+    self.assertAllEqual(vf, input_data)
+    self.assertAllEqual(vt, [])
+
   @test_util.run_deprecated_v1
   def testQIntArgAndRet(self):
 
",1,test
22e07fb204386768e5bcbea563641ea11f96ceb8,tensorflow/tensorflow,"Fix multiple vulnerabilities in `tf.experimental.dlpack.to_dlpack`.

We have a use after free caused by memory coruption, a segmentation fault caused by memory corruption, several memory leaks and an undefined behavior when taking the reference of a nullptr.

PiperOrigin-RevId: 332568894
Change-Id: Ife0fc05e103b35325094ae5d822ee5fdea764572",dlpack.cc,"@@ -249,21 +249,36 @@ void TFE_CallDLManagedTensorDeleter(void* dlm_ptr) {
 }
 
 void* TFE_HandleToDLPack(TFE_TensorHandle* h, TF_Status* status) {
+  auto tf_dlm_context = GetDlContext(h, status);
+  if (!status->status.ok()) {
+    return nullptr;
+  }
+
+  auto* tf_dlm_data = TFE_TensorHandleDevicePointer(h, status);
+  if (!status->status.ok()) {
+    return nullptr;
+  }
+
   const Tensor* tensor = GetTensorFromHandle(h, status);
   TF_DataType data_type = static_cast<TF_DataType>(tensor->dtype());
-  TensorReference tensor_ref(*tensor);  // This will call buf_->Ref()
 
+  auto tf_dlm_type = GetDlDataType(data_type, status);
+  if (!status->status.ok()) {
+    return nullptr;
+  }
+
+  TensorReference tensor_ref(*tensor);  // This will call buf_->Ref()
   auto* tf_dlm_tensor_ctx = new TfDlManagedTensorCtx(tensor_ref);
   tf_dlm_tensor_ctx->reference = tensor_ref;
 
   DLManagedTensor* dlm_tensor = &tf_dlm_tensor_ctx->tensor;
   dlm_tensor->manager_ctx = tf_dlm_tensor_ctx;
   dlm_tensor->deleter = &DLManagedTensorDeleter;
-  dlm_tensor->dl_tensor.ctx = GetDlContext(h, status);
+  dlm_tensor->dl_tensor.ctx = tf_dlm_context;
   int ndim = tensor->dims();
   dlm_tensor->dl_tensor.ndim = ndim;
-  dlm_tensor->dl_tensor.data = TFE_TensorHandleDevicePointer(h, status);
-  dlm_tensor->dl_tensor.dtype = GetDlDataType(data_type, status);
+  dlm_tensor->dl_tensor.data = tf_dlm_data;
+  dlm_tensor->dl_tensor.dtype = tf_dlm_type;
 
   std::vector<int64_t>* shape_arr = &tf_dlm_tensor_ctx->shape;
   std::vector<int64_t>* stride_arr = &tf_dlm_tensor_ctx->strides;
@@ -276,13 +291,14 @@ void* TFE_HandleToDLPack(TFE_TensorHandle* h, TF_Status* status) {
     (*stride_arr)[i] = (*shape_arr)[i + 1] * (*stride_arr)[i + 1];
   }
 
-  dlm_tensor->dl_tensor.shape = &(*shape_arr)[0];
+  dlm_tensor->dl_tensor.shape = shape_arr->data();
   // There are two ways to represent compact row-major data
   // 1) nullptr indicates tensor is compact and row-majored.
   // 2) fill in the strides array as the real case for compact row-major data.
   // Here we choose option 2, since some frameworks didn't handle the strides
   // argument properly.
-  dlm_tensor->dl_tensor.strides = &(*stride_arr)[0];
+  dlm_tensor->dl_tensor.strides = stride_arr->data();
+
   dlm_tensor->dl_tensor.byte_offset =
       0;  // TF doesn't handle the strides and byte_offsets here
   return static_cast<void*>(dlm_tensor);
",1,train
22e07fb204386768e5bcbea563641ea11f96ceb8,tensorflow/tensorflow,"Fix multiple vulnerabilities in `tf.experimental.dlpack.to_dlpack`.

We have a use after free caused by memory coruption, a segmentation fault caused by memory corruption, several memory leaks and an undefined behavior when taking the reference of a nullptr.

PiperOrigin-RevId: 332568894
Change-Id: Ife0fc05e103b35325094ae5d822ee5fdea764572",dlpack_test.py,"@@ -20,9 +20,11 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
+
 from tensorflow.python.dlpack import dlpack
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.platform import test
 from tensorflow.python.ops import array_ops
@@ -105,6 +107,12 @@ class DLPackTest(parameterized.TestCase, test.TestCase):
     self.assertRaisesRegex(Exception, "".* is not supported by dlpack"",
                            UnsupportedComplex64)
 
+  def testMustPassTensorArgumentToDLPack(self):
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        ""The argument to `to_dlpack` must be a TF tensor, not Python object""):
+      dlpack.to_dlpack([1])
+
 
 if __name__ == ""__main__"":
   ops.enable_eager_execution()
",1,train
22e07fb204386768e5bcbea563641ea11f96ceb8,tensorflow/tensorflow,"Fix multiple vulnerabilities in `tf.experimental.dlpack.to_dlpack`.

We have a use after free caused by memory coruption, a segmentation fault caused by memory corruption, several memory leaks and an undefined behavior when taking the reference of a nullptr.

PiperOrigin-RevId: 332568894
Change-Id: Ife0fc05e103b35325094ae5d822ee5fdea764572",tfe_wrapper.cc,"@@ -1358,9 +1358,16 @@ PYBIND11_MODULE(_pywrap_tfe, m) {
   // DLPack functions
   m.def(""TFE_ToDlpackCapsule"", [](py::handle& o) {
     PyObject* eager_tensor_pyobject_ptr = o.ptr();
-    TFE_TensorHandle* thandle = EagerTensor_Handle(eager_tensor_pyobject_ptr);
     tensorflow::Safe_TF_StatusPtr status =
         tensorflow::make_safe(TF_NewStatus());
+
+    if (!EagerTensor_CheckExact(eager_tensor_pyobject_ptr)) {
+      status->status = tensorflow::errors::InvalidArgument(
+          ""The argument to `to_dlpack` must be a TF tensor, not Python object"");
+      tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
+    }
+
+    TFE_TensorHandle* thandle = EagerTensor_Handle(eager_tensor_pyobject_ptr);
     void* dlm_ptr = tensorflow::TFE_HandleToDLPack(thandle, status.get());
     tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
 
",1,train
390611e0d45c5793c7066110af37c8514e6a6c54,tensorflow/tensorflow,"Fix heap buffer overflow in `tf.raw_ops.SparseFillEmptyRowsGrad`.

Also add tests as they were lacking

PiperOrigin-RevId: 332566071
Change-Id: I44277578e26ff5fb3fdb0dcbba6e91b2ec3e7859",sparse_fill_empty_rows_op.cc,"@@ -236,6 +236,9 @@ class SparseFillEmptyRowsGradOp : public OpKernel {
         context, TensorShapeUtils::IsVector(reverse_index_map_t->shape()),
         errors::InvalidArgument(""reverse_index_map must be a vector, saw: "",
                                 reverse_index_map_t->shape().DebugString()));
+    OP_REQUIRES(context, TensorShapeUtils::IsVector(grad_values_t->shape()),
+                errors::InvalidArgument(""grad_values must be a vector, saw: "",
+                                        grad_values_t->shape().DebugString()));
 
     const auto reverse_index_map = reverse_index_map_t->vec<int64>();
     const auto grad_values = grad_values_t->vec<T>();
@@ -264,8 +267,13 @@ class SparseFillEmptyRowsGradOp : public OpKernel {
       // Locate the index of the output of the forward prop associated
       // with this location in the input of the forward prop.  Copy
       // the gradient into it.  Mark it as visited.
-      d_values(i) = grad_values(reverse_index_map(i));
-      visited(reverse_index_map(i)) = true;
+      int64 reverse_index = reverse_index_map(i);
+      OP_REQUIRES(
+          context, 0 <= reverse_index && reverse_index < N_full,
+          errors::InvalidArgument(""Elements in reverse index must be in [0, "",
+                                  N_full, "") but got "", reverse_index));
+      d_values(i) = grad_values(reverse_index);
+      visited(reverse_index) = true;
     }
     for (int j = 0; j < N_full; ++j) {
       // The default value gradient gets the accumulated remainder of
",1,test
390611e0d45c5793c7066110af37c8514e6a6c54,tensorflow/tensorflow,"Fix heap buffer overflow in `tf.raw_ops.SparseFillEmptyRowsGrad`.

Also add tests as they were lacking

PiperOrigin-RevId: 332566071
Change-Id: I44277578e26ff5fb3fdb0dcbba6e91b2ec3e7859",sparse_ops_test.py,"@@ -21,6 +21,7 @@ from __future__ import print_function
 from absl.testing import parameterized
 import numpy as np
 
+from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -30,6 +31,7 @@ from tensorflow.python.framework import test_util
 # Need array_grad to register gradient for Identity.
 from tensorflow.python.ops import array_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_sparse_ops
 from tensorflow.python.ops import gradient_checker_v2 as gradient_checker
 from tensorflow.python.ops import math_ops
 # Need sparse_grad to register gradient for SparseToDense.
@@ -234,5 +236,57 @@ class SparseOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertAllEqual([5], result.dense_shape)
 
 
+@test_util.run_all_in_graph_and_eager_modes
+class RawOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
+
+  def testSparseFillEmptyRowsGrad(self):
+    reverse_index_map = [2, 1]
+    grad_values = [0, 1, 2, 3]
+    d_values, d_default_value = self.evaluate(
+        gen_sparse_ops.SparseFillEmptyRowsGrad(
+            reverse_index_map=reverse_index_map, grad_values=grad_values))
+    self.assertAllEqual([2, 1], d_values)
+    self.assertEqual(3, d_default_value)
+
+  def testSparseFillEmptyRowsGradNegativeIndexMapValue(self):
+    reverse_index_map = [2, -1]
+    grad_values = [0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        r'Elements in reverse index must be in \[0, 4\)'):
+      self.evaluate(
+          gen_sparse_ops.SparseFillEmptyRowsGrad(
+              reverse_index_map=reverse_index_map, grad_values=grad_values))
+
+  def testSparseFillEmptyRowsGradLargeIndexMapValue(self):
+    reverse_index_map = [2, 10]
+    grad_values = [0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        r'Elements in reverse index must be in \[0, 4\)'):
+      self.evaluate(
+          gen_sparse_ops.SparseFillEmptyRowsGrad(
+              reverse_index_map=reverse_index_map, grad_values=grad_values))
+
+  def testSparseFillEmptyRowsGradMatrix(self):
+    reverse_index_map = [0, 1]
+    grad_values = [[0, 1], [2, 3]]
+    # Note: Eager mode and graph mode throw different errors here. Graph mode
+    # will fail with a ValueError from the shape checking logic, while Eager
+    # will fail with an InvalidArgumentError from the kernel itself.
+    if context.executing_eagerly():
+      with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                  r'grad_values must be a vector'):
+        self.evaluate(
+            gen_sparse_ops.SparseFillEmptyRowsGrad(
+                reverse_index_map=reverse_index_map, grad_values=grad_values))
+    else:
+      with self.assertRaisesRegex(ValueError,
+                                  r'Shape must be rank 1 but is rank 2'):
+        self.evaluate(
+            gen_sparse_ops.SparseFillEmptyRowsGrad(
+                reverse_index_map=reverse_index_map, grad_values=grad_values))
+
+
 if __name__ == '__main__':
   googletest.main()
",1,test
3cbb917b4714766030b28eba9fb41bb97ce9ee02,tensorflow/tensorflow,"Fix multiple vulnerabilities in `tf.raw_ops.*CountSparseOutput`.

Also add tests for these API points, both for the happy paths and for the vulnerable ones.

PiperOrigin-RevId: 332563222
Change-Id: Ib3b52116a83a134c2e742a7c66e5e956db8fba05",count_ops.cc,"@@ -178,10 +178,30 @@ class SparseCount : public OpKernel {
     const Tensor& weights = context->input(3);
     bool use_weights = weights.NumElements() > 0;
 
+    OP_REQUIRES(context, TensorShapeUtils::IsMatrix(indices.shape()),
+                errors::InvalidArgument(
+                    ""Input indices must be a 2-dimensional tensor. Got: "",
+                    indices.shape().DebugString()));
+
+    if (use_weights) {
+      OP_REQUIRES(
+          context, weights.shape() == values.shape(),
+          errors::InvalidArgument(
+              ""Weights and values must have the same shape. Weight shape: "",
+              weights.shape().DebugString(),
+              ""; values shape: "", values.shape().DebugString()));
+    }
+
     bool is_1d = shape.NumElements() == 1;
     int num_batches = is_1d ? 1 : shape.flat<int64>()(0);
     int num_values = values.NumElements();
 
+    OP_REQUIRES(context, num_values == indices.shape().dim_size(0),
+                errors::InvalidArgument(
+                    ""Number of values must match first dimension of indices."",
+                    ""Got "", num_values,
+                    "" values, indices shape: "", indices.shape().DebugString()));
+
     const auto indices_values = indices.matrix<int64>();
     const auto values_values = values.flat<T>();
     const auto weight_values = weights.flat<W>();
@@ -235,12 +255,33 @@ class RaggedCount : public OpKernel {
     bool use_weights = weights.NumElements() > 0;
     bool is_1d = false;
 
+    if (use_weights) {
+      OP_REQUIRES(
+          context, weights.shape() == values.shape(),
+          errors::InvalidArgument(
+              ""Weights and values must have the same shape. Weight shape: "",
+              weights.shape().DebugString(),
+              ""; values shape: "", values.shape().DebugString()));
+    }
+
     const auto splits_values = splits.flat<int64>();
     const auto values_values = values.flat<T>();
     const auto weight_values = weights.flat<W>();
     int num_batches = splits.NumElements() - 1;
     int num_values = values.NumElements();
 
+    OP_REQUIRES(
+        context, num_batches > 0,
+        errors::InvalidArgument(
+            ""Must provide at least 2 elements for the splits argument""));
+    OP_REQUIRES(context, splits_values(0) == 0,
+                errors::InvalidArgument(""Splits must start with 0, not with "",
+                                        splits_values(0)));
+    OP_REQUIRES(context, splits_values(num_batches) == num_values,
+                errors::InvalidArgument(
+                    ""Splits must end with the number of values, got "",
+                    splits_values(num_batches), "" instead of "", num_values));
+
     auto per_batch_counts = BatchedMap<W>(num_batches);
     T max_value = 0;
     int batch_idx = 0;
",1,test
3cbb917b4714766030b28eba9fb41bb97ce9ee02,tensorflow/tensorflow,"Fix multiple vulnerabilities in `tf.raw_ops.*CountSparseOutput`.

Also add tests for these API points, both for the happy paths and for the vulnerable ones.

PiperOrigin-RevId: 332563222
Change-Id: Ib3b52116a83a134c2e742a7c66e5e956db8fba05",bincount_ops_test.py,"@@ -25,7 +25,9 @@ from tensorflow.python.eager import context
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import bincount_ops
+from tensorflow.python.ops import gen_count_ops
 from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_tensor
@@ -834,5 +836,121 @@ class TestSparseCountFailureModes(test.TestCase):
       self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
 
 
+@test_util.run_all_in_graph_and_eager_modes
+@test_util.disable_tfrt
+class RawOpsTest(test.TestCase, parameterized.TestCase):
+
+  def testSparseCountSparseOutputBadIndicesShape(self):
+    indices = [[[0], [0]], [[0], [1]], [[1], [0]], [[1], [2]]]
+    values = [1, 1, 1, 10]
+    weights = [1, 2, 4, 6]
+    dense_shape = [2, 3]
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                ""Input indices must be a 2-dimensional tensor""):
+      self.evaluate(
+          gen_count_ops.SparseCountSparseOutput(
+              indices=indices,
+              values=values,
+              dense_shape=dense_shape,
+              weights=weights,
+              binary_output=False))
+
+  def testSparseCountSparseOutputBadWeightsShape(self):
+    indices = [[0, 0], [0, 1], [1, 0], [1, 2]]
+    values = [1, 1, 1, 10]
+    weights = [1, 2, 4]
+    dense_shape = [2, 3]
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                ""Weights and values must have the same shape""):
+      self.evaluate(
+          gen_count_ops.SparseCountSparseOutput(
+              indices=indices,
+              values=values,
+              dense_shape=dense_shape,
+              weights=weights,
+              binary_output=False))
+
+  def testSparseCountSparseOutputBadNumberOfValues(self):
+    indices = [[0, 0], [0, 1], [1, 0]]
+    values = [1, 1, 1, 10]
+    weights = [1, 2, 4, 6]
+    dense_shape = [2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        ""Number of values must match first dimension of indices""):
+      self.evaluate(
+          gen_count_ops.SparseCountSparseOutput(
+              indices=indices,
+              values=values,
+              dense_shape=dense_shape,
+              weights=weights,
+              binary_output=False))
+
+  def testRaggedCountSparseOutput(self):
+    splits = [0, 4, 7]
+    values = [1, 1, 2, 1, 2, 10, 5]
+    weights = [1, 2, 3, 4, 5, 6, 7]
+    output_indices, output_values, output_shape = self.evaluate(
+        gen_count_ops.RaggedCountSparseOutput(
+            splits=splits, values=values, weights=weights, binary_output=False))
+    self.assertAllEqual([[0, 1], [0, 2], [1, 2], [1, 5], [1, 10]],
+                        output_indices)
+    self.assertAllEqual([7, 3, 5, 7, 6], output_values)
+    self.assertAllEqual([2, 11], output_shape)
+
+  def testRaggedCountSparseOutputBadWeightsShape(self):
+    splits = [0, 4, 7]
+    values = [1, 1, 2, 1, 2, 10, 5]
+    weights = [1, 2, 3, 4, 5, 6]
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                ""Weights and values must have the same shape""):
+      self.evaluate(
+          gen_count_ops.RaggedCountSparseOutput(
+              splits=splits,
+              values=values,
+              weights=weights,
+              binary_output=False))
+
+  def testRaggedCountSparseOutputEmptySplits(self):
+    splits = []
+    values = [1, 1, 2, 1, 2, 10, 5]
+    weights = [1, 2, 3, 4, 5, 6, 7]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        ""Must provide at least 2 elements for the splits argument""):
+      self.evaluate(
+          gen_count_ops.RaggedCountSparseOutput(
+              splits=splits,
+              values=values,
+              weights=weights,
+              binary_output=False))
+
+  def testRaggedCountSparseOutputBadSplitsStart(self):
+    splits = [1, 7]
+    values = [1, 1, 2, 1, 2, 10, 5]
+    weights = [1, 2, 3, 4, 5, 6, 7]
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                ""Splits must start with 0""):
+      self.evaluate(
+          gen_count_ops.RaggedCountSparseOutput(
+              splits=splits,
+              values=values,
+              weights=weights,
+              binary_output=False))
+
+  def testRaggedCountSparseOutputBadSplitsEnd(self):
+    splits = [0, 5]
+    values = [1, 1, 2, 1, 2, 10, 5]
+    weights = [1, 2, 3, 4, 5, 6, 7]
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                ""Splits must end with the number of values""):
+      self.evaluate(
+          gen_count_ops.RaggedCountSparseOutput(
+              splits=splits,
+              values=values,
+              weights=weights,
+              binary_output=False))
+
+
 if __name__ == ""__main__"":
   test.main()
",1,test
27b417360cbd671ef55915e4bb6bb06af8b8a832,tensorflow/tensorflow,"Prevent `int64` to `int` truncation in `Shard` API usage.

The function argument in `Shard` must be a function of two `int64` arguments. However, we are passing in a function with two `int` arguments. Thus, for large workloads, these arguments get truncated from positive `int64` values to negative `int` ones, resulting in a buffer out of bounds write.

PiperOrigin-RevId: 332557334
Change-Id: I236c9a2e7f53580e520571da8ba941a3aa9fa0b5",random_op.cc,"@@ -202,7 +202,7 @@ class RandomGammaOp : public OpKernel {
     // avoid a couple flops which can be done on a per-alpha basis.
 
     auto DoWork = [samples_per_alpha, num_alphas, &rng, samples_flat,
-                   alpha_flat](int start_output, int limit_output) {
+                   alpha_flat](int64 start_output, int64 limit_output) {
       using Eigen::numext::exp;
       using Eigen::numext::log;
       using Eigen::numext::log1p;
",1,train
ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits.

The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior.

PiperOrigin-RevId: 332560414
Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",prediction_ops.cc,"@@ -121,7 +121,7 @@ class BoostedTreesTrainingPredictOp : public OpKernel {
       auto do_work = [&resource, &bucketized_features, &cached_tree_ids,
                       &cached_node_ids, &output_partial_logits,
                       &output_node_ids, latest_tree,
-                      this](int32 start, int32 end) {
+                      this](int64 start, int64 end) {
         for (int32 i = start; i < end; ++i) {
           int32 tree_id = cached_tree_ids(i);
           int32 node_id = cached_node_ids(i);
@@ -237,7 +237,7 @@ class BoostedTreesPredictOp : public OpKernel {
 
     const int32 last_tree = resource->num_trees() - 1;
     auto do_work = [&resource, &bucketized_features, &output_logits, last_tree,
-                    this](int32 start, int32 end) {
+                    this](int64 start, int64 end) {
       for (int32 i = start; i < end; ++i) {
         std::vector<float> tree_logits(logits_dimension_, 0.0);
         int32 tree_id = 0;
@@ -340,7 +340,7 @@ class BoostedTreesExampleDebugOutputsOp : public OpKernel {
     // path. Note: feature_ids has one less value than logits_path because the
     // first value of each logit path will be the bias.
     auto do_work = [&resource, &bucketized_features, &output_debug_info,
-                    last_tree](int32 start, int32 end) {
+                    last_tree](int64 start, int64 end) {
       for (int32 i = start; i < end; ++i) {
         // Proto to store debug outputs, per example.
         boosted_trees::DebugOutput example_debug_info;
",1,test
ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits.

The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior.

PiperOrigin-RevId: 332560414
Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",crop_and_resize_op.cc,"@@ -223,7 +223,7 @@ struct CropAndResize<CPUDevice, T> {
     const int depth = crops.dimension(3);
 
     // Sharding across boxes.
-    auto CropAndResizePerBox = [&](int start_box, int limit_box) {
+    auto CropAndResizePerBox = [&](int64 start_box, int64 limit_box) {
       for (int b = start_box; b < limit_box; ++b) {
         const float y1 = boxes(b, 0);
         const float x1 = boxes(b, 1);
@@ -449,7 +449,7 @@ struct CropAndResizeBackpropImage<CPUDevice, T> {
 
     grads_image.setZero();
 
-    auto CropAndResizeBackImgPerBox = [&](int start_box, int limit_box) {
+    auto CropAndResizeBackImgPerBox = [&](int64 start_box, int64 limit_box) {
       for (int b = start_box; b < limit_box; ++b) {
         const float y1 = boxes(b, 0);
         const float x1 = boxes(b, 1);
",1,test
ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits.

The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior.

PiperOrigin-RevId: 332560414
Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",banded_triangular_solve_op.cc,"@@ -193,7 +193,8 @@ struct LaunchBatchBandedTriangularSolve {
 
     Shard(worker_threads.num_threads, worker_threads.workers, batch_size,
           cost_per_unit,
-          [&in_x, &in_y, adjoint, lower, &bcast, out](int start, int limit) {
+          [&in_x, &in_y, adjoint, lower, &bcast, out](int64 start,
+                                                      int64 limit) {
             SequentialBandedTriangularSolveKernel<Scalar>::Run(
                 in_x, in_y, lower, adjoint, bcast, out, start, limit);
           });
",1,test
ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits.

The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior.

PiperOrigin-RevId: 332560414
Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",nth_element_op.cc,"@@ -95,7 +95,8 @@ struct NthElementFunctor<CPUDevice, T> {
     const int last_dim = input_tensor.dim_size(input_tensor.dims() - 1);
 
     // Allocate each row to different shard.
-    auto SubNthElement = [&, input, output, last_dim, n](int start, int limit) {
+    auto SubNthElement = [&, input, output, last_dim, n](int64 start,
+                                                         int64 limit) {
       // std::nth_element would rearrange the array, so we need a new buffer.
       std::vector<T> buf(last_dim);
 
",1,test
ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits.

The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior.

PiperOrigin-RevId: 332560414
Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",parameterized_truncated_normal_op.cc,"@@ -70,8 +70,8 @@ struct TruncatedNormalFunctor<CPUDevice, T> {
 
     auto do_work = [samples_per_batch, num_elements, &ctx, &means, &stddevs,
                     &minvals, &maxvals, &gen, &output,
-                    kStdDevsInsideBoundsToUseRandnSampler](int start_batch,
-                                                           int limit_batch) {
+                    kStdDevsInsideBoundsToUseRandnSampler](int64 start_batch,
+                                                           int64 limit_batch) {
       // Capturing ""gen"" by-value would only make a copy for the _shared_
       // lambda.  Since we want to let each worker have its own copy, we pass
       // ""gen"" by reference and explicitly do a copy assignment here.
@@ -333,8 +333,8 @@ struct TruncatedNormalFunctorV2<CPUDevice, T> {
 
     auto do_work = [num_batches, samples_per_batch, &ctx, &bcast, &means,
                     &stddevs, &minvals, &maxvals, &gen, &output,
-                    kStdDevsInsideBoundsToUseRandnSampler](int start_output,
-                                                           int limit_output) {
+                    kStdDevsInsideBoundsToUseRandnSampler](int64 start_output,
+                                                           int64 limit_output) {
       // Capturing ""gen"" by-value would only make a copy for the _shared_
       // lambda.  Since we want to let each worker have its own copy, we pass
       // ""gen"" by reference and explicitly do a copy assignment here.
",1,test
ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits.

The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior.

PiperOrigin-RevId: 332560414
Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",random_binomial_op.cc,"@@ -184,7 +184,7 @@ struct RandomBinomialFunctor<CPUDevice, T, U> {
     // the sample shape and [H1, ... Hm] for the batch shape of the samples.
     // We have B1 * ... * Bk samples per batch member we need.
     auto DoWork = [num_batches, samples_per_batch, &bcast, &counts, &probs,
-                   &gen, &output](int start_output, int limit_output) {
+                   &gen, &output](int64 start_output, int64 limit_output) {
       // Vectorized intermediate calculations for uniform rejection sampling.
       // We always generate at most 4 samples.
       Eigen::array<T, 4> z;
",1,test
ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits.

The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior.

PiperOrigin-RevId: 332560414
Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",random_poisson_op.cc,"@@ -97,7 +97,7 @@ struct PoissonFunctor<CPUDevice, T, U> {
     typedef random::UniformDistribution<random::PhiloxRandom, CT> Uniform;
 
     auto DoWork = [num_samples, num_rate, &rng, samples_flat, rate_flat](
-                      int start_output, int limit_output) {
+                      int64 start_output, int64 limit_output) {
       // Capturing ""rng"" by value would only make a copy for the _shared_
       // lambda.  Since we want to let each worker have its own copy, we pass
       // ""rng"" by reference and explicitly do a copy assignment.
",1,test
ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits.

The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior.

PiperOrigin-RevId: 332560414
Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",stateless_random_ops.cc,"@@ -252,7 +252,7 @@ class StatelessRandomGammaOp : public StatelessRandomOpBase {
     // avoid a couple flops which can be done on a per-alpha basis.
 
     auto DoWork = [samples_per_alpha, num_alphas, &random, samples_flat,
-                   alpha_flat](int start_output, int limit_output) {
+                   alpha_flat](int64 start_output, int64 limit_output) {
       // Capturing ""random"" by-value would only make a copy for the _shared_
       // lambda.  Since we want to let each worker have its own copy, we pass
       // ""random"" by reference and explicitly do a copy assignment.
",1,test
ca8c013b5e97b1373b3bb1c97ea655e69f31a575,tensorflow/tensorflow,"Prevent integer truncation from 64 to 32 bits.

The `tensorflow::Shard` functions last argument must be a 2 argument function where both arguments are `int64` (`long long`, 64 bits). However, there are usages where code passes in a function where arguments are `int` or `int32` (32 bits). In these cases, it is possible that the integer truncation would later cause a segfault or other unexpected behavior.

PiperOrigin-RevId: 332560414
Change-Id: Ief649406babc8d4f60b3e7a9d573cbcc5ce5b767",topk_op.cc,"@@ -136,7 +136,7 @@ struct TopKFunctor<CPUDevice, T> {
       return Status::OK();
     }
 
-    auto SortIndices = [&](int start_batch, int limit_batch) {
+    auto SortIndices = [&](int64 start_batch, int64 limit_batch) {
       for (int32 b = start_batch; b < limit_batch; ++b) {
         const T* input_data = &input(b, 0);
         const auto stable_comp = [input_data](const int32 a, const int32 b) {
",1,test
33be22c65d86256e6826666662e40dbdfe70ee83,tensorflow/tensorflow,"Prevent format string vulnerability in `tf.strings.as_string`.

The `printf` format specifier only allows `#`, `0`, `-`, `+` and space as flag characters. Others are interpreted as width/precision/length modifier or conversion specifiers. If a character does not fit into any of these sets `printf` just displays it.

Also add a test suite for `tf.strings.as_string`. Also fix the issue where the flag character was used only if width was specified.

PiperOrigin-RevId: 332553548
Change-Id: Ie57cf2a7c14d1a36097642794c14329db669bbba",as_string_op.cc,"@@ -65,9 +65,26 @@ class AsStringOp : public OpKernel {
     OP_REQUIRES(ctx, !(scientific && shortest),
                 errors::InvalidArgument(
                     ""Cannot select both scientific and shortest notation""));
+
     format_ = ""%"";
+    if (!fill_string.empty()) {
+      switch (fill_string[0]) {
+        case ' ':
+        case '+':
+        case '-':
+        case '0':
+        case '#':
+          strings::Appendf(&format_, ""%s"", fill_string.c_str());
+          break;
+        default:
+          bool fill_not_supported = true;
+          OP_REQUIRES(ctx, !fill_not_supported,
+                      errors::InvalidArgument(""Fill argument not supported: \"""",
+                                              fill_string, ""\""""));
+      }
+    }
     if (width > -1) {
-      strings::Appendf(&format_, ""%s%d"", fill_string.c_str(), width);
+      strings::Appendf(&format_, ""%d"", width);
     }
     if (precision > -1) {
       strings::Appendf(&format_, "".%d"", precision);
",1,train
33be22c65d86256e6826666662e40dbdfe70ee83,tensorflow/tensorflow,"Prevent format string vulnerability in `tf.strings.as_string`.

The `printf` format specifier only allows `#`, `0`, `-`, `+` and space as flag characters. Others are interpreted as width/precision/length modifier or conversion specifiers. If a character does not fit into any of these sets `printf` just displays it.

Also add a test suite for `tf.strings.as_string`. Also fix the issue where the flag character was used only if width was specified.

PiperOrigin-RevId: 332553548
Change-Id: Ie57cf2a7c14d1a36097642794c14329db669bbba",as_string_op_test.cc,"@@ -0,0 +1,245 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/core/framework/fake_input.h""
+#include ""tensorflow/core/framework/node_def_builder.h""
+#include ""tensorflow/core/framework/tensor.h""
+#include ""tensorflow/core/framework/tensor_testutil.h""
+#include ""tensorflow/core/framework/types.h""
+#include ""tensorflow/core/kernels/ops_testutil.h""
+#include ""tensorflow/core/kernels/ops_util.h""
+#include ""tensorflow/core/lib/core/status_test_util.h""
+
+namespace tensorflow {
+namespace {
+
+class AsStringGraphTest : public OpsTestBase {
+ protected:
+  Status Init(DataType input_type, const string& fill = """", int width = -1,
+              int precision = -1, bool scientific = false,
+              bool shortest = false) {
+    TF_CHECK_OK(NodeDefBuilder(""op"", ""AsString"")
+                    .Input(FakeInput(input_type))
+                    .Attr(""fill"", fill)
+                    .Attr(""precision"", precision)
+                    .Attr(""scientific"", scientific)
+                    .Attr(""shortest"", shortest)
+                    .Attr(""width"", width)
+                    .Finalize(node_def()));
+    return InitOp();
+  }
+};
+
+TEST_F(AsStringGraphTest, Int8) {
+  TF_ASSERT_OK(Init(DT_INT8));
+
+  AddInputFromArray<int8>(TensorShape({3}), {-42, 0, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(&expected, {""-42"", ""0"", ""42""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, Int64) {
+  TF_ASSERT_OK(Init(DT_INT64));
+
+  AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(&expected, {""-42"", ""0"", ""42""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FloatDefault) {
+  TF_ASSERT_OK(Init(DT_FLOAT));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(
+      &expected, {""-42.000000"", ""0.000000"", ""3.141590"", ""42.000000""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FloatScientific) {
+  TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"""", /*width=*/-1, /*precision=*/-1,
+                    /*scientific=*/true));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(&expected, {""-4.200000e+01"", ""0.000000e+00"",
+                                        ""3.141590e+00"", ""4.200000e+01""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FloatShortest) {
+  TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"""", /*width=*/-1, /*precision=*/-1,
+                    /*scientific=*/false, /*shortest=*/true));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(&expected, {""-42"", ""0"", ""3.14159"", ""42""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FloatPrecisionOnly) {
+  TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"""", /*width=*/-1, /*precision=*/2));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(&expected, {""-42.00"", ""0.00"", ""3.14"", ""42.00""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FloatWidthOnly) {
+  TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"""", /*width=*/5));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(
+      &expected, {""-42.000000"", ""0.000000"", ""3.141590"", ""42.000000""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, Float_5_2_Format) {
+  TF_ASSERT_OK(Init(DT_FLOAT, /*fill=*/"""", /*width=*/5, /*precision=*/2));
+
+  AddInputFromArray<float>(TensorShape({4}), {-42, 0, 3.14159, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({4}));
+  test::FillValues<tstring>(&expected, {""-42.00"", "" 0.00"", "" 3.14"", ""42.00""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, Complex) {
+  TF_ASSERT_OK(Init(DT_COMPLEX64, /*fill=*/"""", /*width=*/5, /*precision=*/2));
+
+  AddInputFromArray<complex64>(TensorShape({3}), {{-4, 2}, {0}, {3.14159, -1}});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(
+      &expected, {""(-4.00, 2.00)"", ""( 0.00, 0.00)"", ""( 3.14,-1.00)""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, Bool) {
+  TF_ASSERT_OK(Init(DT_BOOL));
+
+  AddInputFromArray<bool>(TensorShape({2}), {true, false});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({2}));
+  test::FillValues<tstring>(&expected, {""true"", ""false""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, String) {
+  Status s = Init(DT_STRING);
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(absl::StrContains(
+      s.error_message(),
+      ""Value for attr 'T' of string is not in the list of allowed values""));
+}
+
+TEST_F(AsStringGraphTest, OnlyOneOfScientificAndShortest) {
+  Status s = Init(DT_FLOAT, /*fill=*/"""", /*width=*/-1, /*precision=*/-1,
+                  /*scientific=*/true, /*shortest=*/true);
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(
+      absl::StrContains(s.error_message(),
+                        ""Cannot select both scientific and shortest notation""));
+}
+
+TEST_F(AsStringGraphTest, NoShortestForNonFloat) {
+  Status s = Init(DT_INT32, /*fill=*/"""", /*width=*/-1, /*precision=*/-1,
+                  /*scientific=*/false, /*shortest=*/true);
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(absl::StrContains(
+      s.error_message(),
+      ""scientific and shortest format not supported for datatype""));
+}
+
+TEST_F(AsStringGraphTest, NoScientificForNonFloat) {
+  Status s = Init(DT_INT32, /*fill=*/"""", /*width=*/-1, /*precision=*/-1,
+                  /*scientific=*/true);
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(absl::StrContains(
+      s.error_message(),
+      ""scientific and shortest format not supported for datatype""));
+}
+
+TEST_F(AsStringGraphTest, NoPrecisionForNonFloat) {
+  Status s = Init(DT_INT32, /*fill=*/"""", /*width=*/-1, /*precision=*/5);
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(absl::StrContains(s.error_message(),
+                                ""precision not supported for datatype""));
+}
+
+TEST_F(AsStringGraphTest, LongFill) {
+  Status s = Init(DT_INT32, /*fill=*/""asdf"");
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(absl::StrContains(s.error_message(),
+                                ""Fill string must be one or fewer characters""));
+}
+
+TEST_F(AsStringGraphTest, FillWithZero) {
+  TF_ASSERT_OK(Init(DT_INT64, /*fill=*/""0"", /*width=*/4));
+
+  AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(&expected, {""-042"", ""0000"", ""0042""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FillWithSpace) {
+  TF_ASSERT_OK(Init(DT_INT64, /*fill=*/"" "", /*width=*/4));
+
+  AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(&expected, {"" -42"", ""   0"", ""  42""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FillWithChar1) {
+  TF_ASSERT_OK(Init(DT_INT64, /*fill=*/""-"", /*width=*/4));
+
+  AddInputFromArray<int64>(TensorShape({3}), {-42, 0, 42});
+  TF_ASSERT_OK(RunOpKernel());
+  Tensor expected(allocator(), DT_STRING, TensorShape({3}));
+  test::FillValues<tstring>(&expected, {""-42 "", ""0   "", ""42  ""});
+  test::ExpectTensorEqual<tstring>(expected, *GetOutput(0));
+}
+
+TEST_F(AsStringGraphTest, FillWithChar3) {
+  Status s = Init(DT_INT32, /*fill=*/""s"");
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(
+      absl::StrContains(s.error_message(), ""Fill argument not supported""));
+}
+
+TEST_F(AsStringGraphTest, FillWithChar4) {
+  Status s = Init(DT_INT32, /*fill=*/""n"");
+  ASSERT_EQ(error::INVALID_ARGUMENT, s.code());
+  ASSERT_TRUE(
+      absl::StrContains(s.error_message(), ""Fill argument not supported""));
+}
+
+}  // end namespace
+}  // end namespace tensorflow
",1,train
9a133d73ae4b4664d22bd1aa6d654fec13c52ee1,tensorflow/tensorflow,"Prevent segfault in `GetSessionHandle{,V2}`.

In eager mode, session state is null.

PiperOrigin-RevId: 332548597
Change-Id: If094812c2e094044220b9ba28f7d7601be042f38",session_ops.cc,"@@ -16,6 +16,7 @@ limitations under the License.
 // See docs in ../ops/data_flow_ops.cc.
 
 #include <limits.h>
+
 #include <vector>
 
 #include ""tensorflow/core/common_runtime/device.h""
@@ -27,6 +28,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/lib/core/errors.h""
 #include ""tensorflow/core/lib/gtl/map_util.h""
+#include ""tensorflow/core/platform/errors.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/mutex.h""
@@ -42,7 +44,11 @@ class GetSessionHandleOp : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& val = ctx->input(0);
-    int64 id = ctx->session_state()->GetNewId();
+    auto session_state = ctx->session_state();
+    OP_REQUIRES(ctx, session_state != nullptr,
+                errors::FailedPrecondition(
+                    ""GetSessionHandle called on null session state""));
+    int64 id = session_state->GetNewId();
     TensorStore::TensorAndKey tk{val, id, requested_device()};
     OP_REQUIRES_OK(ctx, ctx->tensor_store()->AddTensor(name(), tk));
 
",1,train
9a133d73ae4b4664d22bd1aa6d654fec13c52ee1,tensorflow/tensorflow,"Prevent segfault in `GetSessionHandle{,V2}`.

In eager mode, session state is null.

PiperOrigin-RevId: 332548597
Change-Id: If094812c2e094044220b9ba28f7d7601be042f38",raw_ops_test.py,"@@ -25,6 +25,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gen_data_flow_ops
 from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import gen_string_ops
 from tensorflow.python.platform import test
@@ -79,6 +80,13 @@ class RawOpsTest(test.TestCase, parameterized.TestCase):
               pad_width=0,
               preserve_short_sequences=False))
 
+  def testGetSessionHandle(self):
+    if context.executing_eagerly():
+      with self.assertRaisesRegex(
+          errors.FailedPreconditionError,
+          ""GetSessionHandle called on null session state""):
+        gen_data_flow_ops.GetSessionHandle(value=[1])
+
 
 if __name__ == ""__main__"":
   ops.enable_eager_execution()
",1,train
0462de5b544ed4731aa2fb23946ac22c01856b80,tensorflow/tensorflow,"Validate `data_splits` for `tf.StringNGrams`.

Without validation, we can cause a heap buffer overflow which results in data leakage and/or segfaults.

PiperOrigin-RevId: 332543478
Change-Id: Iee5bda24497a195d09d122355502480830b1b317",string_ngrams_op.cc,"@@ -19,6 +19,7 @@ limitations under the License.
 #include ""absl/strings/ascii.h""
 #include ""absl/strings/str_cat.h""
 #include ""tensorflow/core/framework/op_kernel.h""
+#include ""tensorflow/core/platform/errors.h""
 
 namespace tensorflow {
 namespace text {
@@ -60,6 +61,18 @@ class StringNGramsOp : public tensorflow::OpKernel {
     OP_REQUIRES_OK(context, context->input(""data_splits"", &splits));
     const auto& splits_vec = splits->flat<SPLITS_TYPE>();
 
+    // Validate that the splits are valid indices into data
+    const int input_data_size = data->flat<tstring>().size();
+    const int splits_vec_size = splits_vec.size();
+    for (int i = 0; i < splits_vec_size; ++i) {
+      bool valid_splits = splits_vec(i) >= 0;
+      valid_splits = valid_splits && (splits_vec(i) <= input_data_size);
+      OP_REQUIRES(
+          context, valid_splits,
+          errors::InvalidArgument(""Invalid split value "", splits_vec(i),
+                                  "", must be in [0,"", input_data_size, ""]""));
+    }
+
     int num_batch_items = splits_vec.size() - 1;
     tensorflow::Tensor* ngrams_splits;
     OP_REQUIRES_OK(
",1,test
0462de5b544ed4731aa2fb23946ac22c01856b80,tensorflow/tensorflow,"Validate `data_splits` for `tf.StringNGrams`.

Without validation, we can cause a heap buffer overflow which results in data leakage and/or segfaults.

PiperOrigin-RevId: 332543478
Change-Id: Iee5bda24497a195d09d122355502480830b1b317",raw_ops_test.py,"@@ -18,16 +18,21 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from absl.testing import parameterized
+
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import gen_math_ops
+from tensorflow.python.ops import gen_string_ops
 from tensorflow.python.platform import test
 
 
 @test_util.run_all_in_graph_and_eager_modes
-class RawOpsTest(test.TestCase):
+@test_util.disable_tfrt
+class RawOpsTest(test.TestCase, parameterized.TestCase):
 
   def testSimple(self):
     x = constant_op.constant(1)
@@ -58,6 +63,22 @@ class RawOpsTest(test.TestCase):
         gen_math_ops.Any(input=x, axis=0),
         gen_math_ops.Any(input=x, axis=0, keep_dims=False))
 
+  @parameterized.parameters([[0, 8]], [[-1, 6]])
+  def testStringNGramsBadDataSplits(self, splits):
+    data = [""aa"", ""bb"", ""cc"", ""dd"", ""ee"", ""ff""]
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                ""Invalid split value""):
+      self.evaluate(
+          gen_string_ops.string_n_grams(
+              data=data,
+              data_splits=splits,
+              separator="""",
+              ngram_widths=[2],
+              left_pad="""",
+              right_pad="""",
+              pad_width=0,
+              preserve_short_sequences=False))
+
 
 if __name__ == ""__main__"":
   ops.enable_eager_execution()
",1,test
adf095206f25471e864a8e63a0f1caef53a0e3a6,tensorflow/tensorflow,"Validate `NodeDef`s from `FunctionDefLibrary` of a `GraphDef`.

We already validated `NodeDef`s from a `GraphDef` but missed validating those from the `FunctionDefLibrary`. Thus, some maliciously crafted models could evade detection and cause denial of service due to a `CHECK`-fail.

PiperOrigin-RevId: 332536309
Change-Id: I052efe919ff1fe2f90815e286a1aa4c54c7b94ff",loader.cc,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""tensorflow/cc/saved_model/loader_util.h""
 #include ""tensorflow/cc/saved_model/reader.h""
 #include ""tensorflow/core/framework/attr_value.pb.h""
+#include ""tensorflow/core/framework/function.pb.h""
 #include ""tensorflow/core/framework/node_def.pb.h""
 #include ""tensorflow/core/framework/tensor.pb.h""
 #include ""tensorflow/core/lib/io/path.h""
@@ -73,26 +74,41 @@ uint64 GetLatencyMicroseconds(const uint64 start_microseconds) {
 // Ensure that constant tensors loaded from the saved model have valid shape.
 // Also ensure that constant nodes have a value assigned to them.
 // TODO(b/154763635): this is temporary and will be replaced with a better audit
+static Status ValidateNode(const NodeDef& node) {
+  const auto node_iterator = node.attr().find(""value"");
+  if (node_iterator != node.attr().end()) {
+    AttrValue node_value = node_iterator->second;
+    if (node_value.has_tensor()) {
+      const PartialTensorShape node_shape(node_value.tensor().tensor_shape());
+      if (node_shape.num_elements() < 0) {
+        return errors::FailedPrecondition(
+            ""Saved model contains node \"""", node.name(), ""\"" (op \"""", node.op(),
+            ""\"") which initializes from a tensor with "",
+            node_shape.num_elements(), "" elements"");
+      }
+    }
+  } else if (node.op() == ""Const"") {
+    return errors::FailedPrecondition(
+        ""Saved model contains node \"""", node.name(),
+        ""\"" which is a constant tensor but no value has been provided"");
+  }
+  return Status::OK();
+}
+
 static Status ValidateSavedTensors(const GraphDef& graph_def) {
   for (const auto& node : graph_def.node()) {
-    const auto node_iterator = node.attr().find(""value"");
-    if (node_iterator != node.attr().end()) {
-      AttrValue node_value = node_iterator->second;
-      if (node_value.has_tensor()) {
-        const PartialTensorShape node_shape(node_value.tensor().tensor_shape());
-        if (node_shape.num_elements() < 0) {
-          return errors::FailedPrecondition(
-              ""Saved model contains node \"""", node.name(), ""\"" (op \"""",
-              node.op(), ""\"") which initializes from a tensor with "",
-              node_shape.num_elements(), "" elements"");
-        }
+    TF_RETURN_IF_ERROR(ValidateNode(node));
+  }
+
+  if (graph_def.has_library()) {
+    const FunctionDefLibrary& library = graph_def.library();
+    for (const auto& function : library.function()) {
+      for (const auto& node : function.node_def()) {
+        TF_RETURN_IF_ERROR(ValidateNode(node));
       }
-    } else if (node.op() == ""Const"") {
-      return errors::FailedPrecondition(
-          ""Saved model contains node \"""", node.name(),
-          ""\"" which is a constant tensor but no value has been provided"");
     }
   }
+
   return Status::OK();
 }
 
",1,train
adf095206f25471e864a8e63a0f1caef53a0e3a6,tensorflow/tensorflow,"Validate `NodeDef`s from `FunctionDefLibrary` of a `GraphDef`.

We already validated `NodeDef`s from a `GraphDef` but missed validating those from the `FunctionDefLibrary`. Thus, some maliciously crafted models could evade detection and cause denial of service due to a `CHECK`-fail.

PiperOrigin-RevId: 332536309
Change-Id: I052efe919ff1fe2f90815e286a1aa4c54c7b94ff",saved_model_bundle_test.cc,"@@ -45,6 +45,8 @@ constexpr char kTestFuzzGeneratedNegativeShape[] =
     ""cc/saved_model/testdata/fuzz_generated/negative_shape"";
 constexpr char kTestFuzzGeneratedConstWithNoValue[] =
     ""cc/saved_model/testdata/fuzz_generated/const_with_no_value"";
+constexpr char kTestFuzzGeneratedBadNodeAttr[] =
+    ""cc/saved_model/testdata/fuzz_generated/bad_node_attr"";
 
 class LoaderTest : public ::testing::Test {
  protected:
@@ -328,5 +330,20 @@ TEST_F(LoaderTest, ConstNoValue) {
       std::string::npos);
 }
 
+TEST_F(LoaderTest, BadNodeAttr) {
+  SavedModelBundle bundle;
+  RunOptions run_options;
+  SessionOptions session_options;
+
+  const string export_dir =
+      io::JoinPath(testing::TensorFlowSrcRoot(), kTestFuzzGeneratedBadNodeAttr);
+  Status st = LoadSavedModel(session_options, run_options, export_dir,
+                             {kSavedModelTagServe}, &bundle);
+  EXPECT_FALSE(st.ok());
+  EXPECT_NE(
+      st.error_message().find(""constant tensor but no value has been provided""),
+      std::string::npos);
+}
+
 }  // namespace
 }  // namespace tensorflow
",1,train
2d88f470dea2671b430884260f3626b1fe99830a,tensorflow/tensorflow,"[tflite] Ensure `ResolveAxis` properly handles negative inputs.

In Python, a list `l` of length `n` allows indexing with negative indices, `l[i]`. The only constraint is that `n + i` becomes positive. Code in `ResolveAxis` assumes the constraints and only checks it using a `DCHECK`. But the macro is a no-op in non-debug builds and that can result in reading from negative offsets (buffer underflows).

PiperOrigin-RevId: 332530683
Change-Id: I464e073fee618054ae3719a3679739007bb3f3bc",reduce.h,"@@ -70,6 +70,9 @@ inline bool ResolveAxis(const int num_dims, const int* axis,
     // eg: For num_dims=3, [0, 1, 2] is the same as [-3, -2, -1]  */
     int current = axis[idx] < 0 ? (axis[idx] + num_dims) : axis[idx];
     TFLITE_DCHECK(current >= 0 && current < num_dims);
+    if (current < 0 || current >= num_dims) {
+      return false;
+    }
     bool is_dup = false;
     for (int j = 0; j < *out_num_axis; ++j) {
       if (out_axis[j] == current) {
",1,train
8ee24e7949a203d234489f9da2c5bf45a7d5157d,tensorflow/tensorflow,"[tflite] Ensure `MatchingDim` does not allow buffer overflow.

We check in `MatchingDim` that both arguments have the same dimensionality, however that is a `DCHECK` only enabled if building in debug mode. Hence, it could be possible to cause buffer overflows by passing in a tensor with larger dimensions as the second argument. To fix, we now make `MatchingDim` return the minimum of the two sizes.

A much better fix would be to return a status object but that requires refactoring a large part of the codebase for minor benefits.

PiperOrigin-RevId: 332526127
Change-Id: If627d0d2c80a685217b6e0d1e64b0872dbf1c5e4",types.h,"@@ -438,7 +438,7 @@ int MatchingArraySize(const ArrayType1& array1, int index1,
 inline int MatchingDim(const RuntimeShape& shape1, int index1,
                        const RuntimeShape& shape2, int index2) {
   TFLITE_DCHECK_EQ(shape1.Dims(index1), shape2.Dims(index2));
-  return shape1.Dims(index1);
+  return std::min(shape1.Dims(index1), shape2.Dims(index2));
 }
 
 template <typename... Args>
",1,train
0b5662bc2be13a8c8f044d925d87fb6e56247cd8,tensorflow/tensorflow,"[tflite] Ensure input tensors don't have `nullptr` buffers.

A crafted TFLite model can force a node to have as input a tensor backed by a `nullptr` buffer. That is, by carefully changing the buffer index in the flatbuffer serialization, we can force the TFLite interpreter to consider a read-only tensor to be a read-write one and assume that there is an operator that has this tensor as output, writing to it and allocating memory before the tensor is used as input. If this does not happen, we get memory corruption.

PiperOrigin-RevId: 332524692
Change-Id: I57ef175152a29020af9ab041dc959e5631dce40f",subgraph.cc,"@@ -19,6 +19,7 @@ limitations under the License.
 #include <cstdint>
 
 #include ""tensorflow/lite/arena_planner.h""
+#include ""tensorflow/lite/builtin_ops.h""
 #include ""tensorflow/lite/c/common.h""
 #include ""tensorflow/lite/context_util.h""
 #include ""tensorflow/lite/core/api/tensor_utils.h""
@@ -1030,6 +1031,19 @@ TfLiteStatus Subgraph::Invoke() {
           tensor->data_is_stale) {
         TF_LITE_ENSURE_STATUS(EnsureTensorDataIsReadable(tensor_index));
       }
+      if (tensor->data.raw == nullptr && tensor->bytes > 0) {
+        if (registration.builtin_code == kTfLiteBuiltinReshape && i == 1) {
+          // In general, having a tensor here with no buffer will be an error.
+          // However, for the reshape operator, the second input tensor is only
+          // used for the shape, not for the data. Thus, null buffer is ok.
+          continue;
+        } else {
+          // In all other cases, we need to return an error as otherwise we will
+          // trigger a null pointer dereference (likely).
+          ReportError(""Input tensor %d lacks data"", tensor_index);
+          return kTfLiteError;
+        }
+      }
     }
 
     if (check_cancelled_func_ != nullptr &&
",1,train
0b5662bc2be13a8c8f044d925d87fb6e56247cd8,tensorflow/tensorflow,"[tflite] Ensure input tensors don't have `nullptr` buffers.

A crafted TFLite model can force a node to have as input a tensor backed by a `nullptr` buffer. That is, by carefully changing the buffer index in the flatbuffer serialization, we can force the TFLite interpreter to consider a read-only tensor to be a read-write one and assume that there is an operator that has this tensor as output, writing to it and allocating memory before the tensor is used as input. If this does not happen, we get memory corruption.

PiperOrigin-RevId: 332524692
Change-Id: I57ef175152a29020af9ab041dc959e5631dce40f",model_test.cc,"@@ -438,24 +438,48 @@ TEST(BasicFlatBufferModel, TestParseModelWithSparseTensor) {
 }
 
 // TODO(b/150072943): Add malformed model with sparse tensor tests.
-TEST(BasicFlatBufferModel, TestHandleMalformedModel) {
-  const auto model_paths = {
-      // These models use the same tensor as both input and ouput of a node
-      ""tensorflow/lite/testdata/add_shared_tensors.bin"",
-  };
-
-  for (const auto& model_path : model_paths) {
-    std::unique_ptr<tflite::FlatBufferModel> model =
-        FlatBufferModel::BuildFromFile(model_path);
-    ASSERT_NE(model, nullptr);
-
-    tflite::ops::builtin::BuiltinOpResolver resolver;
-    InterpreterBuilder builder(*model, resolver);
-    std::unique_ptr<Interpreter> interpreter;
-    ASSERT_EQ(builder(&interpreter), kTfLiteOk);
-    ASSERT_NE(interpreter, nullptr);
-    ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk);
-  }
+
+// The models here have at least a node that uses the same tensor as input and
+// output. This causes segfaults when trying to eval the operator, hence we try
+// to prevent this scenario. The earliest place we can check this is in
+// `AllocateTensors`, hence the test checks that `interpreter->AllocateTensors`
+// detects these bad models.
+TEST(BasicFlatBufferModel, TestHandleMalformedModelReuseTensor) {
+  const auto model_path =
+      ""tensorflow/lite/testdata/add_shared_tensors.bin"";
+
+  std::unique_ptr<tflite::FlatBufferModel> model =
+      FlatBufferModel::BuildFromFile(model_path);
+  ASSERT_NE(model, nullptr);
+
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  InterpreterBuilder builder(*model, resolver);
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(builder(&interpreter), kTfLiteOk);
+  ASSERT_NE(interpreter, nullptr);
+  ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk);
+}
+
+// The models here have a buffer index for a tensor pointing to a null buffer.
+// This results in the tensor being interpreted as read-write, but the model
+// assumes the tensor is read-only. As such, `interpreter->Invoke()` would
+// segfault if no precondition check is added. The test checks that the
+// precondition check exists.
+TEST(BasicFlatBufferModel, TestHandleMalformedModelInvalidBuffer) {
+  const auto model_path =
+      ""tensorflow/lite/testdata/segment_sum_invalid_buffer.bin"";
+
+  std::unique_ptr<tflite::FlatBufferModel> model =
+      FlatBufferModel::BuildFromFile(model_path);
+  ASSERT_NE(model, nullptr);
+
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  InterpreterBuilder builder(*model, resolver);
+  std::unique_ptr<Interpreter> interpreter;
+  ASSERT_EQ(builder(&interpreter), kTfLiteOk);
+  ASSERT_NE(interpreter, nullptr);
+  ASSERT_EQ(interpreter->AllocateTensors(), kTfLiteOk);
+  ASSERT_NE(interpreter->Invoke(), kTfLiteOk);
 }
 
 // TODO(aselle): Add tests for serialization of builtin op data types.
",1,train
d58c96946b2880991d63d1dacacb32f0a4dfa453,tensorflow/tensorflow,"[tflite] Ensure inputs and outputs don't overlap.

If a model uses the same tensor for both an input and an output then this can result in data loss and memory corruption. This should not happen.

PiperOrigin-RevId: 332522916
Change-Id: If0905b142415a9dfceaf2d181872f2a8fb88f48a",subgraph.cc,"@@ -581,6 +581,33 @@ TfLiteStatus Subgraph::CheckTensorIndices(const char* label, const int* indices,
   return kTfLiteOk;
 }
 
+// We have two arrays and we need to check that elements from one array don't
+// show up in the other. We could sort both arrays and then iterate with two
+// pointers from start to finish always increasing the smaller one but since
+// these arrays are usually short (<25 elements for inputs, usually <3 for
+// outputs), this might be slower than the naive approach (if arrays have size n
+// and m, with n >> m ~ O(1), first approach is O(nlogn) whereas the other is
+// O(n)). Plus, sorting the input and output arrays might not be something we
+// want as it destroys ordering of elements.
+//
+// If it turns out that this is an issue, we can switch to the other algorithm.
+TfLiteStatus Subgraph::CheckInputAndOutputForOverlap(const int* input_indices,
+                                                     int num_inputs,
+                                                     const int* output_indices,
+                                                     int num_outputs) {
+  for (int i = 0; i < num_inputs; i++) {
+    for (int j = 0; j < num_outputs; j++) {
+      if (input_indices[i] == output_indices[j]) {
+        ReportError(""Tensor %d is both input %d and output %d\n"",
+                    input_indices[i], i, j);
+        consistent_ = false;
+        return kTfLiteError;
+      }
+    }
+  }
+  return kTfLiteOk;
+}
+
 namespace {
 // Multiply two sizes and return true if overflow occurred;
 // This is based off tensorflow/overflow.h but is simpler as we already
@@ -707,6 +734,16 @@ TfLiteStatus Subgraph::AddNodeWithParameters(
       &context_,
       CheckTensorIndices(""node outputs"", outputs.data(), outputs.size()));
 
+  // For builtin ops, inputs and outputs must not overlap. Custom ops must do
+  // this check by themselves if they don't support overlapping tensors. This
+  // distinction is to allow custom ops to just forward a tensor, reusing it as
+  // both input and output.
+  if (builtin_data != nullptr) {
+    TF_LITE_ENSURE_OK(&context_, CheckInputAndOutputForOverlap(
+                                     inputs.data(), inputs.size(),
+                                     outputs.data(), outputs.size()));
+  }
+
   int new_node_index = nodes_and_registration_.size();
   if (node_index) *node_index = new_node_index;
   nodes_and_registration_.resize(nodes_and_registration_.size() + 1);
",1,test
d58c96946b2880991d63d1dacacb32f0a4dfa453,tensorflow/tensorflow,"[tflite] Ensure inputs and outputs don't overlap.

If a model uses the same tensor for both an input and an output then this can result in data loss and memory corruption. This should not happen.

PiperOrigin-RevId: 332522916
Change-Id: If0905b142415a9dfceaf2d181872f2a8fb88f48a",subgraph.h,"@@ -451,6 +451,15 @@ class Subgraph {
   TfLiteStatus CheckTensorIndices(const char* label, const int* indices,
                                   int length);
 
+  // Check that the input indices and the output indices don't overlap.
+  // This is needed because same tensor must not be used both as input and
+  // output for an operator.
+  // NOTE: this changes consistent_ to be false if indices are out of bounds.
+  TfLiteStatus CheckInputAndOutputForOverlap(const int* input_indices,
+                                             int num_inputs,
+                                             const int* output_indices,
+                                             int num_outputs);
+
   // Compute the number of bytes required to represent a tensor with dimensions
   // specified by the array dims (of length dims_size). Returns the status code
   // and bytes.
",1,test
d58c96946b2880991d63d1dacacb32f0a4dfa453,tensorflow/tensorflow,"[tflite] Ensure inputs and outputs don't overlap.

If a model uses the same tensor for both an input and an output then this can result in data loss and memory corruption. This should not happen.

PiperOrigin-RevId: 332522916
Change-Id: If0905b142415a9dfceaf2d181872f2a8fb88f48a",model_test.cc,"@@ -438,6 +438,25 @@ TEST(BasicFlatBufferModel, TestParseModelWithSparseTensor) {
 }
 
 // TODO(b/150072943): Add malformed model with sparse tensor tests.
+TEST(BasicFlatBufferModel, TestHandleMalformedModel) {
+  const auto model_paths = {
+      // These models use the same tensor as both input and ouput of a node
+      ""tensorflow/lite/testdata/add_shared_tensors.bin"",
+  };
+
+  for (const auto& model_path : model_paths) {
+    std::unique_ptr<tflite::FlatBufferModel> model =
+        FlatBufferModel::BuildFromFile(model_path);
+    ASSERT_NE(model, nullptr);
+
+    tflite::ops::builtin::BuiltinOpResolver resolver;
+    InterpreterBuilder builder(*model, resolver);
+    std::unique_ptr<Interpreter> interpreter;
+    ASSERT_EQ(builder(&interpreter), kTfLiteOk);
+    ASSERT_NE(interpreter, nullptr);
+    ASSERT_NE(interpreter->AllocateTensors(), kTfLiteOk);
+  }
+}
 
 // TODO(aselle): Add tests for serialization of builtin op data types.
 // These tests will occur with the evaluation tests of individual operators,
",1,test
00302787b788c5ff04cb6f62aed5a74d936e86c0,tensorflow/tensorflow,"[tflite] Make `GetOptionalInputTensor` the same as `GetInput`.

With the previous change, there is no more need for two separate APIs. We would deprecate `GetOptionalInputTensor` in the future.

PiperOrigin-RevId: 332513386
Change-Id: Id7110271c25ebd6126ad8c82a493e37e0e0756b3",kernel_util.cc,"@@ -75,12 +75,7 @@ TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
 
 const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
                                            const TfLiteNode* node, int index) {
-  const bool use_tensor = index < node->inputs->size &&
-                          node->inputs->data[index] != kTfLiteOptionalTensor;
-  if (use_tensor) {
-    return GetMutableInput(context, node, index);
-  }
-  return nullptr;
+  return GetInput(context, node, index);
 }
 
 // Per-axis
",1,test
46d5b0852528ddfd614ded79bccc75589f801bd9,tensorflow/tensorflow,"[tflite] Test for `kTfLiteOptionalTensor` in `GetInput`.

`GetInput`, `GetVariableInput` and `GetOutput` all fail to check for the case where `node->inputs->data[index]` is the special `kTfLiteOptionalTensor` value (-1) which then causes `context->tensors[node->inputs->data[index]]` to read from invalid memory location.

This fix makes `GetInput` and related return `nullptr` in those cases, asking the caller to check for `nullptr`. This is better than having `GetOptionalInputTensor` and `GetOptionalOutputTensor` (does not exist but could be added) as using the patched `GetInput` in error would be caught by a sanitizer test in the default optimized build (due to the `-fsanitize=null` option).

PiperOrigin-RevId: 332512190
Change-Id: Iabca54da2f2de02b6ece3c38b54f76d4277d689e",kernel_util.cc,"@@ -32,11 +32,17 @@ namespace {
 
 inline TfLiteTensor* GetMutableInput(const TfLiteContext* context,
                                      const TfLiteNode* node, int index) {
-  if (context->tensors != nullptr) {
-    return &context->tensors[node->inputs->data[index]];
-  } else {
-    return context->GetTensor(context, node->inputs->data[index]);
+  if (index >= 0 && index < node->inputs->size) {
+    const int tensor_index = node->inputs->data[index];
+    if (tensor_index != kTfLiteOptionalTensor) {
+      if (context->tensors != nullptr) {
+        return &context->tensors[tensor_index];
+      } else {
+        return context->GetTensor(context, tensor_index);
+      }
+    }
   }
+  return nullptr;
 }
 
 }  // anonymous namespace.
@@ -54,11 +60,17 @@ TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
 
 TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
                         int index) {
-  if (context->tensors != nullptr) {
-    return &context->tensors[node->outputs->data[index]];
-  } else {
-    return context->GetTensor(context, node->outputs->data[index]);
+  if (index >= 0 && index < node->outputs->size) {
+    const int tensor_index = node->outputs->data[index];
+    if (tensor_index != kTfLiteOptionalTensor) {
+      if (context->tensors != nullptr) {
+        return &context->tensors[tensor_index];
+      } else {
+        return context->GetTensor(context, tensor_index);
+      }
+    }
   }
+  return nullptr;
 }
 
 const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
",1,train
46d5b0852528ddfd614ded79bccc75589f801bd9,tensorflow/tensorflow,"[tflite] Test for `kTfLiteOptionalTensor` in `GetInput`.

`GetInput`, `GetVariableInput` and `GetOutput` all fail to check for the case where `node->inputs->data[index]` is the special `kTfLiteOptionalTensor` value (-1) which then causes `context->tensors[node->inputs->data[index]]` to read from invalid memory location.

This fix makes `GetInput` and related return `nullptr` in those cases, asking the caller to check for `nullptr`. This is better than having `GetOptionalInputTensor` and `GetOptionalOutputTensor` (does not exist but could be added) as using the patched `GetInput` in error would be caught by a sanitizer test in the default optimized build (due to the `-fsanitize=null` option).

PiperOrigin-RevId: 332512190
Change-Id: Iabca54da2f2de02b6ece3c38b54f76d4277d689e",kernel_util.h,"@@ -29,18 +29,46 @@ namespace tflite {
 // benchmark_model for MobileNet + MobileBERT is unaffected. If such a change is
 // made, move the newly non-inlined function declarations to the top of this
 // header file.
+
+// Note: You must check if result is not null:
+//
+//   TfLiteTensor* my_tensor = GetInput(context, node, kMyTensorIdx);
+//   TF_LITE_ENSURE(context, my_tensor != nullptr);
+//
+// This is because the index might point to the optional tensor constant
+// (kTfLiteOptionalTensor) in which case there is no tensor to return.
 const TfLiteTensor* GetInput(const TfLiteContext* context,
                              const TfLiteNode* node, int index);
 
 // Note: You must check if result is not null:
-// TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx);
-// TF_LITE_ENSURE(context, my_tensor != nullptr);
+//
+//   TfLiteTensor* my_tensor = GetVariableInput(context, node, kMyTensorIdx);
+//   TF_LITE_ENSURE(context, my_tensor != nullptr);
+//
+// This is because the index might point to the optional tensor constant
+// (kTfLiteOptionalTensor) in which case there is no tensor to return.
 TfLiteTensor* GetVariableInput(TfLiteContext* context, const TfLiteNode* node,
                                int index);
 
+// Note: You must check if result is not null:
+//
+//   TfLiteTensor* my_tensor = GetOutput(context, node, kMyTensorIdx);
+//   TF_LITE_ENSURE(context, my_tensor != nullptr);
+//
+// This is because the index might point to the optional tensor constant
+// (kTfLiteOptionalTensor) in which case there is no tensor to return.
 TfLiteTensor* GetOutput(TfLiteContext* context, const TfLiteNode* node,
                         int index);
 
+// Note: You must check if result is not null:
+//
+//   TfLiteTensor* my_tensor = GetOptionalInputTensor(context, node, kIdx);
+//   TF_LITE_ENSURE(context, my_tensor != nullptr);
+//
+// This is because the index might point to the optional tensor constant
+// (kTfLiteOptionalTensor) in which case there is no tensor to return.
+//
+// Deprecated. GetInput has the same functionality.
 const TfLiteTensor* GetOptionalInputTensor(const TfLiteContext* context,
                                            const TfLiteNode* node, int index);
 
@@ -50,14 +78,46 @@ inline int SizeOfDimension(const TfLiteTensor* t, int dim) {
 }
 
 #ifndef TF_LITE_STATIC_MEMORY
+// Note: You must check if result is not null:
+//
+//   TfLiteTensor* my_tensor = GetTemporary(context, node, kMyTensorIdx);
+//   TF_LITE_ENSURE(context, my_tensor != nullptr);
+//
+// This is because the index might point to the optional tensor constant
+// (kTfLiteOptionalTensor) in which case there is no tensor to return.
 inline TfLiteTensor* GetTemporary(TfLiteContext* context,
                                   const TfLiteNode* node, int index) {
-  return &context->tensors[node->temporaries->data[index]];
+  if (index >= 0 && index < node->temporaries->size) {
+    const int tensor_index = node->temporaries->data[index];
+    if (tensor_index != kTfLiteOptionalTensor) {
+      if (context->tensors != nullptr) {
+        return &context->tensors[tensor_index];
+      }
+    }
+  }
+  return nullptr;
 }
+
+// Note: You must check if result is not null:
+//
+//   TfLiteTensor* my_tensor = GetIntermediates(context, node, kMyTensorIdx);
+//   TF_LITE_ENSURE(context, my_tensor != nullptr);
+//
+// This is because the index might point to the optional tensor constant
+// (kTfLiteOptionalTensor) in which case there is no tensor to return.
 inline const TfLiteTensor* GetIntermediates(TfLiteContext* context,
                                             const TfLiteNode* node, int index) {
-  return &context->tensors[node->intermediates->data[index]];
+  if (index >= 0 && index < node->intermediates->size) {
+    const int tensor_index = node->intermediates->data[index];
+    if (tensor_index != kTfLiteOptionalTensor) {
+      if (context->tensors != nullptr) {
+        return &context->tensors[tensor_index];
+      }
+    }
+  }
+  return nullptr;
 }
+
 inline int NumIntermediates(const TfLiteNode* node) {
   return node->intermediates->size;
 }
",1,train
cd31fd0ce0449a9e0f83dcad08d6ed7f1d6bef3f,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332518902
Change-Id: I92eb164a6101ac3cca66090061a9b56a97288236",test_helpers.cc,"@@ -601,7 +601,8 @@ TfLiteStatus SimpleStatefulOp::Prepare(TfLiteContext* context,
   OpData* data = reinterpret_cast<OpData*>(node->user_data);
 
   // Make sure that the input is in uint8_t with at least 1 data entry.
-  const TfLiteTensor* input = tflite::GetInput(context, node, kInputTensor);
+  const TfLiteTensor* input;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
   if (input->type != kTfLiteUInt8) return kTfLiteError;
   if (NumElements(input->dims) == 0) return kTfLiteError;
 
@@ -622,7 +623,8 @@ TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context,
   OpData* data = reinterpret_cast<OpData*>(node->user_data);
   *data->invoke_count += 1;
 
-  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  const TfLiteTensor* input;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kInputTensor, &input));
   const uint8_t* input_data = GetTensorData<uint8_t>(input);
   int size = NumElements(input->dims);
 
@@ -641,9 +643,13 @@ TfLiteStatus SimpleStatefulOp::Invoke(TfLiteContext* context,
     }
   }
 
-  TfLiteTensor* median = GetOutput(context, node, kMedianTensor);
+  TfLiteTensor* median;
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kMedianTensor, &median));
   uint8_t* median_data = GetTensorData<uint8_t>(median);
-  TfLiteTensor* invoke_count = GetOutput(context, node, kInvokeCount);
+  TfLiteTensor* invoke_count;
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kInvokeCount, &invoke_count));
   int32_t* invoke_count_data = GetTensorData<int32_t>(invoke_count);
 
   median_data[0] = sorting_buffer[size / 2];
@@ -681,11 +687,14 @@ TfLiteStatus MockCustom::Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 TfLiteStatus MockCustom::Invoke(TfLiteContext* context, TfLiteNode* node) {
-  const TfLiteTensor* input = tflite::GetInput(context, node, 0);
+  const TfLiteTensor* input;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 0, &input));
   const int32_t* input_data = input->data.i32;
-  const TfLiteTensor* weight = tflite::GetInput(context, node, 1);
+  const TfLiteTensor* weight;
+  TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 1, &weight));
   const uint8_t* weight_data = weight->data.uint8;
-  TfLiteTensor* output = GetOutput(context, node, 0);
+  TfLiteTensor* output;
+  TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, 0, &output));
   int32_t* output_data = output->data.i32;
   output_data[0] =
       0;  // Catch output tensor sharing memory with an input tensor
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",activations.cc,"@@ -139,7 +139,9 @@ TfLiteStatus ReluPrepare(TfLiteContext* context, TfLiteNode* node) {
   ReluOpData* data = static_cast<ReluOpData*>(node->user_data);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   if (input->type == kTfLiteInt8) {
     CalculateReluOpData<int8_t>(input, output, data);
@@ -200,6 +202,7 @@ TfLiteStatus Relu6Prepare(TfLiteContext* context, TfLiteNode* node) {
   Relu6OpData* data = static_cast<Relu6OpData*>(node->user_data);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
 
   if (input->type == kTfLiteInt8) {
     data->six_int8 = FloatToAsymmetricQuantizedInt8(6.0f, input->params.scale,
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",add.cc,"@@ -201,8 +201,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TFLITE_DCHECK(node->builtin_data != nullptr);
 
   const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+  TF_LITE_ENSURE(context, input1 != nullptr);
   const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+  TF_LITE_ENSURE(context, input2 != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   OpData* data = static_cast<OpData*>(node->user_data);
   auto* params = reinterpret_cast<TfLiteAddParams*>(node->builtin_data);
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",ceil.cc,"@@ -30,7 +30,9 @@ constexpr int kOutputTensor = 0;
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",circular_buffer.cc,"@@ -77,7 +77,9 @@ void Free(TfLiteContext* context, void* buffer) { op_data_counter = 0; }
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE(context, input != nullptr);
   TF_LITE_ENSURE(context, output != nullptr);
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",comparisons.cc,"@@ -619,7 +619,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   OpData* data = static_cast<OpData*>(node->user_data);
 
   const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+  TF_LITE_ENSURE(context, input1 != nullptr);
   const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+  TF_LITE_ENSURE(context, input2 != nullptr);
 
   if (input1->type == kTfLiteUInt8 || input1->type == kTfLiteInt8) {
     auto input1_offset = -input1->params.zero_point;
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",concatenation.cc,"@@ -136,8 +136,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteConcatenationParams* params =
       reinterpret_cast<TfLiteConcatenationParams*>(node->builtin_data);
 
-  TfLiteType input_type = GetInput(context, node, 0)->type;
-  TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
+  const TfLiteTensor* input_tensor = GetInput(context, node, 0);
+  TF_LITE_ENSURE(context, input_tensor != nullptr);
+  TfLiteType input_type = input_tensor->type;
+  const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output_tensor != nullptr);
+  TfLiteType output_type = output_tensor->type;
 
   // Check activation and input type
   TF_LITE_ENSURE_EQ(context, params->activation, kTfLiteActNone);
@@ -156,6 +160,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // Shapes with dimensions >4 are not yet supported with static allocation.
   for (int i = 0; i < num_inputs; ++i) {
     const TfLiteTensor* input = GetInput(context, node, i);
+    TF_LITE_ENSURE(context, input != nullptr);
     int num_dimensions = NumDimensions(input);
 
     if (num_dimensions > 4) {
@@ -173,6 +178,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   OpData* data = static_cast<OpData*>(node->user_data);
 
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   switch (output_type) {  // Already know in/outtypes are same.
     case kTfLiteFloat32:
@@ -199,6 +205,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       // Store input scale and zero point values in OpParams:
       for (int i = 0; i < node->inputs->size; ++i) {
         const TfLiteTensor* t = GetInput(context, node, i);
+        TF_LITE_ENSURE(context, t != nullptr);
         input_scales[i] = t->params.scale;
         input_zero_points[i] = t->params.zero_point;
       }
@@ -220,7 +227,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  TfLiteType output_type = GetOutput(context, node, kOutputTensor)->type;
+  const TfLiteTensor* output_tensor = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output_tensor != nullptr);
+  TfLiteType output_type = output_tensor->type;
 
   switch (output_type) {  // Already know in/outtypes are same.
     case kTfLiteFloat32:
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",conv.cc,"@@ -97,10 +97,13 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
   // parameters set. This is usually done during quantized training.
   if (data_type != kTfLiteFloat32) {
     const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+    TF_LITE_ENSURE(context, input != nullptr);
     const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+    TF_LITE_ENSURE(context, filter != nullptr);
     const TfLiteTensor* bias =
         GetOptionalInputTensor(context, node, kBiasTensor);
     TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+    TF_LITE_ENSURE(context, output != nullptr);
     int output_channels = filter->dims->data[kConvQuantizedDimension];
 
     TF_LITE_ENSURE_STATUS(tflite::PopulateConvolutionQuantizationParams(
@@ -127,8 +130,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const auto params = static_cast<const TfLiteConvParams*>(node->builtin_data);
 
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+  TF_LITE_ENSURE(context, filter != nullptr);
 
   int input_width = input->dims->data[2];
   int input_height = input->dims->data[1];
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",depthwise_conv.cc,"@@ -82,10 +82,13 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
   // parameters set. This is usually done during quantized training.
   if (data_type != kTfLiteFloat32) {
     const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+    TF_LITE_ENSURE(context, input != nullptr);
     const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+    TF_LITE_ENSURE(context, filter != nullptr);
     const TfLiteTensor* bias =
         GetOptionalInputTensor(context, node, kBiasTensor);
     TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+    TF_LITE_ENSURE(context, output != nullptr);
     int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
 
     return tflite::PopulateConvolutionQuantizationParams(
@@ -114,8 +117,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   OpData* data = static_cast<OpData*>(node->user_data);
 
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   const TfLiteTensor* filter = GetInput(context, node, kFilterTensor);
+  TF_LITE_ENSURE(context, filter != nullptr);
 
   const TfLiteType data_type = input->type;
   int width = SizeOfDimension(input, 2);
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",dequantize.cc,"@@ -52,7 +52,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   // TODO(b/140515557): Add cached dequant to improve hybrid model performance.
   const TfLiteTensor* input = GetInput(context, node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE(context, input->type == kTfLiteUInt8 ||
                               input->type == kTfLiteInt8 ||
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",elementwise.cc,"@@ -41,7 +41,9 @@ TfLiteStatus GenericPrepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
   const TfLiteTensor* input = GetInput(context, node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
   if (!IsSupportedType(input->type)) {
     TF_LITE_KERNEL_LOG(context, ""Input data type %s (%d) is not supported."",
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",fully_connected.cc,"@@ -93,9 +93,12 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
       static_cast<const TfLiteFullyConnectedParams*>(node->builtin_data);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
+  TF_LITE_ENSURE(context, filter != nullptr);
   const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
   TF_LITE_ENSURE_MSG(context, input->type == filter->type,
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",hard_swish.cc,"@@ -45,7 +45,9 @@ TfLiteStatus HardSwishPrepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   if (input->type == kTfLiteUInt8 || input->type == kTfLiteInt8) {
     HardSwishParams* params = static_cast<HardSwishParams*>(node->user_data);
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",l2norm.cc,"@@ -50,7 +50,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE(context, NumDimensions(input) <= 4);
 
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",logistic.cc,"@@ -43,7 +43,9 @@ struct OpData {
 TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
                                        OpData* data) {
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
   if (input->type == kTfLiteInt8) {
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",mul.cc,"@@ -51,8 +51,11 @@ struct OpData {
 TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
                              TfLiteMulParams* params, OpData* data) {
   const TfLiteTensor* input1 = GetInput(context, node, kInput1Tensor);
+  TF_LITE_ENSURE(context, input1 != nullptr);
   const TfLiteTensor* input2 = GetInput(context, node, kInput2Tensor);
+  TF_LITE_ENSURE(context, input2 != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 2);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",pad.cc,"@@ -50,10 +50,13 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
   const TfLiteTensor* input = GetInput(context, node, /*index=*/0);
+  TF_LITE_ENSURE(context, input != nullptr);
   const TfLiteTensor* paddings = GetInput(context, node, /*index=*/1);
+  TF_LITE_ENSURE(context, paddings != nullptr);
   const TfLiteTensor* constant_values =
       NumInputs(node) == 3 ? GetInput(context, node, /*index=*/2) : nullptr;
   TfLiteTensor* output = GetOutput(context, node, /*index=*/0);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
 
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",pooling.cc,"@@ -222,7 +222,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   OpData* data = static_cast<OpData*>(node->user_data);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_STATUS(CalculateOpData(context, params, input, output, data));
 
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",prelu.cc,"@@ -95,8 +95,11 @@ TfLiteStatus PreluPrepare(TfLiteContext* context, TfLiteNode* node) {
   PreluParams* params = static_cast<PreluParams*>(node->user_data);
 
   const TfLiteTensor* input = GetInput(context, node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
   const TfLiteTensor* alpha = GetInput(context, node, 1);
+  TF_LITE_ENSURE(context, alpha != nullptr);
   TfLiteTensor* output = GetOutput(context, node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   return CalculatePreluParams(input, alpha, output, params);
 }
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",quantize.cc,"@@ -50,7 +50,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
 
   const TfLiteTensor* input = GetInput(context, node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   // TODO(b/128934713): Add support for fixed-point per-channel quantization.
   // Currently this only support affine per-layer quantization.
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",reduce.cc,"@@ -64,6 +64,7 @@ TfLiteStatus PrepareSimple(TfLiteContext* context, TfLiteNode* node) {
 
   // Validate axis type
   const TfLiteTensor* axis = GetInput(context, node, 1);
+  TF_LITE_ENSURE(context, axis != nullptr);
   TF_LITE_ENSURE_TYPES_EQ(context, axis->type, kTfLiteInt32);
 
   if (input->type == kTfLiteInt8) {
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",reshape.cc,"@@ -32,7 +32,9 @@ constexpr int kOutputTensor = 0;
 
 TfLiteStatus ReshapeOutput(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
   // Tensorflow's Reshape allows one of the shape components to have the
   // special -1 value, meaning it will be calculated automatically based on the
   // input. Here we calculate what that dimension should be so that the number
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",round.cc,"@@ -30,7 +30,9 @@ constexpr int kOutputTensor = 0;
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, kTfLiteFloat32);
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",softmax.cc,"@@ -119,9 +119,11 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
   const TfLiteTensor* input = GetInput(context, node, 0);
+  TF_LITE_ENSURE(context, input != nullptr);
   TF_LITE_ENSURE(context, NumDimensions(input) >= 1);
 
   TfLiteTensor* output = GetOutput(context, node, 0);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TFLITE_DCHECK(node->user_data != nullptr);
   SoftmaxParams* data = static_cast<SoftmaxParams*>(node->user_data);
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",split.cc,"@@ -69,6 +69,7 @@ TfLiteStatus SplitImpl(TfLiteContext* context, TfLiteNode* node,
 
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* axis = GetInput(context, node, 0);
+  TF_LITE_ENSURE(context, axis != nullptr);
 
   // Dynamic output tensors are needed if axis tensor is not constant.
   // But Micro doesn't support dynamic memory allocation, so we only support
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",sub.cc,"@@ -108,8 +108,11 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteSubParams*>(node->builtin_data);
 
   const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
+  TF_LITE_ENSURE(context, input1 != nullptr);
   const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
+  TF_LITE_ENSURE(context, input2 != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_STATUS(
       CalculateOpData(context, params, input1, input2, output, data));
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",svdf.cc,"@@ -366,13 +366,17 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // [4] = Activation State (variable),
   //         {2, batch_size, memory_size * num_filters}
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   const TfLiteTensor* weights_feature =
       GetInput(context, node, kWeightsFeatureTensor);
+  TF_LITE_ENSURE(context, weights_feature != nullptr);
   const TfLiteTensor* weights_time =
       GetInput(context, node, kWeightsTimeTensor);
+  TF_LITE_ENSURE(context, weights_time != nullptr);
   const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
   const TfLiteTensor* activation_state =
       GetInput(context, node, kInputActivationStateTensor);
+  TF_LITE_ENSURE(context, activation_state != nullptr);
 
   // Define input constants based on input tensor definition above:
   const int rank = params->rank;
@@ -392,6 +396,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   // [0] = float/int8_t, {2, batch_size, num_units}
   TF_LITE_ENSURE_EQ(context, node->outputs->size, 1);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
   TF_LITE_ENSURE_EQ(context, NumDimensions(output), 2);
   TF_LITE_ENSURE_EQ(context, output->dims->data[0], batch_size);
   TF_LITE_ENSURE_EQ(context, output->dims->data[1], num_units);
",1,train
fff2c8326280c07733828f990548979bdc893859,tensorflow/tensorflow,"[tflite]: Insert `nullptr` checks when obtaining tensors.

As part of ongoing refactoring, `tflite::GetInput`, `tflite::GetOutput`, `tflite::GetTemporary` and `tflite::GetIntermediates` will return `nullptr` in some cases. Hence, we insert the `nullptr` checks on all usages.

We also insert `nullptr` checks on usages of `tflite::GetVariableInput` and `tflite::GetOptionalInputTensor` but only in the cases where there is no obvious check that `nullptr` is acceptable (that is, we only insert the check for the output of these two functions if the tensor is accessed as if it is always not `nullptr`).

PiperOrigin-RevId: 332520146
Change-Id: I405d986cfc653aaafcfdf4162c0acbd46220b921",tanh.cc,"@@ -51,7 +51,9 @@ TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
   TF_LITE_ENSURE_EQ(context, NumInputs(node), 1);
   TF_LITE_ENSURE_EQ(context, NumOutputs(node), 1);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+  TF_LITE_ENSURE(context, output != nullptr);
 
   TF_LITE_ENSURE_TYPES_EQ(context, input->type, output->type);
 
@@ -76,6 +78,7 @@ TfLiteStatus TanhPrepare(TfLiteContext* context, TfLiteNode* node) {
   OpData* data = static_cast<OpData*>(node->user_data);
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TF_LITE_ENSURE(context, input != nullptr);
   data->input_zero_point = input->params.zero_point;
   return CalculateArithmeticOpData(context, node, data);
 }
",1,train
204945b19e44b57906c9344c0d00120eeeae178a,tensorflow/tensorflow,"[tflite] Validate segment ids for segment_sum.

Segment identifiers in segment_sum should be in a 1-D tensor of same size as the first dimension of the input. The values of the tensor should be integers from {0, 1, 2, ... k-1}, where k is the first dimension of the input. The segment identifiers must not contain jumps and must be increasing.

See https://www.tensorflow.org/api_docs/python/tf/math#Segmentation as the source for these constraints.

PiperOrigin-RevId: 332510942
Change-Id: I898beaba00642c918bcd4b4d4ce893ebb190d869",segment_sum.cc,"@@ -34,11 +34,24 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
                                 const TfLiteTensor* data,
                                 const TfLiteTensor* segment_ids,
                                 TfLiteTensor* output) {
-  int max_index = -1;
+  // Segment ids should be of same cardinality as first input dimension and they
+  // should be increasing by at most 1, from 0 (e.g., [0, 0, 1, 2, 3] is valid)
   const int segment_id_size = segment_ids->dims->data[0];
-  if (segment_id_size > 0) {
-    max_index = segment_ids->data.i32[segment_id_size - 1];
+  TF_LITE_ENSURE_EQ(context, segment_id_size, data->dims->data[0]);
+  int previous_segment_id = -1;
+  for (int i = 0; i < segment_id_size; i++) {
+    const int current_segment_id = GetTensorData<int32_t>(segment_ids)[i];
+    if (i == 0) {
+      TF_LITE_ENSURE_EQ(context, current_segment_id, 0);
+    } else {
+      int delta = current_segment_id - previous_segment_id;
+      TF_LITE_ENSURE(context, delta == 0 || delta == 1);
+    }
+    previous_segment_id = current_segment_id;
   }
+
+  const int max_index = previous_segment_id;
+
   const int data_rank = NumDimensions(data);
   TfLiteIntArray* output_shape = TfLiteIntArrayCreate(NumDimensions(data));
   output_shape->data[0] = max_index + 1;
",1,train
204945b19e44b57906c9344c0d00120eeeae178a,tensorflow/tensorflow,"[tflite] Validate segment ids for segment_sum.

Segment identifiers in segment_sum should be in a 1-D tensor of same size as the first dimension of the input. The values of the tensor should be integers from {0, 1, 2, ... k-1}, where k is the first dimension of the input. The segment identifiers must not contain jumps and must be increasing.

See https://www.tensorflow.org/api_docs/python/tf/math#Segmentation as the source for these constraints.

PiperOrigin-RevId: 332510942
Change-Id: I898beaba00642c918bcd4b4d4ce893ebb190d869",segment_sum_test.cc,"@@ -110,5 +110,37 @@ TEST(SegmentSumOpModelTest, Float32Test_ThreeDimensions) {
   EXPECT_THAT(model.GetOutputShape(), ElementsAreArray({2, 2, 1}));
 }
 
+TEST(SegmentSumOpModelTest, TestFailIfSegmentsAreNotSorted) {
+  SegmentSumOpModel<int32_t> model({TensorType_INT32, {3, 2}},
+                                   {TensorType_INT32, {3}});
+  model.PopulateTensor<int32_t>(model.data(), {1, 2, 3, 4, 5, 6});
+  model.PopulateTensor<int32_t>(model.segment_ids(), {0, 3, 1});
+  ASSERT_EQ(model.InvokeUnchecked(), kTfLiteError);
+}
+
+TEST(SegmentSumOpModelTest, TestFailIfSegmentsAreNotConsecutive) {
+  SegmentSumOpModel<int32_t> model({TensorType_INT32, {3, 2}},
+                                   {TensorType_INT32, {3}});
+  model.PopulateTensor<int32_t>(model.data(), {1, 2, 3, 4, 5, 6});
+  model.PopulateTensor<int32_t>(model.segment_ids(), {0, 3, 5});
+  ASSERT_EQ(model.InvokeUnchecked(), kTfLiteError);
+}
+
+TEST(SegmentSumOpModelTest, TestFailIfSegmentsAreNegative) {
+  SegmentSumOpModel<int32_t> model({TensorType_INT32, {3, 2}},
+                                   {TensorType_INT32, {3}});
+  model.PopulateTensor<int32_t>(model.data(), {1, 2, 3, 4, 5, 6});
+  model.PopulateTensor<int32_t>(model.segment_ids(), {-1, 0, 1});
+  ASSERT_EQ(model.InvokeUnchecked(), kTfLiteError);
+}
+
+TEST(SegmentSumOpModelTest, TestFailIfSegmentsAreNotTheRightCardinality) {
+  SegmentSumOpModel<int32_t> model({TensorType_INT32, {3, 2}},
+                                   {TensorType_INT32, {2}});
+  model.PopulateTensor<int32_t>(model.data(), {1, 2, 3, 4, 5, 6});
+  model.PopulateTensor<int32_t>(model.segment_ids(), {0, 1});
+  ASSERT_EQ(model.InvokeUnchecked(), kTfLiteError);
+}
+
 }  // namespace
 }  // namespace tflite
",1,train
eccb7ec454e6617738554a255d77f08e60ee0808,tensorflow/tensorflow,"Prevent segfault in `quantize_and_dequantize`

Fixes #42105.

If `tf.quantization.quantize_and_dequantize` is called with `axis` argument pointing to outside of the input tensor, we obtain a `CHECK` fail which then aborts the application/interpreter. This change adds a condition check and returns a `Status` instead of crashing.

PiperOrigin-RevId: 337972243
Change-Id: I71ec32c00a87266e364fb017f0ad5dfd3e23542f",quantize_and_dequantize_op.cc,"@@ -71,6 +71,10 @@ class QuantizeAndDequantizeV2Op : public OpKernel {
 
   void Compute(OpKernelContext* ctx) override {
     const Tensor& input = ctx->input(0);
+    OP_REQUIRES(
+        ctx, (axis_ == -1 || axis_ < input.shape().dims()),
+        errors::InvalidArgument(""Shape must be at least rank "", axis_ + 1,
+                                "" but is rank "", input.shape().dims()));
     const int depth = (axis_ == -1) ? 1 : input.dim_size(axis_);
     Tensor input_min_tensor;
     Tensor input_max_tensor;
",1,train
eccb7ec454e6617738554a255d77f08e60ee0808,tensorflow/tensorflow,"Prevent segfault in `quantize_and_dequantize`

Fixes #42105.

If `tf.quantization.quantize_and_dequantize` is called with `axis` argument pointing to outside of the input tensor, we obtain a `CHECK` fail which then aborts the application/interpreter. This change adds a condition check and returns a `Status` instead of crashing.

PiperOrigin-RevId: 337972243
Change-Id: I71ec32c00a87266e364fb017f0ad5dfd3e23542f",array_ops_test.py,"@@ -1628,6 +1628,22 @@ class QuantizeAndDequantizeTest(test_util.TensorFlowTestCase):
                   axis=(axis - 4)))
           self.assertAllClose(fake_quantized, expected)
 
+  def testBadAxis(self):
+    input_tensor = [2.5, 2.5]
+    input_min = [0, 0]
+    input_max = [1, 1]
+    error_message_pattern = ""Shape must be at least rank 11 but is rank 1""
+    # TODO(b/171260356): Eager mode and graph mode throw different error types
+    error = errors.InvalidArgumentError if context.executing_eagerly(
+    ) else ValueError
+    with self.assertRaisesRegex(error, error_message_pattern):
+      self.evaluate(
+          array_ops.quantize_and_dequantize_v2(
+              input=input_tensor,
+              input_min=input_min,
+              input_max=input_max,
+              axis=10))
+
   def testQuantizeDequantizeGrad(self):
     shape = (2, 2)
     max_threshold = 0
",1,train
ace0c15a22f7f054abcc1f53eabbcb0a1239a9e2,tensorflow/tensorflow,"Default initialize fixed point Eigen types.

In certain cases, tensors are filled with default values of the type. But, for these fixed point types, these values were uninitialized. Thus, we would have uninitialized memory access bugs, some of which were caught by MSAN.

PiperOrigin-RevId: 344101137
Change-Id: I14555fda74dca3b5f1582da9008901937e3f14e2",FixedPointTypes.h,"@@ -49,7 +49,7 @@ struct scalar_product_traits<QInt32, double> {
 // the compiler from silently type cast the mantissa into a bigger or a smaller
 // representation.
 struct QInt8 {
-  QInt8() {}
+  QInt8() : value(0) {}
   QInt8(const int8_t v) : value(v) {}
   QInt8(const QInt32 v);
 
@@ -59,7 +59,7 @@ struct QInt8 {
 };
 
 struct QUInt8 {
-  QUInt8() {}
+  QUInt8() : value(0) {}
   QUInt8(const uint8_t v) : value(v) {}
   QUInt8(const QInt32 v);
 
@@ -69,7 +69,7 @@ struct QUInt8 {
 };
 
 struct QInt16 {
-  QInt16() {}
+  QInt16() : value(0) {}
   QInt16(const int16_t v) : value(v) {}
   QInt16(const QInt32 v);
   operator int() const { return static_cast<int>(value); }
@@ -78,7 +78,7 @@ struct QInt16 {
 };
 
 struct QUInt16 {
-  QUInt16() {}
+  QUInt16() : value(0) {}
   QUInt16(const uint16_t v) : value(v) {}
   QUInt16(const QInt32 v);
   operator int() const { return static_cast<int>(value); }
@@ -87,7 +87,7 @@ struct QUInt16 {
 };
 
 struct QInt32 {
-  QInt32() {}
+  QInt32() : value(0) {}
   QInt32(const int8_t v) : value(v) {}
   QInt32(const int32_t v) : value(v) {}
   QInt32(const uint32_t v) : value(static_cast<int32_t>(v)) {}
",1,train
ebc70b7a592420d3d2f359e4b1694c236b82c7ae,tensorflow/tensorflow,"Validate that `DataFormat*` attributes form a permutation.

The `src_format` and `dst_format` attributes for the `DataFormatDimMap` and `DataFormatVecPermute` raw ops are supposed to determine a permutation. However, this was not validated and could result in unitialized memory accesses as well as writes outside of bounds and potential crashes.

While here, we also test that the format attributes have the needed length, add tests for all validation failure cases, remove unnecessary calls to `strings::StrCat`, and fix a few grammar errors.

This will be cherry-picked on the supported release branches.

PiperOrigin-RevId: 346135579
Change-Id: I1c76392382c89ad8f072d5bc93d70669851eb404",data_format_ops.cc,"@@ -18,16 +18,52 @@ limitations under the License.
 #define EIGEN_USE_THREADS
 
 #include ""tensorflow/core/kernels/data_format_ops.h""
+
+#include <map>
+
 #include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
+#include ""tensorflow/core/platform/errors.h""
 
 namespace tensorflow {
 
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 
+// Ensure that `src` and `dst` define a valid permutation.
+// Ops defined in this file assume that user specifies a permutation via two
+// string attributes. This check validates that these attributes properly define
+// it to prevent security vulnerabilities.
+static bool IsValidPermutation(const std::string& src, const std::string& dst) {
+  if (src.size() != dst.size()) {
+    return false;
+  }
+
+  std::map<char, bool> characters;
+
+  // Every character in `src` must be present only once
+  for (const auto c : src) {
+    if (characters[c]) {
+      return false;
+    }
+    characters[c] = true;
+  }
+
+  // Every character in `dst` must show up in `src` exactly once
+  for (const auto c : dst) {
+    if (!characters[c]) {
+      return false;
+    }
+    characters[c] = false;
+  }
+
+  // At this point, characters[] has been switched to true and false exactly
+  // once for all character in `src` (and `dst`) so we have a valid permutation
+  return true;
+}
+
 template <typename Device, typename T>
 class DataFormatDimMapOp : public OpKernel {
  public:
@@ -38,15 +74,19 @@ class DataFormatDimMapOp : public OpKernel {
     string dst_format;
     OP_REQUIRES_OK(context, context->GetAttr(""dst_format"", &dst_format));
     OP_REQUIRES(context, src_format.size() == 4 || src_format.size() == 5,
-                errors::InvalidArgument(strings::StrCat(
-                    ""Source format must of length 4 or 5, received ""
+                errors::InvalidArgument(
+                    ""Source format must be of length 4 or 5, received ""
                     ""src_format = "",
-                    src_format)));
+                    src_format));
+    OP_REQUIRES(context, dst_format.size() == 4 || dst_format.size() == 5,
+                errors::InvalidArgument(""Destination format must be of length ""
+                                        ""4 or 5, received dst_format = "",
+                                        dst_format));
     OP_REQUIRES(
-        context, dst_format.size() == 4 || dst_format.size() == 5,
-        errors::InvalidArgument(strings::StrCat(
-            ""Destination format must of length 4 or 5, received dst_format = "",
-            dst_format)));
+        context, IsValidPermutation(src_format, dst_format),
+        errors::InvalidArgument(
+            ""Destination and source format must determine a permutation, got "",
+            src_format, "" and "", dst_format));
     dst_idx_ = Tensor(DT_INT32, {static_cast<int64>(src_format.size())});
     for (int i = 0; i < src_format.size(); ++i) {
       for (int j = 0; j < dst_format.size(); ++j) {
@@ -78,8 +118,22 @@ class DataFormatVecPermuteOp : public OpKernel {
       : OpKernel(context) {
     string src_format;
     OP_REQUIRES_OK(context, context->GetAttr(""src_format"", &src_format));
+    OP_REQUIRES(context, src_format.size() == 4 || src_format.size() == 5,
+                errors::InvalidArgument(
+                    ""Source format must be of length 4 or 5, received ""
+                    ""src_format = "",
+                    src_format));
     string dst_format;
     OP_REQUIRES_OK(context, context->GetAttr(""dst_format"", &dst_format));
+    OP_REQUIRES(context, dst_format.size() == 4 || dst_format.size() == 5,
+                errors::InvalidArgument(""Destination format must be of length ""
+                                        ""4 or 5, received dst_format = "",
+                                        dst_format));
+    OP_REQUIRES(
+        context, IsValidPermutation(src_format, dst_format),
+        errors::InvalidArgument(
+            ""Destination and source format must determine a permutation, got "",
+            src_format, "" and "", dst_format));
     src_format_ = src_format;
     dst_format_ = dst_format;
   }
@@ -127,6 +181,10 @@ class DataFormatVecPermuteOp : public OpKernel {
       };
       keep_only_spatial_dimensions(&src_format_str);
       keep_only_spatial_dimensions(&dst_format_str);
+      OP_REQUIRES(context,
+                  src_format_str.size() == 2 && dst_format_str.size() == 2,
+                  errors::InvalidArgument(
+                      ""Format specifier must contain H and W for 2D case""));
     }
     ComputeDstIndex(src_format_str, dst_format_str, input.dims(), &dst_idx);
 
",1,train
ebc70b7a592420d3d2f359e4b1694c236b82c7ae,tensorflow/tensorflow,"Validate that `DataFormat*` attributes form a permutation.

The `src_format` and `dst_format` attributes for the `DataFormatDimMap` and `DataFormatVecPermute` raw ops are supposed to determine a permutation. However, this was not validated and could result in unitialized memory accesses as well as writes outside of bounds and potential crashes.

While here, we also test that the format attributes have the needed length, add tests for all validation failure cases, remove unnecessary calls to `strings::StrCat`, and fix a few grammar errors.

This will be cherry-picked on the supported release branches.

PiperOrigin-RevId: 346135579
Change-Id: I1c76392382c89ad8f072d5bc93d70669851eb404",nn_test.py,"@@ -27,6 +27,7 @@ from six.moves import xrange  # pylint: disable=redefined-builtin
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
@@ -1260,6 +1261,7 @@ class DataFormatDimMapTest(test_lib.TestCase):
       y_val = self.evaluate(y)
       self.assertAllEqual(y_val, y_val_expected)
 
+  @test_util.disable_xla(""XLA catches the error and rethrows as different one"")
   def testArbitraryASCII(self):
     x_val = [-4, -3, -2, -1, 0, 1, 2, 3]
     y_val_expected = [3, 2, 1, 0, 3, 2, 1, 0]
@@ -1269,6 +1271,46 @@ class DataFormatDimMapTest(test_lib.TestCase):
       y_val = self.evaluate(y)
       self.assertAllEqual(y_val, y_val_expected)
 
+  @test_util.disable_xla(""XLA catches the error and rethrows as different one"")
+  def testInvalidLength(self):
+    x = [-4, -3, -2, -1, 0, 1, 2, 3]
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                ""Source format must be of length 4 or 5""):
+      op = nn_ops.data_format_dim_map(
+          x, src_format=""12345678"", dst_format=""87654321"")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla(""XLA catches the error and rethrows as different one"")
+  def testDuplicateSrc(self):
+    x = [-4, -3, -2, -1, 0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        ""Destination and source format must determine a permutation""):
+      op = nn_ops.data_format_dim_map(x, src_format=""1233"", dst_format=""4321"")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla(""XLA catches the error and rethrows as different one"")
+  def testDuplicateDst(self):
+    x = [-4, -3, -2, -1, 0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        ""Destination and source format must determine a permutation""):
+      op = nn_ops.data_format_dim_map(x, src_format=""1234"", dst_format=""3321"")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla(""XLA catches the error and rethrows as different one"")
+  def testExtraSpecifiers(self):
+    x = [-4, -3, -2, -1, 0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        ""Destination and source format must determine a permutation""):
+      op = nn_ops.data_format_dim_map(x, src_format=""1234"", dst_format=""5321"")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
 
 class DataFormatVectorPermuteTest(test_lib.TestCase):
 
@@ -1370,6 +1412,60 @@ class DataFormatVectorPermuteTest(test_lib.TestCase):
       y_val = self.evaluate(y)
       self.assertAllEqual(y_val, [[7, 4], [4, 5], [5, 1], [9, 3]])
 
+  @test_util.disable_xla(""XLA catches the error and rethrows as different one"")
+  def testInvalidLength(self):
+    x = [0, 1, 2, 3]
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                ""Source format must be of length 4 or 5""):
+      op = nn_ops.data_format_vec_permute(
+          x, src_format=""12345678"", dst_format=""87654321"")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla(""XLA catches the error and rethrows as different one"")
+  def testDuplicateSrc(self):
+    x = [0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        ""Destination and source format must determine a permutation""):
+      op = nn_ops.data_format_vec_permute(
+          x, src_format=""1233"", dst_format=""4321"")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla(""XLA catches the error and rethrows as different one"")
+  def testDuplicateDst(self):
+    x = [0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        ""Destination and source format must determine a permutation""):
+      op = nn_ops.data_format_vec_permute(
+          x, src_format=""1234"", dst_format=""3321"")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla(""XLA catches the error and rethrows as different one"")
+  def testExtraSpecifiers(self):
+    x = [0, 1, 2, 3]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        ""Destination and source format must determine a permutation""):
+      op = nn_ops.data_format_vec_permute(
+          x, src_format=""1234"", dst_format=""5321"")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
+  @test_util.disable_xla(""XLA catches the error and rethrows as different one"")
+  def test2DNoWH(self):
+    x = [[0, 1], [2, 3]]
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        ""Format specifier must contain H and W for 2D case""):
+      op = nn_ops.data_format_vec_permute(
+          x, src_format=""1234"", dst_format=""4321"")
+      with test_util.use_gpu():
+        self.evaluate(op)
+
 
 @test_util.run_all_in_graph_and_eager_modes
 class AvgPoolTest(test_lib.TestCase):
",1,train
c1e1fc899ad5f8c725dcbb6470069890b5060bc7,tensorflow/tensorflow,"Mark `MemmappedTensorAllocator` as returning opaque handle.

This allocator is used for `ImmutableConstantOp` and it returns a handle to the contents of a memory mapped file which is supposed to represent a tensor.

For tensors of complex types (resources, variables and strings), allocators which are not marked as returning opaque handles will call placement new to initialize each element. This means writing to the buffer. However, in our case, the buffer is immutable and already contains the tensor data. Hence, writing to it is both destructive and causes a crash.

PiperOrigin-RevId: 345786451
Change-Id: I46369c50fa60b3431709ffe068a728d3061f49c4",immutable_constant_op.cc,"@@ -62,6 +62,12 @@ class MemmappedTensorAllocator : public Allocator {
 
   void set_delete_on_deallocate() { delete_on_deallocate_ = true; }
 
+  // Make sure tensors or complex types (strings, variants, resources) don't get
+  // their constructor called via a placement new since that would require
+  // writing to immutable data.
+  // See also: tensorflow/core/framework/typed_allocator.h
+  bool AllocatesOpaqueHandle() const override { return true; }
+
  private:
   std::unique_ptr<ReadOnlyMemoryRegion> memory_region_;
   // If there is an error during allocation we keep it in this status.
",1,train
8b5b9dc96666a3a5d27fad7179ff215e3b74b67c,tensorflow/tensorflow,"Completely rewrite `GetMatchingPaths`.

The current parallel implementation is too complex (lambda inside lambda, two levels of parallelism) and has a read outside of bounds issue.

The new implementation cleans up artifacts from the previous implementations that were left in the code as it evolves. We add multiple helper functions, and document invariants and preconditions as well as every major step. This way, we fix the security issue and a potential new one which was not caught before

PiperOrigin-RevId: 346146220
Change-Id: Iec0f44673f43349797bf9944dffe9b2f779137d8",file_system_helper.cc,"@@ -52,115 +52,217 @@ void ForEach(int first, int last, const std::function<void(int)>& f) {
 #endif
 }
 
+// A globbing pattern can only start with these characters:
+static const char kGlobbingChars[] = ""*?[\\"";
+
+static inline bool IsGlobbingPattern(const std::string& pattern) {
+  return (pattern.find_first_of(kGlobbingChars) != std::string::npos);
+}
+
+// Make sure that the first entry in `dirs` during glob expansion does not
+// contain a glob pattern. This is to prevent a corner-case bug where
+// `<pattern>` would be treated differently than `./<pattern>`.
+static std::string PatchPattern(const std::string& pattern) {
+  const std::string fixed_prefix =
+      pattern.substr(0, pattern.find_first_of(kGlobbingChars));
+
+  // Patching is needed when there is no directory part in `prefix`
+  if (io::Dirname(fixed_prefix).empty()) {
+    return io::JoinPath(""."", pattern);
+  }
+
+  // No patching needed
+  return pattern;
+}
+
+static std::vector<std::string> AllDirectoryPrefixes(const std::string& d) {
+  std::vector<std::string> dirs;
+  const std::string patched = PatchPattern(d);
+  StringPiece dir(patched);
+
+  // If the pattern ends with a `/` (or `\\` on Windows), we need to strip it
+  // otherwise we would have one additional matching step and the result set
+  // would be empty.
+  bool is_directory = d[d.size() - 1] == '/';
+#ifdef PLATFORM_WINDOWS
+  is_directory = is_directory || (d[d.size() - 1] == '\\');
+#endif
+  if (is_directory) {
+    dir = io::Dirname(dir);
+  }
+
+  while (!dir.empty()) {
+    dirs.emplace_back(dir);
+    StringPiece new_dir(io::Dirname(dir));
+    // io::Dirname(""/"") returns ""/"" so we need to break the loop.
+    // On Windows, io::Dirname(""C:\\"") would return ""C:\\"", so we check for
+    // identity of the result instead of checking for dir[0] == `/`.
+    if (dir == new_dir) break;
+    dir = new_dir;
+  }
+
+  // Order the array from parent to ancestor (reverse order).
+  std::reverse(dirs.begin(), dirs.end());
+
+  return dirs;
+}
+
+static inline int GetFirstGlobbingEntry(const std::vector<std::string>& dirs) {
+  int i = 0;
+  for (const auto& d : dirs) {
+    if (IsGlobbingPattern(d)) {
+      break;
+    }
+    i++;
+  }
+  return i;
+}
+
 }  // namespace
 
 Status GetMatchingPaths(FileSystem* fs, Env* env, const string& pattern,
                         std::vector<string>* results) {
+  // Check that `fs`, `env` and `results` are non-null.
+  if (fs == nullptr || env == nullptr || results == nullptr) {
+    return Status(tensorflow::error::INVALID_ARGUMENT,
+                  ""Filesystem calls GetMatchingPaths with nullptr arguments"");
+  }
+
+  // By design, we don't match anything on empty pattern
   results->clear();
   if (pattern.empty()) {
     return Status::OK();
   }
 
-  string fixed_prefix = pattern.substr(0, pattern.find_first_of(""*?[\\""));
-  string eval_pattern = pattern;
-  string dir(io::Dirname(fixed_prefix));
-  // If dir is empty then we need to fix up fixed_prefix and eval_pattern to
-  // include . as the top level directory.
-  if (dir.empty()) {
-    dir = ""."";
-    fixed_prefix = io::JoinPath(dir, fixed_prefix);
-    eval_pattern = io::JoinPath(dir, eval_pattern);
-  }
-  bool is_directory = pattern[pattern.size() - 1] == '/';
-#ifdef PLATFORM_WINDOWS
-  is_directory = is_directory || pattern[pattern.size() - 1] == '\\';
-#endif
-  std::vector<string> dirs;
-  if (!is_directory) {
-    dirs.emplace_back(eval_pattern);
-  }
-  StringPiece tmp_dir(io::Dirname(eval_pattern));
-  while (tmp_dir.size() > dir.size()) {
-    dirs.emplace_back(string(tmp_dir));
-    tmp_dir = io::Dirname(tmp_dir);
+  // The pattern can contain globbing characters at multiple levels, e.g.:
+  //
+  //   foo/ba?/baz/f*r
+  //
+  // To match the full pattern, we must match every prefix subpattern and then
+  // operate on the children for each match. Thus, we separate all subpatterns
+  // in the `dirs` vector below.
+  std::vector<std::string> dirs = AllDirectoryPrefixes(pattern);
+
+  // We can have patterns that have several parents where no globbing is being
+  // done, for example, `foo/bar/baz/*`. We don't need to expand the directories
+  // which don't contain the globbing characters.
+  int matching_index = GetFirstGlobbingEntry(dirs);
+
+  // If we don't have globbing characters in the pattern then it specifies a
+  // path in the filesystem. We add it to the result set if it exists.
+  if (matching_index == dirs.size()) {
+    if (fs->FileExists(pattern).ok()) {
+      results->emplace_back(pattern);
+    }
+    return Status::OK();
   }
-  dirs.emplace_back(dir);
-  std::reverse(dirs.begin(), dirs.end());
-  // Setup a parallel BFS to explore everything under dir.
-  std::deque<std::pair<string, int>> dir_q;
-  std::deque<std::pair<string, int>> next_dir_q;
-  dir_q.emplace_back(std::make_pair(dirs[0], 0));
-  Status ret;  // Status to return.
-  mutex results_mutex;
-  condition_variable results_cond;
-  mutex next_que_mutex;
-  condition_variable next_que_cond;
-  while (!dir_q.empty()) {
-    next_dir_q.clear();
-    std::vector<Status> new_rets(dir_q.size());
-    auto handle_level = [fs, &results, &dir_q, &next_dir_q, &new_rets,
-                         &is_directory, &dirs, &results_mutex, &results_cond,
-                         &next_que_mutex, &next_que_cond](int i) {
-      string current_dir = dir_q.at(i).first;
-      int dir_index = dir_q.at(i).second;
-      dir_index++;
-      std::vector<string> children;
-      Status s = fs->GetChildren(current_dir, &children);
-      // In case PERMISSION_DENIED is encountered, we bail here.
+
+  // To expand the globbing, we do a BFS from `dirs[matching_index-1]`.
+  // At every step, we work on a pair `{dir, ix}` such that `dir` is a real
+  // directory, `ix < dirs.size() - 1` and `dirs[ix+1]` is a globbing pattern.
+  // To expand the pattern, we select from all the children of `dir` only those
+  // that match against `dirs[ix+1]`.
+  // If there are more entries in `dirs` after `dirs[ix+1]` this mean we have
+  // more patterns to match. So, we add to the queue only those children that
+  // are also directories, paired with `ix+1`.
+  // If there are no more entries in `dirs`, we return all children as part of
+  // the answer.
+  // Since we can get into a combinatorial explosion issue (e.g., pattern
+  // `/*/*/*`), we process the queue in parallel. Each parallel processing takes
+  // elements from `expand_queue` and adds them to `next_expand_queue`, after
+  // which we swap these two queues (similar to double buffering algorithms).
+  // PRECONDITION: `IsGlobbingPattern(dirs[0]) == false`
+  // PRECONDITION: `matching_index > 0`
+  // INVARIANT: If `{d, ix}` is in queue, then `d` and `dirs[ix]` are at the
+  //            same level in the filesystem tree.
+  // INVARIANT: If `{d, _}` is in queue, then `IsGlobbingPattern(d) == false`.
+  // INVARIANT: If `{d, _}` is in queue, then `d` is a real directory.
+  // INVARIANT: If `{_, ix}` is in queue, then `ix < dirs.size() - 1`.
+  // INVARIANT: If `{_, ix}` is in queue, `IsGlobbingPattern(dirs[ix + 1])`.
+  std::deque<std::pair<string, int>> expand_queue;
+  std::deque<std::pair<string, int>> next_expand_queue;
+  expand_queue.emplace_back(dirs[matching_index - 1], matching_index - 1);
+
+  // Adding to `result` or `new_expand_queue` need to be protected by mutexes
+  // since there are multiple threads writing to these.
+  mutex result_mutex;
+  mutex queue_mutex;
+
+  while (!expand_queue.empty()) {
+    next_expand_queue.clear();
+
+    // The work item for every item in `expand_queue`.
+    // pattern, we process them in parallel.
+    auto handle_level = [&fs, &results, &dirs, &expand_queue,
+                         &next_expand_queue, &result_mutex,
+                         &queue_mutex](int i) {
+      // See invariants above, all of these are valid accesses.
+      const auto& queue_item = expand_queue.at(i);
+      const std::string& parent = queue_item.first;
+      const int index = queue_item.second + 1;
+      const std::string& match_pattern = dirs[index];
+
+      // Get all children of `parent`. If this fails, return early.
+      std::vector<std::string> children;
+      Status s = fs->GetChildren(parent, &children);
       if (s.code() == tensorflow::error::PERMISSION_DENIED) {
         return;
       }
-      new_rets[i] = s;
-      if (children.empty()) return;
-
-      // children_dir_status holds is_dir status for children. It can have three
-      // possible values: OK for true; FAILED_PRECONDITION for false; CANCELLED
-      // if we don't calculate IsDirectory (we might do that because there isn't
-      // any point in exploring that child path).
-      std::vector<Status> children_dir_status;
-
-      // This IsDirectory call can be expensive for some FS. Parallelizing it.
-      children_dir_status.resize(children.size());
-      auto handle_children = [fs, &current_dir, &children, &dirs, dir_index,
-                              is_directory, &children_dir_status](int j) {
-        const string child_path = io::JoinPath(current_dir, children[j]);
-        if (!fs->Match(child_path, dirs[dir_index])) {
-          children_dir_status[j] =
+
+      // Also return early if we don't have any children
+      if (children.empty()) {
+        return;
+      }
+
+      // Since we can get extremely many children here and on some filesystems
+      // `IsDirectory` is expensive, we process the children in parallel.
+      // We also check that children match the pattern in parallel, for speedup.
+      // We store the status of the match and `IsDirectory` in
+      // `children_status` array, one element for each children.
+      std::vector<Status> children_status(children.size());
+      auto handle_children = [&fs, &match_pattern, &parent, &children,
+                              &children_status](int j) {
+        const std::string path = io::JoinPath(parent, children[j]);
+        if (!fs->Match(path, match_pattern)) {
+          children_status[j] =
               Status(tensorflow::error::CANCELLED, ""Operation not needed"");
-        } else if (dir_index != dirs.size() - 1) {
-          children_dir_status[j] = fs->IsDirectory(child_path);
         } else {
-          children_dir_status[j] =
-              is_directory ? fs->IsDirectory(child_path) : Status::OK();
+          children_status[j] = fs->IsDirectory(path);
         }
       };
       ForEach(0, children.size(), handle_children);
 
-      for (size_t j = 0; j < children.size(); ++j) {
-        const string child_path = io::JoinPath(current_dir, children[j]);
-        // If the IsDirectory call was cancelled we bail.
-        if (children_dir_status[j].code() == tensorflow::error::CANCELLED) {
+      // At this point, pairing `children` with `children_status` will tell us
+      // if a children:
+      //   * does not match the pattern
+      //   * matches the pattern and is a directory
+      //   * matches the pattern and is not a directory
+      // We fully ignore the first case.
+      // If we matched the last pattern (`index == dirs.size() - 1`) then all
+      // remaining children get added to the result.
+      // Otherwise, only the directories get added to the next queue.
+      for (size_t j = 0; j < children.size(); j++) {
+        if (children_status[j].code() == tensorflow::error::CANCELLED) {
           continue;
         }
-        if (children_dir_status[j].ok()) {
-          if (dir_index != dirs.size() - 1) {
-            mutex_lock lk(next_que_mutex);
-            next_dir_q.emplace_back(std::make_pair(child_path, dir_index));
-            next_que_cond.notify_one();
-          } else {
-            mutex_lock lk(results_mutex);
-            results->emplace_back(child_path);
-            results_cond.notify_one();
-          }
+
+        const std::string path = io::JoinPath(parent, children[j]);
+        if (index == dirs.size() - 1) {
+          mutex_lock l(result_mutex);
+          results->emplace_back(path);
+        } else if (children_status[j].ok()) {
+          mutex_lock l(queue_mutex);
+          next_expand_queue.emplace_back(path, index);
         }
       }
     };
-    ForEach(0, dir_q.size(), handle_level);
+    ForEach(0, expand_queue.size(), handle_level);
 
-    ret.Update(new_rets[dir_q.size() - 1]);
-    std::swap(dir_q, next_dir_q);
+    // After evaluating one level, swap the ""buffers""
+    std::swap(expand_queue, next_expand_queue);
   }
-  return ret;
+
+  return Status::OK();
 }
 
 }  // namespace internal
",1,train
14755416e364f17fb1870882fa778c7fec7f16e3,tensorflow/tensorflow,"Prevent CHECK-fail in LSTM/GRU with zero-length input.

PiperOrigin-RevId: 346239181
Change-Id: I5f233dbc076aab7bb4e31ba24f5abd4eaf99ea4f",cuda_dnn.cc,"@@ -1468,7 +1468,9 @@ class CudnnRnnSequenceTensorDescriptor
   static port::StatusOr<CudnnRnnSequenceTensorDescriptor> Create(
       GpuExecutor* parent, int max_seq_length, int batch_size, int data_size,
       cudnnDataType_t data_type) {
-    CHECK_GT(max_seq_length, 0);
+    if (max_seq_length <= 0) {
+      return port::Status(port::error::INVALID_ARGUMENT, ""max_seq_length <= 0"");
+    }
     int dims[] = {batch_size, data_size, 1};
     int strides[] = {dims[1] * dims[2], dims[2], 1};
     TensorDescriptor tensor_desc = CreateTensorDescriptor();
@@ -1486,7 +1488,9 @@ class CudnnRnnSequenceTensorDescriptor
       GpuExecutor* parent, int max_seq_length, int batch_size, int data_size,
       const absl::Span<const int>& seq_lengths, bool time_major,
       cudnnDataType_t data_type) {
-    CHECK_GT(max_seq_length, 0);
+    if (max_seq_length <= 0) {
+      return port::Status(port::error::INVALID_ARGUMENT, ""max_seq_length <= 0"");
+    }
     int dims[] = {batch_size, data_size, 1};
     int strides[] = {dims[1] * dims[2], dims[2], 1};
     TensorDescriptor tensor_desc = CreateTensorDescriptor();
",1,train
0cc38aaa4064fd9e79101994ce9872c6d91f816b,tensorflow/tensorflow,"Prevent unitialized memory access in `GraphConstructor::MakeEdge`

The `MakeEdge` implementation assumes that there exists an output at `output_index` of `src` node and an input at `input_index` of `dst` node. However, if this is not the case this results in accessing data out of bounds. Because we are accessing an array that is a private member of a class and only in read only mode, this usually results only in unitialized memory access. However, it is reasonable to think that malicious users could manipulate these indexes to actually read data outside the class, thus resulting in information leakage and further exploits.

PiperOrigin-RevId: 346343288
Change-Id: I2127da27c2023d27f26efd39afa6c853385cab6f",graph_constructor.cc,"@@ -44,6 +44,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/gtl/inlined_vector.h""
 #include ""tensorflow/core/lib/strings/scanner.h""
 #include ""tensorflow/core/lib/strings/str_util.h""
+#include ""tensorflow/core/platform/errors.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/public/version.h""
@@ -1425,6 +1426,17 @@ void GraphConstructor::Undo() {
 
 Status GraphConstructor::MakeEdge(Node* src, int output_index, Node* dst,
                                   int input_index) {
+  if (output_index >= src->num_outputs()) {
+    return errors::InvalidArgument(
+        ""Output "", output_index, "" of node "", src->name(),
+        "" does not exist. Node only has "", src->num_outputs(), "" outputs."");
+  }
+  if (input_index >= dst->num_inputs()) {
+    return errors::InvalidArgument(
+        ""Input "", input_index, "" of node "", dst->name(),
+        "" does not exist. Node only has "", dst->num_inputs(), "" inputs."");
+  }
+
   DataType src_out = src->output_type(output_index);
   DataType dst_in = dst->input_type(input_index);
   if (!TypesCompatible(dst_in, src_out)) {
",1,train
5ac1b9e24ff6afc465756edf845d2e9660bd34bf,tensorflow/tensorflow,"Fix segfault when attempting to convert string to float16.

To make sure this gets fixed, add test for converting string to any numeric type.

PiperOrigin-RevId: 286650886
Change-Id: I81f770ec2bbd33a863e8057ce198c679912fa8e0",constant_op_test.py,"@@ -0,0 +1,61 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""Tests for tensorflow.python.framework.constant_op.""""""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.platform import test
+
+
+class ConstantOpTest(test.TestCase, parameterized.TestCase):
+
+  @parameterized.parameters(
+      dtypes.bfloat16,
+      dtypes.complex128,
+      dtypes.complex64,
+      dtypes.double,
+      dtypes.float16,
+      dtypes.float32,
+      dtypes.float64,
+      dtypes.half,
+      dtypes.int16,
+      dtypes.int32,
+      dtypes.int64,
+      dtypes.int8,
+      dtypes.qint16,
+      dtypes.qint32,
+      dtypes.qint8,
+      dtypes.quint16,
+      dtypes.quint8,
+      dtypes.uint16,
+      dtypes.uint32,
+      dtypes.uint64,
+      dtypes.uint8,
+  )
+  def test_convert_string_to_number(self, dtype):
+    with self.assertRaises(TypeError):
+      constant_op.constant(""hello"", dtype)
+
+
+if __name__ == ""__main__"":
+  ops.enable_eager_execution()
+  test.main()
",1,train
5ac1b9e24ff6afc465756edf845d2e9660bd34bf,tensorflow/tensorflow,"Fix segfault when attempting to convert string to float16.

To make sure this gets fixed, add test for converting string to any numeric type.

PiperOrigin-RevId: 286650886
Change-Id: I81f770ec2bbd33a863e8057ce198c679912fa8e0",py_seq_tensor.cc,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/core/errors.h""
 #include ""tensorflow/core/lib/core/stringpiece.h""
 #include ""tensorflow/core/lib/strings/str_util.h""
+#include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/python/lib/core/numpy.h""
 #include ""tensorflow/python/lib/core/py_util.h""
@@ -396,6 +397,21 @@ typedef Converter<int32> Int32Converter;
 
 // Floating-point support
 
+// Returns `true` if `out` overflows when converted from `as_double`.
+template <class T>
+static inline bool CheckForOverflow(double as_double, T* out) {
+  return (sizeof(T) < sizeof(double) && std::isinf(*out) &&
+          std::isfinite(as_double));
+}
+
+// There is no `std::isinf` that takes `Eigen::half` as argument but Eigen
+// provides `Eigen::half_impl::isinf` instead.
+template <>
+inline bool CheckForOverflow<Eigen::half>(double as_double, Eigen::half* out) {
+  return (sizeof(Eigen::half) < sizeof(double) &&
+          Eigen::half_impl::isinf(*out) && std::isfinite(as_double));
+}
+
 template <class T>
 static const char* ConvertOneFloat(PyObject* v, T* out) {
   if (PyErr_Occurred()) {
@@ -405,20 +421,19 @@ static const char* ConvertOneFloat(PyObject* v, T* out) {
     const double as_double = PyFloat_AS_DOUBLE(v);
     *out = static_cast<T>(as_double);
     // Check for overflow
-    if (TF_PREDICT_FALSE(sizeof(T) < sizeof(double) && std::isinf(*out) &&
-                         std::isfinite(as_double))) {
+    if (TF_PREDICT_FALSE(CheckForOverflow<T>(as_double, out))) {
       return ErrorOutOfRangeDouble;
     }
     return nullptr;
   }
 #if PY_MAJOR_VERSION < 3
   if (PyInt_Check(v)) {
-    *out = PyInt_AS_LONG(v);
+    *out = static_cast<T>(PyInt_AS_LONG(v));
     return nullptr;
   }
 #endif
   if (PyLong_Check(v)) {
-    *out = PyLong_AsDouble(v);
+    *out = static_cast<T>(PyLong_AsDouble(v));
     if (PyErr_Occurred()) return ErrorOutOfRangeDouble;
     return nullptr;
   }
@@ -467,13 +482,7 @@ struct ConverterTraits<Eigen::half> {
   static const tensorflow::DataType kTypeEnum = DT_HALF;
 
   static const char* ConvertScalar(PyObject* v, Eigen::half* out) {
-    // NOTE(nareshmodi): Is there a way to convert to C double without the
-    // intermediate Python double? This will help with ConvertOneFloat as well.
-    Safe_PyObjectPtr as_float = make_safe(PyNumber_Float(v));
-    double v_double = PyFloat_AS_DOUBLE(as_float.get());
-    *out = Eigen::half(v_double);
-
-    return nullptr;
+    return ConvertOneFloat<Eigen::half>(v, out);
   }
 };
 
@@ -613,7 +622,9 @@ Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret) {
       break;
 
     case DT_HALF:
-      RETURN_STRING_AS_STATUS(NumpyHalfConverter::Convert(obj, &state, ret));
+      if (NumpyHalfConverter::Convert(obj, &state, ret) == nullptr)
+        return Status::OK();
+      break;
 
     case DT_INT64:
       if (Int64Converter::Convert(obj, &state, ret) == nullptr)
",1,train
db4f9717c41bccc3ce10099ab61996b246099892,tensorflow/tensorflow,"Fix heap buffer overflow in UnsortedSegmentSum.

When Index=int32, data_size and num_segments were truncated from int64 to int32. This truncation can produce negative numbers, which causes UnsortedSegmentFunctor to access out of bounds memory.

Also:
- Switches some indexing calculations to int64 to avoid signed integer overflow when either the input or output tensors have more than 2**31 - 1 elements.
- Fixes a range check error in the GPU kernel. The segment ID was checked against an upper bound measured in elements, not segments.
PiperOrigin-RevId: 256451663",segment_reduction_ops.cc,"@@ -376,18 +376,17 @@ namespace functor {
 template <typename T, typename Index, typename InitialValueF,
           typename ReductionF>
 struct UnsortedSegmentFunctor<CPUDevice, T, Index, InitialValueF, ReductionF> {
-  void operator()(OpKernelContext* ctx, const Index num_segments,
-                  const TensorShape& segment_ids_shape,
+  void operator()(OpKernelContext* ctx, const TensorShape& segment_ids_shape,
                   typename TTypes<Index>::ConstFlat segment_ids,
-                  const Index data_size, const T* data,
+                  typename TTypes<T, 2>::ConstTensor data,
                   typename TTypes<T, 2>::Tensor output) {
     output.setConstant(InitialValueF()());
-    if (data_size == 0) {
+    if (data.size() == 0) {
       return;
     }
     const int64 N = segment_ids.dimension(0);
+    const int64 num_segments = output.dimension(0);
     ReductionF reduction;
-    auto data_flat = typename TTypes<T, 2>::ConstTensor(data, N, data_size / N);
     for (int64 i = 0; i < N; ++i) {
       Index j = internal::SubtleMustCopy(segment_ids(i));
       if (j < 0) {
@@ -397,7 +396,7 @@ struct UnsortedSegmentFunctor<CPUDevice, T, Index, InitialValueF, ReductionF> {
                   errors::InvalidArgument(
                       ""segment_ids"", SliceDebugString(segment_ids_shape, i),
                       "" = "", j, "" is out of range [0, "", num_segments, "")""));
-      reduction(data_flat.template chip<0>(i), output.template chip<0>(j));
+      reduction(data.template chip<0>(i), output.template chip<0>(j));
     }
   }
 };
@@ -485,7 +484,7 @@ class UnsortedSegmentReductionOp : public OpKernel {
       return;
     }
     const auto segment_flat = segment_ids.flat<Index>();
-    const Index output_rows = internal::SubtleMustCopy(static_cast<Index>(
+    const int64 output_rows = internal::SubtleMustCopy(static_cast<int64>(
         num_segments.dtype() == DT_INT32 ? num_segments.scalar<int32>()()
                                          : num_segments.scalar<int64>()()));
     OP_REQUIRES(context, output_rows >= 0,
@@ -499,9 +498,9 @@ class UnsortedSegmentReductionOp : public OpKernel {
     Tensor* output = nullptr;
     OP_REQUIRES_OK(context, context->allocate_output(0, output_shape, &output));
     auto output_flat = output->flat_outer_dims<T>();
-    auto data_ptr = data.template flat<T>().data();
-    reduction_functor_(context, output_rows, segment_ids.shape(), segment_flat,
-                       data.NumElements(), data_ptr, output_flat);
+    auto data_flat = data.flat_inner_outer_dims<T, 2>(segment_ids.dims() - 1);
+    reduction_functor_(context, segment_ids.shape(), segment_flat, data_flat,
+                       output_flat);
   }
 
  protected:
",1,train
db4f9717c41bccc3ce10099ab61996b246099892,tensorflow/tensorflow,"Fix heap buffer overflow in UnsortedSegmentSum.

When Index=int32, data_size and num_segments were truncated from int64 to int32. This truncation can produce negative numbers, which causes UnsortedSegmentFunctor to access out of bounds memory.

Also:
- Switches some indexing calculations to int64 to avoid signed integer overflow when either the input or output tensors have more than 2**31 - 1 elements.
- Fixes a range check error in the GPU kernel. The segment ID was checked against an upper bound measured in elements, not segments.
PiperOrigin-RevId: 256451663",segment_reduction_ops.h,"@@ -59,10 +59,9 @@ struct SegmentSumFunctor {
 template <typename Device, typename T, typename Index, typename InitialValueF,
           typename ReductionF>
 struct UnsortedSegmentFunctor {
-  void operator()(OpKernelContext* ctx, const Index num_segments,
-                  const TensorShape& segment_ids_shape,
+  void operator()(OpKernelContext* ctx, const TensorShape& segment_ids_shape,
                   typename TTypes<Index>::ConstFlat segment_ids,
-                  const Index data_size, const T* data,
+                  typename TTypes<T, 2>::ConstTensor data,
                   typename TTypes<T, 2>::Tensor output);
 };
 
",1,train
db4f9717c41bccc3ce10099ab61996b246099892,tensorflow/tensorflow,"Fix heap buffer overflow in UnsortedSegmentSum.

When Index=int32, data_size and num_segments were truncated from int64 to int32. This truncation can produce negative numbers, which causes UnsortedSegmentFunctor to access out of bounds memory.

Also:
- Switches some indexing calculations to int64 to avoid signed integer overflow when either the input or output tensors have more than 2**31 - 1 elements.
- Fixes a range check error in the GPU kernel. The segment ID was checked against an upper bound measured in elements, not segments.
PiperOrigin-RevId: 256451663",segment_reduction_ops_gpu.cu.cc,"@@ -106,21 +106,21 @@ __global__ void SortedSegmentSumCustomKernel(const Index input_outer_dim_size,
 // Each element is mapped from input to output by a combination of its
 // 'segment_ids' mapping and 'inner_dim_size'.
 template <typename T, typename Index, typename KernelReductionFunctor>
-__global__ void UnsortedSegmentCustomKernel(const Index input_outer_dim_size,
-                                            const Index inner_dim_size,
-                                            const Index output_outer_dim_size,
+__global__ void UnsortedSegmentCustomKernel(const int64 input_outer_dim_size,
+                                            const int64 inner_dim_size,
+                                            const int64 output_outer_dim_size,
                                             const Index* segment_ids,
                                             const T* input, T* output) {
-  const Index input_total_size = input_outer_dim_size * inner_dim_size;
-  const Index output_total_size = output_outer_dim_size * inner_dim_size;
-  for (int input_index : GpuGridRangeX(input_total_size)) {
-    const Index input_segment_index = input_index / inner_dim_size;
-    const Index segment_offset = input_index % inner_dim_size;
+  const int64 input_total_size = input_outer_dim_size * inner_dim_size;
+  for (int64 input_index : GpuGridRangeX(input_total_size)) {
+    const int64 input_segment_index = input_index / inner_dim_size;
+    const int64 segment_offset = input_index % inner_dim_size;
     const Index output_segment_index = segment_ids[input_segment_index];
-    if (output_segment_index < 0 || output_segment_index >= output_total_size) {
+    if (output_segment_index < 0 ||
+        output_segment_index >= output_outer_dim_size) {
       continue;
     }
-    const Index output_index =
+    const int64 output_index =
         output_segment_index * inner_dim_size + segment_offset;
     KernelReductionFunctor()(output + output_index, ldg(input + input_index));
   }
@@ -174,10 +174,9 @@ void SegmentSumFunctor<T, Index>::operator()(
 template <typename T, typename Index, typename InitialValueF,
           typename ReductionF>
 struct UnsortedSegmentFunctor<GPUDevice, T, Index, InitialValueF, ReductionF> {
-  void operator()(OpKernelContext* ctx, const Index num_segments,
-                  const TensorShape& segment_ids_shape,
+  void operator()(OpKernelContext* ctx, const TensorShape& segment_ids_shape,
                   typename TTypes<Index>::ConstFlat segment_ids,
-                  const Index data_size, const T* data,
+                  typename TTypes<T, 2>::ConstTensor data,
                   typename TTypes<T, 2>::Tensor output) {
     if (output.size() == 0) {
       return;
@@ -188,6 +187,7 @@ struct UnsortedSegmentFunctor<GPUDevice, T, Index, InitialValueF, ReductionF> {
     TF_CHECK_OK(GpuLaunchKernel(
         SetToValue<T>, config.block_count, config.thread_per_block, 0,
         d.stream(), output.size(), output.data(), InitialValueF()()));
+    const int64 data_size = data.size();
     if (data_size == 0 || segment_ids_shape.num_elements() == 0) {
       return;
     }
@@ -196,15 +196,16 @@ struct UnsortedSegmentFunctor<GPUDevice, T, Index, InitialValueF, ReductionF> {
     // *) 'data_size' is the total number of elements to process.
     // *) 'segment_ids.shape' is a prefix of data's shape.
     // *) 'input_outer_dim_size' is the total number of segments to process.
-    const Index input_outer_dim_size = segment_ids.dimension(0);
-    const Index input_inner_dim_size = data_size / input_outer_dim_size;
+    const int64 input_outer_dim_size = segment_ids.dimension(0);
+    const int64 input_inner_dim_size = data.dimension(1);
+    const int64 output_outer_dim_size = output.dimension(0);
     config = GetGpuLaunchConfig(data_size, d);
 
-    TF_CHECK_OK(
-        GpuLaunchKernel(UnsortedSegmentCustomKernel<T, Index, ReductionF>,
-                        config.block_count, config.thread_per_block, 0,
-                        d.stream(), input_outer_dim_size, input_inner_dim_size,
-                        num_segments, segment_ids.data(), data, output.data()));
+    TF_CHECK_OK(GpuLaunchKernel(
+        UnsortedSegmentCustomKernel<T, Index, ReductionF>, config.block_count,
+        config.thread_per_block, 0, d.stream(), input_outer_dim_size,
+        input_inner_dim_size, output_outer_dim_size, segment_ids.data(),
+        data.data(), output.data()));
   }
 };
 
",1,train
49f73c55d56edffebde4bca4a407ad69c1cae433,tensorflow/tensorflow,"Fix integer overflow in BMP decoder by making the checks in DecodeBmp
more stringent.  Add fuzzer to improve the robustness of the decoder
in the future.

PiperOrigin-RevId: 185780111",decode_bmp_op.cc,"@@ -91,15 +91,32 @@ class DecodeBmpOp : public OpKernel {
                 errors::InvalidArgument(
                     ""Number of channels must be 1, 3 or 4, was "", channels_));
 
+    OP_REQUIRES(context, width > 0 && header_size >= 0,
+                errors::InvalidArgument(""Width must be positive""));
+    OP_REQUIRES(context, header_size >= 0,
+                errors::InvalidArgument(""header size must be nonnegative""));
+
+    // The real requirement is < 2^31 minus some headers and channel data,
+    // so rounding down to something that's still ridiculously big.
+    OP_REQUIRES(
+        context,
+        (static_cast<int64>(width) * std::abs(static_cast<int64>(height))) <
+            static_cast<int64>(std::numeric_limits<int32_t>::max() / 8),
+        errors::InvalidArgument(
+            ""Total possible pixel bytes must be less than 2^30""));
+
+    const int32 abs_height = abs(height);
+
     // there may be padding bytes when the width is not a multiple of 4 bytes
     // 8 * channels == bits per pixel
     const int row_size = (8 * channels_ * width + 31) / 32 * 4;
 
-    const int last_pixel_offset =
-        header_size + (abs(height) - 1) * row_size + (width - 1) * channels_;
+    const int64 last_pixel_offset = static_cast<int64>(header_size) +
+                                    (abs_height - 1) * row_size +
+                                    (width - 1) * channels_;
 
     // [expected file size] = [last pixel offset] + [last pixel size=channels]
-    const int expected_file_size = last_pixel_offset + channels_;
+    const int64 expected_file_size = last_pixel_offset + channels_;
 
     OP_REQUIRES(
         context, (expected_file_size <= input.size()),
@@ -115,12 +132,12 @@ class DecodeBmpOp : public OpKernel {
     Tensor* output = nullptr;
     OP_REQUIRES_OK(
         context, context->allocate_output(
-                     0, TensorShape({abs(height), width, channels_}), &output));
+                     0, TensorShape({abs_height, width, channels_}), &output));
 
     const uint8* bmp_pixels = &img_bytes[header_size];
 
     Decode(bmp_pixels, row_size, output->flat<uint8>().data(), width,
-           abs(height), channels_, top_down);
+           abs_height, channels_, top_down);
   }
 
   uint8* Decode(const uint8* input, const int row_size, uint8* const output,
",1,test
49f73c55d56edffebde4bca4a407ad69c1cae433,tensorflow/tensorflow,"Fix integer overflow in BMP decoder by making the checks in DecodeBmp
more stringent.  Add fuzzer to improve the robustness of the decoder
in the future.

PiperOrigin-RevId: 185780111",decode_bmp_fuzz.cc,"@@ -0,0 +1,29 @@
+/* Copyright 2018 Google Inc. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/cc/ops/standard_ops.h""
+#include ""tensorflow/core/kernels/fuzzing/fuzz_session.h""
+
+namespace tensorflow {
+namespace fuzzing {
+
+class FuzzDecodeBmp : public FuzzStringInputOp {
+  SINGLE_INPUT_OP_BUILDER(DT_STRING, DecodeBmp);
+};
+
+STANDARD_TF_FUZZ_FUNCTION(FuzzDecodeBmp);
+
+}  // end namespace fuzzing
+}  // end namespace tensorflow
",1,test
c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter

PiperOrigin-RevId: 377454790
Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",tf_tfl_flatbuffer_helpers.cc,"@@ -25,6 +25,7 @@ limitations under the License.
 #include ""mlir/Support/FileUtilities.h""  // from @llvm-project
 #include ""mlir/Transforms/ViewOpGraph.h""  // from @llvm-project
 #include ""tensorflow/compiler/mlir/lite/common/tfl_pass_config.h""
+#include ""tensorflow/compiler/mlir/lite/metrics/error_collector.h""
 #include ""tensorflow/compiler/mlir/lite/tf_tfl_passes.h""
 #include ""tensorflow/compiler/mlir/lite/tf_to_tfl_flatbuffer.h""
 #include ""tensorflow/compiler/mlir/lite/transforms/passes.h""
@@ -316,6 +317,9 @@ Status ConvertMLIRToTFLiteFlatBuffer(
   mlir::PassManager pm(module->getContext(),
                        mlir::OpPassManager::Nesting::Implicit);
   ::tensorflow::SetCrashReproducer(pm);
+  pm.addInstrumentation(
+      std::make_unique<mlir::TFL::ErrorCollectorInstrumentation>(
+          module->getContext()));
 
   tensorflow::AddTFToTFLConversionPasses(model_flags, toco_flags, pass_config,
                                          &pm, session);
",0,train
c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter

PiperOrigin-RevId: 377454790
Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",convert.py,"@@ -36,6 +36,7 @@ from tensorflow.lite.python.convert_phase import Component
 from tensorflow.lite.python.convert_phase import convert_phase
 from tensorflow.lite.python.convert_phase import ConverterError
 from tensorflow.lite.python.convert_phase import SubComponent
+from tensorflow.lite.python.metrics_wrapper import metrics_wrapper as _metrics_wrapper
 from tensorflow.lite.toco import model_flags_pb2 as _model_flags_pb2
 from tensorflow.lite.toco import toco_flags_pb2 as _toco_flags_pb2
 from tensorflow.lite.toco import types_pb2 as _types_pb2
@@ -295,7 +296,10 @@ def toco_convert_protos(model_flags_str,
                                                  enable_mlir_converter)
       return model_str
     except Exception as e:
-      raise ConverterError(str(e))
+      converter_error = ConverterError(str(e))
+      for error_data in _metrics_wrapper.get_collected_errors():
+        converter_error.append_error(error_data)
+      raise converter_error
 
   return _run_toco_binary(model_flags_str, toco_flags_str, input_data_str,
                           debug_info_str)
",0,train
c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter

PiperOrigin-RevId: 377454790
Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_nonportable.py,"@@ -18,8 +18,8 @@ from typing import Optional, Text
 import uuid
 
 from tensorflow.lite.python import metrics_interface
-from tensorflow.lite.python.metrics_wrapper import _pywrap_tensorflow_lite_metrics_wrapper as _metrics_wrapper
 from tensorflow.lite.python.metrics_wrapper import converter_error_data_pb2
+from tensorflow.lite.python.metrics_wrapper import metrics_wrapper
 from tensorflow.python.eager import monitoring
 
 _counter_debugger_creation = monitoring.Counter(
@@ -116,7 +116,7 @@ class TFLiteConverterMetrics(TFLiteMetrics):
   def __init__(self) -> None:
     super(TFLiteConverterMetrics, self).__init__()
     session_id = uuid.uuid4().hex
-    self._metrics_exporter = _metrics_wrapper.MetricsWrapper(session_id)
+    self._metrics_exporter = metrics_wrapper.MetricsWrapper(session_id)
     self._exported = False
 
   def __del__(self):
",0,train
c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter

PiperOrigin-RevId: 377454790
Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_nonportable_test.py,"@@ -25,13 +25,17 @@ import tensorflow as tf
 from tensorflow.lite.python import lite
 from tensorflow.lite.python import metrics_nonportable as metrics
 from tensorflow.lite.python.convert import ConverterError
+from tensorflow.lite.python.convert import register_custom_opdefs
 from tensorflow.python.client import session
+from tensorflow.python.eager import monitoring
 from tensorflow.python.framework import convert_to_constants
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import string_ops
+from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.platform import test
 from tensorflow.python.saved_model import saved_model
 from tensorflow.python.training.tracking import tracking
@@ -317,5 +321,93 @@ class ConverterMetricsTest(test_util.TensorFlowTestCase):
     mock_exporter.ExportMetrics.assert_called_once()
 
 
+def mock_ngrams(data, width, axis=-1, string_separator=' ', name=None):
+  """"""This mock Ngrams lack the width attr, causing conversion to fail.""""""
+
+  experimental_implements = [
+      'name: ""tftext:Ngrams""',
+      'attr { key: ""axis"" value { i: %d } }' % axis,
+      'attr { key: ""reduction_type"" value { s: ""STRING_JOIN"" } }',
+      'attr { key: ""string_separator"" value { s: ""%s"" } }' % string_separator,
+  ]
+  experimental_implements = ' '.join(experimental_implements)
+
+  @tf.function(experimental_implements=experimental_implements)
+  def func(data):
+    with ops.name_scope(name, 'NGrams', [data, width]):
+      data = ragged_tensor.convert_to_tensor_or_ragged_tensor(data, name='data')
+      slices = []
+      for start in range(width):
+        stop = None if start - width + 1 == 0 else start - width + 1
+        if axis >= 0:
+          idx = [slice(None)] * axis + [slice(start, stop)]
+        else:
+          idx = [Ellipsis, slice(start, stop)] + [slice(None)] * (-axis - 1)
+        slices.append(data[idx])
+
+      # Stack the slices.
+      stack_axis = axis + 1 if axis >= 0 else axis
+      windowed_data = array_ops.stack(slices, stack_axis)
+
+      return string_ops.reduce_join(
+          windowed_data, axis=axis, separator=string_separator)
+
+  return func(data)
+
+
+class ConverterErrorMetricTest(test_util.TensorFlowTestCase):
+  """"""Testing conversion error metric.""""""
+
+  def setUp(self):
+    super(ConverterErrorMetricTest, self).setUp()
+
+    # Mock metrics instance except errors so other test cases are not affected.
+    mock_attempt = mock.create_autospec(monitoring.Counter, instance=True)
+    self._counter_conversion_attempt = metrics._counter_conversion_attempt
+    metrics._counter_conversion_attempt = mock_attempt
+
+    mock_success = mock.create_autospec(monitoring.Counter, instance=True)
+    self._counter_conversion_success = metrics._counter_conversion_success
+    metrics._counter_conversion_success = mock_success
+
+    mock_params = mock.create_autospec(monitoring.StringGauge, instance=True)
+    self._gauge_conversion_params = metrics._gauge_conversion_params
+    metrics._gauge_conversion_params = mock_params
+
+  def tearDown(self):
+    super(ConverterErrorMetricTest, self).tearDown()
+    # # Restore metrics instances.
+    metrics._counter_conversion_attempt = self._counter_conversion_attempt
+    metrics._counter_conversion_success = self._counter_conversion_success
+    metrics._gauge_conversion_params = self._gauge_conversion_params
+
+  def test_failure_at_PrepareCompositeFunctionsPass(self):
+
+    class NgramsLayer(tf.keras.layers.Layer):
+
+      def call(self, input_tensor, **kwargs):
+        return mock_ngrams(input_tensor, width=2, axis=-1, string_separator=' ')
+
+    # Registers a fake WhitespaceTokenizeWithOffsets so the TFText fusing logic
+    # is enable in MLIR side.
+    custom_opdefs_str = (
+        'name: \'WhitespaceTokenizeWithOffsets\' input_arg: {name: \'Input1\' '
+        'type: DT_FLOAT} input_arg: {name: \'Input2\' type: DT_FLOAT} '
+        'output_arg: {name: \'Output\' type: DT_FLOAT}')
+    register_custom_opdefs([custom_opdefs_str])
+
+    model = tf.keras.models.Sequential([NgramsLayer()])
+    model.predict(tf.constant(['test']))
+    converter = tf.lite.TFLiteConverter.from_keras_model(model)
+    converter.allow_custom_ops = True
+    with self.assertRaises(ConverterError):
+      converter.convert()
+    exported_error = metrics._gauge_conversion_errors.get_cell(
+        'CONVERT_TF_TO_TFLITE_MODEL', 'PrepareCompositeFunctionsPass', '',
+        'UNKNOWN').value()
+    self.assertEqual(exported_error,
+                     ""\'width\' attribute is not set or not an integer\n"")
+
+
 if __name__ == '__main__':
   test.main()
",0,train
c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter

PiperOrigin-RevId: 377454790
Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_wrapper.h,"@@ -52,6 +52,9 @@ class MetricsWrapper {
   const std::unique_ptr<MetricsExporter> exporter_;
 };
 
+// Returns a vector of serialized ConverterErrorData from ErrorCollector.
+std::vector<std::string> GetCollectedErrors();
+
 }  // namespace metrics_wrapper
 }  // namespace tflite
 
",0,train
c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter

PiperOrigin-RevId: 377454790
Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_wrapper.py,"@@ -0,0 +1,39 @@
+# Lint as: python2, python3
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""Stub to make pywrap metrics wrapper accessible.""""""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from tensorflow.lite.python.metrics_wrapper import converter_error_data_pb2
+from tensorflow.lite.python.metrics_wrapper._pywrap_tensorflow_lite_metrics_wrapper import GetCollectedErrors as _get_collected_errors
+from tensorflow.lite.python.metrics_wrapper._pywrap_tensorflow_lite_metrics_wrapper import MetricsWrapper  # pylint: disable=unused-import
+
+
+def get_collected_errors():
+  """"""Returns a list of collected errors in ErrorCollector.
+
+  The GetCollectedErrors function in C++ returns a list of serialized proto
+  messages. This function will convert them to ConverterErrorData instances.
+
+  Returns:
+    A list of ConverterErrorData.
+  """"""
+  serialized_message_list = _get_collected_errors()
+  return list(
+      map(converter_error_data_pb2.ConverterErrorData.FromString,
+          serialized_message_list))
",0,train
c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter

PiperOrigin-RevId: 377454790
Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_wrapper_nonportable.cc,"@@ -17,6 +17,7 @@ limitations under the License.
 #include <vector>
 
 #include ""learning/brain/google/monitoring/metrics_exporter.h""
+#include ""tensorflow/compiler/mlir/lite/metrics/error_collector.h""
 #include ""tensorflow/lite/python/metrics_wrapper/metrics_wrapper.h""
 
 namespace tflite {
@@ -58,5 +59,14 @@ PyObject* MetricsWrapper::ExportMetrics() {
   Py_RETURN_NONE;
 }
 
+std::vector<std::string> GetCollectedErrors() {
+  mlir::TFL::ErrorCollector* collector = mlir::TFL::GetErrorCollector();
+  std::vector<std::string> result;
+  for (const auto& error_data : collector->CollectedErrors()) {
+    result.push_back(error_data.SerializeAsString());
+  }
+  return result;
+}
+
 }  // namespace metrics_wrapper
 }  // namespace tflite
",0,train
c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter

PiperOrigin-RevId: 377454790
Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_wrapper_portable.cc,"@@ -55,5 +55,9 @@ PyObject* MetricsWrapper::ExportMetrics() {
   Py_RETURN_NONE;
 }
 
+std::vector<std::string> GetCollectedErrors() {
+  return std::vector<std::string>();
+}
+
 }  // namespace metrics_wrapper
 }  // namespace tflite
",0,train
c654965d9336981338e39e5eb1bb9632293b571b,tensorflow/tensorflow,"Report errors collected from MLIR converter

PiperOrigin-RevId: 377454790
Change-Id: I3f335cfc8c0fd707b38245c1666bee9231276411",metrics_wrapper_pybind11.cc,"@@ -39,4 +39,12 @@ PYBIND11_MODULE(_pywrap_tensorflow_lite_metrics_wrapper, m) {
       .def(""ExportMetrics"", [](MetricsWrapper& self) {
         return tensorflow::PyoOrThrow(self.ExportMetrics());
       });
+  m.def(""GetCollectedErrors"", []() {
+    py::list serialized_message_list;
+    for (const auto& error_data :
+         tflite::metrics_wrapper::GetCollectedErrors()) {
+      serialized_message_list.append(py::bytes(error_data));
+    }
+    return serialized_message_list;
+  });
 }
",0,train
a9f8a9b1c1995dc4a7c0b6d53df8af6082325360,tensorflow/tensorflow,"Do not call Unprotect on remote inputs

Unprotect should only be called on local handles. In order to test the
triggering of forwarding for remote inputs to a function we add an
optimization whereby EagerExecute releases the inputs of the eager
operation. This enforces that a TFE_Op cannot be reused since the inputs
would have been removed. This was technically already true since if the
inputs were ever forwarded we should not be re-using the TFE_Op.

PiperOrigin-RevId: 306564949
Change-Id: I94bd3a243658277891867802b792a4492ec0a039",c_api_remote_test.cc,"@@ -129,7 +129,45 @@ void TestRemoteExecute(bool async) {
 TEST(CAPI, RemoteExecute) { TestRemoteExecute(false); }
 TEST(CAPI, RemoteExecuteAsync) { TestRemoteExecute(true); }
 
-void TestRemoteExecuteSilentCopies(bool async, bool remote) {
+string MatMulFunction() {
+  tensorflow::FunctionDef def;
+  CHECK(tensorflow::protobuf::TextFormat::ParseFromString(
+      ""    signature {""
+      ""      name: 'MatMulFunction'""
+      ""      input_arg {""
+      ""        name: 'a'""
+      ""        type: DT_FLOAT""
+      ""      }""
+      ""      input_arg {""
+      ""        name: 'b'""
+      ""        type: DT_FLOAT""
+      ""      }""
+      ""      output_arg {""
+      ""        name: 'm'""
+      ""        type: DT_FLOAT""
+      ""      }""
+      ""    }""
+      ""    node_def {""
+      ""      name: 'matmul'""
+      ""      op: 'MatMul'""
+      ""      input: 'a'""
+      ""      input: 'b'""
+      ""      attr {""
+      ""        key: 'T'""
+      ""        value {""
+      ""          type: DT_FLOAT""
+      ""        }""
+      ""      }""
+      ""    }""
+      ""    ret {""
+      ""      key: 'm'""
+      ""      value: 'matmul:product'""
+      ""    }"",
+      &def));
+  return def.SerializeAsString();
+}
+
+void TestRemoteExecuteSilentCopies(bool async, bool remote, bool func) {
   tensorflow::ServerDef server_def = GetServerDef(3);
 
   // This server def has the task index set to 0.
@@ -169,10 +207,29 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote) {
       TFE_TensorHandleCopyToDevice(h1_task0, ctx, task2_name, status);
   ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
 
-  // Handles are on task0 (local), and task2, but op is on task1.
-  TFE_Op* matmul = MatMulOp(ctx, h0_task0, h1_task2);
+  TFE_Op* matmul = nullptr;
+  if (func) {
+    string function_def = MatMulFunction();
+    TFE_ContextAddFunctionDef(ctx, function_def.data(), function_def.size(),
+                              status);
+    CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+
+    matmul = TFE_NewOp(ctx, ""MatMulFunction"", status);
+    ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+    TFE_OpAddInput(matmul, h0_task0, status);
+    ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+    TFE_OpAddInput(matmul, h1_task2, status);
+    ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+  } else {
+    // Handles are on task0 (local), and task2, but op is on task1.
+    matmul = MatMulOp(ctx, h0_task0, h1_task2);
+  }
   if (remote) {
     TFE_OpSetDevice(matmul, task1_name, status);
+  } else if (!async) {
+    auto remote_arg = tensorflow::TensorHandleFromInterface(h1_task2->handle);
+    // The input handles should never change since they have been mirrored.
+    ASSERT_FALSE(remote_arg->HasLocalMirror(nullptr));
   }
   EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
 
@@ -182,12 +239,10 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote) {
   EXPECT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
 
   // TODO(gjn): Add support for waiting on async local mirrors
-  if (!async) {
+  if (!remote && !async) {
     auto remote_arg = tensorflow::TensorHandleFromInterface(h1_task2->handle);
-    tensorflow::EagerOperation* op =
-        tensorflow::OperationFromInterface(matmul->operation);
     // The input handles should never change since they have been mirrored.
-    ASSERT_EQ(op->Inputs()[1], remote_arg);
+    ASSERT_TRUE(remote_arg->HasLocalMirror(nullptr));
   }
 
   auto* retval_task0 = TFE_TensorHandleCopyToDevice(
@@ -217,6 +272,9 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote) {
   TFE_ExecutorWaitForAllPendingNodes(executor, status);
   ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TFE_DeleteExecutor(executor);
+  if (func) {
+    TFE_ContextRemoveFunction(ctx, ""MatMulFunction"", status);
+  }
   TFE_DeleteContext(ctx);
 
   TF_DeleteStatus(status);
@@ -227,16 +285,22 @@ void TestRemoteExecuteSilentCopies(bool async, bool remote) {
 }
 
 TEST(CAPI, RemoteExecuteSilentCopies) {
-  TestRemoteExecuteSilentCopies(false, true);
+  TestRemoteExecuteSilentCopies(false, true, false);
 }
 TEST(CAPI, RemoteExecuteSilentCopiesAsync) {
-  TestRemoteExecuteSilentCopies(true, true);
+  TestRemoteExecuteSilentCopies(true, true, false);
+}
+TEST(CAPI, RemoteExecuteSilentCopiesAsyncFunc) {
+  TestRemoteExecuteSilentCopies(true, true, true);
 }
 TEST(CAPI, RemoteExecuteSilentCopiesLocal) {
-  TestRemoteExecuteSilentCopies(false, false);
+  TestRemoteExecuteSilentCopies(false, false, false);
 }
 TEST(CAPI, RemoteExecuteSilentCopiesLocalAsync) {
-  TestRemoteExecuteSilentCopies(true, false);
+  TestRemoteExecuteSilentCopies(true, false, false);
+}
+TEST(CAPI, RemoteExecuteSilentCopiesLocalAsyncFunc) {
+  TestRemoteExecuteSilentCopies(true, false, true);
 }
 
 void TestRemoteExecuteDeleteContextWithOutstandingRPC(bool async) {
",0,train
a9f8a9b1c1995dc4a7c0b6d53df8af6082325360,tensorflow/tensorflow,"Do not call Unprotect on remote inputs

Unprotect should only be called on local handles. In order to test the
triggering of forwarding for remote inputs to a function we add an
optimization whereby EagerExecute releases the inputs of the eager
operation. This enforces that a TFE_Op cannot be reused since the inputs
would have been removed. This was technically already true since if the
inputs were ever forwarded we should not be re-using the TFE_Op.

PiperOrigin-RevId: 306564949
Change-Id: I94bd3a243658277891867802b792a4492ec0a039",c_api_test.cc,"@@ -78,11 +78,18 @@ void BM_Execute(int iters, int async) {
   TFE_DeleteContextOptions(opts);
 
   TFE_TensorHandle* m = TestMatrixTensorHandle(ctx);
-  TFE_Op* matmul = MatMulOp(ctx, m, m);
+  TFE_Op* matmul = TFE_NewOp(ctx, ""MatMul"", status);
+  CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TFE_TensorHandle* retvals[1];
   int num_retvals = 1;
   tensorflow::testing::StartTiming();
   for (int i = 0; i < iters; ++i) {
+    TFE_OpReset(matmul, ""MatMul"", nullptr, status);
+    CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+    TFE_OpAddInput(matmul, m, status);
+    CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+    TFE_OpAddInput(matmul, m, status);
+    CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
     TFE_Execute(matmul, &retvals[0], &num_retvals, status);
     CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   }
@@ -113,11 +120,15 @@ void BM_Execute_Identity(int iters, int async) {
   TFE_DeleteContextOptions(opts);
 
   TFE_TensorHandle* m = TestMatrixTensorHandle(ctx);
-  TFE_Op* identity = IdentityOp(ctx, m);
+  TFE_Op* identity = TFE_NewOp(ctx, ""Identity"", status);
   TFE_TensorHandle* retvals[1];
   int num_retvals = 1;
   tensorflow::testing::StartTiming();
   for (int i = 0; i < iters; ++i) {
+    TFE_OpReset(identity, ""Identity"", nullptr, status);
+    CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
+    TFE_OpAddInput(identity, m, status);
+    CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
     TFE_Execute(identity, &retvals[0], &num_retvals, status);
     CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   }
@@ -405,6 +416,11 @@ void TensorHandleSilentCopy(bool async,
         hcpu, ctx, gpu_device_name.c_str(), status.get());
     ASSERT_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get());
 
+    auto cpu_arg = tensorflow::TensorHandleFromInterface(hcpu->handle);
+    auto gpu_arg = tensorflow::TensorHandleFromInterface(hgpu->handle);
+    auto gpu_device = absl::get<tensorflow::Device*>(gpu_arg->device());
+    ASSERT_FALSE(cpu_arg->HasLocalMirror(gpu_device));
+
     TFE_Op* matmul = MatMulOp(ctx, hcpu, hgpu);
     if (cpu_op) {
       string cpu_device_name;
@@ -420,15 +436,8 @@ void TensorHandleSilentCopy(bool async,
     TFE_Execute(matmul, &retvals[0], &num_retvals, status.get());
     ASSERT_EQ(TF_GetCode(status.get()), TF_OK) << TF_Message(status.get());
 
-    // Validate if the input was replaced with a different TensorHandle
-    auto arg0 = tensorflow::TensorHandleFromInterface(hcpu->handle);
-    auto arg1 = tensorflow::TensorHandleFromInterface(hgpu->handle);
-    tensorflow::EagerOperation* op =
-        tensorflow::OperationFromInterface(matmul->operation);
-
-    // The input handles should never change since they have been mirrored.
-    EXPECT_EQ(op->Inputs()[0], arg0);
-    EXPECT_EQ(op->Inputs()[1], arg1);
+    // The CPU handle should have been copied and have a mirror on the GPU
+    ASSERT_TRUE(cpu_arg->HasLocalMirror(gpu_device));
 
     TFE_DeleteOp(matmul);
     TFE_DeleteTensorHandle(retvals[0]);
@@ -626,17 +635,6 @@ void ExecuteAdd(bool async, bool forward_input, bool tfrt) {
   }
 
   int num_retvals = 1;
-
-  if (async) {
-    // Enqueue dummy ops so we backlog async execution & actually test async.
-    for (int i = 0; i < 10000; ++i) {
-      TFE_TensorHandle* dummy = nullptr;
-      TFE_Execute(add_op, &dummy, &num_retvals, status);
-      ASSERT_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
-      TFE_DeleteTensorHandle(dummy);
-    }
-  }
-
   TFE_TensorHandle* retval = nullptr;
   TFE_Execute(add_op, &retval, &num_retvals, status);
   EXPECT_EQ(1, num_retvals);
",0,train
a9f8a9b1c1995dc4a7c0b6d53df8af6082325360,tensorflow/tensorflow,"Do not call Unprotect on remote inputs

Unprotect should only be called on local handles. In order to test the
triggering of forwarding for remote inputs to a function we add an
optimization whereby EagerExecute releases the inputs of the eager
operation. This enforces that a TFE_Op cannot be reused since the inputs
would have been removed. This was technically already true since if the
inputs were ever forwarded we should not be re-using the TFE_Op.

PiperOrigin-RevId: 306564949
Change-Id: I94bd3a243658277891867802b792a4492ec0a039",execute.cc,"@@ -596,6 +596,10 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals,
         &ctx, op->Inputs(), op->remote_func_params(), std::move(kernel),
         graph_collector, op->GetCancellationManager(),
         absl::Span<TensorHandle*>(retvals, num_outputs));
+    // Release the inputs from the eager operation since the AsyncExecuteNode
+    // would have taken ownership. This allows the inputs to be forwarded if
+    // possible.
+    op->Clear();
     // For async mode, execution order will make sure that all
     // input handles are ready before executing them.
     // TODO(b/137118203): Consider executing ""cheap"" kernels inline for
@@ -609,6 +613,10 @@ Status EagerLocalExecute(EagerOperation* op, TensorHandle** retvals,
                      graph_collector, op->GetCancellationManager(),
                      {retvals, static_cast<size_t>(num_outputs)});
     s = executor.SyncExecute(&node);
+    // We release the inputs AFTER executing the operation in sync mode since
+    // ExecuteNode does not increment the reference count and thus does not have
+    // ownership of the inputs while executing.
+    op->Clear();
   }
   // Since the operation failed, we need to Unref any outputs if they were
   // allocated.
",0,train
a9f8a9b1c1995dc4a7c0b6d53df8af6082325360,tensorflow/tensorflow,"Do not call Unprotect on remote inputs

Unprotect should only be called on local handles. In order to test the
triggering of forwarding for remote inputs to a function we add an
optimization whereby EagerExecute releases the inputs of the eager
operation. This enforces that a TFE_Op cannot be reused since the inputs
would have been removed. This was technically already true since if the
inputs were ever forwarded we should not be re-using the TFE_Op.

PiperOrigin-RevId: 306564949
Change-Id: I94bd3a243658277891867802b792a4492ec0a039",tensor_handle.cc,"@@ -449,7 +449,7 @@ Status TensorHandle::NumElements(int64* num_elements) const {
 Status TensorHandle::Unprotect(const Device* d) {
   DVLOG(3) << ""Unprotect on TensorHandle: "" << this << "" device: "" << d;
 
-  if (d == absl::get<Device*>(device_)) {
+  if (!IsRemote() && (d == absl::get<Device*>(device_))) {
     auto& data = absl::get<LocalTensorHandleData>(data_);
     return data.Unprotect();
   }
",0,train
835ac7291dd62277e27d1a66e241608b98790bb3,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 296339357
Change-Id: Ife4d6cc532586e15b94c049786977c4a7acf597d",network.py,"@@ -1063,7 +1063,28 @@ class Network(base_layer.Layer):
         ValueError: For invalid/unknown format arguments.
     """"""
     self._assert_weights_created()
-    save_format = validate_save_format(filepath, save_format)
+    filepath_is_h5 = _is_hdf5_filepath(filepath)
+    if save_format is None:
+      if filepath_is_h5:
+        save_format = 'h5'
+      else:
+        save_format = 'tf'
+    else:
+      user_format = save_format.lower().strip()
+      if user_format in ('tensorflow', 'tf'):
+        save_format = 'tf'
+      elif user_format in ('hdf5', 'h5', 'keras'):
+        save_format = 'h5'
+      else:
+        raise ValueError(
+            'Unknown format ""%s"". Was expecting one of {""tf"", ""h5""}.' % (
+                save_format,))
+    if save_format == 'tf' and filepath_is_h5:
+      raise ValueError(
+          ('save_weights got save_format=""tf""/""tensorflow"", but the '
+           'filepath (""%s"") looks like an HDF5 file. Omit the "".h5""/"".keras"" '
+           'when saving in TensorFlow format.')
+          % filepath)
 
     if save_format == 'h5' and h5py is None:
       raise ImportError(
@@ -2086,67 +2107,3 @@ def get_network_config(network, serialize_layer_fn=None):
   model_outputs = tf_utils.convert_inner_node_data(model_outputs)
   config['output_layers'] = model_outputs
   return config
-
-
-def validate_save_format(filepath, save_format, default='tf'):
-  """"""Validates `save_format` argument passed to methods used for saving.
-
-  Returns either 'tf' or 'h5', indicating whether to save the model
-  to Tensorflow SavedModel or HDF5. Output will default to 'tf' in TF2.X and
-  'h5' in TF1.X.
-
-  Defaults to 'h5' if `filepath` is a path to a hdf5 file (having suffix '.h5'
-  or '.hdf5' or '.keras') or is an h5py.File object.
-
-  Args:
-    filepath: Value of the `filepath` argument passed to the method.
-      Can be: - String - h5py.File object
-    save_format: String, value of the 'save_format' argument as passed.
-    default: Default format if save_format isn't specified and the filepath
-      doesn't indicate that the format is 'h5'.
-
-  Returns:
-    save_format: String, 'h5' or 'tf'. The processed
-    value of the `save_format` argument.
-
-  Raises:
-    ValueError: If
-      - `filepath` is not a String or an h5py.File object.
-      - `save_format` is not valid. Valid values are ""tensorflow"", ""tf"" for
-        saving in SavedModel format, and ""hdf5"", ""keras"" or ""h5"" for saving in
-        h5 format.
-      - `save_format` is ""tf"" but `filepath` is a path to a h5 file.
-      - `save_format` is ""tf"" but `filepath` is an h5py.File object.
-  """"""
-  if not isinstance(filepath, (str, h5py.File)):
-    raise ValueError(
-        'Expected `filepath` to be a String or h5py.File object. Got '
-        'unsupported value %s of type %s' % (filepath, type(filepath)))
-
-  filepath_is_h5py_file = h5py is not None and isinstance(filepath, h5py.File)
-  filepath_is_h5 = isinstance(filepath, str) and _is_hdf5_filepath(filepath)
-  if save_format is None:
-    if filepath_is_h5 or filepath_is_h5py_file:
-      save_format = 'h5'
-    else:
-      save_format = default
-  else:
-    user_format = save_format.lower().strip()
-    if user_format in ('tensorflow', 'tf'):
-      save_format = 'tf'
-    elif user_format in ('hdf5', 'h5', 'keras'):
-      save_format = 'h5'
-    else:
-      raise ValueError(
-          'Unknown format ""%s"". Was expecting one of {""tf"", ""h5""}.' %
-          (save_format))
-  if save_format == 'tf' and filepath_is_h5:
-    raise ValueError(
-        ('Got save_format=""tf""/""tensorflow"", but the filepath (""%s"") looks '
-         'like an HDF5 file. Omit the "".h5""/"".keras"" when saving in '
-         'TensorFlow format.') % filepath)
-  if save_format == 'tf' and filepath_is_h5py_file:
-    raise ValueError(
-        'Got save_format=""tf""/""tensorflow"", but the given `filepath`'
-        'is an h5py.File object.')
-  return save_format
",0,train
835ac7291dd62277e27d1a66e241608b98790bb3,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 296339357
Change-Id: Ife4d6cc532586e15b94c049786977c4a7acf597d",network_test.py,"@@ -1880,43 +1880,5 @@ class CacheCorrectnessTest(keras_parameterized.TestCase):
       self.assertEqual(network.compute_output_shape((1, i, 32)), (1, i, 2))
 
 
-class SaveFormatValidationTest(keras_parameterized.TestCase):
-
-  def test_save_format_validation(self):
-    filepath = 'file/path'
-    h5_filepath = 'h5_filepath.h5'
-    h5_filepath_2 = 'h5_filepath.hdf5'
-    h5_filepath_3 = 'h5_filepath.keras'
-
-    self.assertEqual(
-        network_lib.validate_save_format(filepath, None, 'h5'), 'h5')
-    self.assertEqual(
-        network_lib.validate_save_format(filepath, None, 'tf'), 'tf')
-
-    self.assertEqual(network_lib.validate_save_format(filepath, 'h5'), 'h5')
-    self.assertEqual(network_lib.validate_save_format(h5_filepath, None), 'h5')
-    self.assertEqual(
-        network_lib.validate_save_format(h5_filepath_2, None), 'h5')
-    self.assertEqual(
-        network_lib.validate_save_format(h5_filepath_3, None), 'h5')
-    self.assertEqual(
-        network_lib.validate_save_format(h5_filepath, 'hdf5'), 'h5')
-    self.assertEqual(
-        network_lib.validate_save_format(h5_filepath, 'keras'), 'h5')
-
-    self.assertEqual(network_lib.validate_save_format(filepath, 'tf'), 'tf')
-    self.assertEqual(
-        network_lib.validate_save_format(filepath, 'tensorflow'), 'tf')
-
-    with self.assertRaises(ValueError):
-      network_lib.validate_save_format(42, 'h5')
-
-    with self.assertRaises(ValueError):
-      network_lib.validate_save_format(filepath, 'unknown_format')
-
-    with self.assertRaises(ValueError):
-      network_lib.validate_save_format(h5_filepath, 'tf')
-
-
 if __name__ == '__main__':
   test.main()
",0,train
835ac7291dd62277e27d1a66e241608b98790bb3,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 296339357
Change-Id: Ife4d6cc532586e15b94c049786977c4a7acf597d",save.py,"@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import os
 import sys
 
 import six
@@ -28,15 +29,8 @@ from tensorflow.python.keras.saving.saved_model import load as saved_model_load
 from tensorflow.python.keras.saving.saved_model import save as saved_model_save
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.saved_model import loader_impl
-from tensorflow.python.util.lazy_loader import LazyLoader
 from tensorflow.python.util.tf_export import keras_export
 
-# pylint: disable=g-inconsistent-quotes
-network = LazyLoader(
-    ""network"", globals(),
-    ""tensorflow.python.keras.engine.network"")
-# pylint: enable=g-inconsistent-quotes
-
 # pylint: disable=g-import-not-at-top
 if sys.version_info >= (3, 4):
   import pathlib
@@ -46,6 +40,9 @@ except ImportError:
   h5py = None
 # pylint: enable=g-import-not-at-top
 
+_HDF5_EXTENSIONS = ['.h5', '.hdf5', '.keras']
+
+
 # TODO(kathywu): Remove this when Keras SavedModel is not experimental.
 _KERAS_SAVED_MODEL_STILL_EXPERIMENTAL = True
 
@@ -115,14 +112,15 @@ def save_model(model,
   """"""
   from tensorflow.python.keras.engine import sequential  # pylint: disable=g-import-not-at-top
 
+  default_format = 'tf' if tf2.enabled() else 'h5'
+  save_format = save_format or default_format
+
   if sys.version_info >= (3, 4) and isinstance(filepath, pathlib.Path):
     filepath = str(filepath)
 
-  default_format = 'tf' if tf2.enabled() else 'h5'
-  save_format = network.validate_save_format(filepath, save_format,
-                                             default_format)
-
-  if save_format == 'h5':
+  if (save_format == 'h5' or
+      (h5py is not None and isinstance(filepath, h5py.File)) or
+      os.path.splitext(filepath)[1] in _HDF5_EXTENSIONS):
     # TODO(b/130258301): add utility method for detecting model type.
     if (not model._is_graph_network and  # pylint:disable=protected-access
         not isinstance(model, sequential.Sequential)):
",0,train
cebfa65df5451af59f88115d509b4a7830d34d26,tensorflow/tensorflow,"Enable the mlir quantizer for all the zip tests

This also enabled the tests for all the tests including the floating-point
conversions with the _experimental_new_quantizer is on.

PiperOrigin-RevId: 348900650
Change-Id: Iea3cd528d4cf105fb218fb856b4c7fda27db2bdb",zip_test_utils.py,"@@ -368,11 +368,6 @@ def make_zip_of_tests(options,
           ""fully_quantize"", False) or param_dict.get(""quant_16x8"", False)):
         continue
 
-      # Skips the new quantizer tests when `fully_quantize` is set to false
-      # or it is not set.
-      if options.mlir_quantizer and not param_dict.get(""fully_quantize"", False):
-        continue
-
       def generate_inputs_outputs(tflite_model_binary,
                                   min_value=0,
                                   max_value=255):
",0,train
880d16bc8fcf8160037abc05c2585baed6a35cd5,tensorflow/tensorflow,"Change max clones to simple heuristic based on original number of functions

Less arbitrary than fixed number but degenerate behavior is now defined in
terms of the number of functions in the original module.

PiperOrigin-RevId: 381109197
Change-Id: I68f66b303044648c8410e0a16d3926dc0a5570ea",guarantee_all_funcs_one_use.cc,"@@ -67,15 +67,11 @@ class GuaranteeAllFuncsOneUse
     SymbolTable &symbol_table = symbol_table_collection.getSymbolTable(module);
     bool made_changes = false;
 
-    // The maximum number of clones value needs to be low enough to actually
-    // stop compilation in a reasonable time, but not too low that it blocks
-    // real programs. This number was chosen semi-randomly.
-    int number_of_functions = [&]() -> int {
-      auto fn_range = module.getOps<FuncOp>();
-      return std::distance(fn_range.begin(), fn_range.end());
-    }();
-    const int kMaxClones = 4 * number_of_functions;
-
+    // This value needs to be low enough to actually stop compilation in a
+    // reasonable time, but not too low that it blocks real programs.
+    // This number was chosen semi-randomly.
+    // TODO(jpienaar): Switch to a more context aware heuristic.
+    const int kMaxClones = 10000;
     int num_clones = 0;
     do {
       SymbolUserMap symbol_users(symbol_table_collection, module);
",0,train
2b703011163454ae15ba07ec89dd6bd2d8633a00,tensorflow/tensorflow,"add type info described in issue #5236 (#5278)

* add type info when assign

* add INFO

* add spaces

* wrap to 80 lines",simple_placer.cc,"@@ -815,9 +815,11 @@ void SimplePlacer::AssignAndLog(const string& assigned_device,
   node->set_assigned_device_name(assigned_device);
   // Log placement if log_device_placement is set.
   if (options_ && options_->config.log_device_placement()) {
-    printf(""%s: %s\n"", node->name().c_str(),
+    printf(""%s: (%s): %s\n"", node->name().c_str(),
+           node->type_string().c_str(),
            node->assigned_device_name().c_str());
-    LOG(INFO) << node->name() << "": "" << node->assigned_device_name();
+    LOG(INFO) << node->name() << "": "" << ""("" << node->type_string() << "")"" 
+              << node->assigned_device_name();
   }
 }
 
",0,train
e5379bb6c053c1d1af913cd1c8c14663191e58b3,tensorflow/tensorflow,pylint: whitespace changes,mirrored_strategy.py,"@@ -87,7 +87,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
     self._devices = [device_util.resolve(d) for d in devices]
     self._canonical_device_set = set(self._devices)
     self._device_index = values.PerDevice(
-            {d: i for i, d in enumerate(devices)})
+        {d: i for i, d in enumerate(devices)})
     self._cross_tower_ops = cross_tower_ops
     self._prefetch_on_device = prefetch_on_device
     # TODO(yuefengz): consider setting the default device.
",0,train
e5379bb6c053c1d1af913cd1c8c14663191e58b3,tensorflow/tensorflow,pylint: whitespace changes,errors_impl.py,"@@ -476,7 +476,7 @@ _CODE_TO_EXCEPTION_CLASS = {
 c_api.PyExceptionRegistry_Init(_CODE_TO_EXCEPTION_CLASS)
 
 _EXCEPTION_CLASS_TO_CODE = {
-  class_: code for code, class_ in _CODE_TO_EXCEPTION_CLASS.items()}
+    class_: code for code, class_ in _CODE_TO_EXCEPTION_CLASS.items()}
 
 
 @tf_export(""errors.exception_type_from_error_code"")
",0,train
49e59a8cad98ff3cfaa38247108bad2f8d23e70f,tensorflow/tensorflow,"Add a launch id field in run options and hlo module config.

PiperOrigin-RevId: 307922589
Change-Id: Ie1ea0b389e5228f827d570086799227983035f81",executable_run_options.h,"@@ -127,6 +127,13 @@ class ExecutableRunOptions {
   ExecutableRunOptions& set_rng_seed(int rng_seed);
   int rng_seed() const;
 
+  ExecutableRunOptions& set_launch_id(int32 launch_id) {
+    launch_id_ = launch_id;
+    return *this;
+  }
+
+  int32 launch_id() const { return launch_id_; }
+
   ExecutableRunOptions& set_run_id(RunId id);
   RunId run_id() const;
 
@@ -153,6 +160,7 @@ class ExecutableRunOptions {
   const Eigen::ThreadPoolDevice* intra_op_thread_pool_ = nullptr;
   ExecutionProfile* execution_profile_ = nullptr;
   int rng_seed_ = 0;
+  int32 launch_id_ = 0;
   stream_executor::Stream* host_to_device_stream_ = nullptr;
   ThenExecuteFunction* then_execute_function_ = nullptr;
   RunId run_id_;
",0,train
49e59a8cad98ff3cfaa38247108bad2f8d23e70f,tensorflow/tensorflow,"Add a launch id field in run options and hlo module config.

PiperOrigin-RevId: 307922589
Change-Id: Ie1ea0b389e5228f827d570086799227983035f81",hlo_module_config.h,"@@ -108,6 +108,12 @@ class HloModuleConfig {
   void set_seed(uint64 seed) { seed_ = seed; }
   uint64 seed() const { return seed_; }
 
+  // Set the launch id of the program. Launch id identifies a set of programs
+  // that should be launched together.
+  void set_launch_id(uint64 launch_id) { launch_id_ = launch_id; }
+
+  int32 launch_id() const { return launch_id_; }
+
   void set_replica_count(int64 replica_count) {
     replica_count_ = replica_count;
   }
@@ -197,6 +203,9 @@ class HloModuleConfig {
   // Module/graph-level seed handle.
   uint64 seed_ = 0;
 
+  // Program id that identifies a set of program to be launched together.
+  int32 launch_id_ = 0;
+
   // The number of replicas (data parallelism) to compile this binary for.
   int64 replica_count_ = 1;
 
",0,train
49e59a8cad98ff3cfaa38247108bad2f8d23e70f,tensorflow/tensorflow,"Add a launch id field in run options and hlo module config.

PiperOrigin-RevId: 307922589
Change-Id: Ie1ea0b389e5228f827d570086799227983035f81",service.cc,"@@ -314,6 +314,7 @@ StatusOr<std::unique_ptr<HloModuleConfig>> Service::CreateModuleConfig(
       config->set_num_partitions(execution_options->num_partitions());
     }
     config->set_seed(execution_options->seed());
+    config->set_launch_id(execution_options->launch_id());
     config->set_debug_options(execution_options->debug_options());
   } else {
     config->set_replica_count(options_.number_of_replicas());
",0,train
9b1fe8f31ee1788208d8d6b7385382e436c5e1d7,tensorflow/tensorflow,"Use `tempfile.mkdtemp` instead of `tempfile.mktemp` to create directories.

The `tempfile.mktemp` function is [deprecated](https://docs.python.org/3/library/tempfile.html#tempfile.mktemp) due to [security issues](https://cwe.mitre.org/data/definitions/377.html).

The switch is easy to do: just a name change

PiperOrigin-RevId: 420370858
Change-Id: I44a0849d161132eacd4f3881fdb615e09c0f02a2",debug_data_test.py,"@@ -147,8 +147,7 @@ class DebugTensorDatumTest(test_util.TensorFlowTestCase):
 class DebugDumpDirTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
-    self._dump_root = tempfile.mktemp()
-    os.mkdir(self._dump_root)
+    self._dump_root = tempfile.mkdtemp()
 
   def tearDown(self):
     # Tear down temporary dump directory.
@@ -179,7 +178,7 @@ class DebugDumpDirTest(test_util.TensorFlowTestCase):
 
   def testDebugDumpDir_nonexistentDumpRoot(self):
     with self.assertRaisesRegex(IOError, ""does not exist""):
-      debug_data.DebugDumpDir(tempfile.mktemp() + ""_foo"")
+      debug_data.DebugDumpDir(tempfile.mkdtemp() + ""_foo"")
 
   def testDebugDumpDir_invalidFileNamingPattern(self):
     # File name with too few underscores should lead to an exception.
",0,test
4475cc8744b1c6b2f61052a5da0810ecc34ee642,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 397208029
Change-Id: I16d248be52ccdef0e88995842780c6dc93a20ba8",session_test.py,"@@ -76,7 +76,6 @@ except ImportError:
 defaultdict = collections.defaultdict  # pylint:disable=invalid-name
 
 
-@test_util.with_eager_op_as_function
 class SessionTest(test_util.TensorFlowTestCase):
 
   def setUp(self):
@@ -1963,9 +1962,7 @@ class SessionTest(test_util.TensorFlowTestCase):
     self.assertEqual(c, 3)
     self.assertEqual(d, 3)
     # Ensure that we did log device placement.
-    add_executions = [
-        l for l in str(log).splitlines() if 'Executing op AddV2' in l
-    ]
+    add_executions = [l for l in str(log).splitlines() if 'AddV2' in l]
     self.assertEqual(len(add_executions), 2)
 
     @def_function.function
",0,train
5c438dfc7a0d47b18f0064c6ad6172df6eee4325,tensorflow/tensorflow,"...Setting shapes of placeholders used in tf.compat.v2.test.compute_gradient...

PiperOrigin-RevId: 293212323
Change-Id: I07e7620965bff0872f83d4619941509d97bc499f",gradient_checker_v2.py,"@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """"""Gradient checker for functions.
 
 The gradient checker verifies numerically that an function properly
@@ -94,7 +93,7 @@ def _to_numpy(a):
   return a
 
 
-def _prepare(f, xs_dtypes):
+def _prepare(f, xs_dtypes, xs_shapes):
   """"""Return a function that executes 'f'.
 
     In TF 2.x, this is the same as `f`.
@@ -104,9 +103,9 @@ def _prepare(f, xs_dtypes):
   Args:
     f: the function.
     xs_dtypes: dtypes of f's arguments.
+    xs_shapes: shapes of f's arguments.
 
   Returns:
-    a function that will be evaluated in both graph and eager mode
   """"""
   if context.executing_eagerly():
 
@@ -114,12 +113,17 @@ def _prepare(f, xs_dtypes):
       return f(*map(ops.convert_to_tensor, xs_data))
 
     return decorated_eager
-  xs = [array_ops.placeholder(x_dtype) for x_dtype in xs_dtypes]
+  xs = [
+      array_ops.placeholder(x_dtype, shape=x_shape)
+      for x_dtype, x_shape in zip(xs_dtypes, xs_shapes)
+  ]
   y = f(*xs)
   sess = ops.get_default_session()
+
   def decorated_graph(*xs_data):
     xs_data = [_to_numpy(a) for a in xs_data]
     return sess.run(y, feed_dict=dict(zip(xs, xs_data)))
+
   return decorated_graph
 
 
@@ -159,12 +163,13 @@ def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param):
 
   # For each of the entry of dy, we set this to be 1 and
   # everything else to be 0 and compute the gradients -- this will give us one
-  # one row of the Jacobian matrix.
+  # row of the Jacobian matrix.
   dy_data = np.zeros(y_shape, dtype=y_dtype.as_numpy_dtype)
   dy_data_flat = dy_data.ravel().view(y_dtype.real_dtype.as_numpy_dtype)
   grad_fn_unprep = backprop.gradients_function(f, [param])
   grad_fn = _prepare(lambda dy, *xs: grad_fn_unprep(*xs, dy=dy),
-                     [y_dtype] + [z.dtype for z in xs])
+                     [y_dtype] + [z.dtype for z in xs],
+                     [None] + [z.shape for z in xs])
   for row in range(y_size):
     dy_data_flat[row] = 1
     grad = _to_numpy(grad_fn(dy_data, *xs)[0])
@@ -192,8 +197,7 @@ def _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param):
   return jacobian
 
 
-def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param,
-                              delta):
+def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta):
   """"""Computes the numeric Jacobian for f regarding xs[param].
 
   One can think of the relation among f, xs and y as y = f(xs).
@@ -227,6 +231,7 @@ def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param,
   y_dtype = y_dtype.real_dtype.as_numpy_dtype
 
   xs_dtypes = [x.dtype for x in xs]
+  xs_shapes = [x.shape for x in xs]
   # Converts xs to numpy arrays to do in-place perturbation.
   # Calls asarray() to avoid copying in ravel() later.
   xs = [np.asarray(_to_numpy(x)) for x in xs]
@@ -240,7 +245,7 @@ def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param,
   # For each of the entry of x, we slightly perturbs this by adding and
   # subtracting a delta and then compute difference between the outputs. This
   # will give us one column of the Jacobian matrix.
-  f = _prepare(f, xs_dtypes)
+  f = _prepare(f, xs_dtypes, xs_shapes)
   for col in range(x_size):
     original = x.ravel().view(x_dtype)[col]
     x.ravel().view(x_dtype)[col] += delta
@@ -256,17 +261,14 @@ def _compute_numeric_jacobian(f, y_size, y_dtype, xs, param,
   return jacobian
 
 
-def _compute_gradient(f,
-                      y_shape,
-                      y_dtype,
-                      xs,
-                      param,
-                      delta):
+def _compute_gradient(f, y_shape, y_dtype, xs, param, delta):
   """"""Computes the theoretical and numerical jacobian.""""""
   x = xs[param]
   t = x.dtype
-  allowed_types = [dtypes.float16, dtypes.bfloat16, dtypes.float32,
-                   dtypes.float64, dtypes.complex64, dtypes.complex128]
+  allowed_types = [
+      dtypes.float16, dtypes.bfloat16, dtypes.float32, dtypes.float64,
+      dtypes.complex64, dtypes.complex128
+  ]
   assert t.base_dtype in allowed_types, (""Cannot compute gradient for ""
                                          ""unsupported type %s of argument %s"" %
                                          (t.name, param))
@@ -274,10 +276,8 @@ def _compute_gradient(f,
   assert t2.base_dtype in allowed_types, (""Cannot compute gradient for ""
                                           ""unsupported type %s of y"" % t2.name)
   y_size = _product(y_shape)
-  jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype,
-                                          xs, param)
-  jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs,
-                                      param, delta)
+  jacob_t = _compute_theoretical_jacobian(f, y_shape, y_dtype, xs, param)
+  jacob_n = _compute_numeric_jacobian(f, y_size, y_dtype, xs, param, delta)
   return jacob_t, jacob_n
 
 
@@ -287,10 +287,13 @@ def _compute_gradient_list(f, xs, delta):
   xs = list(map(ops.convert_to_tensor, xs))
   # run the function to get info of the result
   xs_dtypes = [x.dtype for x in xs]
-  f_temp = _prepare(f, xs_dtypes)
+  xs_shapes = [x.shape for x in xs]
+  f_temp = _prepare(f, xs_dtypes, xs_shapes)
   y = f_temp(*xs)
-  return zip(*[_compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype),
-                                 xs, i, delta) for i in range(len(xs))])
+  return zip(*[
+      _compute_gradient(f, y.shape, dtypes.as_dtype(y.dtype), xs, i, delta)
+      for i in range(len(xs))
+  ])
 
 
 @tf_export(""test.compute_gradient"", v1=[])
",0,train
5c438dfc7a0d47b18f0064c6ad6172df6eee4325,tensorflow/tensorflow,"...Setting shapes of placeholders used in tf.compat.v2.test.compute_gradient...

PiperOrigin-RevId: 293212323
Change-Id: I07e7620965bff0872f83d4619941509d97bc499f",gradient_checker_v2_test.py,"@@ -12,8 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-""""""Tests for compute_gradient.
-""""""
+""""""Tests for compute_gradient.""""""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -47,6 +46,21 @@ def _random_complex(shape, dtype):
 @test_util.run_all_in_graph_and_eager_modes
 class GradientCheckerTest(test.TestCase):
 
+  def testWithStaticShape(self):
+    size = (2, 3)
+    constant = constant_op.constant(2.0, shape=size, name=""const"")
+
+    def add_constant_with_static_shape_check(x):
+      self.assertAllEqual(x.shape.as_list(), constant.shape.as_list())
+      return x + constant
+
+    x = constant_op.constant(3.0, shape=size, name=""x"")
+
+    error = gradient_checker.max_error(*gradient_checker.compute_gradient(
+        add_constant_with_static_shape_check, [x]))
+
+    self.assertLess(error, 1e-4)
+
   def testAddSimple(self):
     size = (2, 3)
     x1 = constant_op.constant(2.0, shape=size, name=""x1"")
@@ -58,31 +72,32 @@ class GradientCheckerTest(test.TestCase):
 
   def testAddCustomized(self):
     size = (2, 3)
-    x1 = constant_op.constant(
-        2.0, shape=size, dtype=dtypes.float64, name=""x1"")
+    x1 = constant_op.constant(2.0, shape=size, dtype=dtypes.float64, name=""x1"")
     x2 = np.asarray(np.arange(6, dtype=np.float64).reshape(2, 3))
     # checkint gradients for x2 using a special delta
     error = gradient_checker.max_error(*gradient_checker.compute_gradient(
-        lambda x2: math_ops.add(x1, x2),
-        [x2], delta=1e-2))
+        lambda x2: math_ops.add(x1, x2), [x2], delta=1e-2))
     tf_logging.info(""x2 error = %f"", error)
     self.assertLess(error, 1e-10)
 
   def testGather(self):
+
     def f(params):
       index_values = [1, 3]
       indices = constant_op.constant(index_values, name=""i"")
       return array_ops.gather(params, indices, name=""y"")
+
     p_shape = (4, 2)
     p_size = 8
     params = constant_op.constant(
         np.arange(p_size).astype(np.float), shape=p_shape, name=""p"")
-    error = gradient_checker.max_error(*gradient_checker.compute_gradient(
-        f, [params]))
+    error = gradient_checker.max_error(
+        *gradient_checker.compute_gradient(f, [params]))
     tf_logging.info(""gather error = %f"", error)
     self.assertLess(error, 1e-4)
 
   def testNestedGather(self):
+
     def f(params):
       index_values = [1, 3, 5, 6]
       indices = constant_op.constant(index_values, name=""i"")
@@ -90,57 +105,62 @@ class GradientCheckerTest(test.TestCase):
       index_values2 = [0, 2]
       indices2 = constant_op.constant(index_values2, name=""i2"")
       return array_ops.gather(y, indices2, name=""y2"")
+
     p_shape = (8, 2)
     p_size = 16
     params = constant_op.constant(
         np.arange(p_size).astype(np.float), shape=p_shape, name=""p"")
-    error = gradient_checker.max_error(*gradient_checker.compute_gradient(
-        f, [params]))
+    error = gradient_checker.max_error(
+        *gradient_checker.compute_gradient(f, [params]))
     tf_logging.info(""nested gather error = %f"", error)
     self.assertLess(error, 1e-4)
 
   def testComplexMul(self):
     c = constant_op.constant(5 + 7j, dtype=dtypes.complex64)
+
     def f(x):
       return c * x
+
     x_shape = c.shape
     x_dtype = c.dtype
     x = constant_op.constant(_random_complex(x_shape, x_dtype))
-    analytical, numerical = gradient_checker.compute_gradient(
-        f, [x])
+    analytical, numerical = gradient_checker.compute_gradient(f, [x])
     correct = np.array([[5, -7], [7, 5]])
     self.assertAllEqual(correct, analytical[0])
     self.assertAllClose(correct, numerical[0], rtol=1e-4)
     x = constant_op.constant(_random_complex(x_shape, x_dtype))
     self.assertLess(
-        gradient_checker.max_error(*gradient_checker.compute_gradient(
-            f, [x])), 3e-4)
+        gradient_checker.max_error(*gradient_checker.compute_gradient(f, [x])),
+        3e-4)
 
   def testComplexConj(self):
+
     def f(x):
       return math_ops.conj(x)
+
     x_shape = ()
     x_dtype = dtypes.complex64
     x = constant_op.constant(_random_complex(x_shape, x_dtype))
-    analytical, numerical = gradient_checker.compute_gradient(
-        f, [x])
+    analytical, numerical = gradient_checker.compute_gradient(f, [x])
     correct = np.array([[1, 0], [0, -1]])
     self.assertAllEqual(correct, analytical[0])
     self.assertAllClose(correct, numerical[0], rtol=2e-5)
     x = constant_op.constant(_random_complex(x_shape, x_dtype))
     self.assertLess(
-        gradient_checker.max_error(*gradient_checker.compute_gradient(
-            f, [x])), 2e-5)
+        gradient_checker.max_error(*gradient_checker.compute_gradient(f, [x])),
+        2e-5)
 
   def testEmptySucceeds(self):
+
     def f(x):
       return array_ops.identity(x)
-    x = constant_op.constant(np.random.random_sample((0, 3)),
-                             dtype=dtypes.float32)
+
+    x = constant_op.constant(
+        np.random.random_sample((0, 3)), dtype=dtypes.float32)
     for grad in gradient_checker.compute_gradient(f, [x]):
       self.assertEqual(grad[0].shape, (0, 0))
-    error = gradient_checker.max_error(*gradient_checker.compute_gradient(
-        f, [x]))
+    error = gradient_checker.max_error(
+        *gradient_checker.compute_gradient(f, [x]))
     self.assertEqual(error, 0)
 
   def testEmptyMatMul(self):
@@ -160,37 +180,47 @@ class GradientCheckerTest(test.TestCase):
     self.assertEqual(error, 0)
 
   def testEmptyFails(self):
+
     @custom_gradient.custom_gradient
     def id_bad_grad(x):
       y = array_ops.identity(x)
+
       def grad_fn(dy):
         # dx = constant_op.constant(np.zeros((1, 4)), dtype=dtypes.float32)
         dx = array_ops.transpose(dy)
         return dx
+
       return y, grad_fn
+
     def f(x):
       return id_bad_grad(x)
-    x = constant_op.constant(np.random.random_sample((0, 3)),
-                             dtype=dtypes.float32)
+
+    x = constant_op.constant(
+        np.random.random_sample((0, 3)), dtype=dtypes.float32)
     bad = r""Empty gradient has wrong shape: expected \(0, 3\), got \(3, 0\)""
     with self.assertRaisesRegexp(ValueError, bad):
       gradient_checker.compute_gradient(f, [x])
 
   def testNaNGradFails(self):
+
     @custom_gradient.custom_gradient
     def id_nan_grad(x):
       y = array_ops.identity(x)
+
       def grad_fn(dy):
         dx = np.nan * dy
         # dx = dy
         return dx
+
       return y, grad_fn
+
     def f(x):
       return id_nan_grad(x)
-    x = constant_op.constant(np.random.random_sample((1, 1)),
-                             dtype=dtypes.float32)
-    error = gradient_checker.max_error(*gradient_checker.compute_gradient(
-        f, [x]))
+
+    x = constant_op.constant(
+        np.random.random_sample((1, 1)), dtype=dtypes.float32)
+    error = gradient_checker.max_error(
+        *gradient_checker.compute_gradient(f, [x]))
     # Typical test would assert error < max_err, so assert this test would
     # raise AssertionError, since NaN is not < 1.0.
     with self.assertRaisesRegexp(AssertionError, ""nan not less than 1.0""):
@@ -264,9 +294,7 @@ class MiniMNISTTest(test.TestCase):
         name=""softmax_bias"")
 
     # List all the parameter so that we can test them one at a time
-    all_params = [
-        inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias
-    ]
+    all_params = [inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias]
 
     # Now, Building MNIST
     def f(inp, hidden_weight, hidden_bias, softmax_weight, softmax_bias):
@@ -287,8 +315,9 @@ class MiniMNISTTest(test.TestCase):
       xs = all_params
       i = param_index
       # use x for the i-th parameter
-      xs = xs[0:i]+[x]+xs[i+1:]
+      xs = xs[0:i] + [x] + xs[i + 1:]
       return f(*xs)
+
     # Test the gradients.
     err = gradient_checker.max_error(*gradient_checker.compute_gradient(
         f_restricted, [all_params[param_index]], delta=1e-5))
",0,train
72aa2aea58513bb00f3d9cf24bc79ebe880258a7,tensorflow/tensorflow,"[tpu_driver] Add missing namespace to fix OS build

PiperOrigin-RevId: 376286743
Change-Id: I7aaa700d2a86dc608c2775a62916f08d3e595715",tpu_client.cc,"@@ -95,7 +95,7 @@ PyTpuClient::PyTpuClient(std::string platform_name,
       devices_(std::move(devices)),
       process_index_(process_index) {
   for (const std::shared_ptr<PjRtDevice>& device : devices_) {
-    down_cast<TpuDevice*>(device.get())->set_tpu_client(this);
+    tensorflow::down_cast<TpuDevice*>(device.get())->set_tpu_client(this);
     CHECK(id_to_device_.insert({device->id(), device}).second)
         << ""Duplicate device id: "" << device->id();
 
",0,train
547daed2591ff84b4c9d27ae26336ab4b6d5bf06,tensorflow/tensorflow,"Fix HandleCopies so it no longer invokes UB when the `params` or `out` TensorMaps are empty with no backing data.

PiperOrigin-RevId: 313686113
Change-Id: I4b38d7e7e8cebb40d8b7f2390f841f0b541a01e5",gather_functor.h,"@@ -44,8 +44,8 @@ SliceIndex HandleCopies(OpKernelContext* ctx,
   const SliceIndex indices_size = static_cast<SliceIndex>(indices.dimension(0));
   const SliceIndex batch_size = static_cast<SliceIndex>(params.dimension(0));
   const Index limit = static_cast<Index>(params.dimension(1));
-  T* out_base = &out(0, 0, 0);
-  const T* params_base = &params(0, 0, 0);
+  T* out_base = out.data();
+  const T* params_base = params.data();
   if (static_slice_elems >= 0) {
     // Give compiler static knowledge of the number of elements/bytes
     slice_elems = static_slice_elems;
",0,train
716fea7be71d03ba486dde6c1adba245d18e805f,tensorflow/tensorflow,"Update all tf.to_float to tf.cast(..,dtype=tf.float32) in losses_impl

PiperOrigin-RevId: 226041616",losses_impl.py,"@@ -139,7 +139,7 @@ def _num_present(losses, weights, per_batch=False):
        and not math_ops.equal(weights, 0.0))):
     return _num_elements(losses)
   with ops.name_scope(None, ""num_present"", (losses, weights)) as scope:
-    weights = math_ops.to_float(weights)
+    weights = math_ops.cast(weights, dtype=dtypes.float32)
     present = array_ops.where(
         math_ops.equal(weights, 0.0),
         array_ops.zeros_like(weights),
@@ -207,8 +207,8 @@ def compute_weighted_loss(
         weights_broadcast_ops.assert_broadcastable(weights, losses),)):
       losses = ops.convert_to_tensor(losses)
       input_dtype = losses.dtype
-      losses = math_ops.to_float(losses)
-      weights = math_ops.to_float(weights)
+      losses = math_ops.cast(losses, dtype=dtypes.float32)
+      weights = math_ops.cast(weights, dtype=dtypes.float32)
       weighted_losses = math_ops.multiply(losses, weights)
       if reduction == Reduction.NONE:
         loss = weighted_losses
@@ -275,8 +275,8 @@ def absolute_difference(
     raise ValueError(""predictions must not be None."")
   with ops.name_scope(scope, ""absolute_difference"",
                       (predictions, labels, weights)) as scope:
-    predictions = math_ops.to_float(predictions)
-    labels = math_ops.to_float(labels)
+    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
     losses = math_ops.abs(math_ops.subtract(predictions, labels))
     return compute_weighted_loss(
@@ -329,8 +329,8 @@ def cosine_distance(
     raise ValueError(""predictions must not be None."")
   with ops.name_scope(scope, ""cosine_distance_loss"",
                       (predictions, labels, weights)) as scope:
-    predictions = math_ops.to_float(predictions)
-    labels = math_ops.to_float(labels)
+    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
 
     radial_diffs = math_ops.multiply(predictions, labels)
@@ -377,8 +377,8 @@ def hinge_loss(labels, logits, weights=1.0, scope=None,
   if logits is None:
     raise ValueError(""logits must not be None."")
   with ops.name_scope(scope, ""hinge_loss"", (logits, labels, weights)) as scope:
-    logits = math_ops.to_float(logits)
-    labels = math_ops.to_float(labels)
+    logits = math_ops.cast(logits, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     logits.get_shape().assert_is_compatible_with(labels.get_shape())
     # We first need to convert binary labels to -1/1 labels (as floats).
     all_ones = array_ops.ones_like(labels)
@@ -446,8 +446,8 @@ def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
     raise ValueError(""predictions must not be None."")
   with ops.name_scope(scope, ""huber_loss"",
                       (predictions, labels, weights)) as scope:
-    predictions = math_ops.to_float(predictions)
-    labels = math_ops.to_float(labels)
+    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
     error = math_ops.subtract(predictions, labels)
     abs_error = math_ops.abs(error)
@@ -512,8 +512,8 @@ def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None,
     raise ValueError(""predictions must not be None."")
   with ops.name_scope(scope, ""log_loss"",
                       (predictions, labels, weights)) as scope:
-    predictions = math_ops.to_float(predictions)
-    labels = math_ops.to_float(labels)
+    predictions = math_ops.cast(predictions, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
     losses = -math_ops.multiply(
         labels,
@@ -580,11 +580,11 @@ def mean_pairwise_squared_error(
     raise ValueError(""predictions must not be None."")
   with ops.name_scope(scope, ""mean_pairwise_squared_error"",
                       (predictions, labels, weights)) as scope:
-    weights = math_ops.to_float(weights)
-    labels = math_ops.to_float(labels)
+    weights = math_ops.cast(weights, dtype=dtypes.float32)
+    labels = math_ops.cast(labels, dtype=dtypes.float32)
     with ops.control_dependencies((
         weights_broadcast_ops.assert_broadcastable(weights, labels),)):
-      predictions = math_ops.to_float(predictions)
+      predictions = math_ops.cast(predictions, dtype=dtypes.float32)
       predictions.get_shape().assert_is_compatible_with(labels.get_shape())
 
       diffs = math_ops.subtract(predictions, labels)
",0,train
898c7319013fede56e08370f6aa9998aaad9df35,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-02-25

PiperOrigin-RevId: 359466367
Change-Id: Ia3f896f0a1fbd4d029f21a93f193b6e726cd1cfa",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 2, 24)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 2, 25)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
8f457cef03c03dbd646824739f152d103c4239e8,tensorflow/tensorflow,Add None check to restorer,saved_model_aot_compile.py,"@@ -321,7 +321,8 @@ def aot_compile_cpu_meta_graph_def(checkpoint_path,
   # Load the Variables so that we can freeze the graph.
   with session.Session(graph=ops_lib.Graph()) as sess:
     restorer = saver_lib.import_meta_graph(meta_graph_def, clear_devices=True)
-    restorer.restore(sess, checkpoint_path)
+    if restorer is not None:
+      restorer.restore(sess, checkpoint_path)
     graph_def.CopyFrom(
         graph_util.convert_variables_to_constants(
             sess,
",0,train
4ab86d026ff419a5d35ee41493e29611f29a555d,tensorflow/tensorflow,"[TF:MLIR] Add promote resources to arguments pass when converting MLIR to XLA
computation.

Enable IR printing in ConvertMLIRToXlaComputation when vlog level is 1.

PiperOrigin-RevId: 290674378
Change-Id: I90739f8bde085e1f92b54c2f3c7e2448b2eb9bc1",compile_mlir_util.cc,"@@ -28,6 +28,7 @@ limitations under the License.
 #include ""mlir/Transforms/Passes.h""  // TF:llvm-project
 #include ""tensorflow/compiler/mlir/tensorflow/transforms/passes.h""
 #include ""tensorflow/compiler/mlir/tensorflow/transforms/shape_inference.h""
+#include ""tensorflow/compiler/mlir/tensorflow/utils/bridge_logger.h""
 #include ""tensorflow/compiler/mlir/tensorflow/utils/convert_type.h""
 #include ""tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.h""
 #include ""tensorflow/compiler/mlir/tensorflow/utils/error_util.h""
@@ -211,6 +212,7 @@ Status ConvertMLIRToXlaComputation(mlir::ModuleOp module_op,
   mlir::PassManager tf2xla(module_op.getContext());
   tf2xla.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
   tf2xla.addPass(mlir::xla_hlo::createLegalizeTFControlFlowPass());
+  tf2xla.addPass(mlir::TF::CreatePromoteResourcesToArgsPass());
   // We need to run LegalizeTFPass 2 times because first
   // LegalizeTFPass(allow_partial_conversion=true) can expose more graph pruning
   // and canonicalization opportunities that are necessary for the second
@@ -221,17 +223,17 @@ Status ConvertMLIRToXlaComputation(mlir::ModuleOp module_op,
   tf2xla.addNestedPass<mlir::FuncOp>(
       mlir::xla_hlo::createLegalizeTFPass(false));
 
-  {
-    // Make sure we catch any error reported by MLIR and forward it to the TF
-    // error reporting system. Report a generic error if pass manager failed
-    // without emitting a diagnostic.
-    mlir::StatusScopedDiagnosticHandler error_handler(module_op.getContext());
-
-    mlir::LogicalResult result = tf2xla.run(module_op);
-    if (failed(result)) {
-      return error_handler.Combine(
-          errors::Internal(""MLIR TF to XLA legalization failed""));
-    }
+  if (VLOG_IS_ON(1))
+    tf2xla.enableIRPrinting(std::make_unique<tensorflow::BridgeLoggerConfig>());
+
+  // Make sure we catch any error reported by MLIR and forward it to the TF
+  // error reporting system. Report a generic error if pass manager failed
+  // without emitting a diagnostic.
+  mlir::StatusScopedDiagnosticHandler error_handler(module_op.getContext());
+
+  if (failed(tf2xla.run(module_op))) {
+    return error_handler.Combine(
+        errors::Internal(""MLIR TF to XLA legalization failed""));
   }
 
   if (VLOG_IS_ON(1))
",0,train
23a07f2c1444509986eece54e486cdcf0b8e32e4,tensorflow/tensorflow,"[tf.data] Adding serialization support for `StatsAggregatorDatasets` to make it possible to apply static optimizations to input pipelines whose prefix contains the `set_stats_aggregator` transformation.

PiperOrigin-RevId: 214619583",latency_all_edges_test.py,"@@ -34,8 +34,8 @@ class OptimizeStatsDatasetTest(stats_dataset_test_base.StatsDatasetTestBase):
         optimization.assert_next(
             [""LatencyStats"", ""Map"", ""LatencyStats"", ""Prefetch"",
              ""LatencyStats""])).map(lambda x: x * x).prefetch(1).apply(
-                 optimization.optimize([""latency_all_edges""])).apply(
-                     stats_ops.set_stats_aggregator(stats_aggregator))
+                 stats_ops.set_stats_aggregator(stats_aggregator)).apply(
+                     optimization.optimize([""latency_all_edges""]))
     iterator = dataset.make_initializable_iterator()
     get_next = iterator.get_next()
     summary_t = stats_aggregator.get_summary()
",0,train
23a07f2c1444509986eece54e486cdcf0b8e32e4,tensorflow/tensorflow,"[tf.data] Adding serialization support for `StatsAggregatorDatasets` to make it possible to apply static optimizations to input pipelines whose prefix contains the `set_stats_aggregator` transformation.

PiperOrigin-RevId: 214619583",stats_aggregator_dataset_op.cc,"@@ -34,16 +34,18 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
                                        &stats_aggregator_resource));
     core::ScopedUnref unref_stats_aggregator(stats_aggregator_resource);
 
-    *output = new Dataset(ctx, input, stats_aggregator_resource);
+    *output = new Dataset(ctx, input, ctx->input(1), stats_aggregator_resource);
   }
 
  private:
   class Dataset : public DatasetBase {
    public:
     explicit Dataset(OpKernelContext* ctx, const DatasetBase* input,
+                     const Tensor& resource_handle,
                      StatsAggregatorResource* stats_aggregator_resource)
         : DatasetBase(DatasetContext(ctx)),
           input_(input),
+          resource_handle_(resource_handle),
           stats_aggregator_resource_(stats_aggregator_resource) {
       input_->Ref();
       stats_aggregator_resource_->Ref();
@@ -75,8 +77,13 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
     Status AsGraphDefInternal(SerializationContext* ctx,
                               DatasetGraphDefBuilder* b,
                               Node** output) const override {
-      return errors::Unimplemented(""%s does not support serialization"",
-                                   DebugString());
+      Node* input_graph_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddInputDataset(ctx, input_, &input_graph_node));
+      Node* resource_handle_node = nullptr;
+      TF_RETURN_IF_ERROR(b->AddTensor(resource_handle_, &resource_handle_node));
+      TF_RETURN_IF_ERROR(b->AddDataset(
+          this, {input_graph_node, resource_handle_node}, output));
+      return Status::OK();
     }
 
    private:
@@ -129,6 +136,7 @@ class SetStatsAggregatorDatasetOp : public UnaryDatasetOpKernel {
     };
 
     const DatasetBase* const input_;
+    const Tensor resource_handle_;
     StatsAggregatorResource* stats_aggregator_resource_;
   };
 };
",0,train
d329c289b9381137ca849466e79b11e91048d9e0,tensorflow/tensorflow,"Update GraphDef version to 744.

PiperOrigin-RevId: 369826495
Change-Id: Iab9ed9151647604517fd60834be883905548e29a",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 743  // Updated: 2021/4/21
+#define TF_GRAPH_DEF_VERSION 744  // Updated: 2021/4/22
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
26cd260fac5fa98ade11ff2a5ec38ede65631cc0,tensorflow/tensorflow,"Add additional data validation while saving and restoring iterators.

PiperOrigin-RevId: 319078544
Change-Id: I4a439934e1ba35d5eab38513cae735372d62c8d6",iterator_ops.cc,"@@ -331,6 +331,12 @@ class IteratorVariantSerializer {
     data.reserve(num_tensors);
     for (int i = 0; i < num_tensors; ++i) {
       auto* w = serialized_vec(i).get<IteratorStateVariant>();
+      if (!w) {
+        return errors::Internal(
+            ""Cannot initialize an iterator from tensor "",
+            serialized_vec(i).DebugString(),
+            "". Expected a variant tensor of type IteratorStateVariant"");
+      }
       data.push_back(w->GetData());
     }
     reader_ = absl::make_unique<VariantTensorDataReader>(data);
@@ -349,6 +355,10 @@ class IteratorVariantSerializer {
     }
     int64 size = variants_.size();
     for (int64 i = 0; i < size; ++i) {
+      if (variants_[i].GetData() == nullptr) {
+        return errors::Internal(
+            ""Cannot serialize an empty IteratorStateVariant"");
+      }
       serialized->vec<Variant>()(i) = variants_[i];
     }
     return Status::OK();
",0,train
1ee4fe2db09c29fba0631e580dee941c7c2b2beb,tensorflow/tensorflow,"Untrack eager tensors from GC during dealloc.

We were seeing an issue where the item would be deleted, make a call to
ClearWeakRefs which seemingly dealloc'd the object (bypassing the call to
ClearWeakRefs?), and then would fail since we would try to re-delete things.

The base type dealloc mentions that clearing weakrefs with GC enabled for the object might be problematic. See https://github.com/python/cpython/blob/f78a5e9ce8f32a195f5f788aade79578437f30a6/Objects/typeobject.c#L1206-L1209

PiperOrigin-RevId: 240381495",pywrap_tensor.cc,"@@ -499,6 +499,10 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) {
 
 // tp_dealloc for EagerTensor.
 void EagerTensor_dealloc(EagerTensor* self) {
+  // Unhook the object from python's GC so that the weakref deleter doesn't
+  // try to re-delete this.
+  PyObject_GC_UnTrack((PyObject*)self);
+
   // Clear weak references to self.
   // Needs to happen before any actual destruction.
   PyObject_ClearWeakRefs((PyObject*)self);
",0,test
d1fd406e41dc8220d1b26b6164776e529396007e,tensorflow/tensorflow,"[MLIR:TF/XLA] Enable variable runtime reformatting pass.

PiperOrigin-RevId: 293846291
Change-Id: I30774c15524830612d84feaa59cc78fab6a8bfb0",bridge.cc,"@@ -54,8 +54,7 @@ void CreateTPUBridge(OpPassManager &pm) {
   pm.addNestedPass<FuncOp>(TFDevice::CreateReplicateInvariantOpHoistingPass());
   pm.addNestedPass<FuncOp>(CreateTPUDynamicLayoutPass());
   pm.addNestedPass<FuncOp>(CreateTPUMergeVariablesWithExecutePass());
-  // TODO(b/147020076): Enable this pass.
-  // pm.addPass(CreateTPUVariableReformattingPass());
+  pm.addPass(CreateTPUVariableReformattingPass());
   pm.addNestedPass<FuncOp>(CreateFunctionalToExecutorDialectConversionPass());
   pm.addNestedPass<FuncOp>(CreateBreakUpIslandsPass());
   pm.addNestedPass<FuncOp>(TFDevice::CreateReplicateToIslandPass());
",0,train
5dc0ab7565afc0f707adc628c2865233a9702cfa,tensorflow/tensorflow,"Inline read/write ops of Stack and TensorArray.
Change: 113373813",stack_ops.cc,"@@ -136,6 +136,8 @@ class StackPushOp : public OpKernel {
     stack->Push(PersistentTensor(ctx->input(1)));
     ctx->set_output(0, ctx->input(1));
   }
+
+  bool IsExpensive() override { return false; }
 };
 
 REGISTER_KERNEL_BUILDER(Name(""StackPush"").Device(DEVICE_CPU), StackPushOp);
@@ -165,6 +167,8 @@ class StackPopOp : public OpKernel {
                                     ""Calling Pop() when the stack is empty.""));
     ctx->set_output(0, *value.AccessTensor(ctx));
   }
+
+  bool IsExpensive() override { return false; }
 };
 
 REGISTER_KERNEL_BUILDER(Name(""StackPop"").Device(DEVICE_CPU), StackPopOp);
",0,test
5dc0ab7565afc0f707adc628c2865233a9702cfa,tensorflow/tensorflow,"Inline read/write ops of Stack and TensorArray.
Change: 113373813",tensor_array_ops.cc,"@@ -273,6 +273,8 @@ class TensorArrayWriteOp : public OpKernel {
     PersistentTensor persistent_tensor(*tensor_value);
     OP_REQUIRES_OK(ctx, tensor_array->Write(ctx, index, &persistent_tensor));
   }
+
+  bool IsExpensive() override { return false; }
 };
 
 #define REGISTER_WRITE(type)                                                 \
@@ -332,6 +334,8 @@ class TensorArrayReadOp : public OpKernel {
     ctx->set_output(0, *value.AccessTensor(ctx));
   }
 
+  bool IsExpensive() override { return false; }
+
  private:
   DataType dtype_;
 };
",0,test
6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier.

PiperOrigin-RevId: 177548483",hlo_instruction.cc,"@@ -2060,6 +2060,14 @@ std::vector<string> HloInstruction::ExtraAttributesToString() const {
     extra.push_back(
         StrCat(""outfeed_config=\"""", CEscape(outfeed_config_), ""\""""));
   }
+  if (opcode() == HloOpcode::kRng) {
+    extra.push_back(
+        StrCat(""distribution="", RandomDistributionToString(distribution_)));
+  }
+  if (opcode() == HloOpcode::kReducePrecision) {
+    extra.push_back(StrCat(""exponent_bits="", exponent_bits_));
+    extra.push_back(StrCat(""mantissa_bits="", mantissa_bits_));
+  }
   return extra;
 }
 
@@ -3029,6 +3037,28 @@ string OpMetadataToString(const OpMetadata& metadata) {
   return Join(result, "" "");
 }
 
+string RandomDistributionToString(const RandomDistribution& distribution) {
+  return tensorflow::str_util::Lowercase(RandomDistribution_Name(distribution));
+}
+
+StatusOr<RandomDistribution> StringToRandomDistribution(const string& name) {
+  static std::unordered_map<string, RandomDistribution>* map = [] {
+    static auto* map = new std::unordered_map<string, RandomDistribution>;
+    for (int i = 0; i < RandomDistribution_ARRAYSIZE; i++) {
+      if (RandomDistribution_IsValid(i)) {
+        auto value = static_cast<RandomDistribution>(i);
+        (*map)[RandomDistributionToString(value)] = value;
+      }
+    }
+    return map;
+  }();
+  auto found = map->find(tensorflow::str_util::Lowercase(name));
+  if (found == map->end()) {
+    return InvalidArgument(""Unknown distribution"");
+  }
+  return found->second;
+}
+
 std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind) {
   return os << ToString(kind);
 }
",0,train
6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier.

PiperOrigin-RevId: 177548483",hlo_instruction.h,"@@ -1285,9 +1285,12 @@ string ToString(HloInstruction::FusionKind kind);
 StatusOr<HloInstruction::FusionKind> StringToFusionKind(
     const string& kind_name);
 
-// Custom stringification functions for protos that live inside HloInstruction.
+// Custom (de)stringification functions for protos that live inside
+// HloInstruction.
 string PaddingConfigToString(const PaddingConfig& padding);
 string OpMetadataToString(const OpMetadata& metadata);
+string RandomDistributionToString(const RandomDistribution& distribution);
+StatusOr<RandomDistribution> StringToRandomDistribution(const string& name);
 
 std::ostream& operator<<(std::ostream& os, HloInstruction::FusionKind kind);
 
",0,train
6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier.

PiperOrigin-RevId: 177548483",hlo_lexer.cc,"@@ -17,7 +17,6 @@ limitations under the License.
 
 #include <unordered_map>
 
-#include ""tensorflow/compiler/xla/service/hlo_instruction.h""
 #include ""tensorflow/compiler/xla/shape_util.h""
 #include ""tensorflow/compiler/xla/statusor.h""
 #include ""tensorflow/compiler/xla/util.h""
@@ -153,15 +152,15 @@ TokKind HloLexer::LexToken() {
   }
 }
 
-// Lex a shape, name, keyword, opcode, attribute name, or the dim labels
-// pattern.
+// Lex a shape, name, keyword, attribute name, the dim labels pattern, and
+// other identifiers.
 //
 // shape    ::= ([a-zA-Z0-9_]*[0-9]*)\[([0-9,]*)\](?:\s*{([0-9,]*)})?
 // name     ::= [a-zA-Z_][a-zA-Z0-9_.-]*:
 // keyword  ::= HloModule, ENTRY, ...
-// opcode   ::= add, greater-than, ...
 // attribute_name ::= condition, body, dimensions, ...
 // dim_labels_pattern ::= [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
+// identifiers ::= other cases that match [a-zA-Z_][a-zA-Z0-9_.-]*
 TokKind HloLexer::LexIdentifier() {
   {
     auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
@@ -220,20 +219,6 @@ TokKind HloLexer::LexIdentifier() {
 
 #undef KEYWORD
 
-  // See if this is an opcode.
-  auto opcode = StringToHloOpcode(identifier.ToString());
-  if (opcode.ok()) {
-    opcode_val_ = opcode.ValueOrDie();
-    return TokKind::kOpcode;
-  }
-
-  // See if this is an fusion kind.
-  auto kind = xla::StringToFusionKind(identifier.ToString());
-  if (kind.ok()) {
-    fusion_kind_val_ = kind.ValueOrDie();
-    return TokKind::kFusionKind;
-  }
-
   {
     auto consumable = RegexpStringPieceFromPointers(token_start_, buf_.end());
     static LazyRE2 dim_labels_pattern = {
@@ -244,8 +229,9 @@ TokKind HloLexer::LexIdentifier() {
       return TokKind::kDimLabels;
     }
   }
-  current_ptr_ = token_start_ + 1;
-  return TokKind::kError;
+
+  str_val_ = identifier.ToString();
+  return TokKind::kIdent;
 }
 
 // Lex names after a % character.
@@ -428,14 +414,12 @@ string TokKindToString(TokKind kind) {
       return ""kDxD"";
     case TokKind::kPad:
       return ""kPad"";
+    case TokKind::kIdent:
+      return ""kIdent"";
     case TokKind::kString:
       return ""kString"";
     case TokKind::kShape:
       return ""kShape"";
-    case TokKind::kOpcode:
-      return ""kOpcode"";
-    case TokKind::kFusionKind:
-      return ""kFusionKind"";
     case TokKind::kInt:
       return ""kInt"";
     case TokKind::kDecimal:
",0,train
6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier.

PiperOrigin-RevId: 177548483",hlo_lexer.h,"@@ -18,9 +18,8 @@ limitations under the License.
 
 #include <string>
 
-#include ""tensorflow/compiler/xla/service/hlo_instruction.h""
-#include ""tensorflow/compiler/xla/service/hlo_opcode.h""
 #include ""tensorflow/compiler/xla/tools/parser/hlo_token.h""
+#include ""tensorflow/compiler/xla/types.h""
 #include ""tensorflow/compiler/xla/xla_data.pb.h""
 #include ""tensorflow/core/lib/core/stringpiece.h""
 #include ""tensorflow/core/platform/logging.h""
@@ -48,6 +47,7 @@ class HloLexer {
       case TokKind::kDxD:
       case TokKind::kPad:
       case TokKind::kString:
+      case TokKind::kIdent:
         return str_val_;
       default:
         LOG(FATAL) << ""This token does not have string value"";
@@ -57,14 +57,6 @@ class HloLexer {
     CHECK(GetKind() == TokKind::kShape);
     return shape_val_;
   }
-  HloOpcode GetOpcodeVal() const {
-    CHECK(GetKind() == TokKind::kOpcode);
-    return opcode_val_;
-  }
-  HloInstruction::FusionKind GetFusionKindVal() const {
-    CHECK(GetKind() == TokKind::kFusionKind);
-    return fusion_kind_val_;
-  }
   int64 GetInt64Val() const {
     CHECK(GetKind() == TokKind::kInt);
     return int64_val_;
@@ -114,8 +106,6 @@ class HloLexer {
   TokKind current_kind_;
   string str_val_;
   Shape shape_val_;
-  HloOpcode opcode_val_;
-  HloInstruction::FusionKind fusion_kind_val_;
   int64 int64_val_;
   double decimal_val_;
 };
",0,train
6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier.

PiperOrigin-RevId: 177548483",hlo_parser.cc,"@@ -16,6 +16,7 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/tools/parser/hlo_parser.h""
 
 #include ""tensorflow/compiler/xla/literal_util.h""
+#include ""tensorflow/compiler/xla/service/hlo_opcode.h""
 #include ""tensorflow/compiler/xla/shape_util.h""
 #include ""tensorflow/compiler/xla/util.h""
 #include ""tensorflow/core/lib/gtl/map_util.h""
@@ -104,6 +105,7 @@ class HloParser {
     kPaddingConfig,
     kMetadata,
     kFusionKind,
+    kDistribution,
   };
 
   struct AttrConfig {
@@ -174,6 +176,7 @@ class HloParser {
   bool ParseShape(Shape* result);
   bool ParseOpcode(HloOpcode* result);
   bool ParseFusionKind(HloInstruction::FusionKind* result);
+  bool ParseRandomDistribution(RandomDistribution* result);
   bool ParseInt64(int64* result);
   bool ParseDouble(double* result);
   bool ParseBool(bool* result);
@@ -816,10 +819,36 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
           shape, operands[0], config ? *config : """"));
       break;
     }
+    case HloOpcode::kRng: {
+      optional<RandomDistribution> distribution;
+      attrs[""distribution""] = {/*required=*/true, AttrTy::kDistribution,
+                               &distribution};
+      if (!ParseOperands(&operands) || !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction = builder->AddInstruction(
+          HloInstruction::CreateRng(shape, *distribution, operands));
+      break;
+    }
+    case HloOpcode::kReducePrecision: {
+      optional<int64> exponent_bits;
+      optional<int64> mantissa_bits;
+      attrs[""exponent_bits""] = {/*required=*/true, AttrTy::kInt64,
+                                &exponent_bits};
+      attrs[""mantissa_bits""] = {/*required=*/true, AttrTy::kInt64,
+                                &mantissa_bits};
+      if (!ParseOperands(&operands, /*expected_size=*/1) ||
+          !ParseAttributes(attrs)) {
+        return false;
+      }
+      instruction =
+          builder->AddInstruction(HloInstruction::CreateReducePrecision(
+              shape, operands[0], static_cast<int>(*exponent_bits),
+              static_cast<int>(*mantissa_bits)));
+      break;
+    }
     case HloOpcode::kConditional:
     case HloOpcode::kCustomCall:
-    case HloOpcode::kReducePrecision:
-    case HloOpcode::kRng:
     case HloOpcode::kTrace:
       return TokenError(StrCat(""parsing not yet implemented for op: "",
                                HloOpcodeString(opcode)));
@@ -1548,6 +1577,15 @@ bool HloParser::ParseAttributeHelper(
         static_cast<optional<OpMetadata>*>(attr_out_ptr)->emplace(result);
         return true;
       }
+      case AttrTy::kDistribution: {
+        RandomDistribution result;
+        if (!ParseRandomDistribution(&result)) {
+          return false;
+        }
+        static_cast<optional<RandomDistribution>*>(attr_out_ptr)
+            ->emplace(result);
+        return true;
+      }
     }
   }();
   if (!success) {
@@ -2024,20 +2062,51 @@ bool HloParser::ParseMetadata(OpMetadata* metadata) {
 
 bool HloParser::ParseOpcode(HloOpcode* result) {
   VLOG(1) << ""ParseOpcode"";
-  if (lexer_.GetKind() != TokKind::kOpcode) {
+  if (lexer_.GetKind() != TokKind::kIdent) {
     return TokenError(""expects opcode"");
   }
-  *result = lexer_.GetOpcodeVal();
+  string val = lexer_.GetStrVal();
+  auto status_or_result = StringToHloOpcode(val);
+  if (!status_or_result.ok()) {
+    return TokenError(
+        Printf(""expects opcode but sees: %s, error: %s"", val.c_str(),
+               status_or_result.status().error_message().c_str()));
+  }
+  *result = status_or_result.ValueOrDie();
   lexer_.Lex();
   return true;
 }
 
 bool HloParser::ParseFusionKind(HloInstruction::FusionKind* result) {
   VLOG(1) << ""ParseFusionKind"";
-  if (lexer_.GetKind() != TokKind::kFusionKind) {
+  if (lexer_.GetKind() != TokKind::kIdent) {
     return TokenError(""expects fusion kind"");
   }
-  *result = lexer_.GetFusionKindVal();
+  string val = lexer_.GetStrVal();
+  auto status_or_result = StringToFusionKind(val);
+  if (!status_or_result.ok()) {
+    return TokenError(
+        Printf(""expects fusion kind but sees: %s, error: %s"", val.c_str(),
+               status_or_result.status().error_message().c_str()));
+  }
+  *result = status_or_result.ValueOrDie();
+  lexer_.Lex();
+  return true;
+}
+
+bool HloParser::ParseRandomDistribution(RandomDistribution* result) {
+  VLOG(1) << ""ParseRandomDistribution"";
+  if (lexer_.GetKind() != TokKind::kIdent) {
+    return TokenError(""expects random distribution"");
+  }
+  string val = lexer_.GetStrVal();
+  auto status_or_result = StringToRandomDistribution(val);
+  if (!status_or_result.ok()) {
+    return TokenError(
+        Printf(""expects random distribution but sees: %s, error: %s"",
+               val.c_str(), status_or_result.status().error_message().c_str()));
+  }
+  *result = status_or_result.ValueOrDie();
   lexer_.Lex();
   return true;
 }
",0,train
6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier.

PiperOrigin-RevId: 177548483",hlo_parser_test.cc,"@@ -654,6 +654,31 @@ ENTRY %InfeedToOutfeed () -> (u32[3], pred[]) {
   %outfeed.1 = () outfeed((u32[3]{0}, pred[]) %infeed.1)
 }
 
+)""
+},
+// Rng
+{
+""Rng"",
+R""(HloModule rng_module:
+
+ENTRY %Rng () -> f32[8] {
+  %constant = f32[] constant(0)
+  %constant.1 = f32[] constant(1)
+  ROOT %rng = f32[8]{0} rng(f32[] %constant, f32[] %constant.1), distribution=rng_uniform
+}
+
+)""
+},
+// Reduce precision
+{
+""ReducePrevison"",
+R""(HloModule reduce_precision:
+
+ENTRY %ReducePrecision () -> f32[1] {
+  %constant = f32[1]{0} constant({3.14159})
+  ROOT %reduce-precision = f32[1]{0} reduce-precision(f32[1]{0} %constant), exponent_bits=8, mantissa_bits=10
+}
+
 )""
 }
   });
",0,train
6eec9c2ea33f3b86012cb0ea2aeb9e49e65bc716,tensorflow/tensorflow,"[XLA] Hlo parser: support rng and reduce-precision. Also simplify the lexer by regarding several things as identifier.

PiperOrigin-RevId: 177548483",hlo_token.h,"@@ -18,6 +18,9 @@ limitations under the License.
 
 #include <string>
 
+#include ""tensorflow/compiler/xla/types.h""
+#include ""tensorflow/core/platform/types.h""
+
 namespace xla {
 namespace tools {
 
@@ -60,10 +63,9 @@ enum class TokKind {
   kDimLabels,      // [0-9bf]{2,}_[0-9io]{2,}->[0-9bf]{2,}
   kDxD,            // [0-9]+(x[0-9]+)+
   kPad,            // [0-9]+_[0-9]+(_[0-9]+)?(x[0-9]+_[0-9]+(_[0-9]+)?)*
+  kIdent,          // other identifiers
   kString,         // ""abcd\""\n""
   kShape,          // f32[2,3]{1,0}
-  kOpcode,         // add
-  kFusionKind,     // kLoop, kOutput, ...
   kInt,            // 42
   kDecimal,        // 4.2
 };
",0,train
3e7aab52a7f7f0dc797753f2aae9e4c2c2ba0356,tensorflow/tensorflow,"Unbreak the build.
Change: 128425319",estimator_utils_test.py,"@@ -107,7 +107,7 @@ class EstimatorUtilsTest(tf.test.TestCase):
     }
     self.assertEqual(expected_base_features, base_features)
 
-    expected_targets = {""g"": mocks.MockTensor(""Out iue"", tf.int32)}
+    expected_targets = mocks.MockTensor(""Out iue"", tf.int32)
     self.assertEqual(expected_targets, targets)
 
     self.assertEqual(3, len(feature_columns))
",0,test
631fd6fbdb6a8286c1030cf317a6d1eaca334100,tensorflow/tensorflow,"Implementing MultiHead's create_loss().

PiperOrigin-RevId: 174894493",head.py,"@@ -172,7 +172,8 @@ def multi_label_head(n_classes,
     weight_column: A string or a `_NumericColumn` created by
       `tf.feature_column.numeric_column` defining feature column representing
       weights. It is used to down weight or boost examples during training. It
-      will be multiplied by the loss of the example.
+      will be multiplied by the loss of the example.  Per-class weighting is
+      not supported.
     thresholds: Iterable of floats in the range `(0, 1)`. Accuracy, precision
       and recall metrics are evaluated for each threshold value. The threshold
       is applied to the predicted probabilities, i.e. above the threshold is
",0,test
631fd6fbdb6a8286c1030cf317a6d1eaca334100,tensorflow/tensorflow,"Implementing MultiHead's create_loss().

PiperOrigin-RevId: 174894493",head_test.py,"@@ -226,7 +226,7 @@ class MultiLabelHead(test.TestCase):
 
   def test_weight_should_not_impact_prediction(self):
     n_classes = 4
-    head = head_lib.multi_label_head(n_classes, weight_column='label_weights')
+    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
     self.assertEqual(n_classes, head.logits_dimension)
 
     logits = np.array(
@@ -237,7 +237,7 @@ class MultiLabelHead(test.TestCase):
     spec = head.create_estimator_spec(
         features={
             'x': np.array(((42,),), dtype=np.int32),
-            'label_weights': weights_2x1,
+            'example_weights': weights_2x1,
         },
         mode=model_fn.ModeKeys.PREDICT,
         logits=logits)
@@ -549,7 +549,7 @@ class MultiLabelHead(test.TestCase):
 
   def test_eval_with_weights(self):
     n_classes = 2
-    head = head_lib.multi_label_head(n_classes, weight_column='label_weights')
+    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
 
     logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
     labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
@@ -563,7 +563,7 @@ class MultiLabelHead(test.TestCase):
     spec = head.create_estimator_spec(
         features={
             'x': np.array([[41], [42]], dtype=np.int32),
-            'label_weights': np.array([[1.], [2.]], dtype=np.float32),
+            'example_weights': np.array([[1.], [2.]], dtype=np.float32),
         },
         mode=model_fn.ModeKeys.EVAL,
         logits=logits,
@@ -605,7 +605,7 @@ class MultiLabelHead(test.TestCase):
   def test_train_create_loss_large_logits(self):
     """"""Tests head.create_loss for train mode and large logits.""""""
     n_classes = 2
-    head = head_lib.multi_label_head(n_classes, weight_column='label_weights')
+    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
 
     logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
     labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
@@ -623,7 +623,7 @@ class MultiLabelHead(test.TestCase):
     actual_weighted_sum_loss, actual_example_weight_sum, _ = head.create_loss(
         features={
             'x': np.array(((42,),), dtype=np.int32),
-            'label_weights': weights
+            'example_weights': weights
         },
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
@@ -742,7 +742,7 @@ class MultiLabelHead(test.TestCase):
 
   def test_train_with_weights(self):
     n_classes = 2
-    head = head_lib.multi_label_head(n_classes, weight_column='label_weights')
+    head = head_lib.multi_label_head(n_classes, weight_column='example_weights')
 
     logits = np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)
     labels = np.array([[1, 0], [1, 1]], dtype=np.int64)
@@ -761,7 +761,7 @@ class MultiLabelHead(test.TestCase):
     spec = head.create_estimator_spec(
         features={
             'x': np.array([[41], [42]], dtype=np.int32),
-            'label_weights': np.array([[1.], [2.]], dtype=np.float32),
+            'example_weights': np.array([[1.], [2.]], dtype=np.float32),
         },
         mode=model_fn.ModeKeys.TRAIN,
         logits=logits,
",0,test
631fd6fbdb6a8286c1030cf317a6d1eaca334100,tensorflow/tensorflow,"Implementing MultiHead's create_loss().

PiperOrigin-RevId: 174894493",multi_head.py,"@@ -161,12 +161,52 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
 
   def create_loss(self, features, mode, logits, labels):
     """"""See `Head`.""""""
-    # TODO(roumposg): Implement it.
-    raise NotImplementedError('create_loss not yet implemented for MultiHead.')
+    # TODO(roumposg): Add support for logits as single Tensor (with
+    # _split_logits utility).
+    if not isinstance(logits, dict):
+      raise ValueError('logits must be a dict.  Single Tensor support coming '
+                       'soon.')
+    weighted_sum_losses = []
+    example_weight_sums = []
+    labels_by_head = {}
+    for head in self._heads:
+      (weighted_sum_loss,
+       example_weight_sum, processed_labels) = head.create_loss(
+           features, mode, logits[head.name], labels[head.name])
+      weighted_sum_losses.append(weighted_sum_loss)
+      example_weight_sums.append(example_weight_sum)
+      labels_by_head[head.name] = processed_labels
+
+    weighted_sum_losses = tuple(weighted_sum_losses)
+    with ops.name_scope('merge_losses',
+                        values=weighted_sum_losses + (self._head_weights or
+                                                      tuple())):
+      if self._head_weights:
+        head_weighted_losses = []
+        head_weighted_example_weight_sums = []
+        for loss, example_weight_sum, weight in zip(weighted_sum_losses,
+                                                    example_weight_sums,
+                                                    self._head_weights):
+          head_weighted_losses.append(math_ops.multiply(loss, weight))
+          head_weighted_example_weight_sums.append(math_ops.multiply(
+              example_weight_sum, weight))
+        merged_weighted_sum_loss = math_ops.add_n(head_weighted_losses)
+        merged_example_weight_sum = math_ops.add_n(
+            head_weighted_example_weight_sums)
+      else:
+        merged_weighted_sum_loss = math_ops.add_n(weighted_sum_losses)
+        merged_example_weight_sum = math_ops.add_n(example_weight_sums)
+
+    return head_lib.LossSpec(
+        weighted_sum_loss=merged_weighted_sum_loss,
+        example_weight_sum=merged_example_weight_sum,
+        processed_labels=labels_by_head)
 
   def create_estimator_spec(
       self, features, mode, logits, labels=None, train_op_fn=None):
     """"""See `_Head`.""""""
+    # TODO(roumposg): Add support for logits as single Tensor (with
+    # _split_logits utility).
     if not isinstance(logits, dict):
       raise ValueError('logits must be a dict. Given: {}'.format(logits))
     if labels and not isinstance(labels, dict):
@@ -183,6 +223,8 @@ class _MultiHead(head_lib._Head):  # pylint:disable=protected-access
               labels=labels[head_name] if labels else None,
               train_op_fn=_no_op_train_fn))
 
+    # TODO(roumposg): Add LOSS and LOSS_MEAN summaries for the total head-
+    # combined loss.
     if mode == model_fn.ModeKeys.TRAIN:
       if train_op_fn is None:
         raise ValueError('train_op_fn can not be None in TRAIN mode.')
",0,test
631fd6fbdb6a8286c1030cf317a6d1eaca334100,tensorflow/tensorflow,"Implementing MultiHead's create_loss().

PiperOrigin-RevId: 174894493",multi_head_test.py,"@@ -178,7 +178,7 @@ class MultiHeadTest(test.TestCase):
     #        (1 - labels) * (logits > 0) * logits =>
     # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]]
     # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]]
-    # Average over classes, weighted sum ober batch and heads.
+    # Average over classes, weighted sum over batch and heads.
     expected_loss_head1 = 17.5
     expected_loss_head2 = 30.0
     expected_loss = 1. * expected_loss_head1 + 2. * expected_loss_head2
@@ -231,18 +231,25 @@ class MultiHeadTest(test.TestCase):
 
     logits = {'head1': np.array([[-10., 10.], [-15., 10.]], dtype=np.float32)}
     labels = {'head1': np.array([[1, 0], [1, 1]], dtype=np.int64)}
-    with self.assertRaisesRegexp(
-        NotImplementedError,
-        r'create_loss not yet implemented for MultiHead\.'):
-      multi_head.create_loss(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=logits,
-          labels=labels)
+    loss = multi_head.create_loss(
+        features={'x': np.array(((42,),), dtype=np.int32)},
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)[0]
+    tol = 1e-3
+    with self.test_session():
+      # Unreduced loss of the head is [[(10 + 10) / 2], (15 + 0) / 2]
+      # (averaged over classes, sum-reduced over examples).
+      self.assertAllClose(17.5, loss.eval(), rtol=tol, atol=tol)
 
   def test_train_create_loss_two_heads_with_weights(self):
-    head1 = head_lib.multi_label_head(n_classes=2, name='head1')
-    head2 = head_lib.multi_label_head(n_classes=3, name='head2')
+    # Use different example weighting for each head weighting.
+    weights1 = np.array([[1.], [2.]], dtype=np.float32)
+    weights2 = np.array([[2.], [3.]])
+    head1 = head_lib.multi_label_head(n_classes=2, name='head1',
+                                      weight_column='weights1')
+    head2 = head_lib.multi_label_head(n_classes=3, name='head2',
+                                      weight_column='weights2')
     multi_head = multi_head_lib.multi_head(
         [head1, head2], head_weights=[1., 2.])
 
@@ -255,14 +262,27 @@ class MultiHeadTest(test.TestCase):
         'head1': np.array([[1, 0], [1, 1]], dtype=np.int64),
         'head2': np.array([[0, 1, 0], [1, 1, 0]], dtype=np.int64),
     }
-    with self.assertRaisesRegexp(
-        NotImplementedError,
-        r'create_loss not yet implemented for MultiHead\.'):
-      multi_head.create_loss(
-          features={'x': np.array(((42,),), dtype=np.int32)},
-          mode=model_fn.ModeKeys.TRAIN,
-          logits=logits,
-          labels=labels)
+    weighted_sum_loss, example_weight_sum, _ = multi_head.create_loss(
+        features={
+            'x': np.array(((42,),), dtype=np.int32),
+            'weights1': weights1,
+            'weights2': weights2
+        },
+        mode=model_fn.ModeKeys.TRAIN,
+        logits=logits,
+        labels=labels)
+    tol = 1e-3
+    with self.test_session():
+      # loss of the first head is [[(10 + 10) / 2], [(15 + 0) / 2]]
+      # = [10, 7.5]
+      # weighted_sum_loss = 1 * 10 + 2 * 7.5 = 25
+      # loss of the second head is [[(20 + 20 + 20) / 3], [(30 + 0 + 0) / 3]]
+      # = [20, 10]
+      # weighted_sum_loss = 2 * 20 + 3 * 10 = 70
+      # head-weighted merge = 1 * 25 + 2 * 70 = 165
+      self.assertAllClose(165, weighted_sum_loss.eval(), rtol=tol, atol=tol)
+      # example_weight_sum = 1 * (1 + 2) + 2 * (2 + 3) = 13
+      self.assertAllClose(13., example_weight_sum.eval(), rtol=tol, atol=tol)
 
   def test_train_one_head(self):
     head1 = head_lib.multi_label_head(n_classes=2, name='head1')
@@ -332,7 +352,7 @@ class MultiHeadTest(test.TestCase):
     #        (1 - labels) * (logits > 0) * logits =>
     # head1: expected_unweighted_loss = [[10., 10.], [15., 0.]]
     # head2: expected_unweighted_loss = [[20., 20., 20.], [30., 0., 0]]
-    # Average over classes, weighted sum ober batch and heads.
+    # Average over classes, weighted sum over batch and heads.
     expected_loss_head1 = 17.5
     expected_loss_head2 = 30.0
     expected_loss = 1. * expected_loss_head1 + 2. * expected_loss_head2
",0,test
e0e7cbbc55af32041b1721bb4600a38f352d8242,tensorflow/tensorflow,"Disable TF whitelisting in Keras Layers.

PiperOrigin-RevId: 230763811",base_layer.py,"@@ -524,13 +524,6 @@ class Layer(checkpointable.Checkpointable):
     # models using the functional API).
     build_graph = tf_utils.are_all_symbolic_tensors(input_list)
 
-    if build_graph:
-      # Only create Keras history if at least one tensor originates from a
-      # `keras.Input`. Otherwise this Layer may be being used outside the Keras
-      # framework.
-      if base_layer_utils.uses_keras_input_layers(inputs):
-        base_layer_utils.create_keras_history(inputs)
-
     # Handle Keras mask propagation from previous layer to current layer.
     previous_mask = None
     if build_graph and (not hasattr(self, '_compute_previous_mask') or
",0,train
e0e7cbbc55af32041b1721bb4600a38f352d8242,tensorflow/tensorflow,"Disable TF whitelisting in Keras Layers.

PiperOrigin-RevId: 230763811",tensorflow_op_layer_test.py,"@@ -1,131 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the ""License"");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an ""AS IS"" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-""""""Test for allowing TF ops to work with Keras Functional API.""""""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from absl.testing import parameterized
-import numpy as np
-
-from tensorflow.python import keras
-from tensorflow.python.framework import ops
-from tensorflow.python.keras import keras_parameterized
-from tensorflow.python.keras import testing_utils
-from tensorflow.python.keras.optimizer_v2 import adam
-from tensorflow.python.ops import gen_nn_ops
-from tensorflow.python.ops import math_ops
-from tensorflow.python.platform import test
-from tensorflow.python.util import nest
-
-
-def _single_op_at_end():
-  inputs = keras.Input(shape=(10,))
-  x = keras.layers.Dense(10)(inputs)
-  outputs = gen_nn_ops.relu(x, name='hey')
-  return inputs, outputs
-
-
-def _multiple_ops_at_end():
-  inputs = keras.Input(shape=(10,))
-  inputs = keras.Input(shape=(10,))
-  x = keras.layers.Dense(10)(inputs)
-  x = gen_nn_ops.relu(x, name='hey')
-  outputs = gen_nn_ops.relu(x, name='hey2')
-  return inputs, outputs
-
-
-def _single_op_in_middle():
-  inputs = keras.Input(shape=(10,))
-  x = keras.layers.Dense(10)(inputs)
-  x = gen_nn_ops.relu(x, name='hey')
-  outputs = keras.layers.Dense(10)(x)
-  return inputs, outputs
-
-
-def _multiple_ops_in_middle():
-  inputs = keras.Input(shape=(10,))
-  x = keras.layers.Dense(10)(inputs)
-  x = gen_nn_ops.relu(x, name='hey')
-  x = gen_nn_ops.relu(x, name='hey2')
-  outputs = keras.layers.Dense(10)(x)
-  return inputs, outputs
-
-
-def _single_standalone_branch():
-  inputs = keras.Input(shape=(10,))
-  x = keras.layers.Dense(10)(inputs)
-  outputs = x * 2
-  return inputs, outputs
-
-
-def _single_op_with_attrs():
-  inputs = keras.Input(shape=(10,))
-  x = math_ops.reduce_mean(inputs, axis=1, keepdims=True)
-  outputs = keras.layers.Dense(10)(x)
-  return inputs, outputs
-
-
-@keras_parameterized.run_all_keras_modes
-class AutoLambdaTest(keras_parameterized.TestCase):
-
-  @parameterized.named_parameters(
-      ('single_op_at_end', _single_op_at_end),
-      ('multiple_ops_at_end', _multiple_ops_at_end),
-      ('single_op_in_middle', _single_op_in_middle),
-      ('multiple_ops_in_middle', _multiple_ops_in_middle),
-      ('single_standalone_branch', _single_standalone_branch),
-      ('single_op_with_attrs', _single_op_with_attrs))
-  def test_autolambda(self, model_fn):
-    inputs, outputs = model_fn()
-    model = keras.Model(inputs, outputs)
-    model.compile(
-        adam.Adam(0.001), 'mse', run_eagerly=testing_utils.should_run_eagerly())
-
-    np_inputs = nest.map_structure(lambda x: np.ones((10, 10), 'float32'),
-                                   inputs)
-    np_outputs = nest.map_structure(lambda x: np.ones((10, 10), 'float32'),
-                                    outputs)
-    model.fit(np_inputs, np_outputs, batch_size=2)
-
-  def test_numerical_correctness_simple(self):
-    x = ops.convert_to_tensor([[-1., 0., -2., 1.]])
-    inputs = keras.Input(shape=(4,))
-    outputs = gen_nn_ops.relu(inputs)
-    model = keras.Model(inputs, outputs)
-    y = self.evaluate(model(x))
-    self.assertAllClose(y, [[0., 0., 0., 1.]])
-
-  def test_numerical_correctness_with_attrs(self):
-    x = ops.convert_to_tensor([[1.5, 1.5], [2.5, 3.5]])
-    inputs = keras.Input(shape=(10,))
-    outputs = math_ops.reduce_mean(inputs, axis=1)
-    model = keras.Model(inputs, outputs)
-    y = self.evaluate(model(x))
-    self.assertAllClose(y, [1.5, 3.])
-
-  def test_serialization(self):
-    x = ops.convert_to_tensor([-1., 0., -2., 1.])
-    inputs = keras.Input(shape=(4,))
-    outputs = gen_nn_ops.relu(inputs)
-    model1 = keras.Model(inputs, outputs)
-    y1 = self.evaluate(model1(x))
-    model2 = model1.from_config(model1.get_config())
-    y2 = self.evaluate(model2(x))
-    self.assertAllClose(y1, y2)
-
-
-if __name__ == '__main__':
-  test.main()
",0,train
7b180f700755b0a3fc0eb9de349f7caacc422d2d,tensorflow/tensorflow,"Whitelist ExtractImagePatches op

Enable use of ExtractImagePatches with TFLite when
using select TF ops.

See also #21526.

PiperOrigin-RevId: 304676488
Change-Id: I3b3aafdc16bf04d3204b2a903274f8a990800e82",whitelisted_flex_ops.cc,"@@ -117,6 +117,7 @@ bool IsWhitelistedFlexOp(const std::string& tensorflow_op_name) {
           ""Exit"",
           ""Exp"",
           ""ExpandDims"",
+          ""ExtractImagePatches"",
           ""FakeQuantWithMinMaxArgs"",
           ""FakeQuantWithMinMaxArgsGradient"",
           ""FakeQuantWithMinMaxVars"",
",0,test
a8712e5c8e47a7e03efa45dd7e900b866309c3b2,tensorflow/tensorflow,"Fix parallel_for converter code to record gradient when explicitly creating an
op in the graph.

PiperOrigin-RevId: 241435115",backprop_test.py,"@@ -1339,7 +1339,6 @@ class BackpropTest(test.TestCase):
       self.assertAllEqual(da[0], tf_da[0].eval())
 
 
-@test_util.run_all_in_graph_and_eager_modes
 class JacobianTest(test.TestCase):
 
   def _jacobian(self, experimental_use_pfor):
@@ -1430,6 +1429,22 @@ class JacobianTest(test.TestCase):
     self.assertAllClose(g.jacobian(y, x, parallel_iterations=2),
                         g.jacobian(y, x, parallel_iterations=3))
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_nested_jacobian(self):
+    if context.executing_eagerly():
+      # TODO(agarwal): b/128842926
+      self.skipTest('Conversion of function calls not implemented yet.')
+    x = array_ops.ones((10, 2))
+    with backprop.GradientTape(persistent=False) as g:
+      g.watch(x)
+      with backprop.GradientTape(persistent=False) as gg:
+        gg.watch(x)
+        y = math_ops.reduce_sum(math_ops.square(x))
+      dy_x = gg.jacobian(y, x)
+    dy_xx = g.batch_jacobian(dy_x, x)
+    dy_xx_answer = [[[2., 0], [0, 2.]]] * 10
+    self.assertAllClose(dy_xx_answer, self.evaluate(dy_xx))
+
 
 @test_util.run_all_in_graph_and_eager_modes
 class BatchJacobianTest(test.TestCase):
",0,train
a8712e5c8e47a7e03efa45dd7e900b866309c3b2,tensorflow/tensorflow,"Fix parallel_for converter code to record gradient when explicitly creating an
op in the graph.

PiperOrigin-RevId: 241435115",pfor.py,"@@ -22,6 +22,7 @@ from __future__ import print_function
 import collections
 
 from tensorflow.python.eager import context
+from tensorflow.python.eager import execute
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -838,10 +839,15 @@ class RegisterPForWithArgs(RegisterPFor):
     return converter
 
 
+# TODO(agarwal): call raw_ops instead of calling these low level routines.
 def _create_op(op_type, inputs, op_dtypes, attrs=None):
   """"""Utility to create an op.""""""
-  return ops.get_default_graph().create_op(
+  op = ops.get_default_graph().create_op(
       op_type, inputs, op_dtypes, attrs=attrs, compute_device=True)
+  flat_attrs = nest.flatten([(a, op.get_attr(a)) for a in attrs])
+  execute.record_gradient(
+      op_type, op.inputs, tuple(flat_attrs), op.outputs[:], """")
+  return op
 
 
 WrappedTensor = collections.namedtuple(""WrappedTensor"",
",0,train
83903a2e993c5456a2038de88b9cf3b9f0e1436f,tensorflow/tensorflow,"Clarify the softmax op documentation.
Change: 125961539",nn_ops.cc,"@@ -1013,7 +1013,7 @@ Computes softmax activations.
 
 For each batch `i` and class `j` we have
 
-    softmax[i, j] = exp(logits[i, j]) / sum(exp(logits[i]))
+    softmax[i, j] = exp(logits[i, j]) / sum_j(exp(logits[i, j]))
 
 logits: 2-D with shape `[batch_size, num_classes]`.
 softmax: Same shape as `logits`.
",0,train
5de1fdaf8aef6d7b219f9e7a2e54027869a08912,tensorflow/tensorflow,"Remove TODO: Adding a clamp operation makes no performance difference.

PiperOrigin-RevId: 333536789
Change-Id: Ia19f7aad7bc13ddeac0f226c04ecb14c5c67445e",lstm_parser.cc,"@@ -288,7 +288,6 @@ absl::Status BuildCellStateUpdate(GraphFloat32* graph, ObjectReader* reader,
     return absl::OkStatus();
   }
 
-  // TODO(b/157166356): Maybe add OperationType::CLAMP ?
   Value* max_clipped_state = CreateNewSimilarValue(graph, new_cell_state);
   {
     // #4 elementwise minimum: min(#3, clip)
@@ -398,7 +397,6 @@ absl::Status BuildOutputStateUpdate(GraphFloat32* graph, ObjectReader* reader,
     return absl::OkStatus();
   }
 
-  // TODO(b/157166356): Maybe add OperationType::CLAMP ?
   Value* max_clipped_state =
       CreateNewSimilarValue(graph, projected_output_state);
   {
",0,train
264e07271385838da8f1ec9e58dddb80ee13800a,tensorflow/tensorflow,"Moves delegate initialization to a common lib for both benchmark tool & eval framework, and utilizes the helpers in TfliteInferenceStage.

PiperOrigin-RevId: 244869850",benchmark_tflite_model.cc,"@@ -23,52 +23,23 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
-#include ""tensorflow/lite/delegates/nnapi/nnapi_delegate.h""
 #include ""tensorflow/lite/kernels/register.h""
 #include ""tensorflow/lite/model.h""
 #include ""tensorflow/lite/op_resolver.h""
 #include ""tensorflow/lite/string_util.h""
 #include ""tensorflow/lite/tools/benchmark/logging.h""
+#include ""tensorflow/lite/tools/evaluation/utils.h""
 
 #ifdef GEMMLOWP_PROFILING
 #include ""profiling/profiler.h""
 #endif
 
-#if defined(__ANDROID__)
-#include ""tensorflow/lite/delegates/gpu/gl_delegate.h""
-#endif
-
 #ifdef TFLITE_CUSTOM_OPS_HEADER
 void RegisterSelectedOps(::tflite::MutableOpResolver* resolver);
 #endif
 
 namespace tflite {
 namespace benchmark {
-namespace {
-
-#if defined(__ANDROID__)
-Interpreter::TfLiteDelegatePtr CreateGPUDelegate(
-    tflite::FlatBufferModel* model) {
-  TfLiteGpuDelegateOptions options;
-  options.metadata = TfLiteGpuDelegateGetModelMetadata(model->GetModel());
-  options.compile_options.precision_loss_allowed = 1;
-  options.compile_options.preferred_gl_object_type =
-      TFLITE_GL_OBJECT_TYPE_FASTEST;
-  options.compile_options.dynamic_batch_enabled = 0;
-  return Interpreter::TfLiteDelegatePtr(TfLiteGpuDelegateCreate(&options),
-                                        &TfLiteGpuDelegateDelete);
-}
-
-Interpreter::TfLiteDelegatePtr CreateNNAPIDelegate() {
-  return Interpreter::TfLiteDelegatePtr(
-      NnApiDelegate(),
-      // NnApiDelegate() returns a singleton, so provide a no-op deleter.
-      [](TfLiteDelegate*) {});
-}
-
-#endif  // defined(__ANDROID__)
-
-}  // namespace
 
 void ProfilingListener::SetInterpreter(tflite::Interpreter* interpreter) {
   TFLITE_BENCHMARK_CHECK(interpreter);
@@ -469,18 +440,21 @@ BenchmarkTfLiteModel::TfLiteDelegatePtrMap BenchmarkTfLiteModel::GetDelegates()
     const {
   TfLiteDelegatePtrMap delegates;
   if (params_.Get<bool>(""use_gpu"")) {
-#if defined(__ANDROID__)
-    delegates.emplace(""GPU"", CreateGPUDelegate(model.get()));
-#else
-    TFLITE_LOG(WARN) << ""GPU acceleration is unsupported on this platform."";
-#endif
+    Interpreter::TfLiteDelegatePtr delegate =
+        evaluation::CreateGPUDelegate(model.get());
+    if (!delegate) {
+      TFLITE_LOG(WARN) << ""GPU acceleration is unsupported on this platform."";
+    } else {
+      delegates.emplace(""GPU"", std::move(delegate));
+    }
   }
   if (params_.Get<bool>(""use_nnapi"")) {
-#if defined(__ANDROID__)
-    delegates.emplace(""NNAPI"", CreateNNAPIDelegate());
-#else
-    TFLITE_LOG(WARN) << ""NNAPI acceleration is unsupported on this platform."";
-#endif
+    Interpreter::TfLiteDelegatePtr delegate = evaluation::CreateNNAPIDelegate();
+    if (!delegate) {
+      TFLITE_LOG(WARN) << ""NNAPI acceleration is unsupported on this platform."";
+    } else {
+      delegates.emplace(""NNAPI"", std::move(delegate));
+    }
   }
   return delegates;
 }
",0,test
264e07271385838da8f1ec9e58dddb80ee13800a,tensorflow/tensorflow,"Moves delegate initialization to a common lib for both benchmark tool & eval framework, and utilizes the helpers in TfliteInferenceStage.

PiperOrigin-RevId: 244869850",tflite_inference_stage.cc,"@@ -20,6 +20,7 @@ limitations under the License.
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/lite/profiling/time.h""
 #include ""tensorflow/lite/tools/evaluation/proto/evaluation_stages.pb.h""
+#include ""tensorflow/lite/tools/evaluation/utils.h""
 
 namespace tflite {
 namespace evaluation {
@@ -57,14 +58,36 @@ TfLiteStatus TfliteInferenceStage::Init() {
   model_ = FlatBufferModel::BuildFromFile(params.model_file_path().c_str());
   resolver_.reset(new ops::builtin::BuiltinOpResolver);
   InterpreterBuilder(*model_, *resolver_)(&interpreter_);
-  if (params.delegate() == TfliteInferenceParams::NNAPI) {
-    interpreter_->UseNNAPI(true);
-  }
   if (!interpreter_) {
     LOG(ERROR) << ""Could not build interpreter"";
     return kTfLiteError;
   }
   interpreter_->SetNumThreads(params.num_threads());
+
+  // TODO(b/122482115): Add support for multiple delegates in
+  // TfLiteInferenceParams.
+  if (params.delegate() == TfliteInferenceParams::NNAPI) {
+    Interpreter::TfLiteDelegatePtr delegate = CreateNNAPIDelegate();
+    if (delegate) {
+      delegates_.push_back(std::move(delegate));
+    } else {
+      LOG(WARNING) << ""NNAPI not supported"";
+    }
+  } else if (params.delegate() == TfliteInferenceParams::GPU) {
+    Interpreter::TfLiteDelegatePtr delegate = CreateGPUDelegate(model_.get());
+    if (!delegate) {
+      delegates_.push_back(std::move(delegate));
+    } else {
+      LOG(WARNING) << ""GPU not supported"";
+    }
+  }
+  for (int i = 0; i < delegates_.size(); ++i) {
+    if (interpreter_->ModifyGraphWithDelegate(delegates_[i].get()) !=
+        kTfLiteOk) {
+      LOG(FATAL) << ""Failed to apply delegate %d"" << i;
+    }
+  }
+
   interpreter_->AllocateTensors();
   model_info_ = GetTfliteModelInfo(*interpreter_);
 
",0,test
264e07271385838da8f1ec9e58dddb80ee13800a,tensorflow/tensorflow,"Moves delegate initialization to a common lib for both benchmark tool & eval framework, and utilizes the helpers in TfliteInferenceStage.

PiperOrigin-RevId: 244869850",tflite_inference_stage.h,"@@ -67,6 +67,7 @@ class TfliteInferenceStage : public EvaluationStage {
   std::unique_ptr<FlatBufferModel> model_;
   std::unique_ptr<ops::builtin::BuiltinOpResolver> resolver_;
   std::unique_ptr<Interpreter> interpreter_;
+  std::vector<Interpreter::TfLiteDelegatePtr> delegates_;
 
   TfLiteModelInfo model_info_;
   std::vector<void*>* inputs_ = nullptr;
",0,test
264e07271385838da8f1ec9e58dddb80ee13800a,tensorflow/tensorflow,"Moves delegate initialization to a common lib for both benchmark tool & eval framework, and utilizes the helpers in TfliteInferenceStage.

PiperOrigin-RevId: 244869850",utils.cc,"@@ -22,6 +22,11 @@ limitations under the License.
 #include <string>
 
 #include ""tensorflow/core/platform/logging.h""
+#include ""tensorflow/lite/delegates/nnapi/nnapi_delegate.h""
+
+#if defined(__ANDROID__)
+#include ""tensorflow/lite/delegates/gpu/gl_delegate.h""
+#endif
 
 namespace tflite {
 namespace evaluation {
@@ -44,5 +49,32 @@ bool ReadFileLines(const std::string& file_path,
   return true;
 }
 
+Interpreter::TfLiteDelegatePtr CreateNNAPIDelegate() {
+#if defined(__ANDROID__)
+  return Interpreter::TfLiteDelegatePtr(
+      NnApiDelegate(),
+      // NnApiDelegate() returns a singleton, so provide a no-op deleter.
+      [](TfLiteDelegate*) {});
+#else
+  return Interpreter::TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {});
+#endif  // defined(__ANDROID__)
+}
+
+Interpreter::TfLiteDelegatePtr CreateGPUDelegate(
+    tflite::FlatBufferModel* model) {
+#if defined(__ANDROID__)
+  TfLiteGpuDelegateOptions options;
+  options.metadata = TfLiteGpuDelegateGetModelMetadata(model->GetModel());
+  options.compile_options.precision_loss_allowed = 1;
+  options.compile_options.preferred_gl_object_type =
+      TFLITE_GL_OBJECT_TYPE_FASTEST;
+  options.compile_options.dynamic_batch_enabled = 0;
+  return Interpreter::TfLiteDelegatePtr(TfLiteGpuDelegateCreate(&options),
+                                        &TfLiteGpuDelegateDelete);
+#else
+  return Interpreter::TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {});
+#endif  // defined(__ANDROID__)
+}
+
 }  // namespace evaluation
 }  // namespace tflite
",0,test
264e07271385838da8f1ec9e58dddb80ee13800a,tensorflow/tensorflow,"Moves delegate initialization to a common lib for both benchmark tool & eval framework, and utilizes the helpers in TfliteInferenceStage.

PiperOrigin-RevId: 244869850",utils.h,"@@ -19,10 +19,17 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include ""tensorflow/lite/model.h""
+
 namespace tflite {
 namespace evaluation {
 bool ReadFileLines(const std::string& file_path,
                    std::vector<std::string>* lines_output);
+
+Interpreter::TfLiteDelegatePtr CreateNNAPIDelegate();
+
+Interpreter::TfLiteDelegatePtr CreateGPUDelegate(FlatBufferModel* model);
+
 }  // namespace evaluation
 }  // namespace tflite
 
",0,test
7dd20b844ced19610f8fa67be61d93948563ac43,tensorflow/tensorflow,"Convert Python Variable objects to tensors in custom_gradient, which allows nested custom_gradient functions. This allows a custom_gradient wrapped function to call through to another custom_gradient wrapped function.

PiperOrigin-RevId: 237295007",custom_gradient.py,"@@ -238,6 +238,9 @@ def _graph_mode_decorator(f, *args, **kwargs):
   original_tensors = all_tensors
   with ops.get_default_graph().gradient_override_map({""IdentityN"": name}):
     all_tensors = array_ops.identity_n(all_tensors)
+
+  original_tensors = [ops.convert_to_tensor(x) for x in original_tensors]
+
   # Propagate handle data for happier shape inference for resource variables.
   for i, t in enumerate(original_tensors):
     if t.dtype == dtypes.resource and hasattr(t, ""_handle_data""):
",0,train
7dd20b844ced19610f8fa67be61d93948563ac43,tensorflow/tensorflow,"Convert Python Variable objects to tensors in custom_gradient, which allows nested custom_gradient functions. This allows a custom_gradient wrapped function to call through to another custom_gradient wrapped function.

PiperOrigin-RevId: 237295007",gradients_test.py,"@@ -1033,6 +1033,42 @@ class CustomGradientTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(g.eval(), [2.0])
       self.assertAllEqual(g.eval(feed_dict={conditional: False}), [3.0])
 
+  def testRecursiveCustomGradient(self):
+    @custom_gradient.custom_gradient
+    def F(x):
+      out = core_layers.dense(x, 3, use_bias=False)
+
+      def Grad(out_grad, variables=None):  # pylint: disable=redefined-outer-name
+        self.assertEqual(1, len(variables))
+        grads = gradients.gradients(out, [x, variables[0]], grad_ys=out_grad)
+        return grads[0], [array_ops.ones((4, 3))]
+
+      return out, Grad
+
+    @custom_gradient.custom_gradient
+    def DoubleF(x):
+      out = F(x)
+
+      def Grad(out_grad, variables=None):  # pylint: disable=redefined-outer-name
+        self.assertEqual(1, len(variables))
+        grads = gradients.gradients(out, [x, variables[0]], grad_ys=out_grad)
+        return grads[0], [array_ops.ones((4, 3))]
+
+      return out, Grad
+    with ops.Graph().as_default():
+      x = array_ops.ones((2, 4))
+      with variable_scope.variable_scope(""f"", use_resource=True) as vs:
+        y = DoubleF(x)
+        all_vars = vs.global_variables()
+        assert len(all_vars) == 1
+      grads = gradients.gradients(y, [x, all_vars[0]])
+      for g in grads:
+        self.assertIsNotNone(g)
+      with session.Session() as sess:
+        self.evaluate(variables.global_variables_initializer())
+        dw = sess.run(math_ops.reduce_sum(grads[1]))
+        self.assertEqual(12., dw)
+
 
 class AggregateIndexedSlicesGradientsTest(test_util.TensorFlowTestCase):
 
",0,train
bc84ebd3b0d49debca3a886d62b76a95698f1a1f,tensorflow/tensorflow,"Make sure the buffers in NNAPI shared memory pool are 16 bytes aligned.

PiperOrigin-RevId: 245247027",nnapi_delegate.cc,"@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include ""tensorflow/lite/delegates/nnapi/nnapi_delegate.h""
+
 #include <cstdarg>
 #include <cstring>
 #include <iostream>
@@ -23,7 +25,6 @@ limitations under the License.
 #include ""tensorflow/lite/builtin_ops.h""
 #include ""tensorflow/lite/c/c_api_internal.h""
 #include ""tensorflow/lite/context_util.h""
-#include ""tensorflow/lite/delegates/nnapi/nnapi_delegate.h""
 #include ""tensorflow/lite/kernels/kernel_util.h""
 #include ""tensorflow/lite/nnapi/nnapi_implementation.h""
 
@@ -116,7 +117,16 @@ bool IsRestrictedScalesCompliant(const TfLiteContext* context,
 constexpr int32_t kMinSdkVersionForNNAPI = 27;
 constexpr int32_t kMinSdkVersionForNNAPI11 = 28;
 constexpr int32_t kMinSdkVersionForNNAPI12 = 29;
+constexpr size_t kDefaultByteAlignmentForNNAPI = 16;
 
+static size_t getNumPaddingBytes(size_t byte_size) {
+  size_t num_padding_bytes = 0;
+  if (byte_size % kDefaultByteAlignmentForNNAPI) {
+    num_padding_bytes = kDefaultByteAlignmentForNNAPI -
+                        (byte_size % kDefaultByteAlignmentForNNAPI);
+  }
+  return num_padding_bytes;
+}
 }  // namespace
 
 // RAII NN API Model Destructor for use with std::unique_ptr
@@ -1156,6 +1166,7 @@ class NNAPIDelegateKernel {
                 execution, relative_input_index, nullptr,
                 nn_input_memory_->get_handle(), input_offset, tensor->bytes));
         input_offset += tensor->bytes;
+        input_offset += getNumPaddingBytes(tensor->bytes);
         relative_input_index++;
       }
     }
@@ -1171,6 +1182,7 @@ class NNAPIDelegateKernel {
               execution, relative_output_index, nullptr,
               nn_output_memory_->get_handle(), output_offset, tensor->bytes));
       output_offset += tensor->bytes;
+      output_offset += getNumPaddingBytes(tensor->bytes);
       relative_output_index++;
     }
 
@@ -1210,6 +1222,7 @@ class NNAPIDelegateKernel {
       memcpy(tensor->data.raw,
              nn_output_memory_->get_data_ptr() + output_offset, tensor->bytes);
       output_offset += tensor->bytes;
+      output_offset += getNumPaddingBytes(tensor->bytes);
     }
 
     return kTfLiteOk;
@@ -1376,6 +1389,7 @@ class NNAPIDelegateKernel {
           context->tensors[i].allocation_type != kTfLiteMmapRo) {
         inputs.push_back(operand_mapping_.lite_index_to_ann(i));
         total_input_byte_size += context->tensors[i].bytes;
+        total_input_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
       }
     }
 
@@ -1383,6 +1397,7 @@ class NNAPIDelegateKernel {
     for (int i : TfLiteIntArrayView(output_tensors)) {
       outputs.push_back(operand_mapping_.lite_index_to_ann(i));
       total_output_byte_size += context->tensors[i].bytes;
+      total_output_byte_size += getNumPaddingBytes(context->tensors[i].bytes);
     }
 
     // Add state output tensors as model outputs.
",0,train
9d93a11431f62a82eda1f314c6c8b2acee1bc1c1,tensorflow/tensorflow,"Bugfix: tf.contrib.distributions.Affine incorrectly computed log-det-jacobian
when using `event_ndims=0` and `scale_identity_multiplier` flag.

PiperOrigin-RevId: 170887218",affine_test.py,"@@ -829,6 +829,15 @@ class AffineBijectorTest(test.TestCase):
         x=np.array(
             [1., 2], dtype=np.float32))
 
+  def testScalarEventIdentityScale(self):
+    with self.test_session() as sess:
+      doubler = Affine(
+          scale_identity_multiplier=2.,
+          event_ndims=0)
+      doubler2 = doubler.inverse_log_det_jacobian(2.)
+      doubler2_ildj_ = sess.run([doubler2])
+      self.assertAllClose([-np.log(2.)], doubler2_ildj_)
+
 
 if __name__ == ""__main__"":
   test.main()
",0,train
9d93a11431f62a82eda1f314c6c8b2acee1bc1c1,tensorflow/tensorflow,"Bugfix: tf.contrib.distributions.Affine incorrectly computed log-det-jacobian
when using `event_ndims=0` and `scale_identity_multiplier` flag.

PiperOrigin-RevId: 170887218",transformed_distribution_test.py,"@@ -172,6 +172,19 @@ class TransformedDistributionTest(test.TestCase):
       self.assertAllClose(actual_mvn_entropy,
                           fake_mvn.entropy().eval())
 
+  def testScalarBatchScalarEventIdentityScale(self):
+    with self.test_session() as sess:
+      exp2 = self._cls()(
+          ds.Exponential(rate=0.25),
+          bijector=ds.bijectors.Affine(
+              scale_identity_multiplier=2.,
+              event_ndims=0))
+      log_prob = exp2.log_prob(1.)
+      log_prob_ = sess.run(log_prob)
+      base_log_prob = -0.5 * 0.25 + np.log(0.25)
+      ildj = np.log(2.)
+      self.assertAllClose(base_log_prob - ildj, log_prob_, rtol=1e-6, atol=0.)
+
 
 class ScalarToMultiTest(test.TestCase):
 
",0,train
9d93a11431f62a82eda1f314c6c8b2acee1bc1c1,tensorflow/tensorflow,"Bugfix: tf.contrib.distributions.Affine incorrectly computed log-det-jacobian
when using `event_ndims=0` and `scale_identity_multiplier` flag.

PiperOrigin-RevId: 170887218",affine_impl.py,"@@ -388,10 +388,11 @@ class Affine(bijector.Bijector):
     if self._is_only_identity_multiplier:
       # We don't pad in this case and instead let the fldj be applied
       # via broadcast.
-      d = math_ops.cast(array_ops.shape(x)[-1], dtype=self._scale.dtype)
-      one = ops.convert_to_tensor(1., self._scale.dtype)
-      return math_ops.log(math_ops.abs(self._scale)) * array_ops.where(
-          math_ops.equal(self._shaper.event_ndims, 0), one, d)
+      event_size = distribution_util.pick_vector(
+          math_ops.equal(self._shaper.event_ndims, 0),
+          [1], array_ops.shape(x))[-1]
+      event_size = math_ops.cast(event_size, dtype=self._scale.dtype)
+      return math_ops.log(math_ops.abs(self._scale)) * event_size
     return self.scale.log_abs_determinant()
 
   def _maybe_check_scale(self):
",0,train
26c01423d9a4c4b17993f636764e9c1ee5ea1c4f,tensorflow/tensorflow,"Fix windows test.

The problem was that LocalTempFilename returns a full path, not just a basename, so we were joining two full paths, which doesn't work on Windows.

PiperOrigin-RevId: 324094697
Change-Id: I563f692ba6525097c95fda2a45ee9d565ac9c8a1",journal_test.cc,"@@ -29,11 +29,11 @@ namespace {
 using ::testing::HasSubstr;
 
 bool NewJournalDir(std::string* journal_dir) {
-  std::string filename;
-  if (!Env::Default()->LocalTempFilename(&filename)) {
+  std::string filename = testing::TmpDir();
+  if (!Env::Default()->CreateUniqueFileName(&filename, ""journal_dir"")) {
     return false;
   }
-  *journal_dir = io::JoinPath(testing::TmpDir(), filename);
+  *journal_dir = filename;
   return true;
 }
 
",0,train
f3afacffc3c6b431677a69754bb69a7791261318,tensorflow/tensorflow,Fix bug in hadoop_file_system.cc when reading big variable of hdfs,hadoop_file_system.cc,"@@ -209,8 +209,9 @@ class HDFSRandomAccessFile : public RandomAccessFile {
       // We lock inside the loop rather than outside so we don't block other
       // concurrent readers.
       mutex_lock lock(mu_);
+      size_t read_n = std::min(n,static_cast<size_t>(std::numeric_limits<int>::max()-2));
       tSize r = hdfs_->hdfsPread(fs_, file_, static_cast<tOffset>(offset), dst,
-                                 static_cast<tSize>(n));
+                                 static_cast<tSize>(read_n));
       if (r > 0) {
         dst += r;
         n -= r;
",0,test
7c1123eac5fb2409d76001d0113a704aa3e824da,tensorflow/tensorflow,Address review comments,xla_gpu_device.cc,"@@ -16,14 +16,16 @@ limitations under the License.
 // Registers the XLA_GPU device, which is an XlaDevice instantiation that runs
 // operators using XLA via the XLA ""CUDA"" (GPU) backend.
 
+#include <set>
 #include ""absl/memory/memory.h""
+#include ""absl/strings/numbers.h""
+#include ""absl/strings/str_split.h""
 #include ""tensorflow/compiler/jit/kernels/xla_ops.h""
 #include ""tensorflow/compiler/jit/xla_device.h""
 #include ""tensorflow/compiler/jit/xla_device_ops.h""
 #include ""tensorflow/compiler/tf2xla/xla_op_registry.h""
 #include ""tensorflow/core/common_runtime/device_factory.h""
 #include ""tensorflow/core/lib/core/status.h""
-#include ""tensorflow/core/lib/strings/str_util.h""
 
 namespace tensorflow {
 
@@ -53,20 +55,21 @@ Status XlaGpuDeviceFactory::CreateDevices(
     VLOG(1) << ""Failed to create XLA_GPU device: "" << platform.status();
     return Status::OK();
   }
-  const auto& allowed_gpus =
+  string allowed_gpus =
       session_options.config.gpu_options().visible_device_list();
-  std::unordered_set<int> gpu_ids;
+  std::set<int> gpu_ids;
   int num_visible_devices = platform.ValueOrDie()->VisibleDeviceCount();
   if (allowed_gpus.empty()) {
     for (int i = 0; i < num_visible_devices; ++i) gpu_ids.insert(i);
   } else {
+    // For loop below is copied from gpu/gpu_device.cc. It validates
+    // configuration string. It should be redundant since code would fail there
+    // before it gets to here.
     const std::vector<string> visible_devices =
-        str_util::Split(allowed_gpus, ',');
-    // copied from gpu/gpu_device.cc Should be redundant since code would fail
-    // there before it gets to here.
+        absl::StrSplit(allowed_gpus, ',');
     for (const string& platform_gpu_id_str : visible_devices) {
       int32 platform_gpu_id;
-      if (!strings::safe_strto32(platform_gpu_id_str, &platform_gpu_id)) {
+      if (!absl::SimpleAtoi(platform_gpu_id_str, &platform_gpu_id)) {
         return errors::InvalidArgument(
             ""Could not parse entry in 'visible_device_list': '"",
             platform_gpu_id_str, ""'. visible_device_list = "", allowed_gpus);
@@ -79,8 +82,8 @@ Status XlaGpuDeviceFactory::CreateDevices(
       gpu_ids.insert(platform_gpu_id);
     }
   }
-  for (int i = 0; i < num_visible_devices; ++i) {
-    if (gpu_ids.count(i) == 0) continue;
+  for (const auto i : gpu_ids) {
+    // Skip devices that are not in the set.
     XlaDevice::Options options;
     options.platform = platform.ValueOrDie();
     options.device_name_prefix = name_prefix;
",0,train
4ab315314078b043240908209086fb64f5260cc5,tensorflow/tensorflow,"Deprecate random_binomial in favor of random_bernoulli.

PiperOrigin-RevId: 299205387
Change-Id: Id347f893f8e8fd6bf573c62827e96ec4d1de3343",backend.py,"@@ -79,6 +79,7 @@ from tensorflow.python.util import nest
 from tensorflow.python.util import object_identity
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util import tf_inspect
+from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.tf_export import keras_export
 
 py_all = all
@@ -5703,10 +5704,13 @@ def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None):
       shape, minval=minval, maxval=maxval, dtype=dtype, seed=seed)
 
 
+@deprecated(None, 'Use `tf.keras.backend.random_bernoulli` instead.')
 @keras_export('keras.backend.random_binomial')
 def random_binomial(shape, p=0.0, dtype=None, seed=None):
   """"""Returns a tensor with random binomial distribution of values.
 
+  DEPRECATED, use `tf.keras.backend.random_bernoulli` instead.
+
   The binomial distribution with parameters `n` and `p` is the probability
   distribution of the number of successful Bernoulli process. Only supports
   `n` = 1 for now.
@@ -5729,6 +5733,22 @@ def random_binomial(shape, p=0.0, dtype=None, seed=None):
       array_ops.ones(shape, dtype=dtype), array_ops.zeros(shape, dtype=dtype))
 
 
+@keras_export('keras.backend.random_bernoulli')
+def random_bernoulli(shape, p=0.0, dtype=None, seed=None):
+  """"""Returns a tensor with random bernoulli distribution of values.
+
+  Arguments:
+      shape: A tuple of integers, the shape of tensor to create.
+      p: A float, `0. <= p <= 1`, probability of bernoulli distribution.
+      dtype: String, dtype of returned tensor.
+      seed: Integer, random seed.
+
+  Returns:
+      A tensor.
+  """"""
+  return random_binomial(shape, p, dtype, seed)
+
+
 @keras_export('keras.backend.truncated_normal')
 def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
   """"""Returns a tensor with truncated random normal distribution of values.
",0,train
f5d9e2c9d7a23ffb92e94032b1b39b82795c129a,tensorflow/tensorflow,Fix space,ragged_constant_value_op_test.py,"@@ -73,7 +73,7 @@ class RaggedConstantValueOpTest(test_util.TensorFlowTestCase,
                   np.array([]), [[5, 6], [7, 8], [9, 0]]],
           ragged_rank=1,
           expected_shape=(3, None, 2)),
-    dict(
+      dict(
           pylist=[np.array([3, np.array(4)]), [[1, 2]],
                   np.array([]), [[5, 6], [7, 8], [9, 0]]],
           ragged_rank=1,
",0,test
821116f72752cb9545565cb57e276e14e45e4973,tensorflow/tensorflow,Tensor boolean strictness fix for skflow.,losses_ops.py,"@@ -46,7 +46,7 @@ def softmax_classifier(tensor_in, labels, weights, biases, class_weight=None, na
     """"""
     with tf.op_scope([tensor_in, labels], name, ""softmax_classifier""):
         logits = tf.nn.xw_plus_b(tensor_in, weights, biases)
-        if class_weight:
+        if class_weight is not None:
             logits = tf.mul(logits, class_weight)
         xent = tf.nn.softmax_cross_entropy_with_logits(logits,
                                                        labels,
@@ -54,4 +54,3 @@ def softmax_classifier(tensor_in, labels, weights, biases, class_weight=None, na
         loss = tf.reduce_mean(xent, name=""xent"")
         predictions = tf.nn.softmax(logits, name=name)
         return predictions, loss
-
",0,train
821116f72752cb9545565cb57e276e14e45e4973,tensorflow/tensorflow,Tensor boolean strictness fix for skflow.,trainer.py,"@@ -121,7 +121,7 @@ class TensorFlowTrainer(object):
         """"""
         for step in xrange(steps):
             feed_dict = feed_dict_fn()
-            if summaries:
+            if summaries is not None:
                 global_step, loss, summ, _ = sess.run(
                     [self.global_step, self.loss, summaries, self.trainer],
                     feed_dict=feed_dict)
@@ -131,7 +131,7 @@ class TensorFlowTrainer(object):
                     feed_dict=feed_dict)
             monitor.update(step, global_step, loss, sess,
                            feed_params_fn, loss_expression_tensor=self.loss)
-            if summaries and summary_writer and summ is not None:
+            if summaries is not None and summary_writer and summ is not None:
                 summary_writer.add_summary(summ, global_step)
             if monitor.monitor_inducing_stop():
                 break
",0,train
2050596162cddc19a21ba1e880bf7a5959c80841,tensorflow/tensorflow,"iOS Metal GPU delegate: reshape tests added.

PiperOrigin-RevId: 269413768",model.h,"@@ -579,6 +579,17 @@ Status ConnectTwoNodes(Graph<TensorT>* graph, const Node* from_node,
 
 using GraphFloat32 = Model<TensorRef<BHWC>>;
 
+// @return true if all tensors have same batch value.
+inline bool IsBatchMatchesForAllValues(const GraphFloat32& model) {
+  const int32_t b = model.values()[0]->tensor.shape.b;
+  for (auto value : model.values()) {
+    if (value->tensor.shape.b != b) {
+      return false;
+    }
+  }
+  return true;
+}
+
 }  // namespace gpu
 }  // namespace tflite
 
",0,train
2050596162cddc19a21ba1e880bf7a5959c80841,tensorflow/tensorflow,"iOS Metal GPU delegate: reshape tests added.

PiperOrigin-RevId: 269413768",api.cc,"@@ -369,18 +369,6 @@ class CompiledModelImpl
   std::unordered_map<ValueId, size_t> object_sizes_;
   CompilerStats stats_;
 };
-
-// @return true if all tensors have same batch value.
-bool IsBatchMatchesForAllValues(const GraphFloat32& model) {
-  const int32_t b = model.values()[0]->tensor.shape.b;
-  for (auto value : model.values()) {
-    if (value->tensor.shape.b != b) {
-      return false;
-    }
-  }
-  return true;
-}
-
 }  // namespace
 
 Status Compile(const CompilationOptions& options, const GraphFloat32& model,
",0,train
2050596162cddc19a21ba1e880bf7a5959c80841,tensorflow/tensorflow,"iOS Metal GPU delegate: reshape tests added.

PiperOrigin-RevId: 269413768",api.cc,"@@ -122,6 +122,9 @@ Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
                           const std::vector<ValueId>& outputs,
                           const RuntimeOptions& options,
                           std::vector<ComputeTaskDescriptorPtr>* tasks) {
+  if (!IsBatchMatchesForAllValues(graph)) {
+    return InvalidArgumentError(""Only identical batch dimension is supported"");
+  }
   int node_id = static_cast<int>(node->id);
   auto op_type = OperationTypeFromString(node->operation.type);
   switch (op_type) {
",0,train
96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64.

PiperOrigin-RevId: 313404227
Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",buffer_assignment.cc,"@@ -261,7 +261,7 @@ void BufferAllocation::AddAssignment(const HloValue& buffer, int64 offset,
     Shape* shape = ShapeUtil::GetMutableSubshape(
         position.instruction->mutable_shape(), position.index);
     if (shape->has_layout()) {
-      shape->mutable_layout()->set_memory_space(buffer.color().value());
+      shape->mutable_layout()->set_memory_space(buffer.color());
     }
   }
 }
@@ -272,7 +272,7 @@ BufferAllocationProto BufferAllocation::ToProto() const {
   proto.set_size(size_);
   proto.set_is_thread_local(is_thread_local_);
   proto.set_is_tuple(is_tuple_);
-  proto.set_color(color_.value());
+  proto.set_color(color_);
   if (is_entry_computation_parameter_) {
     proto.set_is_entry_computation_parameter(true);
     for (int64 idx : param_shape_index()) {
@@ -336,8 +336,8 @@ static const HloInstruction* GetOutputInstruction(
 string BufferAllocation::ToString() const {
   string output;
   StrAppendFormat(&output, ""allocation %d: %p, size %d"", index_, this, size());
-  if (color().value() != 0) {
-    StrAppend(&output, "", color "", color().value());
+  if (color() != 0) {
+    StrAppend(&output, "", color "", color());
   }
   if (is_entry_computation_parameter()) {
     const HloInstruction* param = GetEntryParameterInstruction(*this);
@@ -607,9 +607,7 @@ void BufferAssignment::AddAssignment(BufferAllocation* allocation,
 // BufferAllocation.
 void BufferAssignment::CombineTempAllocations() {
   VLOG(1) << ""CombineTempAllocations()"";
-  flat_hash_map<BufferValue::Color, BufferAllocation,
-                BufferValue::Color::Hasher>
-      combined_allocation_map;
+  flat_hash_map<BufferValue::Color, BufferAllocation> combined_allocation_map;
 
   // Move all temp allocations into a single run at the end of the allocations
   // vector.
@@ -1059,8 +1057,8 @@ Status BufferAssigner::MergeInplaceOpBuffers(BufferAssignment* assignment) {
 
       // The instruction or operand color is excluded because it was assigned by
       // memory_space_assignment.
-      if (excluded_colors.contains(instruction_buffer.color().value()) ||
-          excluded_colors.contains(operand_buffer.color().value())) {
+      if (excluded_colors.contains(instruction_buffer.color()) ||
+          excluded_colors.contains(operand_buffer.color())) {
         continue;
       }
 
@@ -1353,13 +1351,10 @@ Status BufferAssigner::AssignBuffersForComputations(
   return Status::OK();
 }
 
-flat_hash_map<LogicalBuffer::Color, flat_hash_set<const HloValue*>,
-              LogicalBuffer::Color::Hasher>
+flat_hash_map<LogicalBuffer::Color, flat_hash_set<const HloValue*>>
 BufferAssigner::SplitBuffersByColor(
     const flat_hash_set<const HloValue*>& buffers) {
-  flat_hash_map<LogicalBuffer::Color, flat_hash_set<const HloValue*>,
-                LogicalBuffer::Color::Hasher>
-      color_map;
+  flat_hash_map<LogicalBuffer::Color, flat_hash_set<const HloValue*>> color_map;
   for (auto buffer : buffers) {
     color_map[buffer->color()].insert(buffer);
   }
@@ -1374,8 +1369,7 @@ Status BufferAssigner::AssignPresetBuffers(
   }
 
   // Create an allocation for each preset color.
-  absl::flat_hash_map<LogicalBuffer::Color, BufferAllocation*,
-                      LogicalBuffer::Color::Hasher>
+  absl::flat_hash_map<LogicalBuffer::Color, BufferAllocation*>
       preset_allocations;
   for (auto& color_and_info : preset_assignments_->assignment_informations()) {
     LogicalBuffer::Color color(color_and_info.first);
",0,train
96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64.

PiperOrigin-RevId: 313404227
Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",buffer_assignment.h,"@@ -673,8 +673,7 @@ class BufferAssigner {
   // Split a set of buffers into several sets, each of which contains buffers
   // colored with the same color.
   absl::flat_hash_map<LogicalBuffer::Color,
-                      absl::flat_hash_set<const HloValue*>,
-                      LogicalBuffer::Color::Hasher>
+                      absl::flat_hash_set<const HloValue*>>
   SplitBuffersByColor(const absl::flat_hash_set<const HloValue*>& buffers);
 
   // If true, allocate buffers for constant instructions.
",0,train
96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64.

PiperOrigin-RevId: 313404227
Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",buffer_value.cc,"@@ -59,7 +59,7 @@ LogicalBufferProto BufferValue::ToProto(const SizeFunction& size_fn) const {
       ToLocationProto(*instruction(), index());
   proto.mutable_defined_at()->Swap(&proto_location);
   if (has_color()) {
-    proto.set_color(color().value());
+    proto.set_color(color());
   }
   return proto;
 }
",0,train
96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64.

PiperOrigin-RevId: 313404227
Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",buffer_value.h,"@@ -25,7 +25,6 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/shape_util.h""
 #include ""tensorflow/compiler/xla/types.h""
 #include ""tensorflow/compiler/xla/xla_data.pb.h""
-#include ""tensorflow/core/lib/gtl/int_type.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/types.h""
@@ -86,7 +85,7 @@ namespace xla {
 
 class BufferValue {
  public:
-  TF_LIB_GTL_DEFINE_INT_TYPE(Color, int64);
+  using Color = int64;
 
   // Id is a unique identifier for the BufferValue to facilitate efficient
   // collections of BufferValues with stable iteration order.
@@ -154,7 +153,7 @@ class BufferValue {
   static LogicalBufferProto::Location ToLocationProto(
       const HloInstruction& instruction, const ShapeIndex& index);
 
-  const Color kInvalidColor = Color(-1);
+  const Color kInvalidColor = -1;
 
  protected:
   BufferValue(HloInstruction* instruction, const ShapeIndex& index, Id id);
",0,train
96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64.

PiperOrigin-RevId: 313404227
Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",hlo_value.cc,"@@ -91,8 +91,7 @@ string HloValue::ToShortString() const {
   return absl::StrFormat(
       ""<%d %s%s%s%s>"", id(), instruction()->name(),
       instruction()->shape().IsTuple() ? index().ToString() : """",
-      is_phi() ? "" (phi)"" : """",
-      has_color() ? StrCat("" @"", color().value()) : """");
+      is_phi() ? "" (phi)"" : """", has_color() ? StrCat("" @"", color()) : """");
 }
 
 string HloValue::ToString(int indent) const {
",0,train
96ba1c3609b8a0210bdc72c2ba339cf81831f998,tensorflow/tensorflow,"[XLA] BufferValue::Color now type aliases int64.

PiperOrigin-RevId: 313404227
Change-Id: I2d393d426865c61ff210f10e3d9b8402a1813cf1",logical_buffer.cc,"@@ -34,7 +34,7 @@ LogicalBuffer::~LogicalBuffer() {}
 string LogicalBuffer::ToString() const {
   string color_string;
   if (has_color()) {
-    color_string = absl::StrCat("" @"", color().value());
+    color_string = absl::StrCat("" @"", color());
   }
   return absl::StrCat(instruction_->name(), ""["", absl::StrJoin(index_, "",""),
                       ""](#"", id(), color_string, "")"");
",0,train
4727d0180fb693fb7cfd70b372b606752f8efa45,tensorflow/tensorflow,"Split tensorflow.python.tpu module doctests into different target.

These TPU tests do not yet run in OSS.

PiperOrigin-RevId: 310959419
Change-Id: I2a1662e52f25da9c4c58c018c83729dc6da9008d",tpu_embedding.py,"@@ -828,7 +828,7 @@ class TPUEmbedding(object):
   ...     end_learning_rate=0.0)
   >>> wordpiece_table_config = TableConfig(
   ...   vocabulary_size=119547,
-  ...   dimension=768,
+  ...   dimension=256,
   ...   learning_rate_fn=learning_rate_fn)
   >>> wordpiece_feature_config = FeatureConfig(
   ...   table_id='bert/embeddings/word_embeddings',
@@ -846,11 +846,11 @@ class TPUEmbedding(object):
   ...  batch_size=128,
   ...  mode=TRAINING,
   ...  optimization_parameters=optimization_parameters,
-  ...  device_config=DeviceConfig(
-  ...    num_cores=64, num_hosts=4, job_name='tpu_worker'))
+  ...  master='')
   >>> with tf.Graph().as_default():
   ...   init_tpu_op = tf.compat.v1.tpu.initialize_system(
-  ...     embedding_config=tpu_embedding.config_proto, job='tpu_worker')
+  ...     embedding_config=tpu_embedding.config_proto)
+  ...   tf.compat.v1.Session().run(init_tpu_op)
   """"""
 
   # TODO(shizhiw): Consider adding a field to FeatureConfig that indicates that
",0,train
4727d0180fb693fb7cfd70b372b606752f8efa45,tensorflow/tensorflow,"Split tensorflow.python.tpu module doctests into different target.

These TPU tests do not yet run in OSS.

PiperOrigin-RevId: 310959419
Change-Id: I2a1662e52f25da9c4c58c018c83729dc6da9008d",tf_doctest.py,"@@ -43,6 +43,8 @@ tf.keras.preprocessing = preprocessing
 FLAGS = flags.FLAGS
 
 flags.DEFINE_string('module', None, 'A specific module to run doctest on.')
+flags.DEFINE_list('module_prefix_skip', [],
+                  'A list of modules to ignore when resolving modules.')
 flags.DEFINE_boolean('list', None,
                      'List all the modules in the core package imported.')
 flags.DEFINE_string('file', None, 'A specific file to run doctest on.')
@@ -50,6 +52,7 @@ flags.DEFINE_string('file', None, 'A specific file to run doctest on.')
 flags.mark_flags_as_mutual_exclusive(['module', 'file'])
 flags.mark_flags_as_mutual_exclusive(['list', 'file'])
 
+# Both --module and --module_prefix_skip are relative to PACKAGE.
 PACKAGE = 'tensorflow.python.'
 
 
@@ -140,6 +143,9 @@ def load_tests(unused_loader, tests, unused_ignore):
     tf_modules = get_module_and_inject_docstring(FLAGS.file)
 
   for module in tf_modules:
+    if any(module.__name__.startswith(PACKAGE + prefix)
+           for prefix in FLAGS.module_prefix_skip):
+      continue
     testcase = TfTestCase()
     tests.addTests(
         doctest.DocTestSuite(
",0,train
71f86a96994e66280ff6a862594ebfd9ee1dc6d7,tensorflow/tensorflow,"Remove an old TODO in benchmark_tflite_model.cc. For string tensors, don't try populate its content.

PiperOrigin-RevId: 369907367
Change-Id: Id926d787ce5ccf0b511d017826d1345b71e511d3",benchmark_tflite_model.cc,"@@ -562,7 +562,8 @@ BenchmarkTfLiteModel::CreateRandomTensorData(const TfLiteTensor& t,
           num_elements, std::uniform_int_distribution<int32_t>(low, high));
     }
     case kTfLiteString: {
-      // TODO(haoliang): No need to cache string tensors right now.
+      // Don't populate input for string. Instead, return a default-initialized
+      // `InputTensorData` object directly.
       break;
     }
     case kTfLiteBool: {
",0,train
50316308851f9e6049167dc6b475e0f9a9a4274d,tensorflow/tensorflow,"Fixup output shape for IntegerLookup/StringLookup layers

This makes the following fixes for BINARY and COUNT output
 - Fixes compute_output_shape and compute_output_signature
 - Properly propogates batch shape for dense inputs
 - Adds test coverage

PiperOrigin-RevId: 355071871
Change-Id: I7820763100b643b8cd12908caf416aae1c4a1f14",category_encoding.py,"@@ -534,5 +534,6 @@ def dense_bincount(inputs, out_depth, binary_output, count_weights=None):
       dtype=K.floatx(),
       axis=-1,
       binary_output=binary_output)
-  result.set_shape(tensor_shape.TensorShape((None, out_depth)))
+  batch_size = inputs.shape.as_list()[0]
+  result.set_shape(tensor_shape.TensorShape((batch_size, out_depth)))
   return result
",0,test
50316308851f9e6049167dc6b475e0f9a9a4274d,tensorflow/tensorflow,"Fixup output shape for IntegerLookup/StringLookup layers

This makes the following fixes for BINARY and COUNT output
 - Fixes compute_output_shape and compute_output_signature
 - Properly propogates batch shape for dense inputs
 - Adds test coverage

PiperOrigin-RevId: 355071871
Change-Id: I7820763100b643b8cd12908caf416aae1c4a1f14",index_lookup.py,"@@ -27,6 +27,7 @@ import numpy as np
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_spec
+from tensorflow.python.keras import backend as K
 from tensorflow.python.keras.engine import base_preprocessing_layer
 from tensorflow.python.keras.layers.preprocessing import category_encoding
 from tensorflow.python.keras.layers.preprocessing import table_utils
@@ -160,22 +161,20 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
     super(IndexLookup, self).__init__(
         combiner=_IndexLookupCombiner(vocab_size, self.mask_token), **kwargs)
 
-    self._output_dtype = dtypes.int64
-
     # We need to save the key dtype so that we know if we're expecting int64
     # keys. If we are, we will cast int32 inputs to int64 as well.
     if invert:
-      self._key_dtype = self._output_dtype
-      value_dtype = self.dtype
+      self._key_dtype = dtypes.int64
+      self._value_dtype = self.dtype
       oov_value = self.oov_token
     else:
       self._key_dtype = self.dtype
-      value_dtype = self._output_dtype
+      self._value_dtype = dtypes.int64
       oov_value = self._oov_value
 
     self._table = lookup_ops.MutableHashTable(
         key_dtype=self._key_dtype,
-        value_dtype=value_dtype,
+        value_dtype=self._value_dtype,
         default_value=oov_value,
         name=(self._name + ""_index_table""))
     tracked_table = self._add_trackable(self._table, trainable=False)
@@ -201,11 +200,14 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
       self.set_vocabulary(vocabulary)
 
   def compute_output_shape(self, input_shape):
+    if self.output_mode != INT:
+      return tensor_shape.TensorShape([input_shape[0], self.max_tokens])
+
     return input_shape
 
   def compute_output_signature(self, input_spec):
     output_shape = self.compute_output_shape(input_spec.shape.as_list())
-    output_dtype = self.dtype if self.invert else self._output_dtype
+    output_dtype = self._value_dtype if self.output_mode == INT else K.floatx()
     return tensor_spec.TensorSpec(shape=output_shape, dtype=output_dtype)
 
   def adapt(self, data, reset_state=True):
",0,test
50316308851f9e6049167dc6b475e0f9a9a4274d,tensorflow/tensorflow,"Fixup output shape for IntegerLookup/StringLookup layers

This makes the following fixes for BINARY and COUNT output
 - Fixes compute_output_shape and compute_output_signature
 - Properly propogates batch shape for dense inputs
 - Adds test coverage

PiperOrigin-RevId: 355071871
Change-Id: I7820763100b643b8cd12908caf416aae1c4a1f14",index_lookup_test.py,"@@ -618,8 +618,8 @@ class IndexLookupOutputTest(keras_parameterized.TestCase,
     output_dataset = model.predict(input_array)
     self.assertAllEqual(expected_output, output_dataset)
 
-  def test_output_shape(self):
-    input_data = keras.Input(shape=(4,), dtype=dtypes.string)
+  def test_int_output_shape(self):
+    input_data = keras.Input(batch_size=16, shape=(4,), dtype=dtypes.string)
     layer = get_layer_class()(
         max_tokens=2,
         num_oov_indices=1,
@@ -627,7 +627,7 @@ class IndexLookupOutputTest(keras_parameterized.TestCase,
         oov_token=""[OOV]"",
         dtype=dtypes.string)
     int_data = layer(input_data)
-    self.assertAllEqual(int_data.shape[1:], input_data.shape[1:])
+    self.assertAllEqual(int_data.shape.as_list(), [16, 4])
 
   def test_int_output_no_reserved_zero(self):
     vocab_data = [""earth"", ""wind"", ""and"", ""fire""]
@@ -667,6 +667,70 @@ class IndexLookupOutputTest(keras_parameterized.TestCase,
     output_dataset = model.predict(input_array)
     self.assertAllEqual(expected_output, output_dataset)
 
+  def test_binary_output(self):
+    vocab_data = [""earth"", ""wind"", ""and"", ""fire""]
+    input_array = np.array([[""earth"", ""wind"", ""and"", ""fire""],
+                            [""fire"", ""and"", ""earth"", ""michigan""]])
+    expected_output = [[0, 0, 1, 1, 1, 1], [0, 1, 1, 0, 1, 1]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="""",
+        oov_token=""[OOV]"",
+        output_mode=index_lookup.BINARY,
+        dtype=dtypes.string)
+    layer.set_vocabulary(vocab_data)
+    binary_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=binary_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+  def test_binary_output_shape(self):
+    input_data = keras.Input(batch_size=16, shape=(4,), dtype=dtypes.string)
+    layer = get_layer_class()(
+        max_tokens=2,
+        num_oov_indices=1,
+        mask_token="""",
+        oov_token=""[OOV]"",
+        output_mode=index_lookup.BINARY,
+        dtype=dtypes.string)
+    binary_data = layer(input_data)
+    self.assertAllEqual(binary_data.shape.as_list(), [16, 2])
+
+  def test_count_output(self):
+    vocab_data = [""earth"", ""wind"", ""and"", ""fire""]
+    input_array = np.array([[""earth"", ""wind"", ""and"", ""wind""],
+                            [""fire"", ""fire"", ""fire"", ""michigan""]])
+    expected_output = [[0, 0, 1, 2, 1, 0], [0, 1, 0, 0, 0, 3]]
+
+    input_data = keras.Input(shape=(None,), dtype=dtypes.string)
+    layer = get_layer_class()(
+        max_tokens=None,
+        num_oov_indices=1,
+        mask_token="""",
+        oov_token=""[OOV]"",
+        output_mode=index_lookup.COUNT,
+        dtype=dtypes.string)
+    layer.set_vocabulary(vocab_data)
+    count_data = layer(input_data)
+    model = keras.Model(inputs=input_data, outputs=count_data)
+    output_dataset = model.predict(input_array)
+    self.assertAllEqual(expected_output, output_dataset)
+
+  def test_count_output_shape(self):
+    input_data = keras.Input(batch_size=16, shape=(4,), dtype=dtypes.string)
+    layer = get_layer_class()(
+        max_tokens=2,
+        num_oov_indices=1,
+        mask_token="""",
+        oov_token=""[OOV]"",
+        output_mode=index_lookup.COUNT,
+        dtype=dtypes.string)
+    count_data = layer(input_data)
+    self.assertAllEqual(count_data.shape.as_list(), [16, 2])
+
 
 @keras_parameterized.run_all_keras_modes
 class IndexLookupVocabularyTest(keras_parameterized.TestCase,
",0,test
db0ec387400032d14e16932dea77a59cbdd5755d,tensorflow/tensorflow,"Added flops calculation for most computational intensive kernels.

PiperOrigin-RevId: 414808724
Change-Id: I95185d1f5c92368c91bac641dd6c4e89d9d16528",flops_util.cc,"@@ -0,0 +1,54 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/lite/delegates/gpu/common/flops_util.h""
+
+namespace tflite {
+namespace gpu {
+
+uint64_t GetConvolutionFlops(const BHWC& dst_shape, const OHWI& weights_shape) {
+  uint64_t dst_elements = dst_shape.b * dst_shape.h * dst_shape.w * dst_shape.c;
+  // 2 flops per operation( s = a * b + s);
+  return dst_elements * weights_shape.i * weights_shape.w * weights_shape.h * 2;
+}
+
+uint64_t GetConvolutionWinograd4x4To6x6Flops(const BHWC& dst_shape,
+                                             const OHWI& weights_shape) {
+  return GetConvolutionFlops(dst_shape, weights_shape) / 4u;
+}
+
+uint64_t GetConvolutionTransposedFlops(const BHWC& src_shape,
+                                       const OHWI& weights_shape) {
+  uint64_t elements = src_shape.b * src_shape.h * src_shape.w * weights_shape.o;
+  // 2 flops per operation( s = a * b + s);
+  return elements * weights_shape.i * weights_shape.w * weights_shape.h * 2;
+}
+
+uint64_t GetDepthwiseConvolutionFlops(const BHWC& dst_shape,
+                                      const OHWI& weights_shape) {
+  uint64_t dst_elements = dst_shape.b * dst_shape.h * dst_shape.w * dst_shape.c;
+  // 2 flops per operation( s = a * b + s);
+  return dst_elements * weights_shape.w * weights_shape.h * 2;
+}
+
+uint64_t GetFullyConnectedFlops(const BHWC& dst_shape,
+                                const OHWI& weights_shape) {
+  uint64_t dst_elements = dst_shape.b * dst_shape.h * dst_shape.w * dst_shape.c;
+  // 2 flops per operation( s = a * b + s);
+  return dst_elements * weights_shape.i * 2;
+}
+
+}  // namespace gpu
+}  // namespace tflite
",0,train
db0ec387400032d14e16932dea77a59cbdd5755d,tensorflow/tensorflow,"Added flops calculation for most computational intensive kernels.

PiperOrigin-RevId: 414808724
Change-Id: I95185d1f5c92368c91bac641dd6c4e89d9d16528",flops_util.h,"@@ -0,0 +1,42 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_COMMON_FLOPS_UTIL_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_COMMON_FLOPS_UTIL_H_
+
+#include <cstdint>
+
+#include ""tensorflow/lite/delegates/gpu/common/shape.h""
+
+namespace tflite {
+namespace gpu {
+
+uint64_t GetConvolutionFlops(const BHWC& dst_shape, const OHWI& weights_shape);
+uint64_t GetConvolutionWinograd4x4To6x6Flops(const BHWC& dst_shape,
+                                             const OHWI& weights_shape);
+
+uint64_t GetConvolutionTransposedFlops(const BHWC& src_shape,
+                                       const OHWI& weights_shape);
+
+uint64_t GetDepthwiseConvolutionFlops(const BHWC& dst_shape,
+                                      const OHWI& weights_shape);
+
+uint64_t GetFullyConnectedFlops(const BHWC& dst_shape,
+                                const OHWI& weights_shape);
+
+}  // namespace gpu
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_COMMON_FLOPS_UTIL_H_
",0,train
db0ec387400032d14e16932dea77a59cbdd5755d,tensorflow/tensorflow,"Added flops calculation for most computational intensive kernels.

PiperOrigin-RevId: 414808724
Change-Id: I95185d1f5c92368c91bac641dd6c4e89d9d16528",operation_selector.cc,"@@ -15,9 +15,13 @@ limitations under the License.
 
 #include ""tensorflow/lite/delegates/gpu/common/selectors/operation_selector.h""
 
+#include <string>
+#include <utility>
+
 #include ""absl/strings/str_cat.h""
 #include ""absl/types/any.h""
 #include ""tensorflow/lite/delegates/gpu/common/data_type.h""
+#include ""tensorflow/lite/delegates/gpu/common/flops_util.h""
 #include ""tensorflow/lite/delegates/gpu/common/gpu_info.h""
 #include ""tensorflow/lite/delegates/gpu/common/operations.h""
 #include ""tensorflow/lite/delegates/gpu/common/selectors/convolution_selector.h""
@@ -126,6 +130,8 @@ absl::Status WinogradFromNode(const GpuInfo& gpu_info,
   conv.operation = SelectConvolutionForWinograd(attr, input_shape, gpu_info,
                                                 conv_def, hints);
   conv.name = ""convolution_winograd_4x4_6x6"";
+  conv.operation->flops_ =
+      GetConvolutionWinograd4x4To6x6Flops(output_shape, attr.weights.shape);
 
   OperationDef winograd_down_def;
   winograd_down_def.precision = op_def.precision;
@@ -226,6 +232,9 @@ absl::Status GPUOperationFromNodePart0(
           attr, weights_shape, dst_shape, gpu_info, conv_def, hints,
           &conv_weights_desc);
       conv_op.name = ""mat_mul_as_convolution"";
+      conv_op.operation->flops_ = GetConvolutionFlops(
+          outputs[0]->tensor.shape, OHWI(weights_shape.b, weights_shape.h,
+                                         weights_shape.w, weights_shape.c));
 
       int aligned_output =
           AlignByN(weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4);
@@ -330,11 +339,8 @@ absl::Status GPUOperationFromNodePart0(
           gpu_op = InitSingleOpSubgraph(inputs, outputs, gpu_subgraph);
           *gpu_op =
               SelectConvolution(attr, output_shape, gpu_info, op_def, hints);
-          uint64_t dst_elements =
-              output_shape.b * output_shape.h * output_shape.w * output_shape.c;
-          // 2 flops per element, we have for every element multiply and add
-          (*gpu_op)->flops_ = dst_elements * attr.weights.shape.i *
-                              attr.weights.shape.w * attr.weights.shape.h * 2;
+          (*gpu_op)->flops_ =
+              GetConvolutionFlops(output_shape, attr.weights.shape);
           return absl::OkStatus();
         }
       } else {
@@ -359,6 +365,9 @@ absl::Status GPUOperationFromNodePart0(
             attr, weights_shape, output_shape, gpu_info, conv_def, hints,
             &conv_weights_desc);
         conv_op.name = ""convolution_dynamic"";
+        conv_op.operation->flops_ = GetConvolutionFlops(
+            outputs[0]->tensor.shape, OHWI(weights_shape.b, weights_shape.h,
+                                           weights_shape.w, weights_shape.c));
 
         int aligned_output = AlignByN(
             weights_shape.b, conv_weights_desc.GetOutputGroupSize() * 4);
@@ -386,6 +395,8 @@ absl::Status GPUOperationFromNodePart0(
           node.operation.attributes);
       if (inputs.size() == 1) {
         *gpu_op = SelectConvolutionTransposed(attr, gpu_info, op_def);
+        (*gpu_op)->flops_ = GetConvolutionTransposedFlops(
+            inputs[0]->tensor.shape, attr.weights.shape);
         return absl::OkStatus();
       } else {
         // CONVOLUTION_TRANSPOSED with runtime weights
@@ -405,6 +416,8 @@ absl::Status GPUOperationFromNodePart0(
             attr, gpu_info, op_def, &weights_desc);
         conv_op.output_ids = {static_cast<int>(outputs[0]->id)};
         conv_op.name = ""conv_transposed_dynamic"";
+        conv_op.operation->flops_ = GetConvolutionTransposedFlops(
+            inputs[0]->tensor.shape, weights_shape);
 
         const int dst_depth = AlignByN(DivideRoundUp(weights_shape.o, 4),
                                        weights_desc.GetOutputGroupSize());
@@ -456,6 +469,8 @@ absl::Status GPUOperationFromNodePart0(
           node.operation.attributes);
       if (inputs.size() == 1) {
         *gpu_op = SelectDWConvolution(attr, gpu_info, op_def);
+        (*gpu_op)->flops_ = GetDepthwiseConvolutionFlops(
+            outputs[0]->tensor.shape, attr.weights.shape);
       } else {
         if (inputs[1]->tensor.shape.b != 1) {
           return absl::UnimplementedError(
@@ -463,6 +478,10 @@ absl::Status GPUOperationFromNodePart0(
               ""!= 1"");
         }
         *gpu_op = SelectDWConvolutionDynamicWeights(attr, gpu_info, op_def);
+        (*gpu_op)->flops_ = GetDepthwiseConvolutionFlops(
+            outputs[0]->tensor.shape,
+            OHWI(inputs[1]->tensor.shape.b, inputs[1]->tensor.shape.h,
+                 inputs[1]->tensor.shape.w, inputs[1]->tensor.shape.c));
       }
       return absl::OkStatus();
     }
@@ -477,6 +496,8 @@ absl::Status GPUOperationFromNodePart0(
           absl::any_cast<FullyConnectedAttributes>(node.operation.attributes);
       *gpu_op = SelectFullyConnected(attr, gpu_info, op_def,
                                      inputs[0]->tensor.shape.b);
+      (*gpu_op)->flops_ =
+          GetFullyConnectedFlops(outputs[0]->tensor.shape, attr.weights.shape);
       return absl::OkStatus();
     }
     case OperationType::FULLY_CONNECTED_INT8: {
",0,train
db0ec387400032d14e16932dea77a59cbdd5755d,tensorflow/tensorflow,"Added flops calculation for most computational intensive kernels.

PiperOrigin-RevId: 414808724
Change-Id: I95185d1f5c92368c91bac641dd6c4e89d9d16528",special_selector.cc,"@@ -15,8 +15,12 @@ limitations under the License.
 
 #include ""tensorflow/lite/delegates/gpu/common/selectors/special_selector.h""
 
+#include <string>
+#include <utility>
+
 #include ""absl/types/any.h""
 #include ""tensorflow/lite/delegates/gpu/common/data_type.h""
+#include ""tensorflow/lite/delegates/gpu/common/flops_util.h""
 #include ""tensorflow/lite/delegates/gpu/common/operations.h""
 #include ""tensorflow/lite/delegates/gpu/common/shape.h""
 #include ""tensorflow/lite/delegates/gpu/common/status.h""
@@ -108,6 +112,10 @@ absl::Status TryDepthwiseConvPlus1x1Conv(
   auto operation = CreateDepthwiseConvPlus1x1Conv(op_def, gpu_info, dw_attr,
                                                   conv_attr, relu_attr_ptr);
   *gpu_op = absl::make_unique<GPUOperation>(std::move(operation));
+  (*gpu_op)->flops_ = GetDepthwiseConvolutionFlops(dw_outputs[0]->tensor.shape,
+                                                   dw_attr.weights.shape) +
+                      GetConvolutionFlops(conv_outputs[0]->tensor.shape,
+                                          conv_attr.weights.shape);
   std::string fused_nodes = std::to_string(dw_node->id);
   if (relu_node) {
     fused_nodes += "" "" + std::to_string(relu_node->id);
",0,train
5fa4e1ac928b0512b28e955c588c5a7eab2ea046,tensorflow/tensorflow,"Parallel_for: fix converters for some ops that don't support broadcasting.

PiperOrigin-RevId: 215133508",pfor.py,"@@ -1987,14 +1987,12 @@ def _convert_cast(pfor_input):
 @RegisterPForWithArgs(""Pow"", math_ops.pow)
 @RegisterPForWithArgs(""RealDiv"", math_ops.divide)
 @RegisterPForWithArgs(""Real"", math_ops.real)
-@RegisterPForWithArgs(""ReciprocalGrad"", math_ops.reciprocal_grad)
 @RegisterPForWithArgs(""Reciprocal"", math_ops.reciprocal)
 @RegisterPForWithArgs(""Relu6"", nn_ops.relu6)
 @RegisterPForWithArgs(""Relu"", nn_ops.relu)
 @RegisterPForWithArgs(""RightShift"", bitwise_ops.right_shift)
 @RegisterPForWithArgs(""Rint"", math_ops.rint)
 @RegisterPForWithArgs(""Round"", math_ops.round)
-@RegisterPForWithArgs(""RsqrtGrad"", math_ops.rsqrt_grad)
 @RegisterPForWithArgs(""Rsqrt"", math_ops.rsqrt)
 @RegisterPForWithArgs(""Selu"", nn_ops.selu)
 @RegisterPForWithArgs(""Sigmoid"", math_ops.sigmoid)
@@ -2003,7 +2001,6 @@ def _convert_cast(pfor_input):
 @RegisterPForWithArgs(""Sin"", math_ops.sin)
 @RegisterPForWithArgs(""Softplus"", nn_ops.softplus)
 @RegisterPForWithArgs(""Softsign"", nn_ops.softsign)
-@RegisterPForWithArgs(""SqrtGrad"", math_ops.sqrt_grad)
 @RegisterPForWithArgs(""Sqrt"", math_ops.sqrt)
 @RegisterPForWithArgs(""SquaredDifference"", math_ops.squared_difference)
 @RegisterPForWithArgs(""Square"", math_ops.square)
@@ -2095,6 +2092,9 @@ def _convert_biasaddgrad(pfor_input):
 @RegisterPForWithArgs(""SoftplusGrad"")
 @RegisterPForWithArgs(""SoftsignGrad"")
 @RegisterPForWithArgs(""TanhGrad"")
+@RegisterPForWithArgs(""SqrtGrad"")
+@RegisterPForWithArgs(""RsqrtGrad"")
+@RegisterPForWithArgs(""ReciprocalGrad"")
 def _convert_grads(pfor_input, op_type, *args, **kw_args):
   del args
   del kw_args
",0,train
9eb453a230590d49478c716b6bb5ace09d33087c,tensorflow/tensorflow,"Additional test for function conversions.

PiperOrigin-RevId: 239717899",call_trees_test.py,"@@ -36,7 +36,7 @@ class CallTreesTest(converter_testing.TestCase):
           converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 3)
       self.assertListEqual(self.dynamic_calls, [((), {})])
 
-  def test_function_with_call_in_argument(self):
+  def test_function_with_expression_in_argument(self):
 
     def test_fn(f, g):
       return f(g() + 7) + 3
@@ -50,6 +50,20 @@ class CallTreesTest(converter_testing.TestCase):
           ((converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 7,), {}),
       ])
 
+  def test_function_with_call_in_argument(self):
+
+    def test_fn(f, g):
+      return f(g()) + 3
+
+    with self.converted(test_fn, call_trees, {}) as result:
+      self.assertEqual(
+          result.test_fn(None, None),
+          converter_testing.RESULT_OF_MOCK_CONVERTED_CALL + 3)
+      self.assertListEqual(self.dynamic_calls, [
+          ((), {}),
+          ((converter_testing.RESULT_OF_MOCK_CONVERTED_CALL,), {}),
+      ])
+
   def test_function_with_kwarg(self):
 
     def test_fn(f, a, b):
",0,train
da4f92ed3635ace5c1e9275b2f899755dbfdb64f,tensorflow/tensorflow,"[XLA:CPU] Remove injected functions from llvm.compiler.used

LLVM's InjectTLIMappings adds these so they don't get discarded early, but we
actually manually inline them so there is no reason to keep them around.

This is a bit of a hack, but avoids dangling references to functions that are
otherwise unreferenced.

PiperOrigin-RevId: 291900762
Change-Id: I9da177b5b5912dcb856c1700125332f368fab28f",llvm_ir_runtime.cc,"@@ -40,6 +40,38 @@ const char* const kLogV16F32SymbolName = ""__xla_cpu_runtime_LogV16F32AVX"";
 
 namespace {
 
+// Removes 'fn' from the list of symbols to keep in 'module'.
+void RemoveFunctionFromUsedList(llvm::Module* module, llvm::Function* fn) {
+  llvm::GlobalVariable* used = module->getGlobalVariable(""llvm.compiler.used"");
+  if (!used) {
+    return;
+  }
+
+  llvm::Type* int8_ptr_type = llvm::Type::getInt8PtrTy(module->getContext());
+  llvm::Constant* casted_fn = llvm::ConstantExpr::getBitCast(fn, int8_ptr_type);
+  auto* initializer = llvm::cast<llvm::ConstantArray>(used->getInitializer());
+  llvm::SmallVector<llvm::Constant*, 4> new_initializer;
+  for (auto& op : initializer->operands()) {
+    if (op != casted_fn) {
+      new_initializer.push_back(llvm::cast<llvm::Constant>(op));
+    }
+  }
+
+  if (new_initializer.size() == initializer->getNumOperands()) {
+    return;
+  }
+
+  used->eraseFromParent();
+  llvm::ArrayType* array_type =
+      llvm::ArrayType::get(int8_ptr_type, new_initializer.size());
+  used = new llvm::GlobalVariable(
+      *module, array_type, /*isConstant=*/false,
+      llvm::GlobalValue::AppendingLinkage,
+      llvm::ConstantArray::get(array_type, new_initializer),
+      ""llvm.compiler.used"");
+  used->setSection(""llvm.metadata"");
+}
+
 // Replaces calls to the function `fn_name` with the code generated by
 // fn_body_generator.
 //
@@ -71,10 +103,6 @@ void RewriteCalls(
     fn = new_fn;
   }
 
-  // Other libraries using tfcompile could also have generated a function with
-  // the same name and body.  Tell the linker to discard all but one instance.
-  fn->setLinkage(llvm::GlobalVariable::LinkOnceODRLinkage);
-
   llvm::LLVMContext* context = &module->getContext();
 
   llvm::BasicBlock* fn_body = llvm::BasicBlock::Create(*context, ""body"", fn);
@@ -115,10 +143,11 @@ void RewriteCalls(
     CHECK(
         llvm::InlineFunction(call_to_inline, inline_function_info).isSuccess());
   }
-  // Delete the function if all uses have been inlined.
-  if (fn->use_empty()) {
-    fn->eraseFromParent();
-  }
+  // LLVM's InjectTLIMappings adds functions that might be used for
+  // vectorization to 'llvm.compiler.used'. Remove it before deleting the
+  // function.
+  RemoveFunctionFromUsedList(module, fn);
+  fn->eraseFromParent();
 }
 
 llvm::Value* GenerateVF32Tanh(llvm::IRBuilder<>* b, llvm::Value* input,
",0,train
2b95bfb6d812d40c3ef9001c61068571b7c059c2,tensorflow/tensorflow,"Add MakeUnaryHlo() and MakeReverseHlo() to hlo_creation_utils.h/.cc

PiperOrigin-RevId: 296080049
Change-Id: I81d020a76da6820086a1a50379c77efc6c43918c",hlo_creation_utils.cc,"@@ -33,6 +33,15 @@ limitations under the License.
 namespace xla {
 using absl::StrCat;
 
+StatusOr<HloInstruction*> MakeUnaryHlo(HloOpcode opcode,
+                                       HloInstruction* operand) {
+  HloComputation* computation = operand->parent();
+  TF_ASSIGN_OR_RETURN(Shape unary_op_shape,
+                      ShapeInference::InferUnaryOpShape(opcode, operand));
+  return computation->AddInstruction(
+      HloInstruction::CreateUnary(unary_op_shape, opcode, operand));
+}
+
 StatusOr<HloInstruction*> MakeBinaryHlo(HloOpcode opcode, HloInstruction* lhs,
                                         HloInstruction* rhs) {
   HloComputation* computation = lhs->parent();
@@ -344,6 +353,15 @@ StatusOr<HloInstruction*> MakeReduceHlo(HloInstruction* operand,
       scalar_shape, operand, init_value, all_dims, reduce_computation));
 }
 
+StatusOr<HloInstruction*> MakeReverseHlo(HloInstruction* operand,
+                                         absl::Span<const int64> dimensions) {
+  HloComputation* computation = operand->parent();
+  TF_ASSIGN_OR_RETURN(Shape reverse_shape, ShapeInference::InferReverseShape(
+                                               operand->shape(), dimensions));
+  return computation->AddInstruction(
+      HloInstruction::CreateReverse(reverse_shape, operand, dimensions));
+}
+
 StatusOr<HloInstruction*> MakeSelectHlo(HloInstruction* pred,
                                         HloInstruction* on_true,
                                         HloInstruction* on_false,
",0,train
2b95bfb6d812d40c3ef9001c61068571b7c059c2,tensorflow/tensorflow,"Add MakeUnaryHlo() and MakeReverseHlo() to hlo_creation_utils.h/.cc

PiperOrigin-RevId: 296080049
Change-Id: I81d020a76da6820086a1a50379c77efc6c43918c",hlo_creation_utils.h,"@@ -27,6 +27,11 @@ namespace xla {
 // ergonomic.  We don't have a complete set of helpers yet -- I expect we'll
 // expand this interface as needed on an ad-hoc basis.
 
+// Creates a unary HLO instruction and adds it to the computation containing
+// `operand`.
+StatusOr<HloInstruction*> MakeUnaryHlo(HloOpcode opcode,
+                                       HloInstruction* operand);
+
 // Creates a binary HLO instruction and adds it to the computation containing
 // `lhs` and `rhs` (`lhs` and `rhs` must be in the same computation).
 StatusOr<HloInstruction*> MakeBinaryHlo(HloOpcode opcode, HloInstruction* lhs,
@@ -145,6 +150,11 @@ StatusOr<HloInstruction*> MakeReduceHlo(HloInstruction* operand,
                                         HloOpcode binary_opcode,
                                         HloModule* module);
 
+// Creates a Reverse HLO instruction and adds it to the computation containing
+// `operand`.
+StatusOr<HloInstruction*> MakeReverseHlo(HloInstruction* operand,
+                                         absl::Span<const int64> dimensions);
+
 // Creates a Select HLO instruction and adds it to the computation containing
 // the predicate. The on_true and on_false instructions must also be contained
 // in the same computation. If on_true and on_false are tuples, create a tuple
",0,train
2f6edb4f0cf63f9f7dfaf8e50e593a96d9bf2779,tensorflow/tensorflow,"Fix custom initializer and regularizer loading.

PiperOrigin-RevId: 242022733",initializers.py,"@@ -193,8 +193,14 @@ def get(identifier):
   if isinstance(identifier, dict):
     return deserialize(identifier)
   elif isinstance(identifier, six.string_types):
-    config = {'class_name': str(identifier), 'config': {}}
-    return deserialize(config)
+    identifier = str(identifier)
+    # We have to special-case functions that return classes.
+    # TODO(omalleyt): Turn these into classes or class aliases.
+    special_cases = ['he_normal', 'he_uniform', 'lecun_normal', 'lecun_uniform']
+    if identifier in special_cases:
+      # Treat like a class.
+      return deserialize({'class_name': identifier, 'config': {}})
+    return deserialize(identifier)
   elif callable(identifier):
     return identifier
   else:
",0,train
2f6edb4f0cf63f9f7dfaf8e50e593a96d9bf2779,tensorflow/tensorflow,"Fix custom initializer and regularizer loading.

PiperOrigin-RevId: 242022733",initializers_test.py,"@@ -23,6 +23,7 @@ import numpy as np
 from tensorflow.python import keras
 from tensorflow.python import tf2
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.platform import test
 
@@ -213,6 +214,18 @@ class KerasInitializersTest(test.TestCase):
     finally:
       tf2._force_enable = tf2_force_enabled  # pylint: disable=protected-access
 
+  def test_custom_initializer_saving(self):
+
+    def my_initializer(shape, dtype=None):
+      return array_ops.ones(shape, dtype=dtype)
+
+    inputs = keras.Input((10,))
+    outputs = keras.layers.Dense(1, kernel_initializer=my_initializer)(inputs)
+    model = keras.Model(inputs, outputs)
+    model2 = model.from_config(
+        model.get_config(), custom_objects={'my_initializer': my_initializer})
+    self.assertEqual(model2.layers[1].kernel_initializer, my_initializer)
+
 
 if __name__ == '__main__':
   test.main()
",0,train
2f6edb4f0cf63f9f7dfaf8e50e593a96d9bf2779,tensorflow/tensorflow,"Fix custom initializer and regularizer loading.

PiperOrigin-RevId: 242022733",regularizers.py,"@@ -106,8 +106,14 @@ def get(identifier):
   if isinstance(identifier, dict):
     return deserialize(identifier)
   elif isinstance(identifier, six.string_types):
-    config = {'class_name': str(identifier), 'config': {}}
-    return deserialize(config)
+    identifier = str(identifier)
+    # We have to special-case functions that return classes.
+    # TODO(omalleyt): Turn these into classes or class aliases.
+    special_cases = ['l1', 'l2', 'l1_l2']
+    if identifier in special_cases:
+      # Treat like a class.
+      return deserialize({'class_name': identifier, 'config': {}})
+    return deserialize(str(identifier))
   elif callable(identifier):
     return identifier
   else:
",0,train
2f6edb4f0cf63f9f7dfaf8e50e593a96d9bf2779,tensorflow/tensorflow,"Fix custom initializer and regularizer loading.

PiperOrigin-RevId: 242022733",regularizers_test.py,"@@ -25,6 +25,7 @@ from tensorflow.python import keras
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
+from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
@@ -92,6 +93,18 @@ class KerasRegularizersTest(keras_parameterized.TestCase):
     model.compile('sgd', 'mse', run_eagerly=testing_utils.should_run_eagerly())
     model.fit(x, y, batch_size=5, epochs=1)
 
+  def test_custom_regularizer_saving(self):
+
+    def my_regularizer(weights):
+      return math_ops.reduce_sum(math_ops.abs(weights))
+
+    inputs = keras.Input((10,))
+    outputs = keras.layers.Dense(1, kernel_regularizer=my_regularizer)(inputs)
+    model = keras.Model(inputs, outputs)
+    model2 = model.from_config(
+        model.get_config(), custom_objects={'my_regularizer': my_regularizer})
+    self.assertEqual(model2.layers[1].kernel_regularizer, my_regularizer)
+
 
 if __name__ == '__main__':
   test.main()
",0,train
2f6edb4f0cf63f9f7dfaf8e50e593a96d9bf2779,tensorflow/tensorflow,"Fix custom initializer and regularizer loading.

PiperOrigin-RevId: 242022733",generic_utils.py,"@@ -200,17 +200,20 @@ def deserialize_keras_object(identifier,
       with CustomObjectScope(custom_objects):
         return cls(**cls_config)
   elif isinstance(identifier, six.string_types):
-    function_name = identifier
-    if custom_objects and function_name in custom_objects:
-      fn = custom_objects.get(function_name)
-    elif function_name in _GLOBAL_CUSTOM_OBJECTS:
-      fn = _GLOBAL_CUSTOM_OBJECTS[function_name]
+    object_name = identifier
+    if custom_objects and object_name in custom_objects:
+      obj = custom_objects.get(object_name)
+    elif object_name in _GLOBAL_CUSTOM_OBJECTS:
+      obj = _GLOBAL_CUSTOM_OBJECTS[object_name]
     else:
-      fn = module_objects.get(function_name)
-      if fn is None:
-        raise ValueError('Unknown ' + printable_module_name + ':' +
-                         function_name)
-    return fn
+      obj = module_objects.get(object_name)
+      if obj is None:
+        raise ValueError('Unknown ' + printable_module_name + ':' + object_name)
+    # Classes passed by name are instantiated with no args, functions are
+    # returned as-is.
+    if tf_inspect.isclass(obj):
+      return obj()
+    return obj
   else:
     raise ValueError('Could not interpret serialized ' + printable_module_name +
                      ': ' + identifier)
",0,train
835c5d2948527ffb4af2b5ab8d453ba834cc364a,tensorflow/tensorflow,"Replace calls to tf_inspect.getargspec with tf_inspect.getfullargspec.

PiperOrigin-RevId: 209511641",sequential.py,"@@ -239,9 +239,9 @@ class Sequential(Model):
     x = inputs
     for layer in self.layers:
       kwargs = {}
-      if 'mask' in tf_inspect.getargspec(layer.call).args:
+      if 'mask' in tf_inspect.getfullargspec(layer.call).args:
         kwargs['mask'] = mask
-      if 'training' in tf_inspect.getargspec(layer.call).args:
+      if 'training' in tf_inspect.getfullargspec(layer.call).args:
         kwargs['training'] = training
 
       if isinstance(layer, Network) and layer._compute_output_and_mask_jointly:
",0,test
2171a012c117664d4f9928b7c1f75cbcc15be416,tensorflow/tensorflow,"Enable JAX->MLIR lowering by default.

Before this change, JAX produces HLO using the XLA:Python builder APIs. After this change JAX produces MHLO using MLIR:Python APIs, and converts the MHLO to HLO for compilation with XLA. This is a lateral shift that should have little immediate impact, but unlocks a number of interesting opportunities in the future (e.g., mixing MLIR dialects within a JAX program).

[XLA:Python] Pass MLIR input as a std::string to work around https://github.com/pybind/pybind11/issues/2765. A better fix would be to update pybind11 but that is hitting Windows-related hurdles; for now, just avoid relying on reference lifetime extension.

Brax: update test seeds to avoid test failures. Additional constant folding (canonicalization) in the MHLO lowering path seems to cause small numerical differences.
PiperOrigin-RevId: 420755696
Change-Id: I5e2626ea1e82c046a847300bf6bbe94208007802",py_client.cc,"@@ -258,7 +258,7 @@ StatusOr<std::shared_ptr<PyExecutable>> PyClient::Compile(
 }
 
 StatusOr<std::shared_ptr<PyExecutable>> PyClient::CompileMlir(
-    absl::string_view mlir_module, CompileOptions options) {
+    std::string mlir_module, CompileOptions options) {
   std::unique_ptr<PjRtExecutable> executable;
   absl::optional<std::string> fingerprint;
   {
",0,train
2171a012c117664d4f9928b7c1f75cbcc15be416,tensorflow/tensorflow,"Enable JAX->MLIR lowering by default.

Before this change, JAX produces HLO using the XLA:Python builder APIs. After this change JAX produces MHLO using MLIR:Python APIs, and converts the MHLO to HLO for compilation with XLA. This is a lateral shift that should have little immediate impact, but unlocks a number of interesting opportunities in the future (e.g., mixing MLIR dialects within a JAX program).

[XLA:Python] Pass MLIR input as a std::string to work around https://github.com/pybind/pybind11/issues/2765. A better fix would be to update pybind11 but that is hitting Windows-related hurdles; for now, just avoid relying on reference lifetime extension.

Brax: update test seeds to avoid test failures. Additional constant folding (canonicalization) in the MHLO lowering path seems to cause small numerical differences.
PiperOrigin-RevId: 420755696
Change-Id: I5e2626ea1e82c046a847300bf6bbe94208007802",py_client.h,"@@ -151,8 +151,8 @@ class PyClient : public std::enable_shared_from_this<PyClient> {
 
   StatusOr<std::shared_ptr<PyExecutable>> Compile(
       const XlaComputation& computation, CompileOptions options);
-  StatusOr<std::shared_ptr<PyExecutable>> CompileMlir(
-      absl::string_view mlir_module, CompileOptions options);
+  StatusOr<std::shared_ptr<PyExecutable>> CompileMlir(std::string mlir_module,
+                                                      CompileOptions options);
 
   StatusOr<pybind11::bytes> SerializeExecutable(
       const PyExecutable& executable) const;
",0,train
2171a012c117664d4f9928b7c1f75cbcc15be416,tensorflow/tensorflow,"Enable JAX->MLIR lowering by default.

Before this change, JAX produces HLO using the XLA:Python builder APIs. After this change JAX produces MHLO using MLIR:Python APIs, and converts the MHLO to HLO for compilation with XLA. This is a lateral shift that should have little immediate impact, but unlocks a number of interesting opportunities in the future (e.g., mixing MLIR dialects within a JAX program).

[XLA:Python] Pass MLIR input as a std::string to work around https://github.com/pybind/pybind11/issues/2765. A better fix would be to update pybind11 but that is hitting Windows-related hurdles; for now, just avoid relying on reference lifetime extension.

Brax: update test seeds to avoid test failures. Additional constant folding (canonicalization) in the MHLO lowering path seems to cause small numerical differences.
PiperOrigin-RevId: 420755696
Change-Id: I5e2626ea1e82c046a847300bf6bbe94208007802",xla_client.py,"@@ -46,6 +46,9 @@ profiler = _xla.profiler
 # changes.
 _version = 51
 
+# Version number for MLIR:Python components.
+mlir_api_version = 1
+
 xla_platform_names = {
     'cpu': 'Host',
     'gpu': 'CUDA',
",0,train
2fd76cbef3b30bd10807d6d660fedfa7f5451bf5,tensorflow/tensorflow,"batch_matmul_op_test.py: Updated to pass in TF2, by using gradient_checker_v2 (second try).

PiperOrigin-RevId: 224190234",batch_matmul_op_test.py,"@@ -20,9 +20,9 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python.framework import constant_op
+from tensorflow.python import tf2
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gradient_checker
+from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
@@ -105,32 +105,32 @@ class BatchMatmulOpTest(test.TestCase):
 
   def _testNonEmpty(self, dtype, adjoint_a, adjoint_b, use_static_shape):
 
-    def compareNonEmpty(self, a_shape, b_shape):
+    def CompareNonEmpty(self, a_shape, b_shape):
       self._compare(
           self._rand(a_shape, dtype),
           self._rand(b_shape, dtype), adjoint_a, adjoint_b, use_static_shape)
 
-    compareNonEmpty(self, [1, 2, 3], [1, 3, 5])
-    compareNonEmpty(self, [1, 2, 3], [1, 3, 1])
-    compareNonEmpty(self, [1, 1, 3], [1, 3, 5])
-    compareNonEmpty(self, [1, 2, 3], [1, 3, 5])
-    compareNonEmpty(self, [7, 1, 3], [7, 3, 5])
-    compareNonEmpty(self, [7, 2, 3], [7, 3, 1])
-    compareNonEmpty(self, [7, 2, 3], [7, 3, 5])
-    compareNonEmpty(self, [10, 64, 75], [10, 75, 30])
-    compareNonEmpty(self, [5, 7, 2, 3], [5, 7, 3, 5])
+    CompareNonEmpty(self, [1, 2, 3], [1, 3, 5])
+    CompareNonEmpty(self, [1, 2, 3], [1, 3, 1])
+    CompareNonEmpty(self, [1, 1, 3], [1, 3, 5])
+    CompareNonEmpty(self, [1, 2, 3], [1, 3, 5])
+    CompareNonEmpty(self, [7, 1, 3], [7, 3, 5])
+    CompareNonEmpty(self, [7, 2, 3], [7, 3, 1])
+    CompareNonEmpty(self, [7, 2, 3], [7, 3, 5])
+    CompareNonEmpty(self, [10, 64, 75], [10, 75, 30])
+    CompareNonEmpty(self, [5, 7, 2, 3], [5, 7, 3, 5])
 
   def _testEmpty(self, dtype, adjoint_a, adjoint_b, use_static_shape):
 
-    def compareEmpty(self, a_shape, b_shape):
+    def CompareEmpty(self, a_shape, b_shape):
       self._compare(
           np.zeros(a_shape).astype(dtype),
           np.zeros(b_shape).astype(dtype), adjoint_a, adjoint_b,
           use_static_shape)
 
-    compareEmpty(self, [0, 3, 2], [0, 2, 4])
-    compareEmpty(self, [3, 0, 2], [3, 2, 5])
-    compareEmpty(self, [3, 3, 2], [3, 2, 0])
+    CompareEmpty(self, [0, 3, 2], [0, 2, 4])
+    CompareEmpty(self, [3, 0, 2], [3, 2, 5])
+    CompareEmpty(self, [3, 3, 2], [3, 2, 0])
 
 
 def _GetBatchMatmulOpTest(dtype, adjoint_a, adjoint_b, use_static_shape):
@@ -154,17 +154,13 @@ class BatchMatmulGradientTest(test.TestCase):
     y = y_in if not adjoint_b else y_in.reshape(y_t_shape)
     epsilon = np.finfo(x.dtype).eps
     delta = epsilon**(1.0 / 3.0)
+    def Loss(x, y):
+      z = math_ops.matmul(x, y, adjoint_a, adjoint_b)
+      return math_ops.reduce_sum(z)
     with self.cached_session(use_gpu=True):
-      inx = constant_op.constant(x)
-      iny = constant_op.constant(y)
-      z = math_ops.matmul(inx, iny, adjoint_a, adjoint_b)
-      loss = math_ops.reduce_sum(z)
-      ((x_jacob_t, x_jacob_n),
-       (y_jacob_t, y_jacob_n)) = gradient_checker.compute_gradient(
-           [inx, iny], [x.shape, y.shape],
-           loss, [1],
-           x_init_value=[x, y],
-           delta=delta)
+      ((x_jacob_t, y_jacob_t),
+       (x_jacob_n, y_jacob_n)) = gradient_checker_v2.compute_gradient(
+           Loss, [x, y], delta=delta)
       tol = 20 * delta
       self.assertAllClose(x_jacob_t, x_jacob_n, rtol=tol, atol=tol)
       self.assertAllClose(y_jacob_t, y_jacob_n, rtol=tol, atol=tol)
@@ -202,11 +198,11 @@ if __name__ == ""__main__"":
     for adjoint_a_ in False, True:
       for adjoint_b_ in False, True:
         name = ""%s_%s_%s"" % (dtype_.__name__, adjoint_a_, adjoint_b_)
-        for use_static_shape in True, False:
+        for use_static_shape_ in set([True, tf2.enabled()]):
           setattr(BatchMatmulOpTest,
-                  ""testBatchMatmulOp_"" + name + (""_%s"" % use_static_shape),
+                  ""testBatchMatmulOp_"" + name + (""_%s"" % use_static_shape_),
                   _GetBatchMatmulOpTest(dtype_, adjoint_a_, adjoint_b_,
-                                        use_static_shape))
+                                        use_static_shape_))
         if dtype_ is not np.int32:
           setattr(BatchMatmulGradientTest, ""testBatchMatmulGradient_"" + name,
                   _GetBatchMatmulGradientTest(dtype_, adjoint_a_, adjoint_b_))
",0,train
fd3adc51e4112572c07db15c9548cf7e70586b50,tensorflow/tensorflow,"Make lite_test.py pass with MLIR converter.

PiperOrigin-RevId: 290998140
Change-Id: I3c6164c96610e10ec170829cacfab8b976ccf297",lite_test.py,"@@ -1491,33 +1491,6 @@ class FromFrozenGraphObjectDetection(LiteTest):
                      output_details[3]['name'])
     self.assertTrue(([1] == output_details[3]['shape']).all())
 
-  def testTFLiteGraphDefMissingShape(self):
-    # Tests invalid cases for the model that cannot be loaded in TensorFlow.
-    self._initObjectDetectionArgs()
-
-    # Missing `input_shapes`.
-    with self.assertRaises(ValueError) as error:
-      lite.TFLiteConverter.from_frozen_graph(self._graph_def_file,
-                                             self._input_arrays,
-                                             self._output_arrays)
-    self.assertEqual('input_shapes must be defined for this model.',
-                     str(error.exception))
-
-  def testTFLiteGraphDefInvalidShape(self):
-    # Tests invalid cases for the model that cannot be loaded in TensorFlow.
-    self._initObjectDetectionArgs()
-
-    # `input_shapes` does not contain the names in `input_arrays`.
-    with self.assertRaises(ValueError) as error:
-      lite.TFLiteConverter.from_frozen_graph(
-          self._graph_def_file,
-          self._input_arrays,
-          self._output_arrays,
-          input_shapes={'invalid-value': [1, 19]})
-    self.assertEqual(
-        'input_shapes must contain a value for each item in input_array.',
-        str(error.exception))
-
 
 class FromSavedModelTest(TestModels):
 
",0,test
5707e2e2a2ddabf218a56a950a27358a7222bc97,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-08-03

PiperOrigin-RevId: 388406270
Change-Id: Ifc2c2ff6a7156c25fba8b3ba85b5cf1819123c51",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 8, 2)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 8, 3)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
9cfcec41235ef6426d1fae9a7b44cb02c1e19274,tensorflow/tensorflow,"Automated rollback of commit a7e7582e9a2b698054bf93aa27e53ebbc081d1a6. Revert #31106.

PiperOrigin-RevId: 263621326",math_grad.py,"@@ -192,26 +192,22 @@ def _SumGrad(op, grad):
         return [array_ops.tile(grad, tile_scaling), None]
 
   input_shape = array_ops.shape(op.inputs[0])
-
-  if not op.get_attr(""keep_dims""):
-    # TODO(apassos) remove this once device placement for eager ops makes more
-    # sense.
-    with ops.colocate_with(input_shape):
-      output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
-    grad = array_ops.reshape(grad, output_shape_kept_dims)
-  return [array_ops.broadcast_to(grad, input_shape), None]
+  # TODO(apassos) remove this once device placement for eager ops makes more
+  # sense.
+  with ops.colocate_with(input_shape):
+    output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
+    tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims)
+  grad = array_ops.reshape(grad, output_shape_kept_dims)
+  return [array_ops.tile(grad, tile_scaling), None]
 
 
 def _MinOrMaxGrad(op, grad):
   """"""Gradient for Min or Max. Amazingly it's precisely the same code.""""""
   input_shape = array_ops.shape(op.inputs[0])
+  output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
   y = op.outputs[0]
-  if not op.get_attr(""keep_dims""):
-    output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
-    y = array_ops.reshape(y, output_shape_kept_dims)
-    grad = array_ops.reshape(grad, output_shape_kept_dims)
-  else:
-    output_shape_kept_dims = array_ops.shape(y)
+  y = array_ops.reshape(y, output_shape_kept_dims)
+  grad = array_ops.reshape(grad, output_shape_kept_dims)
 
   # Compute the number of selected (maximum or minimum) elements in each
   # reduction dimension. If there are multiple minimum or maximum elements
@@ -267,11 +263,10 @@ def _ProdGrad(op, grad):
   reduction_indices = array_ops.reshape(op.inputs[1], [-1])
 
   # Expand grad to full input shape
-  if not op.get_attr(""keep_dims""):
-    output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
-    grad = array_ops.reshape(grad, output_shape_kept_dims)
-
-  grad = array_ops.broadcast_to(grad, input_shape)
+  output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
+  tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims)
+  grad = array_ops.reshape(grad, output_shape_kept_dims)
+  grad = array_ops.tile(grad, tile_scaling)
 
   # Pack all reduced dimensions into a single one, so we can perform the
   # cumprod ops. If the reduction dims list is empty, it defaults to float32,
",0,train
90842ada9e7ef534eb77ff2890047bf75070fb56,tensorflow/tensorflow,Cosmetic changes,mmap_allocation.cc,"@@ -60,11 +60,11 @@ MMAPAllocation::MMAPAllocation(const char* filename,
 
 MMAPAllocation::~MMAPAllocation() {
   if (valid()) {
-     UnmapViewOfFile( mmapped_buffer_ );
+     UnmapViewOfFile(mmapped_buffer_);
   }
 
   if (file_mapping_ != nullptr) {
-    CloseHandle( file_mapping_ );
+    CloseHandle(file_mapping_);
   }
 
   if (file_handle_ != nullptr){
",0,train
6062f26f626555ca980c716d52c6204e17745503,tensorflow/tensorflow,"Adapt the shape function for `tf.fill()` to handle partial shapes.

Fixes #3102.
Change: 126324523",constant_op_test.py,"@@ -516,6 +516,9 @@ class FillTest(tf.test.TestCase):
         tf.placeholder(tf.int32, shape=(4,)), 3.0)
     self.assertEqual([None, None, None, None], f.get_shape().as_list())
 
+    f = tf.fill([tf.placeholder(tf.int32, shape=()), 17], 1.0)
+    self.assertEqual([None, 17], f.get_shape().as_list())
+
   def testGradient(self):
     with self.test_session():
       in_v = tf.constant(5.0)
",0,train
6062f26f626555ca980c716d52c6204e17745503,tensorflow/tensorflow,"Adapt the shape function for `tf.fill()` to handle partial shapes.

Fixes #3102.
Change: 126324523",array_ops.py,"@@ -1835,16 +1835,16 @@ def _FillShape(op):
 
   Returns:
     A single-element list containing the shape of the output.
+
+  Raises:
+    ValueError: If the shapes or arguments are known to be invalid.
   """"""
-  dimensions_shape = op.inputs[0].get_shape().with_rank(1)
-  op.inputs[1].get_shape().assert_is_compatible_with(tensor_shape.scalar())
+  op.inputs[0].get_shape().assert_has_rank(1)
+  op.inputs[1].get_shape().assert_has_rank(0)
   fill_dims = tensor_util.constant_value(op.inputs[0])
-  if fill_dims is None:
-    # Attempt to infer the rank of the output from the length of
-    # dimensions.
-    return [tensor_shape.unknown_shape(ndims=dimensions_shape[0].value)]
-  else:
-    return [tensor_shape.TensorShape(fill_dims.tolist())]
+  if fill_dims is not None and any(d < 0 for d in fill_dims):
+    raise ValueError(""Fill dimensions must be >= 0"")
+  return [tensor_util.constant_value_as_shape(op.inputs[0])]
 
 
 @ops.RegisterShape(""InvertPermutation"")
",0,train
cc70f17486c0b5416bc2c5d5d6e9014d2f48004f,tensorflow/tensorflow,"Add reallocation capability to bfc_allocator.

This commit mitigates external fragmentation in bfc_allocator by reallocation.
That is, although the sum of regions and unallocated bytes is larger than the
requested bytes but the bfc_allocator still fails to allocate a large enough
contiguous region to fulfill the request due to fragmentation. To avoid this
case, a relocation feature is implemented to deallocate free regions so that
a larger region can be formed.",bfc_allocator.cc,"@@ -16,6 +16,7 @@ limitations under the License.
 #include ""tensorflow/core/common_runtime/bfc_allocator.h""
 
 #include <atomic>
+#include ""absl/container/flat_hash_set.h""
 
 #include ""tensorflow/core/common_runtime/allocator_retry.h""
 #include ""tensorflow/core/lib/core/bits.h""
@@ -260,6 +261,76 @@ size_t BFCAllocator::RoundedBytes(size_t bytes) {
   return rounded_bytes;
 }
 
+bool BFCAllocator::DeallocateFreeRegions(size_t rounded_bytes) {
+  // Searching for free regions.
+  absl::flat_hash_set<void*> free_region_ptrs;
+  size_t total_free_bytes = 0;
+  for (const auto& region : region_manager_.regions()) {
+    ChunkHandle h = region_manager_.get_handle(region.ptr());
+    bool any_use = false;
+    while (h != kInvalidChunkHandle) {
+      const Chunk* c = ChunkFromHandle(h);
+      if (c->in_use()) {
+        any_use = true;
+        break;
+      }
+      h = c->next;
+    }
+
+    if (!any_use) {
+      VLOG(2) << ""Found free region with ptr = "" << region.ptr();
+      free_region_ptrs.insert(region.ptr());
+      total_free_bytes += region.memory_size();
+    }
+  }
+
+  if (total_free_bytes == 0) {
+    return false;
+  }
+
+  // Rough estimation to check whether deallocation can help.
+  size_t available_bytes =
+      memory_limit_ - total_region_allocated_bytes_ + total_free_bytes;
+  if (rounded_bytes > available_bytes) {
+    return false;
+  }
+
+  VLOG(INFO) << ""Re-allocate memory regions to avoid OOM due to memory""
+             << "" fragmentation. If you see this message frequently, note""
+             << "" that the re-allocation may incur performance overhead despite""
+             << "" better memory utilization. You may try smaller batch sizes""
+             << "" to see if it can give you better performance."";
+
+  // Deallocate free regions.
+  auto it = region_manager_.regions().begin();
+  while (it != region_manager_.regions().end()) {
+    if (!free_region_ptrs.contains(it->ptr())) {
+      ++it;
+      continue;
+    }
+
+    VLOG(2) << ""Deallocate region with ptr = "" << it->ptr();
+    // Remove all chunk registrations from Bins.
+    ChunkHandle h = region_manager_.get_handle(it->ptr());
+    while (h != kInvalidChunkHandle) {
+      const Chunk* c = ChunkFromHandle(h);
+      if (c->bin_num != kInvalidBinNum) {
+        RemoveFreeChunkFromBin(h);
+      }
+      auto h_to_delete = h;
+      h = c->next;
+      DeleteChunk(h_to_delete);
+    }
+
+    // Deallocate the memory.
+    sub_allocator_->Free(it->ptr(), it->memory_size());
+    total_region_allocated_bytes_ -= it->memory_size();
+    it = region_manager_.RemoveAllocationRegion(it);
+  }
+
+  return true;
+}
+
 void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
                                         size_t num_bytes,
                                         bool dump_log_on_failure,
@@ -307,6 +378,18 @@ void* BFCAllocator::AllocateRawInternal(size_t unused_alignment,
     }
   }
 
+  // Reaching this point means that no chunks can satisfy the request. Also,
+  // the unallocated bytes cannot satisfy the request. Before giving up, let's
+  // try deallocating free regions so that suballocator can combine them with
+  // the unallocated bytes and form a larger region.
+  if (DeallocateFreeRegions(rounded_bytes) &&
+      Extend(unused_alignment, rounded_bytes)) {
+    ptr = FindChunkPtr(bin_num, rounded_bytes, num_bytes, freed_before);
+    if (ptr != nullptr) {
+      return ptr;
+    }
+  }
+
   // We searched all bins for an existing free chunk to use and
   // couldn't find one.  This means we must have run out of memory,
   // Dump the memory log for analysis.
",0,test
cc70f17486c0b5416bc2c5d5d6e9014d2f48004f,tensorflow/tensorflow,"Add reallocation capability to bfc_allocator.

This commit mitigates external fragmentation in bfc_allocator by reallocation.
That is, although the sum of regions and unallocated bytes is larger than the
requested bytes but the bfc_allocator still fails to allocate a large enough
contiguous region to fulfill the request due to fragmentation. To avoid this
case, a relocation feature is implemented to deallocate free regions so that
a larger region can be formed.",bfc_allocator.h,"@@ -309,6 +309,11 @@ class BFCAllocator : public Allocator {
       regions_.insert(entry, AllocationRegion(ptr, memory_size));
     }
 
+    std::vector<AllocationRegion>::const_iterator RemoveAllocationRegion(
+        std::vector<AllocationRegion>::const_iterator it) {
+      return regions_.erase(it);
+    }
+
     ChunkHandle get_handle(const void* p) const {
       return RegionFor(p)->get_handle(p);
     }
@@ -354,6 +359,14 @@ class BFCAllocator : public Allocator {
   bool Extend(size_t alignment, size_t rounded_bytes)
       EXCLUSIVE_LOCKS_REQUIRED(lock_);
 
+  // Deallocate free regions to give back the memory to suballocator, so that
+  // we can re-allocate a larger region.  The main use scenario of this function
+  // is when OOM happens but we have free regions and the sum of sizes of free
+  // regions and unallocated bytes is larger than the requested size, implying
+  // (external) memory fragmentation.  Returns true if deallocating any free
+  // regions; false otherwise.
+  bool DeallocateFreeRegions(size_t rounded_bytes);
+
   // Returns a pointer to an underlying allocated chunk of size
   // 'rounded_bytes'.
   void* FindChunkPtr(BinNum bin_num, size_t rounded_bytes, size_t num_bytes,
",0,test
cc70f17486c0b5416bc2c5d5d6e9014d2f48004f,tensorflow/tensorflow,"Add reallocation capability to bfc_allocator.

This commit mitigates external fragmentation in bfc_allocator by reallocation.
That is, although the sum of regions and unallocated bytes is larger than the
requested bytes but the bfc_allocator still fails to allocate a large enough
contiguous region to fulfill the request due to fragmentation. To avoid this
case, a relocation feature is implemented to deallocate free regions so that
a larger region can be formed.",gpu_bfc_allocator_test.cc,"@@ -568,6 +568,47 @@ class GPUBFCAllocatorPrivateMethodsTest : public ::testing::Test {
     EXPECT_EQ(GPUBFCAllocator::RoundedBytes(1LL << 31),
               force_no_allow_growth_allocator.curr_region_allocation_bytes_);
   }
+
+  void TestRegionDeallocation() {
+    setenv(""TF_FORCE_GPU_ALLOW_GROWTH"", ""unparseable"", 1);
+    GPUOptions options;
+    options.set_allow_growth(true);
+
+    // Max of 2GiB, but starts out small.
+    PlatformGpuId platform_gpu_id(0);
+    GPUMemAllocator* sub_allocator = new GPUMemAllocator(
+        GpuIdUtil::ExecutorForPlatformGpuId(platform_gpu_id).ValueOrDie(),
+        platform_gpu_id, /*use_unified_memory=*/false, {}, {});
+    GPUBFCAllocator a(sub_allocator, 1LL << 31, options, ""GPU_0_bfc"");
+
+    // Allocate 128 raw pointers of 4 megs.
+    const size_t size = 1LL << 22;
+    std::vector<void*> initial_ptrs;
+    for (size_t s = 0; s < 128; s++) {
+      void* raw = a.AllocateRaw(1, size);
+      initial_ptrs.push_back(raw);
+    }
+
+    // Make sure there are more than 1 regions in preparation for the test.
+    EXPECT_LT(1, a.region_manager_.regions().size());
+
+    // Deallocate all the memories except the last one.
+    for (size_t i = 0; i < initial_ptrs.size() - 1; i++) {
+      a.DeallocateRaw(initial_ptrs[i]);
+    }
+
+    // Deallocate free regions and there shall be only one region left.
+    EXPECT_EQ(true, a.DeallocateFreeRegions(/*rounded_bytes=*/0));
+    EXPECT_EQ(1, a.region_manager_.regions().size());
+
+    // There should be only one chunk left in bins.
+    size_t num_chunks_in_bins = 0;
+    for (int i = 0; i < BFCAllocator::kNumBins; i++) {
+      BFCAllocator::Bin* bin = a.BinFromIndex(i);
+      num_chunks_in_bins += bin->free_chunks.size();
+    }
+    EXPECT_EQ(1, num_chunks_in_bins);
+  }
 };
 
 TEST_F(GPUBFCAllocatorPrivateMethodsTest, BinDebugInfo) { TestBinDebugInfo(); }
@@ -580,6 +621,10 @@ TEST_F(GPUBFCAllocatorPrivateMethodsTest, ForceAllowGrowth) {
   TestForceAllowGrowth();
 }
 
+TEST_F(GPUBFCAllocatorPrivateMethodsTest, TestRegionDeallocation) {
+  TestRegionDeallocation();
+}
+
 }  // namespace tensorflow
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,test
95ea3404528afcb1a74dd5f0946ea8d17beda28b,tensorflow/tensorflow,"Handle next-with-default. Fixes #37983.

PiperOrigin-RevId: 306065738
Change-Id: I0964d7c8ceee1b859b8bb5033e1473654c0719bf",py_builtins.py,"@@ -21,25 +21,29 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
 import inspect
 
+import numpy as np
 import six
 
 from tensorflow.python.autograph.utils import py_func
 from tensorflow.python.autograph.utils import tensors
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.data.ops import iterator_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_parsing_ops
 from tensorflow.python.ops import gen_string_ops
 from tensorflow.python.ops import list_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sort_ops
-from tensorflow.python.ops import check_ops
 from tensorflow.python.util import lazy_loader
 from tensorflow.python.util import nest
 
@@ -397,6 +401,96 @@ def _py_map(fn, *iterables):
   return map(fn, *iterables)
 
 
+def next_(iterator, default=UNSPECIFIED):
+  if isinstance(iterator, iterator_ops.OwnedIterator):
+    return next_tf_iterator(iterator, default)
+  return next_py(iterator, default)
+
+
+# TODO(mdan): These checks should be easier. Fix the nest API.
+def _verify_spec_compatible(input_name, spec_name, input_, spec):
+  """"""Verifies that a symbol has a type compatible vith a given spec.
+
+  Here, compatibility is viewed in the general TensorFlow sense: that the dtypes
+  are the same after implicit conversion, if both are tensors.
+
+  This verifier ensures consistent treatment of types across AutoGraph.
+
+  Args:
+    input_name: A name to use for `input_` in error messages.
+    spec_name: A name to use for `spec` in error messages.
+    input_: Any, value to verify.
+    spec: TypeSpec that `input_` must be compatible with.
+
+  Raises:
+    ValueError if the two types have been determined not to be compatible.
+  """"""
+  assert isinstance(spec, tensor_spec.TensorSpec)
+  if input is None:
+    # TODO(mdan): raise from None when switching to Py3.
+    raise ValueError('{} cannot be None'.format(input_name))
+
+  # TODO(mdan): Use TensorCompatible when ready.
+  if isinstance(input_, (bool, int, float, str, np.ndarray)):
+    input_ = ops.convert_to_tensor_v2(input_)
+
+  input_dtype = getattr(input_, 'dtype', None)
+
+  if input_dtype != spec.dtype:
+    input_dtype_str = 'no dtype' if input_dtype is None else str(input_dtype)
+
+    raise TypeError(
+        '{} must have the same dtype as {}. Expected {}, got {}'.format(
+            input_name, spec_name, spec.dtype, input_dtype_str))
+
+
+def _verify_structure_compatible(input_name, spec_name, input_, spec):
+  """"""Verifies that possibly-structured symbol has types compatible vith another.
+
+  See _verify_spec_compatible for a more concrete meaning of ""compatible"".
+  Unspec _verify_spec_compatible, which handles singular Tensor-spec objects,
+  verify_structures_compatible can process structures recognized by tf.nest.
+
+  Args:
+    input_name: A name to use for `input_` in error messages.
+    spec_name: A name to use for `spec` in error messages.
+    input_: Any, value to verify. May, but doesn't need to, be a structure.
+    spec: Any, value that `input_` must be compatible with. May, but doesn't
+        need to, be a structure.
+
+  Raises:
+    ValueError if the two types have been determined not to be compatible.
+  """"""
+  try:
+    nest.assert_same_structure(input_, spec, expand_composites=True)
+  except (ValueError, TypeError) as e:
+    raise TypeError(
+        '{} must have the same element structure as {}.\n\n{}'.format(
+            input_name, spec_name, str(e)))
+
+  nest.map_structure(
+      functools.partial(_verify_spec_compatible, input_name, spec_name), input_,
+      spec)
+
+
+def next_tf_iterator(iterator, default=UNSPECIFIED):
+  if default is UNSPECIFIED:
+    # Without a default, fall back to the ""normal"" behavior which raises
+    # a runtime exception.
+    return next(iterator)
+  opt_iterate = iterator_ops.get_next_as_optional(iterator)
+  _verify_structure_compatible(
+      'the default argument', 'the iterate', default, iterator.element_spec)
+  return control_flow_ops.cond(
+      opt_iterate.has_value(), opt_iterate.get_value, lambda: default)
+
+
+def next_py(iterator, default=UNSPECIFIED):
+  if default is UNSPECIFIED:
+    return next(iterator)
+  return next(iterator, default)
+
+
 def filter_(function, iterable):
   if isinstance(iterable, dataset_ops.DatasetV2):
     return _tf_dataset_filter(function, iterable)
@@ -515,18 +609,18 @@ if six.PY2:
 
 BUILTIN_FUNCTIONS_MAP = {
     'abs': abs_,
+    'any': any_,
+    'all': all_,
+    'enumerate': enumerate_,
+    'filter': filter_,
     'float': float_,
     'int': int_,
     'len': len_,
+    'map': map_,
+    'next': next_,
     'print': print_,
     'range': range_,
-    # TODO(mdan): This might make more sense as tf.data.range.
+    'sorted': sorted_,
     'xrange': range_,
-    'enumerate': enumerate_,
     'zip': zip_,
-    'map': map_,
-    'filter': filter_,
-    'any': any_,
-    'all': all_,
-    'sorted': sorted_,
 }
",0,test
95ea3404528afcb1a74dd5f0946ea8d17beda28b,tensorflow/tensorflow,"Handle next-with-default. Fixes #37983.

PiperOrigin-RevId: 306065738
Change-Id: I0964d7c8ceee1b859b8bb5033e1473654c0719bf",py_builtins_test.py,"@@ -27,13 +27,14 @@ from tensorflow.python.autograph.core import function_wrappers
 from tensorflow.python.autograph.operators import data_structures
 from tensorflow.python.autograph.operators import py_builtins
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.platform import test
 
 
@@ -248,6 +249,86 @@ class PyBuiltinsTest(test.TestCase):
       self.assertAllEqual(self.evaluate(iterator.get_next()), -34)
       self.assertAllEqual(self.evaluate(iterator.get_next()), 9)
 
+  def test_next_normal(self):
+    iterator = iter([1, 2, 3])
+    self.assertEqual(py_builtins.next_(iterator), 1)
+    self.assertEqual(py_builtins.next_(iterator), 2)
+    self.assertEqual(py_builtins.next_(iterator), 3)
+    with self.assertRaises(StopIteration):
+      py_builtins.next_(iterator)
+    self.assertEqual(py_builtins.next_(iterator, 4), 4)
+
+  def test_next_tf_iterator(self):
+    # graph-mode iterators are only supported inside tf.function.
+    @def_function.function(autograph=False)
+    def test_fn(go_out_of_range, with_default):
+      iterator = iter(dataset_ops.Dataset.range(3))
+      retval = (
+          py_builtins.next_(iterator),
+          py_builtins.next_(iterator),
+          py_builtins.next_(iterator),
+      )
+      if go_out_of_range:
+        if with_default:
+          retval += (
+              py_builtins.next_(iterator,
+                                constant_op.constant(-3, dtype=dtypes.int64)),
+              py_builtins.next_(iterator,
+                                constant_op.constant(-4, dtype=dtypes.int64)),
+          )
+        else:
+          py_builtins.next_(iterator)
+      return retval
+
+    self.assertAllEqual(
+        self.evaluate(test_fn(go_out_of_range=False, with_default=None)),
+        (0, 1, 2))
+    self.assertAllEqual(
+        self.evaluate(test_fn(go_out_of_range=True, with_default=True)),
+        (0, 1, 2, -3, -4))
+    with self.assertRaises(errors_impl.OutOfRangeError):
+      self.evaluate(test_fn(go_out_of_range=True, with_default=False))
+
+  def test_next_tf_iterator_error_checking(self):
+    # graph-mode iterators are only supported inside tf.function.
+    @def_function.function(autograph=False)
+    def test_fn():
+      iterator = iter(dataset_ops.Dataset.range(1))
+      py_builtins.next_(iterator)
+      py_builtins.next_(iterator, constant_op.constant(-3))
+
+    # Dataset.range defaults to int64,
+    with self.assertRaisesRegex(TypeError, 'default.*int64'):
+      self.evaluate(test_fn())
+
+  def test_next_tf_iterator_error_checking_structures(self):
+    # graph-mode iterators are only supported inside tf.function.
+    @def_function.function(autograph=False)
+    def test_fn(default_val):
+      ds = dataset_ops.Dataset.range(1)
+      ds = ds.map(lambda i: {'a': i + 1, 'b': i + 10})
+      iterator = iter(ds)
+      py_builtins.next_(iterator)
+      py_builtins.next_(iterator, default_val)
+
+    default = {
+        'a': constant_op.constant(3, dtype=dtypes.int64),
+    }
+    with self.assertRaisesRegex(TypeError, 'same element structure'):
+      test_fn(default)
+    default = {
+        'a': constant_op.constant(3.0),
+        'b': [constant_op.constant(30), constant_op.constant(300)]
+    }
+    with self.assertRaisesRegex(TypeError, 'same element structure'):
+      test_fn(default)
+    default = {
+        'a': constant_op.constant(3.0),
+        'b': constant_op.constant(30, dtype=dtypes.int64),
+    }
+    with self.assertRaisesRegex(TypeError, 'float32'):
+      test_fn(default)
+
   def _basic_function_scope(self):
     return function_wrappers.FunctionScope(
         'test_function_name',
",0,test
6e7e5836af7bd09bed2c271938c4373a891e0d0a,tensorflow/tensorflow,"Preserve context info when entering merge_call.

PiperOrigin-RevId: 433130849",mirrored_run.py,"@@ -120,6 +120,15 @@ def _enter_graph(g, eager, creator_stack=None):
       yield
 
 
+@contextlib.contextmanager
+def _maybe_enter_eager_mode(eager):
+  if eager:
+    with context.eager_mode():
+      yield
+  else:
+    yield
+
+
 def _cpu_device(device):
   cpu_device = tf_device.DeviceSpec.from_string(device)
   cpu_device = cpu_device.replace(device_type=""CPU"", device_index=0)
@@ -241,9 +250,29 @@ def _call_for_each_replica(distribution, fn, args, kwargs):
           mtt_captured_control_deps = set()
           for t in threads:
             mtt_captured_control_deps.update(t.captured_control_deps)
-          with ops.name_scope(mtt_captured_name_scope),\
-              ops.control_dependencies(mtt_captured_control_deps), \
-              variable_scope.variable_scope(mtt_captured_var_scope):
+
+          # Control is transfered from _MirroredReplicaThread (MRT) to the main
+          # thread, i.e., here, to perform `merge_fn`, and thus we preserve the
+          # name scope,  control dependencies, etc. from MRT at the time
+          # `merge_call` is made.
+          # One special case is that the `merge_call` is made under an
+          # `tf.init_scope` in the MRT. `tf.init_scope` will clear control
+          # dependencies, pause gradient tape, and enter the lowest context on
+          # the `context_stack` that is not building a graph function. Entering
+          # the lowest context could be one of the two things: installation of a
+          # graph as the default graph or switch into eager mode. If the former
+          # is done and causes `merge_call` to be called in a different graph
+          # from the one in which `call_for_each_replica` is called, we do not
+          # allow this case (see comment in `_merge_call`) and we would not have
+          # arrived here due to the assertion in `_merge_call`. However, if the
+          # latter is done, we want to make sure the main thread enter an eager
+          # mode scope as well so that `merge_fn` does not have trouble
+          # accessing resources defined in MRT under the same context.
+          with ops.name_scope(
+              mtt_captured_name_scope), ops.control_dependencies(
+                  mtt_captured_control_deps), variable_scope.variable_scope(
+                      mtt_captured_var_scope), _maybe_enter_eager_mode(
+                          threads[0].merge_call_entered_in_eager):
             merge_result = threads[0].merge_fn(distribution, *merge_args,
                                                **merge_kwargs)
           for r, t in enumerate(threads):
@@ -438,6 +467,8 @@ class _MirroredReplicaContext(distribute_lib.ReplicaContext):
     t.captured_var_scope = variable_scope.get_variable_scope()
     t.captured_control_deps = t.graph._current_control_dependencies()  # pylint: disable=protected-access
 
+    t.merge_call_entered_in_eager = context.context().executing_eagerly()
+
     # It is problematic if `merge_call` is called under a different graph other
     # than the one that `_call_for_each_replica` is called under, there are
     # 3 cases this can happen:
@@ -488,6 +519,7 @@ class _MirroredReplicaContext(distribute_lib.ReplicaContext):
     t.should_run.clear()
     if t.coord.should_stop():
       raise _RequestedStop()
+    t.merge_call_entered_in_eager = None
     return t.merge_result
 
   @property
",0,train
6e7e5836af7bd09bed2c271938c4373a891e0d0a,tensorflow/tensorflow,"Preserve context info when entering merge_call.

PiperOrigin-RevId: 433130849",strategy_common_test.py,"@@ -63,6 +63,29 @@ class StrategyTest(test.TestCase, parameterized.TestCase):
 
     g()
 
+  def testMergeCallInitScope(self, strategy):
+    with strategy.scope():
+
+      @def_function.function
+      def fn():
+
+        def merge_fn(unused_strat):
+
+          y = constant_op.constant(11)
+          return y
+
+        def replica_fn():
+
+          with ops.init_scope():
+            y = ds_context.get_replica_context().merge_call(merge_fn)
+            z = y + 1
+            return z
+
+        return strategy.run(replica_fn)
+
+      result = strategy.experimental_local_results(fn())
+      self.assertAllClose(result, [12] * _get_num_replicas_per_client(strategy))
+
 
 @combinations.generate(
     combinations.combine(
",0,train
e25c7a82285f22e9a99153f094222ea41fae8fe6,tensorflow/tensorflow,TST: check num of fearues and targets,numpy_io_test.py,"@@ -294,20 +294,22 @@ class NumpyIoTest(test.TestCase):
     with self.test_session() as session:
       input_fn = numpy_io.numpy_input_fn(
         x, y, batch_size=2, shuffle=False, num_epochs=1)
-      features, target = input_fn()
+      features_tensor, targets_tensor = input_fn()
 
       coord = coordinator.Coordinator()
       threads = queue_runner_impl.start_queue_runners(session, coord=coord)
 
-      res = session.run([features, target])
-      self.assertAllEqual(res[0]['a'], [0, 1])
-      self.assertAllEqual(res[0]['b'], [32, 33])
-      self.assertAllEqual(res[1]['y1'], [-32, -31])
-      self.assertAllEqual(res[1]['y2'], [32, 31])
+      features, targets = session.run([features_tensor, targets_tensor])
+      self.assertEqual(len(features), 2)
+      self.assertAllEqual(features['a'], [0, 1])
+      self.assertAllEqual(features['b'], [32, 33])
+      self.assertEqual(len(targets), 2)
+      self.assertAllEqual(targets['y1'], [-32, -31])
+      self.assertAllEqual(targets['y2'], [32, 31])
 
-      session.run([features, target])
+      session.run([features_tensor, targets_tensor])
       with self.assertRaises(errors.OutOfRangeError):
-        session.run([features, target])
+        session.run([features_tensor, targets_tensor])
 
       coord.request_stop()
       coord.join(threads)
",0,train
c28b38d452ec2b89c330d5c2fabea956a029f348,tensorflow/tensorflow,"[kernel_gen] Add generation of tf.SquaredDifference

Disabled benchmark for now as it is very slow without fusion.

PiperOrigin-RevId: 354496217
Change-Id: I72216cf8c2599afecd61bbac9dc43dda6cd345f4",cwise_op_gpu_squared_difference.cu.cc,"@@ -19,7 +19,10 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
 DEFINE_BINARY4(squared_difference, float, Eigen::half, double, int64);
+#endif
 }  // namespace functor
 }  // namespace tensorflow
 
",0,train
c28b38d452ec2b89c330d5c2fabea956a029f348,tensorflow/tensorflow,"[kernel_gen] Add generation of tf.SquaredDifference

Disabled benchmark for now as it is very slow without fusion.

PiperOrigin-RevId: 354496217
Change-Id: I72216cf8c2599afecd61bbac9dc43dda6cd345f4",cwise_op_squared_difference.cc,"@@ -20,9 +20,12 @@ REGISTER8(BinaryOp, CPU, ""SquaredDifference"", functor::squared_difference,
           float, Eigen::half, double, bfloat16, int32, int64, complex64,
           complex128);
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
 REGISTER4(BinaryOp, GPU, ""SquaredDifference"", functor::squared_difference,
           float, Eigen::half, double, int64);
 #endif
+#endif
 
 // A special GPU kernel for int32.
 // TODO(b/25387198): Also enable int32 in device memory. This kernel
@@ -36,5 +39,4 @@ REGISTER_KERNEL_BUILDER(
         .TypeConstraint<int32>(""T""),
     BinaryOp<CPUDevice, functor::squared_difference<int32>>);
 
-
 }  // namespace tensorflow
",0,train
c28b38d452ec2b89c330d5c2fabea956a029f348,tensorflow/tensorflow,"[kernel_gen] Add generation of tf.SquaredDifference

Disabled benchmark for now as it is very slow without fusion.

PiperOrigin-RevId: 354496217
Change-Id: I72216cf8c2599afecd61bbac9dc43dda6cd345f4",gpu_binary_ops_test.cc,"@@ -843,6 +843,22 @@ GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     /*test_name=*/Int64, int64, int64, test::DefaultInput<int64>(),
     test::DefaultInputLessThanBitwidth<int64>(), baseline_right_shift)
 
+/// Test `tf.SquaredDifference`.
+
+template <typename T>
+T baseline_squared_difference(T lhs, T rhs) {
+  return (lhs - rhs) * (lhs - rhs);
+}
+
+GENERATE_DEFAULT_TESTS(SquaredDifference, /*test_name=*/Half, Eigen::half,
+                       Eigen::half, baseline_squared_difference)
+GENERATE_DEFAULT_TESTS(SquaredDifference, /*test_name=*/Float, float, float,
+                       baseline_squared_difference)
+GENERATE_DEFAULT_TESTS(SquaredDifference, /*test_name=*/Double, double, double,
+                       baseline_squared_difference)
+GENERATE_DEFAULT_TESTS(SquaredDifference, /*test_name=*/Int64, int64, int64,
+                       baseline_squared_difference)
+
 /// Test `tf.Sub`.
 
 template <typename T>
",0,train
c28b38d452ec2b89c330d5c2fabea956a029f348,tensorflow/tensorflow,"[kernel_gen] Add generation of tf.SquaredDifference

Disabled benchmark for now as it is very slow without fusion.

PiperOrigin-RevId: 354496217
Change-Id: I72216cf8c2599afecd61bbac9dc43dda6cd345f4",gpu_op_squared_difference.cc,"@@ -0,0 +1,26 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
+#include ""tensorflow/core/kernels/mlir_generated/gpu_ops_base.h""
+
+namespace tensorflow {
+
+GENERATE_AND_REGISTER_BINARY_KERNEL(SquaredDifference, f16, DT_HALF,
+                                    Eigen::half);
+GENERATE_AND_REGISTER_BINARY_KERNEL(SquaredDifference, f32, DT_FLOAT, float);
+GENERATE_AND_REGISTER_BINARY_KERNEL(SquaredDifference, f64, DT_DOUBLE, double);
+GENERATE_AND_REGISTER_BINARY_KERNEL(SquaredDifference, i64, DT_INT64, int64);
+
+}  // namespace tensorflow
",0,train
6c1f11a557add7f836751361f26caf2e0062d509,tensorflow/tensorflow,"Minor fix to include order.

PiperOrigin-RevId: 312298890
Change-Id: I3ae60f2d4c5f6c92aa165c7fa1263445c4a98a6d",text_vectorization_test.py,"@@ -37,9 +37,9 @@ from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.layers import convolutional
 from tensorflow.python.keras.layers import core
 from tensorflow.python.keras.layers import embeddings
+from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
 from tensorflow.python.keras.layers.preprocessing import text_vectorization
 from tensorflow.python.keras.layers.preprocessing import text_vectorization_v1
-from tensorflow.python.keras.layers.preprocessing import preprocessing_test_utils
 from tensorflow.python.keras.saving import saved_model_experimental as saving
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils.generic_utils import CustomObjectScope
",0,test
4c01f5d82f0eb3f8f585d87676d759c78a7fa5cf,tensorflow/tensorflow,"[tf.data] Fix the bug when restoring the iterator, the buffered elements are not correctly recorded for ParallelInterleaveDataset.

PiperOrigin-RevId: 356627548
Change-Id: I8bb0ea293d468453790bfdb996fbf428ba7cf71d",parallel_interleave_dataset_op.cc,"@@ -1278,6 +1278,7 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
                 absl::StrCat(kResultsSuffix, ""["", i, ""]["", j, ""]""),
                 &result->return_values.back()));
           }
+          RecordBufferEnqueue(ctx, result->return_values);
           element->results[i] = std::move(result);
         }
         if (!reader->Contains(iterator_name,
@@ -1339,6 +1340,9 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
       TF_RETURN_IF_ERROR(
           ReadElementsParallel(ctx, reader, size, kCurrentElements, &elements));
       mutex_lock l(*mu_);
+      for (auto& element : current_elements_) {
+        DCHECK(element == nullptr);
+      }
       for (int idx = 0; idx < size; ++idx) {
         current_elements_[idx] = std::move(elements[idx]);
       }
@@ -1361,6 +1365,9 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
       TF_RETURN_IF_ERROR(
           ReadElementsParallel(ctx, reader, size, kFutureElements, &elements));
       mutex_lock l(*mu_);
+      for (auto& element : future_elements_) {
+        DCHECK(element == nullptr);
+      }
       for (int idx = 0; idx < size; ++idx) {
         future_elements_[idx] = std::move(elements[idx]);
       }
",0,test
ed408b579eba3844dda9a96ae57fffb0f2c4d10d,tensorflow/tensorflow,"Generate the C++ header files.

PiperOrigin-RevId: 337399875
Change-Id: I251463719569aa261f5c2768a7a5c045d192bfbc",build_cc_api_headers.py,"@@ -0,0 +1,63 @@
+# Lint as: python3
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""Generate Java reference docs for TensorFlow.org.""""""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import pathlib
+import subprocess
+
+from absl import app
+from absl import flags
+
+FLAGS = flags.FLAGS
+
+# These flags are required by infrastructure, not all of them are used.
+flags.DEFINE_string('output_dir', None,
+                    (""Use this branch as the root version and don't""
+                     ' create in version directory'))
+
+# __file__ is the path to this file
+DOCS_TOOLS_DIR = pathlib.Path(__file__).resolve().parent
+TENSORFLOW_ROOT = DOCS_TOOLS_DIR.parents[2]
+
+
+def build_headers(output_dir):
+  """"""Builds the headers files for TF.""""""
+
+  # `$ yes | configure`
+  yes = subprocess.Popen(['yes', ''], stdout=subprocess.PIPE)
+  configure = subprocess.Popen([TENSORFLOW_ROOT / 'configure'],
+                               stdin=yes.stdout,
+                               cwd=TENSORFLOW_ROOT)
+  configure.communicate()
+
+  subprocess.check_call(['bazel', 'build', 'tensorflow/cc:cc_ops'],
+                        cwd=TENSORFLOW_ROOT)
+  subprocess.check_call(
+      ['cp', '--dereference', '-r', 'bazel-bin', output_dir / 'bazel-genfiles'],
+      cwd=TENSORFLOW_ROOT)
+
+
+def main(argv):
+  del argv
+  build_headers(pathlib.Path(FLAGS.output_dir))
+
+
+if __name__ == '__main__':
+  flags.mark_flags_as_required(['output_dir'])
+  app.run(main)
",0,train
9c4dd646b711ae9c65543a2d14e53f12dc4bcafc,tensorflow/tensorflow,"Replace llvm::sys::fs::F_{None,Text} with llvm::sys::fs::OF_{None,Text}

The former are deprecated aliases which will be removed by https://reviews.llvm.org/D101506

PiperOrigin-RevId: 371371581
Change-Id: I860fac466c3415655bea048f9ee7042ad882999a",dump_ir_pass.cc,"@@ -48,7 +48,7 @@ class DumpIrPass : public llvm::FunctionPass {
 
   bool doInitialization(llvm::Module &M) override {
     out_.reset(new llvm::raw_fd_ostream(llvm::StringRef(output_filename_), ec_,
-                                        llvm::sys::fs::F_None));
+                                        llvm::sys::fs::OF_None));
     if (ec_) {
       LOG(FATAL) << ""Unable to open "" << output_filename_
                  << "" to dump LLVM IR: "" << ec_.message();
",0,train
9c4dd646b711ae9c65543a2d14e53f12dc4bcafc,tensorflow/tensorflow,"Replace llvm::sys::fs::F_{None,Text} with llvm::sys::fs::OF_{None,Text}

The former are deprecated aliases which will be removed by https://reviews.llvm.org/D101506

PiperOrigin-RevId: 371371581
Change-Id: I860fac466c3415655bea048f9ee7042ad882999a",gpu_backend_lib.cc,"@@ -216,7 +216,7 @@ void AddOptimizationPasses(unsigned opt_level, unsigned size_level,
 void EmitBitcodeToFile(const llvm::Module& module, absl::string_view filename) {
   std::error_code error_code;
   llvm::ToolOutputFile outfile(string(filename).c_str(), error_code,
-                               llvm::sys::fs::F_None);
+                               llvm::sys::fs::OF_None);
   if (error_code) {
     LOG(FATAL) << ""opening bitcode file for writing: "" << error_code.message();
   }
@@ -696,7 +696,7 @@ StatusOr<std::vector<uint8>> EmitModuleToHsaco(
 
   // Dump LLVM IR.
   std::unique_ptr<llvm::raw_fd_ostream> ir_fs(
-      new llvm::raw_fd_ostream(ir_path, ec, llvm::sys::fs::F_None));
+      new llvm::raw_fd_ostream(ir_path, ec, llvm::sys::fs::OF_None));
   module->print(*ir_fs, nullptr);
   ir_fs->flush();
 
@@ -713,7 +713,7 @@ StatusOr<std::vector<uint8>> EmitModuleToHsaco(
   llvm::SmallVector<char, 0> stream;
   llvm::raw_svector_ostream pstream(stream);
   std::unique_ptr<llvm::raw_fd_ostream> isabin_fs(
-      new llvm::raw_fd_ostream(isabin_path, ec, llvm::sys::fs::F_Text));
+      new llvm::raw_fd_ostream(isabin_path, ec, llvm::sys::fs::OF_Text));
   module->setDataLayout(target_machine->createDataLayout());
   target_machine->addPassesToEmitFile(codegen_passes, *isabin_fs, nullptr,
                                       llvm::CGFT_ObjectFile);
@@ -722,7 +722,7 @@ StatusOr<std::vector<uint8>> EmitModuleToHsaco(
 
   if (keep_tempfiles) {
     std::unique_ptr<llvm::raw_fd_ostream> ir_fs(
-        new llvm::raw_fd_ostream(ir_opt_path, ec, llvm::sys::fs::F_None));
+        new llvm::raw_fd_ostream(ir_opt_path, ec, llvm::sys::fs::OF_None));
     module->print(*ir_fs, nullptr);
     ir_fs->flush();
   }
",0,train
05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA.
XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation.

PiperOrigin-RevId: 229801068",encapsulate_subgraphs_pass.cc,"@@ -2535,7 +2535,33 @@ Status EncapsulateSubgraphsPass::Run(
             std::vector<int>* input_permutation,
             std::vector<int>* output_permutation, NodeDef* node) {
         // Optimize the subgraph.
-        OptimizeGraph(flr, subgraph);
+        // Do not constant fold nodes that output DT_VARIANT type tensors.
+        // XLA does not support Const nodes of Variant type since it needs
+        // to know the original ops to be able to compile them to the relevant
+        // XLA form.
+        // TODO(srbs): This filter is a little conservative. E.g. a subgraph of
+        // the form:
+        //                          Const
+        //                            |
+        // EmptyTensorList -> TensorListPushBack -> TensorListPopBack -> Op
+        //                                                  |
+        //                                        (Discard popped list)
+        //
+        // Would have been reduced to ""Const -> Op"" without this filter.
+        // However since we are only allowed to specify the filter at the ""Node""
+        // level there is no good way to allow the above behavior. So we
+        // disallow any sort of constant folding on Variant nodes for now.
+        auto cf_consider_fn = [](const Node* n) {
+          for (const auto& output_arg : n->op_def().output_arg()) {
+            if (output_arg.type() == DT_VARIANT) {
+              return false;
+            }
+          }
+          return true;
+        };
+        GraphOptimizer::Options graph_optimizer_options;
+        graph_optimizer_options.cf_consider_fn = cf_consider_fn;
+        OptimizeGraph(flr, subgraph, graph_optimizer_options);
 
         const int num_args = input_permutation->size();
         std::vector<bool> const_args(num_args);
",0,train
05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA.
XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation.

PiperOrigin-RevId: 229801068",tensor_list_ops_test.py,"@@ -48,24 +48,39 @@ class ListOpsTest(xla_test.XLATestCase):
 
   def testPushPop(self):
     with self.cached_session() as sess, self.test_scope():
-      num = array_ops.placeholder(dtypes.int32)
       l = list_ops.tensor_list_reserve(
-          element_shape=(7, 15), num_elements=num, element_dtype=dtypes.float32)
+          element_shape=(7, 15), num_elements=10, element_dtype=dtypes.float32)
       l = list_ops.tensor_list_push_back(
           l, constant_op.constant(1.0, shape=(7, 15)))
       l = list_ops.tensor_list_push_back(
           l, constant_op.constant(2.0, shape=(7, 15)))
       l, e2 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
       _, e1 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
-      self.assertAllEqual(sess.run(e2, {num: 10}), 2.0 * np.ones((7, 15)))
-      self.assertAllEqual(sess.run(e1, {num: 10}), 1.0 * np.ones((7, 15)))
+      self.assertAllEqual(sess.run(e2), 2.0 * np.ones((7, 15)))
+      self.assertAllEqual(sess.run(e1), 1.0 * np.ones((7, 15)))
+
+  def testDoNotConstantFoldVariants(self):
+    with self.cached_session() as sess, self.test_scope():
+      val = array_ops.placeholder(dtype=dtypes.float32)
+      l = list_ops.tensor_list_reserve(
+          element_shape=(7, 15), num_elements=10, element_dtype=dtypes.float32)
+      # Note: Pushing a Placeholder will force the constant folding code
+      # to build a Const node with a DT_VARIANT output. This tests that XLA
+      # passes a cf_consider_fn which prevent folding such nodes.
+      l = list_ops.tensor_list_push_back(
+          l, array_ops.fill(value=val, dims=(7, 15)))
+      l = list_ops.tensor_list_push_back(
+          l, constant_op.constant(2.0, shape=(7, 15)))
+      l, e2 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+      _, e1 = list_ops.tensor_list_pop_back(l, element_dtype=dtypes.float32)
+      self.assertAllEqual(sess.run(e2, {val: 1.0}), 2.0 * np.ones((7, 15)))
+      self.assertAllEqual(sess.run(e1, {val: 1.0}), 1.0 * np.ones((7, 15)))
 
   def testPushPopSeparateLists(self):
     with self.cached_session() as sess, self.test_scope():
-      num = array_ops.placeholder(dtypes.int32)
       l = list_ops.tensor_list_reserve(
           element_shape=scalar_shape(),
-          num_elements=num,
+          num_elements=20,
           element_dtype=dtypes.float32)
       l = list_ops.tensor_list_push_back(l, constant_op.constant(1.0))
       l2 = list_ops.tensor_list_push_back(l, constant_op.constant(2.0))
@@ -75,7 +90,7 @@ class ListOpsTest(xla_test.XLATestCase):
       l2, e22 = list_ops.tensor_list_pop_back(l2, element_dtype=dtypes.float32)
       l3, e31 = list_ops.tensor_list_pop_back(l3, element_dtype=dtypes.float32)
       l3, e32 = list_ops.tensor_list_pop_back(l3, element_dtype=dtypes.float32)
-      result = sess.run([e11, [e21, e22], [e31, e32]], {num: 20})
+      result = sess.run([e11, [e21, e22], [e31, e32]])
       self.assertEqual(result, [1.0, [2.0, 1.0], [3.0, 1.0]])
 
   def testEmptyTensorList(self):
",0,train
05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA.
XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation.

PiperOrigin-RevId: 229801068",xla_compiler.cc,"@@ -462,8 +462,34 @@ std::unique_ptr<Graph> XlaCompiler::GetGraph(const FunctionBody* fbody) {
   opts.set_do_function_inlining(true);
   opts.set_do_constant_folding(true);
   GraphOptimizer optimizer(opts);
+  // Do not constant fold nodes that output DT_VARIANT type tensors.
+  // XLA does not support Const nodes of Variant type since it needs
+  // to know the original ops to be able to compile them to the relevant
+  // XLA form.
+  // TODO(srbs): This filter is a little conservative. E.g. a subgraph of
+  // the form:
+  //                          Const
+  //                            |
+  // EmptyTensorList -> TensorListPushBack -> TensorListPopBack -> Op
+  //                                                  |
+  //                                        (Discard popped list)
+  //
+  // Would have been reduced to ""Const -> Op"" without this filter.
+  // However since we are only allowed to specify the filter at the ""Node""
+  // level there is no good way to allow the above behavior. So we
+  // disallow any sort of constant folding on Variant nodes for now.
+  auto cf_consider_fn = [](const Node* n) {
+    for (const auto& output_arg : n->op_def().output_arg()) {
+      if (output_arg.type() == DT_VARIANT) {
+        return false;
+      }
+    }
+    return true;
+  };
+  GraphOptimizer::Options graph_optimizer_options;
+  graph_optimizer_options.cf_consider_fn = cf_consider_fn;
   optimizer.Optimize(flib_runtime_, flib_runtime_->env(),
-                     /*device=*/nullptr, &graph, /*shape_map=*/nullptr);
+                     /*device=*/nullptr, &graph, graph_optimizer_options);
 
   return graph;
 }
",0,train
05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA.
XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation.

PiperOrigin-RevId: 229801068",function.cc,"@@ -786,13 +786,19 @@ void DumpGraph(StringPiece label, const Graph* g) {
   }
 }
 
-void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr<Graph>* g) {
+void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr<Graph>* g,
+                   const GraphOptimizer::Options& graph_optimizer_options) {
   OptimizerOptions opts;
   opts.set_do_common_subexpression_elimination(true);
   opts.set_do_function_inlining(true);
   opts.set_do_constant_folding(true);
   GraphOptimizer optimizer(opts);
-  optimizer.Optimize(lib, lib->env(), lib->device(), g, /*shape_map=*/nullptr);
+  optimizer.Optimize(lib, lib->env(), lib->device(), g,
+                     graph_optimizer_options);
+}
+
+void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr<Graph>* g) {
+  OptimizeGraph(lib, g, GraphOptimizer::Options());
 }
 
 namespace {
",0,train
05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA.
XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation.

PiperOrigin-RevId: 229801068",function.h,"@@ -21,6 +21,7 @@ limitations under the License.
 
 #include ""tensorflow/core/common_runtime/device.h""
 #include ""tensorflow/core/common_runtime/device_mgr.h""
+#include ""tensorflow/core/common_runtime/graph_optimizer.h""
 #include ""tensorflow/core/common_runtime/process_function_library_runtime.h""
 #include ""tensorflow/core/framework/function.h""
 #include ""tensorflow/core/graph/graph.h""
@@ -133,6 +134,8 @@ void DumpGraph(StringPiece label, const Graph* g);
 // OptimizeGraph mutates **g extensively and replaces '*g' with a
 // complete copy. Therefore, the caller should not keep any references
 // to nodes *g.
+void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr<Graph>* g,
+                   const GraphOptimizer::Options& graph_optimizer_options);
 void OptimizeGraph(FunctionLibraryRuntime* lib, std::unique_ptr<Graph>* g);
 
 // Convert the Graph of a function to a GraphDef.
",0,train
05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA.
XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation.

PiperOrigin-RevId: 229801068",graph_optimizer.cc,"@@ -38,8 +38,7 @@ void GraphOptimizer::Optimize(
     std::unique_ptr<Graph>* graph,
     const std::unordered_map<string, std::vector<PartialTensorShape>>*
         shape_map,
-    const std::function<bool(const Node*)>& cse_consider_fn,
-    const std::function<bool(const Node*)>& cf_consider_fn) {
+    const NodePredicate& cse_consider_fn, const NodePredicate& cf_consider_fn) {
   Graph* g = graph->get();
   DumpGraph(""Initial"", g);
 
@@ -103,4 +102,11 @@ void GraphOptimizer::Optimize(
   DumpGraph(""ReCopy"", graph->get());
 }
 
+void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env,
+                              Device* device, std::unique_ptr<Graph>* graph,
+                              const Options& options) {
+  Optimize(runtime, env, device, graph, options.shape_map,
+           options.cse_consider_fn, options.cf_consider_fn);
+}
+
 }  // end namespace tensorflow
",0,train
05bef43400c83ca6d27617e27f1661e14f5f05e8,tensorflow/tensorflow,"Do not constant fold nodes with DT_VARIANT type outputs in XLA.
XLA does not support Const nodes of type Variant. It needs to see the source ops for the Variant operations to be able to build its own representation.

PiperOrigin-RevId: 229801068",graph_optimizer.h,"@@ -26,6 +26,28 @@ namespace tensorflow {
 
 class GraphOptimizer {
  public:
+  using NodePredicate = std::function<bool(const Node*)>;
+
+  struct Options {
+    // If not null it maps from nodes in graph to partially-known
+    // shapes of their outputs, and may be used, e.g., in the constant folding
+    // pass. The use of shape_map implies that the mapping from node name to the
+    // vector of partial shapes of its outputs is stable, i.e., no optimization
+    // pass may replace a node with a different node of the same name that has a
+    // different number of outputs, or outputs with different known shapes.
+    // TODO(b/65453533) introduce a unique way to name nodes in a graph.
+    std::unordered_map<string, std::vector<PartialTensorShape>>* shape_map =
+        nullptr;
+
+    // If not null then only nodes for which cse_consider_fn returns true will
+    // be considered for CSE.
+    NodePredicate cse_consider_fn = nullptr;
+
+    // If not null then only nodes for which cf_consider_fn returns true will be
+    // considered for CF.
+    NodePredicate cf_consider_fn = nullptr;
+  };
+
   GraphOptimizer(const OptimizerOptions& opts);
   ~GraphOptimizer();
 
@@ -34,26 +56,17 @@ class GraphOptimizer {
   // on which the 'graph' will execute. It's passed to the optimizers
   // so that they can respect constraints if any, that should be
   // respected.
-  //
-  // If shape_map is not null it maps from nodes in graph to partially-known
-  // shapes of their outputs, and may be used, e.g., in the constant folding
-  // pass. The use of shape_map implies that the mapping from node name to the
-  // vector of partial shapes of its outputs is stable, i.e., no optimization
-  // pass may replace a node with a different node of the same name that has a
-  // different number of outputs, or outputs with different known shapes.
-  // TODO(b/65453533) introduce a unique way to name nodes in a graph.
-  //
-  // If cse_consider_fn is not null then only nodes for which cse_consider_fn
-  // returns true will be considered for CSE.
-  // If cf_consider_fn is not null then only nodes for which cf_consider_fn
-  // returns true will be considered for CF.
+  void Optimize(FunctionLibraryRuntime* runtime, Env* env, Device* device,
+                std::unique_ptr<Graph>* graph,
+                const Options& graph_optimizer_options);
+  // DEPRECATED: Consider passing a GraphOptimizer::Options object instead.
   void Optimize(
       FunctionLibraryRuntime* runtime, Env* env, Device* device,
       std::unique_ptr<Graph>* graph,
       const std::unordered_map<string, std::vector<PartialTensorShape>>*
           shape_map,
-      const std::function<bool(const Node*)>& cse_consider_fn = nullptr,
-      const std::function<bool(const Node*)>& cf_consider_fn = nullptr);
+      const NodePredicate& cse_consider_fn = nullptr,
+      const NodePredicate& cf_consider_fn = nullptr);
 
   const OptimizerOptions& options() { return opts_; }
 
",0,train
ccee426384468b152aba22e1a9f9a3fd2f92bf00,tensorflow/tensorflow,"Fix use of uninitialized memory in BFCAllocator.

PiperOrigin-RevId: 367554433
Change-Id: I5688685303e4b845a477feac371d7c62b7d0c8c8",bfc_allocator.cc,"@@ -45,7 +45,8 @@ BFCAllocator::BFCAllocator(SubAllocator* sub_allocator, size_t total_memory,
       sub_allocator_(sub_allocator),
       name_(name),
       free_chunks_list_(kInvalidChunkHandle),
-      next_allocation_id_(1) {
+      next_allocation_id_(1),
+      action_counter_(0) {
   if (allow_growth) {
     // 2MiB smallest initial allocation, unless total memory available
     // is less.
",0,train
ab0cbb3cc082a1b4dde989552ccd73986566f57a,tensorflow/tensorflow,"Refine tf.const in TF shape inference.

PiperOrigin-RevId: 307726788
Change-Id: I7bb1ede57d9c27b191078f7533fad5975f1e713d",shape_inference.cc,"@@ -274,6 +274,15 @@ bool InferShapeForCall(Operation* op) {
   return changed;
 }
 
+bool RefineTfConst(TF::ConstOp const_op) {
+  Type old_type = const_op.getType();
+  if (const_op.valueAttr().getType() == old_type) return false;
+  const_op.getResult().setType(const_op.valueAttr().getType());
+  AddCastBackForUnsupportedNonTFUses(const_op, const_op.getResult(),
+                                     const_op.getDialect(), old_type);
+  return true;
+}
+
 }  // namespace
 
 bool InferShapeForSingleOperation(Operation* op, Dialect* tf_dialect,
@@ -622,6 +631,13 @@ LogicalResult InferShapeUntilFixPoint(Region* region, int64_t graph_version,
         return;
       }
 
+      if (auto tf_const = dyn_cast<TF::ConstOp>(op)) {
+        changed |= RefineTfConst(tf_const);
+        // TODO(jpienaar): Debug why we can't just return here. We end up with
+        // additional constant due to the propagation of constant into attached
+        // function if we return already.
+      }
+
       // Before attempting inference, just try to fold the operation.
       if (succeeded(folder.tryToFold(op))) return;
 
",0,train
f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy.

PiperOrigin-RevId: 392411612
Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",gpu_passes.cc,"@@ -37,6 +37,7 @@ limitations under the License.
 #include ""tensorflow/compiler/mlir/tfrt/transforms/lhlo_gpu_to_tfrt_gpu/PassDetail.h""
 #include ""tensorflow/compiler/mlir/tfrt/transforms/lhlo_gpu_to_tfrt_gpu/ccl_pattern.h""
 #include ""tensorflow/compiler/mlir/tfrt/transforms/lhlo_gpu_to_tfrt_gpu/gemm_pattern.h""
+#include ""tensorflow/compiler/mlir/tfrt/transforms/lhlo_gpu_to_tfrt_gpu/memcpy_pattern.h""
 #include ""tensorflow/compiler/xla/service/gpu/xlir_ops.h""
 #include ""tfrt/gpu/kernels/gpu_ops.h""  // from @tf_runtime
 #include ""tfrt/gpu/pass/pass.h""  // from @tf_runtime
@@ -57,7 +58,8 @@ struct LmhloGpuAsyncConversionPass
     converter.addConversion([&](BaseMemRefType) { return buffer_type; });
 
     ConversionTarget target(*context);
-    target.addIllegalDialect<lmhlo_gpu::LmhloGpuDialect>();
+    target
+        .addIllegalDialect<lmhlo_gpu::LmhloGpuDialect, mlir::gpu::GPUDialect>();
     target.addLegalDialect<tfrt::compiler::TFRTDialect, tfrt::gpu::GpuDialect,
                            xla::gpu::XlirDialect>();
     target.addDynamicallyLegalOp<FuncOp>([&](FuncOp op) {
@@ -72,10 +74,12 @@ struct LmhloGpuAsyncConversionPass
     RewritePatternSet patterns(context);
     populateCclConversionPattern(patterns);
     populateGemmConversionPattern(patterns);
+    populateMemcpyConversionPattern(patterns);
     populateFuncOpTypeConversionPattern(patterns, converter);
 
     ConversionTarget wrap_target(*context);
-    wrap_target.addLegalDialect<lmhlo_gpu::LmhloGpuDialect>();
+    wrap_target
+        .addLegalDialect<lmhlo_gpu::LmhloGpuDialect, mlir::gpu::GPUDialect>();
     wrap_target.addLegalOp<lmhlo::AllGatherOp, lmhlo::AllReduceOp,
                            lmhlo::ReduceScatterOp>();
     tfrt::gpu::populateGpuAsyncConversionPatterns(patterns, converter,
",0,train
f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy.

PiperOrigin-RevId: 392411612
Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",memcpy_pattern.cc,"@@ -0,0 +1,88 @@
+// Copyright 2020 The TensorFlow Runtime Authors
+//
+// Licensed under the Apache License, Version 2.0 (the ""License"");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an ""AS IS"" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+//===- memcpy_pattern.cc
+//---------------------------------------------------------===//
+//
+// Pattern to lower mlir::gpu::memcpy Ops to tfrt cuda dialect.
+//
+//===----------------------------------------------------------------------===//
+#include ""tensorflow/compiler/mlir/tfrt/transforms/lhlo_gpu_to_tfrt_gpu/memcpy_pattern.h""
+
+#include <assert.h>
+#include <stdint.h>
+
+#include <type_traits>
+#include <utility>
+
+#include ""mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h""
+#include ""mlir/IR/BuiltinAttributes.h""
+#include ""mlir/IR/Location.h""
+#include ""mlir/IR/Types.h""
+#include ""mlir/Transforms/DialectConversion.h""
+#include ""llvm/ADT/ArrayRef.h""
+#include ""llvm/ADT/StringRef.h""
+#include ""mlir/Dialect/StandardOps/IR/Ops.h""  // from @llvm-project
+#include ""mlir/IR/BlockAndValueMapping.h""  // from @llvm-project
+#include ""mlir/IR/Value.h""  // from @llvm-project
+#include ""mlir/Support/LogicalResult.h""  // from @llvm-project
+#include ""tensorflow/compiler/mlir/xla/type_to_shape.h""
+#include ""tensorflow/compiler/xla/layout_util.h""
+#include ""tensorflow/compiler/xla/shape.h""
+#include ""tfrt/gpu/kernels/gpu_ops.h""  // from @tf_runtime
+#include ""tfrt/gpu/pass/pass.h""  // from @tf_runtime
+#include ""tfrt/gpu/wrapper/cublas_wrapper.h""  // from @tf_runtime
+#include ""tfrt/basic_kernels/opdefs/basic_kernels.h""  // from @tf_runtime
+#include ""tfrt/basic_kernels/opdefs/types.h""  // from @tf_runtime
+
+namespace tensorflow {
+namespace {
+
+using llvm::ArrayRef;
+
+// Creates tfrt::gpu::MemCopyOp from mlir::gpu::MemcpyOp.
+struct MemcpyRewritePattern
+    : tfrt::gpu::GpuAsyncOpConversionPattern<mlir::gpu::MemcpyOp> {
+  using tfrt::gpu::GpuAsyncOpConversionPattern<
+      mlir::gpu::MemcpyOp>::GpuAsyncOpConversionPattern;
+
+  FailureOr<Value> matchAndRewriteOp(
+      mlir::gpu::MemcpyOp op, Value chain, Value stream,
+      ArrayRef<Value> operands,
+      ConversionPatternRewriter& rewriter) const override {
+    if (!all_of(operands, [](Value operand) {
+          return operand.getType().isa<tfrt::gpu::BufferType>();
+        }))
+      return rewriter.notifyMatchFailure(op, ""expected buffer operands"");
+
+    BlockAndValueMapping mapping;
+    for (auto pair : llvm::zip_first(op->getOperands(), operands))
+      mapping.map(std::get<0>(pair), std::get<1>(pair));
+
+    rewriter.eraseOp(op);
+
+    return rewriter
+        .create<tfrt::gpu::MemCopyOp>(op.getLoc(), mapping.lookup(op.dst()),
+                                      mapping.lookup(op.src()), stream, chain)
+        .getResult();
+  }
+};
+
+}  // namespace
+
+void populateMemcpyConversionPattern(RewritePatternSet& patterns) {
+  patterns.add<MemcpyRewritePattern>(patterns.getContext());
+}
+
+}  // namespace tensorflow
",0,train
f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy.

PiperOrigin-RevId: 392411612
Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",memcpy_pattern.h,"@@ -0,0 +1,33 @@
+// Copyright 2020 The TensorFlow Runtime Authors
+//
+// Licensed under the Apache License, Version 2.0 (the ""License"");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an ""AS IS"" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef TENSORFLOW_COMPILER_MLIR_TFRT_TRANSFORMS_LHLO_GPU_TO_TFRT_GPU_MEMCPY_PATTERN_H_
+#define TENSORFLOW_COMPILER_MLIR_TFRT_TRANSFORMS_LHLO_GPU_TO_TFRT_GPU_MEMCPY_PATTERN_H_
+
+#include ""mlir-hlo/Dialect/mhlo/IR/lhlo_gpu_ops.h""
+#include ""mlir/IR/BlockAndValueMapping.h""
+#include ""mlir/Transforms/DialectConversion.h""
+#include ""llvm/ADT/ArrayRef.h""
+#include ""llvm/ADT/StringRef.h""
+#include ""mlir/Dialect/GPU/GPUDialect.h""  // from @llvm-project
+
+namespace tensorflow {
+
+// Add a pattern to the given pattern list to convert from mlir::gpu::MemcpyOp
+// to tfrt::gpu::MemCopyOp.
+void populateMemcpyConversionPattern(mlir::RewritePatternSet& patterns);
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_COMPILER_MLIR_TFRT_TRANSFORMS_LHLO_GPU_TO_TFRT_GPU_MEMCPY_PATTERN_H_
",0,train
f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy.

PiperOrigin-RevId: 392411612
Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",bef_thunk.cc,"@@ -213,6 +213,9 @@ static StatusOr<Thunk::Kind> GetThunkKind(mlir::Operation* op) {
   if (mlir::isa<mlir::lmhlo_gpu::GEMMOp, mlir::lmhlo_gpu::GEMM_BiasOp>(op)) {
     return Thunk::Kind::kGemm;
   }
+  if (mlir::isa<mlir::gpu::MemcpyOp>(op)) {
+    return Thunk::Kind::kMemcpy;
+  }
   if (mlir::isa<mlir::lmhlo::AllGatherOp>(op)) {
     return Thunk::Kind::kNcclAllGather;
   }
",0,train
f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy.

PiperOrigin-RevId: 392411612
Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",mlir_gpu_test_base.cc,"@@ -135,7 +135,7 @@ MlirGpuTestBase::RunMlirModuleWithHostBuffers(
 StatusOr<mlir::OwningModuleRef> MlirGpuTestBase::ParseMlirModule(
     absl::string_view module_text, mlir::MLIRContext& context) {
   context.loadDialect<mlir::lmhlo::LmhloDialect, mlir::mhlo::MhloDialect,
-                      mlir::StandardOpsDialect,
+                      mlir::StandardOpsDialect, mlir::gpu::GPUDialect,
                       mlir::lmhlo_gpu::LmhloGpuDialect>();
   llvm::SourceMgr source_mgr;
   std::string diagnostic_str;
",0,train
f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy.

PiperOrigin-RevId: 392411612
Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",thunk.cc,"@@ -70,6 +70,8 @@ StatusOr<GlobalDeviceId> Thunk::ExecuteParams::GetGlobalDeviceId() const {
       return ""kInfeed"";
     case Thunk::kKernel:
       return ""kKernel"";
+    case Thunk::kMemcpy:
+      return ""kMemcpy"";
     case Thunk::kMemset32BitValue:
       return ""kMemset32BitValue"";
     case Thunk::kMemzero:
",0,train
f13063ee503653e0693a1eee461b55e22c14a7a0,tensorflow/tensorflow,"Lower mlir.gpu.memcpy to tfrt_gpu.mem.copy.

PiperOrigin-RevId: 392411612
Change-Id: I979a86fe7294e7155d51efd47802a6dd20acdf53",thunk.h,"@@ -56,6 +56,7 @@ class Thunk {
     kGemm,
     kInfeed,
     kKernel,
+    kMemcpy,
     kMemset32BitValue,
     kMemzero,
     kNcclAllGather,
",0,train
3e15ac3dd22d58e45b7d6db17dedbb189d789891,tensorflow/tensorflow,"[TF:XLA] Copy elision does not need to know about existing copies.

It already detects layout-changing copies and those are already left unchanged
by copy elision. Special case copies are also skipped because they are tagged
separately (SetCopyElisionAllowed)

PiperOrigin-RevId: 202574858",copy_insertion.cc,"@@ -1093,8 +1093,7 @@ void MaybeDumpModule(const string& message, const HloModule& module) {
 }  // namespace
 
 Status RemoveUnnecessaryCopies(
-    const HloOrdering& ordering,
-    const tensorflow::gtl::FlatSet<int>& copies_to_exclude, HloModule* module,
+    const HloOrdering& ordering, HloModule* module,
     const HloDataflowAnalysis::FusionCanShareBufferFunction&
         fusion_can_share_buffer) {
   MaybeDumpModule(""after adding copies to resolve interference"", *module);
@@ -1108,7 +1107,6 @@ Status RemoveUnnecessaryCopies(
   for (HloComputation* computation : module->computations()) {
     for (HloInstruction* instruction : computation->instructions()) {
       if (instruction->opcode() == HloOpcode::kCopy &&
-          !ContainsKey(copies_to_exclude, instruction->unique_id()) &&
           instruction->CopyElisionAllowed()) {
         TF_RETURN_IF_ERROR(copy_remover.TryElideCopy(instruction).status());
       }
@@ -1152,16 +1150,13 @@ StatusOr<bool> CopyInsertion::Run(HloModule* module) {
         ""Call graph must be flattened before copy insertion."");
   }
 
-  // Gather Ids of existing kCopy instructions in the module. We avoid removing
-  // these copies (except via DCE in TupleSimplifier) because they may have been
-  // added for reasons not considered by copy insertion (eg, layout assignment).
-  // Instruction id is used instead of HloInstruction* because the pointer
-  // values may be recycled.
-  tensorflow::gtl::FlatSet<int> existing_copies;
-  for (HloComputation* computation : module->computations()) {
-    for (HloInstruction* instruction : computation->instructions()) {
-      if (instruction->opcode() == HloOpcode::kCopy) {
-        existing_copies.insert(instruction->unique_id());
+  int64 num_existing_copies = 0;
+  if (VLOG_IS_ON(1)) {
+    for (HloComputation* computation : module->computations()) {
+      for (HloInstruction* instruction : computation->instructions()) {
+        if (instruction->opcode() == HloOpcode::kCopy) {
+          ++num_existing_copies;
+        }
       }
     }
   }
@@ -1181,8 +1176,7 @@ StatusOr<bool> CopyInsertion::Run(HloModule* module) {
   TF_DCHECK_OK(VerifyNoLiveRangeInterference(module));
 
   DependencyHloOrdering ordering(module);
-  TF_RETURN_IF_ERROR(
-      RemoveUnnecessaryCopies(ordering, existing_copies, module));
+  TF_RETURN_IF_ERROR(RemoveUnnecessaryCopies(ordering, module));
 
   TF_RETURN_IF_ERROR(AddSpecialCaseCopies(*call_graph, module));
 
@@ -1203,7 +1197,7 @@ StatusOr<bool> CopyInsertion::Run(HloModule* module) {
         }
       }
     }
-    VLOG(1) << ""Num copies before copy-insertion: "" << existing_copies.size();
+    VLOG(1) << ""Num copies before copy-insertion: "" << num_existing_copies;
     VLOG(1) << ""Num copies after copy-insertion: "" << num_total_copies;
   }
 
",0,train
3e15ac3dd22d58e45b7d6db17dedbb189d789891,tensorflow/tensorflow,"[TF:XLA] Copy elision does not need to know about existing copies.

It already detects layout-changing copies and those are already left unchanged
by copy elision. Special case copies are also skipped because they are tagged
separately (SetCopyElisionAllowed)

PiperOrigin-RevId: 202574858",copy_insertion.h,"@@ -21,7 +21,6 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/service/hlo_instruction.h""
 #include ""tensorflow/compiler/xla/service/hlo_module.h""
 #include ""tensorflow/compiler/xla/service/hlo_pass_interface.h""
-#include ""tensorflow/core/lib/gtl/flatmap.h""
 
 namespace xla {
 
@@ -79,11 +78,10 @@ class CopyInsertion : public HloPassInterface {
 };
 
 // Try to remove as many copies from the module as possible without introducing
-// live range interference. Copy instructions (identified by their unique id) in
-// the set copies_to_exclude are not considered for removal.
+// live range interference. Only copy instructions that are eligible for
+// copy elision are considered for removal.
 Status RemoveUnnecessaryCopies(
-    const HloOrdering& ordering,
-    const tensorflow::gtl::FlatSet<int>& copies_to_exclude, HloModule* module,
+    const HloOrdering& ordering, HloModule* module,
     const HloDataflowAnalysis::FusionCanShareBufferFunction&
         fusion_can_share_buffer = nullptr);
 
",0,train
3e15ac3dd22d58e45b7d6db17dedbb189d789891,tensorflow/tensorflow,"[TF:XLA] Copy elision does not need to know about existing copies.

It already detects layout-changing copies and those are already left unchanged
by copy elision. Special case copies are also skipped because they are tagged
separately (SetCopyElisionAllowed)

PiperOrigin-RevId: 202574858",copy_insertion_test.cc,"@@ -125,21 +125,27 @@ TEST_F(CopyInsertionTest, SingleConstant) {
 }
 
 TEST_F(CopyInsertionTest, ExistingCopiesNotRemoved) {
-  // Verify that an kCopy instructions which exist in the pass before
+  // Verify that kCopy instructions which change layout and exist before
   // copy-insertion remain in the graph after copy-insertion.
   auto module = CreateNewModule();
 
   auto builder = HloComputation::Builder(TestName());
-  HloInstruction* constant = builder.AddInstruction(
-      HloInstruction::CreateConstant(Literal::CreateR0<float>(1.0)));
-  HloInstruction* copy_1 = builder.AddInstruction(HloInstruction::CreateUnary(
-      constant->shape(), HloOpcode::kCopy, constant));
-  HloInstruction* copy_2 = builder.AddInstruction(HloInstruction::CreateUnary(
-      constant->shape(), HloOpcode::kCopy, constant));
+  HloInstruction* constant =
+      builder.AddInstruction(HloInstruction::CreateConstant(
+          Literal::CreateR2<float>({{0.f, 2.f}, {2.f, 4.f}})));
+  auto minor_to_major = LayoutUtil::MinorToMajor(constant->shape());
+  Layout reversed_layout =
+      LayoutUtil::MakeLayoutFromMajorToMinor(minor_to_major);
+  Shape copy_shape = constant->shape();
+  *copy_shape.mutable_layout() = reversed_layout;
+  HloInstruction* copy_1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(copy_shape, HloOpcode::kCopy, constant));
+  HloInstruction* copy_2 = builder.AddInstruction(
+      HloInstruction::CreateUnary(copy_shape, HloOpcode::kCopy, constant));
   HloInstruction* add = builder.AddInstruction(HloInstruction::CreateBinary(
       constant->shape(), HloOpcode::kAdd, copy_1, copy_2));
-  HloInstruction* add_copy = builder.AddInstruction(
-      HloInstruction::CreateUnary(constant->shape(), HloOpcode::kCopy, add));
+  builder.AddInstruction(
+      HloInstruction::CreateUnary(add->shape(), HloOpcode::kCopy, add));
 
   module->AddEntryComputation(builder.Build());
 
@@ -147,12 +153,11 @@ TEST_F(CopyInsertionTest, ExistingCopiesNotRemoved) {
 
   InsertCopies(module.get());
 
-  EXPECT_EQ(CountCopies(*module), 3);
+  EXPECT_EQ(CountCopies(*module), 2);
 
-  EXPECT_EQ(module->entry_computation()->root_instruction(), add_copy);
-  EXPECT_THAT(
-      module->entry_computation()->root_instruction(),
-      op::Copy(op::Add(op::Copy(op::Constant()), op::Copy(op::Constant()))));
+  EXPECT_EQ(module->entry_computation()->root_instruction(), add);
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              op::Add(op::Copy(op::Constant()), op::Copy(op::Constant())));
 }
 
 TEST_F(CopyInsertionTest, MultipleConstantsAndParameters) {
",0,train
3e15ac3dd22d58e45b7d6db17dedbb189d789891,tensorflow/tensorflow,"[TF:XLA] Copy elision does not need to know about existing copies.

It already detects layout-changing copies and those are already left unchanged
by copy elision. Special case copies are also skipped because they are tagged
separately (SetCopyElisionAllowed)

PiperOrigin-RevId: 202574858",hlo_rematerialization.cc,"@@ -1244,7 +1244,7 @@ StatusOr<bool> HloRematerialization::Run(
     // TODO(b/80249101): Instead of a separate copy elision pass, use the
     // ordering from the HLO schedule directly for copy insertion.
     SequentialHloOrdering ordering(module, *sequence);
-    TF_RETURN_IF_ERROR(RemoveUnnecessaryCopies(ordering, {}, module));
+    TF_RETURN_IF_ERROR(RemoveUnnecessaryCopies(ordering, module));
   }
 
   // Compute peak memory usage of all computations in the module called in a
",0,train
ee85e6d230278e763a2784ba86acc747abdb2242,tensorflow/tensorflow,"Use the numerically stable two-pass algorithm to calculate variance in MeanStddevNormalization.

Add an extra test case with large mean and large variance.

PiperOrigin-RevId: 281179296
Change-Id: Ib2a5c3a0b0870670c3c41afc15b9ea3e13fa1f8c",portable_tensor_utils.cc,"@@ -624,13 +624,16 @@ void PortableMeanStddevNormalization(const float* input_vector,
                                      int n_batch) {
   for (int batch = 0; batch < n_batch; ++batch) {
     float sum = 0.0f;
-    float sum_sq = 0.0f;
     for (int i = 0; i < v_size; ++i) {
       sum += input_vector[i];
-      sum_sq += input_vector[i] * input_vector[i];
     }
     const float mean = sum / v_size;
-    const float variance = sum_sq / v_size - mean * mean;
+    float sum_diff_sq = 0.0f;
+    for (int i = 0; i < v_size; ++i) {
+      const float diff = input_vector[i] - mean;
+      sum_diff_sq += diff * diff;
+    }
+    const float variance = sum_diff_sq / v_size;
     constexpr float kNormalizationConstant = 1e-8f;
     const float stddev_inv =
         1.0f / std::sqrt(variance + kNormalizationConstant);
",0,train
ee85e6d230278e763a2784ba86acc747abdb2242,tensorflow/tensorflow,"Use the numerically stable two-pass algorithm to calculate variance in MeanStddevNormalization.

Add an extra test case with large mean and large variance.

PiperOrigin-RevId: 281179296
Change-Id: Ib2a5c3a0b0870670c3c41afc15b9ea3e13fa1f8c",tensor_utils_test.cc,"@@ -1502,13 +1502,13 @@ INSTANTIATE_TEST_SUITE_P(
         std::make_tuple(0.01f, 0.01f, 2.53e-5f),   // small mean, small variance
         std::make_tuple(0.01f, 100.0f, 1.20e-7f),  // small mean, large variance
         std::make_tuple(100.0f, 0.0f, 0.0f),       // large mean, zero variance
-        std::make_tuple(100.0f, 0.01f, 199.0f),    // large mean, small variance
+        std::make_tuple(100.0f, 0.01f, 1.81e-4f),  // large mean, small variance
         std::make_tuple(100.0f, 100.0f, 1.20e-7f)  // large mean, large variance
         ));
 
 TEST(uKernels, MeanStddevNormalizationAllBatches) {
   constexpr int kVectorSize = 4;
-  constexpr int kBatchSize = 8;  // 9, but large mean, small variance fails
+  constexpr int kBatchSize = 9;
 
   // None-zero input.
   static float input[kVectorSize * kBatchSize] = {
@@ -1519,6 +1519,7 @@ TEST(uKernels, MeanStddevNormalizationAllBatches) {
       -0.01f,   0.0f,    0.02f,   0.03f,    // small mean, small variance
       -199.99f, -99.99f, 100.01f, 200.01f,  // small mean, large variance
       100.0f,   100.0f,  100.0f,  100.0f,   // large mean, zero variance
+      99.98f,   99.99f,  100.01f, 100.02f,  // large mean, small variance
       -100.0f,  0.0f,    200.0f,  300.0f,   // large mean, large variance
   };
   float output[kVectorSize * kBatchSize];
@@ -1533,10 +1534,11 @@ TEST(uKernels, MeanStddevNormalizationAllBatches) {
       -ksqrt16, -ksqrt04, ksqrt04, ksqrt16,  // small mean, small variance
       -ksqrt16, -ksqrt04, ksqrt04, ksqrt16,  // small mean, large variance
       0.0f,     0.0f,     0.0f,    0.0f,     // large mean, zero variance
+      -ksqrt16, -ksqrt04, ksqrt04, ksqrt16,  // large mean, small variance
       -ksqrt16, -ksqrt04, ksqrt04, ksqrt16,  // large mean, large variance
   };
   EXPECT_THAT(output, testing::ElementsAreArray(
-                          ArrayFloatNear(expected_output, 2.6e-5f)));
+                          ArrayFloatNear(expected_output, 1.81e-4f)));
 }
 
 }  // namespace tensor_utils
",0,train
7dd3d091a3346622c366eecc3e7509221d91fad1,tensorflow/tensorflow,"[dataset]: Remove extra `repeat` in the docstring for `shard`.

PiperOrigin-RevId: 197185877",dataset_ops.py,"@@ -740,7 +740,6 @@ class Dataset(object):
     d = d.shard(FLAGS.num_workers, FLAGS.worker_index)
     d = d.repeat(FLAGS.num_epochs)
     d = d.shuffle(FLAGS.shuffle_buffer_size)
-    d = d.repeat()
     d = d.interleave(tf.data.TFRecordDataset,
                      cycle_length=FLAGS.num_readers, block_length=1)
     d = d.map(parser_fn, num_parallel_calls=FLAGS.num_map_threads)
",0,train
daf85eddacbdacec50c3d67b145cb1ff59928484,tensorflow/tensorflow,"Support gradient multiplier for embeddings in TPUEstimator.

PiperOrigin-RevId: 241354959",_tpu_estimator_embedding.py,"@@ -25,11 +25,14 @@ import six
 from tensorflow.python.estimator import model_fn as model_fn_lib
 from tensorflow.python.feature_column import feature_column as core_fc
 from tensorflow.python.feature_column import feature_column_lib as core_fc_lib
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.tpu import feature_column as tpu_fc
 from tensorflow.python.tpu import tpu_embedding
 from tensorflow.python.tpu.tpu_embedding import AdagradParameters
 from tensorflow.python.tpu.tpu_embedding import AdamParameters
 from tensorflow.python.tpu.tpu_embedding import StochasticGradientDescentParameters
+from tensorflow.python.training import training
 
 # pylint: disable=protected-access
 _TPU_EMBEDDING_COLUMN_CLASSES = (tpu_fc._TPUEmbeddingColumn,
@@ -150,7 +153,8 @@ def get_tpu_embedding_config_from_feature_columns(feature_columns):
 class EmbeddingConfigSpec(
     collections.namedtuple('EmbeddingConfigSpec', [
         'feature_columns', 'optimization_parameters', 'clipping_limit',
-        'pipeline_execution_with_tensor_core'
+        'pipeline_execution_with_tensor_core',
+        'experimental_gradient_multiplier_fn'
     ])):
   """"""Class to keep track of embedding config specification.""""""
 
@@ -158,7 +162,8 @@ class EmbeddingConfigSpec(
               feature_columns,
               optimization_parameters,
               clipping_limit=None,
-              pipeline_execution_with_tensor_core=False):
+              pipeline_execution_with_tensor_core=False,
+              experimental_gradient_multiplier_fn=None):
     """"""Creates an EmbeddingConfigSpec instance.
 
     Args:
@@ -172,6 +177,8 @@ class EmbeddingConfigSpec(
         faster, but trained model will be different if step N and step N+1
         involve the same set of embedding IDs. Please see
         `tpu_embedding_configuration.proto` for details.
+      experimental_gradient_multiplier_fn: (Optional) A Fn taking global step as
+        input returning the current multiplier for all embedding gradients.
 
     Returns:
       An EmbeddingConfigSpec instance.
@@ -208,7 +215,8 @@ class EmbeddingConfigSpec(
         feature_columns=feature_columns,
         optimization_parameters=optimization_parameters,
         clipping_limit=clipping_limit,
-        pipeline_execution_with_tensor_core=pipeline_execution_with_tensor_core)
+        pipeline_execution_with_tensor_core=pipeline_execution_with_tensor_core,
+        experimental_gradient_multiplier_fn=experimental_gradient_multiplier_fn)
 
 
 class EmbeddingConfig(object):
@@ -221,6 +229,9 @@ class EmbeddingConfig(object):
 
   def __init__(self, embedding_config_spec, train_batch_size, eval_batch_size,
                num_hosts, num_cores, run_config):
+    if not embedding_config_spec:
+      raise ValueError('embedding_config_spec cannot be None.')
+
     self._embedding_config_spec = embedding_config_spec
     self._train_batch_size = train_batch_size
     self._eval_batch_size = eval_batch_size
@@ -234,6 +245,15 @@ class EmbeddingConfig(object):
     self._mode_to_tpu_embedding_dict = {}
     self.dummy_table_variables = None
 
+    self._grad_multiplier_fn = (
+        embedding_config_spec.experimental_gradient_multiplier_fn)
+
+  def get_grad_multiplier(self):
+    if self._grad_multiplier_fn:
+      return ops.convert_to_tensor(
+          self._grad_multiplier_fn(training.get_global_step()),
+          dtype=dtypes.float32)
+
   def has_embedding_tables(self):
     return bool(self._table_to_config_dict)
 
",0,train
daf85eddacbdacec50c3d67b145cb1ff59928484,tensorflow/tensorflow,"Support gradient multiplier for embeddings in TPUEstimator.

PiperOrigin-RevId: 241354959",tpu_estimator.py,"@@ -1488,8 +1488,14 @@ class _ModelFnWrapper(object):
             tpu_embedding_gradient.get_gradients_through_dummy_table_variables(
                 tpu_embedding_)
         )
+        grad_multiplier = self._ctx.embedding_config.get_grad_multiplier()
+        if grad_multiplier is not None:
+          scaled_gradients = collections.OrderedDict(
+              (k, v * grad_multiplier) for k, v in six.iteritems(gradients))
+        else:
+          scaled_gradients = gradients
         apply_sparse_grads = [
-            tpu_embedding_.generate_send_gradients_op(gradients)
+            tpu_embedding_.generate_send_gradients_op(scaled_gradients)
         ]
 
       # We must run train_op to update the variables prior to running the
",0,train
b74d6ba60a6fee82f430a8c8ba80cace44050cd9,tensorflow/tensorflow,"Remove run_v1_only from model_coverage Python tests.

PiperOrigin-RevId: 261199843",model_coverage_lib_test.py,"@@ -38,7 +38,6 @@ from tensorflow.python.saved_model import saved_model
 from tensorflow.python.training.training_util import write_graph
 
 
-@test_util.run_v1_only('Incompatible with 2.0.')
 class EvaluateFrozenGraph(test.TestCase):
 
   def _saveFrozenGraph(self, sess):
@@ -47,27 +46,29 @@ class EvaluateFrozenGraph(test.TestCase):
     return graph_def_file
 
   def testFloat(self):
-    with session.Session().as_default() as sess:
-      in_tensor = array_ops.placeholder(
-          shape=[1, 16, 16, 3], dtype=dtypes.float32)
-      _ = in_tensor + in_tensor
-    filename = self._saveFrozenGraph(sess)
+    with ops.Graph().as_default():
+      with session.Session().as_default() as sess:
+        in_tensor = array_ops.placeholder(
+            shape=[1, 16, 16, 3], dtype=dtypes.float32)
+        _ = in_tensor + in_tensor
 
+    filename = self._saveFrozenGraph(sess)
     model_coverage.test_frozen_graph(filename, ['Placeholder'], ['add'])
 
   def testMultipleOutputs(self):
-    with session.Session().as_default() as sess:
-      in_tensor_1 = array_ops.placeholder(
-          shape=[1, 16], dtype=dtypes.float32, name='inputA')
-      in_tensor_2 = array_ops.placeholder(
-          shape=[1, 16], dtype=dtypes.float32, name='inputB')
-
-      weight = constant_op.constant(-1.0, shape=[16, 16])
-      bias = constant_op.constant(-1.0, shape=[16])
-      layer = math_ops.matmul(in_tensor_1, weight) + bias
-      _ = math_ops.reduce_mean(math_ops.square(layer - in_tensor_2))
-    filename = self._saveFrozenGraph(sess)
+    with ops.Graph().as_default():
+      with session.Session().as_default() as sess:
+        in_tensor_1 = array_ops.placeholder(
+            shape=[1, 16], dtype=dtypes.float32, name='inputA')
+        in_tensor_2 = array_ops.placeholder(
+            shape=[1, 16], dtype=dtypes.float32, name='inputB')
 
+        weight = constant_op.constant(-1.0, shape=[16, 16])
+        bias = constant_op.constant(-1.0, shape=[16])
+        layer = math_ops.matmul(in_tensor_1, weight) + bias
+        _ = math_ops.reduce_mean(math_ops.square(layer - in_tensor_2))
+
+    filename = self._saveFrozenGraph(sess)
     model_coverage.test_frozen_graph(filename, ['inputA', 'inputB'],
                                      ['add', 'Mean'])
 
@@ -94,17 +95,18 @@ class EvaluateFrozenGraph(test.TestCase):
 
   def _getQuantizedModel(self):
     np.random.seed(0)
-    with session.Session().as_default() as sess:
-      # The tensor needs to have more than 1024 elements for quantize_weights to
-      # kick in. Thus, the [33, 33] shape.
-      in_tensor_1 = array_ops.placeholder(
-          shape=[33, 33], dtype=dtypes.float32, name='inputA')
-      in_tensor_2 = constant_op.constant(
-          np.random.uniform(low=-10., high=10., size=(33, 33)),
-          shape=[33, 33],
-          dtype=dtypes.float32,
-          name='inputB')
-      _ = math_ops.matmul(in_tensor_1, in_tensor_2, name='output')
+    with ops.Graph().as_default():
+      with session.Session().as_default() as sess:
+        # The tensor needs to have more than 1024 elements for quantize_weights
+        # to kick in. Thus, the [33, 33] shape.
+        in_tensor_1 = array_ops.placeholder(
+            shape=[33, 33], dtype=dtypes.float32, name='inputA')
+        in_tensor_2 = constant_op.constant(
+            np.random.uniform(low=-10., high=10., size=(33, 33)),
+            shape=[33, 33],
+            dtype=dtypes.float32,
+            name='inputB')
+        _ = math_ops.matmul(in_tensor_1, in_tensor_2, name='output')
 
     filename = self._saveFrozenGraph(sess)
     return filename
@@ -125,25 +127,24 @@ class EvaluateFrozenGraph(test.TestCase):
         target_ops=set([lite.OpsSet.SELECT_TF_OPS]))
 
 
-@test_util.run_v1_only('Incompatible with 2.0.')
 class EvaluateSavedModel(test.TestCase):
 
   def testFloat(self):
     saved_model_dir = os.path.join(self.get_temp_dir(), 'simple_savedmodel')
-    with session.Session().as_default() as sess:
-      in_tensor_1 = array_ops.placeholder(
-          shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB')
-      in_tensor_2 = array_ops.placeholder(
-          shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA')
-      out_tensor = in_tensor_1 + in_tensor_2
-
-      inputs = {'x': in_tensor_1, 'y': in_tensor_2}
-      outputs = {'z': out_tensor}
-      saved_model.simple_save(sess, saved_model_dir, inputs, outputs)
+    with ops.Graph().as_default():
+      with session.Session().as_default() as sess:
+        in_tensor_1 = array_ops.placeholder(
+            shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputB')
+        in_tensor_2 = array_ops.placeholder(
+            shape=[1, 16, 16, 3], dtype=dtypes.float32, name='inputA')
+        out_tensor = in_tensor_1 + in_tensor_2
+
+        inputs = {'x': in_tensor_1, 'y': in_tensor_2}
+        outputs = {'z': out_tensor}
+        saved_model.simple_save(sess, saved_model_dir, inputs, outputs)
     model_coverage.test_saved_model(saved_model_dir)
 
 
-@test_util.run_v1_only('Incompatible with 2.0.')
 class EvaluateKerasModel(test.TestCase):
 
   def _getSingleInputKerasModel(self):
",0,train
549dd6fd66a9b176ee3fe5e7093e4a1654bcbdb1,tensorflow/tensorflow,"Add an argument `stop_gradients` to `tf.gradients` in order to hold specific tensors constant wrt `xs`.

PiperOrigin-RevId: 167501127",gradients_impl.py,"@@ -278,7 +278,7 @@ def _VerifyGeneratedGradients(grads, op):
                      ""inputs %d"" % (len(grads), op.node_def, len(op.inputs)))
 
 
-def _StopOps(from_ops, pending_count):
+def _StopOps(from_ops, stop_gradient_ops, pending_count):
   """"""The set of ops that terminate the gradient computation.
 
   This computes the frontier of the forward graph *before* which backprop
@@ -288,8 +288,11 @@ def _StopOps(from_ops, pending_count):
   `_PendingCount(g, xs, from_ops)`. An 'op' has predecessors in `from_ops`
   iff pending_count[op._id] > 0.
 
+  In addition, none of `stop_gradient_ops` will be differentiated.
+
   Args:
     from_ops: list of Operations.
+    stop_gradient_ops: list of Operations never to backprop through.
     pending_count: List of integers, indexed by operation id.
 
   Returns:
@@ -304,6 +307,7 @@ def _StopOps(from_ops, pending_count):
         break
     if is_stop_op:
       stop_ops.add(op._id)
+  stop_ops.update(op._id for op in stop_gradient_ops)  # pylint: disable=protected-access
   return stop_ops
 
 
@@ -374,17 +378,17 @@ def gradients(ys,
               name=""gradients"",
               colocate_gradients_with_ops=False,
               gate_gradients=False,
-              aggregation_method=None):
-  """"""Constructs symbolic partial derivatives of sum of `ys` w.r.t. x in `xs`.
+              aggregation_method=None,
+              stop_gradients=None):
+  """"""Constructs symbolic derivatives of sum of `ys` w.r.t. x in `xs`.
 
   `ys` and `xs` are each a `Tensor` or a list of tensors.  `grad_ys`
   is a list of `Tensor`, holding the gradients received by the
   `ys`. The list must be the same length as `ys`.
 
-  `gradients()` adds ops to the graph to output the partial
-  derivatives of `ys` with respect to `xs`.  It returns a list of
-  `Tensor` of length `len(xs)` where each tensor is the `sum(dy/dx)`
-  for y in `ys`.
+  `gradients()` adds ops to the graph to output the derivatives of `ys` with
+  respect to `xs`.  It returns a list of `Tensor` of length `len(xs)` where
+  each tensor is the `sum(dy/dx)` for y in `ys`.
 
   `grad_ys` is a list of tensors of the same length as `ys` that holds
   the initial gradients for each y in `ys`.  When `grad_ys` is None,
@@ -394,6 +398,31 @@ def gradients(ys,
   one wanted to weight the gradient differently for each value in
   each y).
 
+  `stop_gradients` is a `Tensor` or a list of tensors to be considered constant
+  with respect to all `xs`. These tensors will not be backpropagated through,
+  as though they had been explicitly disconnected using `stop_gradient`.  Among
+  other things, this allows computation of partial derivatives as opposed to
+  total derivatives. For example:
+
+    a = tf.constant(0.)
+    b = 2 * a
+    g = tf.gradients(a + b, [a, b], stop_gradients=[a, b])
+
+  Here the partial derivatives `g` evaluate to `[1.0, 1.0]`, compared to the
+  total derivatives `tf.gradients(a + b, [a, b])`, which take into account the
+  influence of `a` on `b` and evaluate to `[3.0, 1.0]`.  Note that the above is
+  equivalent to:
+
+    a = tf.stop_gradient(tf.constant(0.))
+    b = tf.stop_gradient(2 * a)
+    g = tf.gradients(a + b, [a, b])
+
+  `stop_gradients` provides a way of stopping gradient after the graph has
+  already been constructed, as compared to `tf.stop_gradient` which is used
+  during graph construction.  When the two approaches are combined,
+  backpropagation stops at both `tf.stop_gradient` nodes and nodes in
+  `stop_gradients`, whichever is encountered first.
+
   Args:
     ys: A `Tensor` or list of tensors to be differentiated.
     xs: A `Tensor` or list of tensors to be used for differentiation.
@@ -407,6 +436,8 @@ def gradients(ys,
       for an operations.  This avoids some race conditions.
     aggregation_method: Specifies the method used to combine gradient terms.
       Accepted values are constants defined in the class `AggregationMethod`.
+    stop_gradients: Optional. A `Tensor` or list of tensors not to differentiate
+      through.
 
   Returns:
     A list of `sum(dy/dx)` for each x in `xs`.
@@ -423,12 +454,15 @@ def gradients(ys,
                        ""functions in tf.contrib.eager.backprop instead."")
   ys = _AsList(ys)
   xs = _AsList(xs)
+  stop_gradients = [] if stop_gradients is None else _AsList(stop_gradients)
   if grad_ys is None:
     grad_ys = [None] * len(ys)
   else:
     grad_ys = _AsList(grad_ys)
 
-  with ops.name_scope(name, ""gradients"", ys + xs + grad_ys) as grad_scope:
+  with ops.name_scope(
+      name, ""gradients"",
+      list(ys) + list(xs) + list(stop_gradients) + list(grad_ys)) as grad_scope:
     ys = ops.convert_n_to_tensor_or_indexed_slices(ys, name=""y"")
     xs = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable)
           else x
@@ -450,6 +484,7 @@ def gradients(ys,
       ys = [array_ops.identity(y) if y.consumers() else y for y in ys]
     to_ops = [t.op for t in ys]
     from_ops = [t.op for t in xs]
+    stop_gradient_ops = [t.op for t in stop_gradients]
     pending_count, loop_state = _PendingCount(ops.get_default_graph(), to_ops,
                                               from_ops,
                                               colocate_gradients_with_ops)
@@ -488,8 +523,7 @@ def gradients(ys,
           _SetGrad(grads, y, loop_state.ZerosLikeForExit(y))
           queue.append(y.op)
 
-    # The set of 'from_ops'.
-    stop_ops = _StopOps(from_ops, pending_count)
+    stop_ops = _StopOps(from_ops, stop_gradient_ops, pending_count)
     while queue:
       # generate gradient subgraph for op.
       op = queue.popleft()
",0,train
549dd6fd66a9b176ee3fe5e7093e4a1654bcbdb1,tensorflow/tensorflow,"Add an argument `stop_gradients` to `tf.gradients` in order to hold specific tensors constant wrt `xs`.

PiperOrigin-RevId: 167501127",gradients_test.py,"@@ -349,6 +349,64 @@ class GradientsTest(test_util.TensorFlowTestCase):
       g = gradients.gradients([z, z2], x)
       self.assertAllClose(17502.0, g[0].eval())
 
+  def testPartialDerivatives(self):
+    with self.test_session():
+      x = constant_op.constant(1.)
+      y = 2 * x
+      z = x + y
+      totalg = gradients.gradients(z, [x, y])
+      self.assertEqual([3.0, 1.0], [g.eval() for g in totalg])
+      partialg = gradients.gradients(z, [x, y], stop_gradients=[x, y])
+      self.assertEqual([1.0, 1.0], [g.eval() for g in partialg])
+
+  def testStopGradients(self):
+    def _MakeGraph(rng, stop_gradients=()):
+      def _FunctionOf(xs, k=3):
+        return ops.convert_to_tensor(
+            sum(math_ops.matmul(rng.rand(k, k), x) for x in xs)
+            + rng.rand(k, k))
+
+      a = _FunctionOf([])
+      if ""a"" in stop_gradients: a = array_ops.stop_gradient(a)
+      b = _FunctionOf([a])
+      if ""b"" in stop_gradients: b = array_ops.stop_gradient(b)
+      c = _FunctionOf([a, b])
+      if ""c"" in stop_gradients: c = array_ops.stop_gradient(c)
+      d = _FunctionOf([b, c])
+      if ""d"" in stop_gradients: d = array_ops.stop_gradient(d)
+      return dict(a=a, b=b, c=c, d=d)
+
+    def _Gradients(ys, xs, **kwargs):
+      dydxs = gradients.gradients(ys, xs, **kwargs)
+      dydxs = [0. * x if dydx is None else dydx
+               for x, dydx in zip(xs, dydxs)]
+      return dydxs
+
+    seed = np.random.randint(1000)
+    cases = []
+    subsets = [""""] + ""a b c d ab ac ad bc bd cd abc abd acd bcd abcd"".split()
+    graph = _MakeGraph(np.random.RandomState(seed))
+    for constants in subsets:
+      graph_with_stops = _MakeGraph(np.random.RandomState(seed), constants)
+      for variables_ in subsets:
+        # compute the gradient when stopped using tf.stop_gradients
+        grad1 = _Gradients([graph_with_stops[""d""]],
+                           [graph_with_stops[v] for v in variables_])
+        # compute the gradient when stopped using the stop_gradients kwarg
+        grad2 = _Gradients([graph[""d""]],
+                           [graph[v] for v in variables_],
+                           stop_gradients=[graph[v] for v in constants])
+        cases.append(dict(grad1=grad1, grad2=grad2,
+                          constants=constants, variables=variables_))
+
+    # evaluate all tensors in one call to session.run for speed
+    with self.test_session() as session:
+      results = session.run([(case[""grad1""], case[""grad2""]) for case in cases])
+
+    for (npgrad1, npgrad2), case in zip(results, cases):
+      for a, b in zip(npgrad1, npgrad2):
+        np.testing.assert_allclose(a, b)
+
 
 class FunctionGradientsTest(test_util.TensorFlowTestCase):
 
",0,train
386de03d9d76d934ca99a00c02060e57e8ab86b0,tensorflow/tensorflow,"Move to using stateless image_ops so that the random seed constructor argument works.

PiperOrigin-RevId: 378938459
Change-Id: I3d6d24a9accc8a52bbf48afeca180eccf8f7d493",image_preprocessing.py,"@@ -412,11 +412,13 @@ class RandomFlip(base_layer.Layer):
     def random_flipped_inputs():
       flipped_outputs = inputs
       if self.horizontal:
-        flipped_outputs = image_ops.random_flip_left_right(
-            flipped_outputs, self.seed)
+        flipped_outputs = image_ops.stateless_random_flip_left_right(
+            flipped_outputs,
+            self._rng.make_seeds()[:, 0])
       if self.vertical:
-        flipped_outputs = image_ops.random_flip_up_down(flipped_outputs,
-                                                        self.seed)
+        flipped_outputs = image_ops.stateless_random_flip_up_down(
+            flipped_outputs,
+            self._rng.make_seeds()[:, 0])
       return flipped_outputs
 
     output = control_flow_util.smart_cond(training, random_flipped_inputs,
@@ -1083,6 +1085,7 @@ class RandomContrast(base_layer.Layer):
       raise ValueError('Factor cannot have negative values or greater than 1.0,'
                        ' got {}'.format(factor))
     self.seed = seed
+    self._rng = make_generator(self.seed)
     self.input_spec = InputSpec(ndim=4)
     super(RandomContrast, self).__init__(**kwargs)
     base_preprocessing_layer.keras_kpl_gauge.get_cell('RandomContrast').set(
@@ -1093,8 +1096,9 @@ class RandomContrast(base_layer.Layer):
       training = backend.learning_phase()
 
     def random_contrasted_inputs():
-      return image_ops.random_contrast(inputs, 1. - self.lower, 1. + self.upper,
-                                       self.seed)
+      return image_ops.stateless_random_contrast(inputs, 1. - self.lower,
+                                                 1. + self.upper,
+                                                 self._rng.make_seeds()[:, 0])
 
     output = control_flow_util.smart_cond(training, random_contrasted_inputs,
                                           lambda: inputs)
@@ -1314,7 +1318,7 @@ def make_generator(seed=None):
   Returns:
     A generator object.
   """"""
-  if seed:
+  if seed is not None:
     return stateful_random_ops.Generator.from_seed(seed)
   else:
     return stateful_random_ops.Generator.from_non_deterministic_state()
",0,test
386de03d9d76d934ca99a00c02060e57e8ab86b0,tensorflow/tensorflow,"Move to using stateless image_ops so that the random seed constructor argument works.

PiperOrigin-RevId: 378938459
Change-Id: I3d6d24a9accc8a52bbf48afeca180eccf8f7d493",image_preprocessing_test.py,"@@ -14,6 +14,7 @@
 # ==============================================================================
 """"""Tests for image preprocessing layers.""""""
 
+import functools
 from absl.testing import parameterized
 import numpy as np
 
@@ -377,7 +378,10 @@ class RandomFlipTest(keras_parameterized.TestCase):
       if mode == 'vertical' or mode == 'horizontal_and_vertical':
         expected_output = np.flip(expected_output, axis=1)
     with test.mock.patch.object(
-        random_ops, 'random_uniform', return_value=mock_random):
+        stateless_random_ops,
+        'stateless_random_uniform',
+        return_value=mock_random,
+    ):
       with testing_utils.use_gpu():
         layer = image_preprocessing.RandomFlip(mode)
         actual_output = layer(inp, training=1)
@@ -427,7 +431,10 @@ class RandomFlipTest(keras_parameterized.TestCase):
       mock_random = [1, 1]
       mock_random = np.reshape(mock_random, [2, 1, 1, 1])
       with test.mock.patch.object(
-          random_ops, 'random_uniform', return_value=mock_random):
+          stateless_random_ops,
+          'stateless_random_uniform',
+          return_value=mock_random,
+      ):
         with self.cached_session():
           layer = image_preprocessing.RandomFlip()
           actual_output = layer(input_images, training=1)
@@ -460,7 +467,10 @@ class RandomContrastTest(keras_parameterized.TestCase):
       inp_mean = np.mean(inp_mean, axis=2, keepdims=True)
       expected_output = (inp - inp_mean) * mock_random + inp_mean
     with test.mock.patch.object(
-        random_ops, 'random_uniform', return_value=mock_random):
+        stateless_random_ops,
+        'stateless_random_uniform',
+        return_value=mock_random,
+    ):
       with testing_utils.use_gpu():
         layer = image_preprocessing.RandomContrast((lower, upper))
         actual_output = layer(inp, training=True)
@@ -1449,5 +1459,35 @@ class LearningPhaseTest(keras_parameterized.TestCase):
     self.assertEqual(tuple(int(i) for i in out.shape[1:]), shape)
 
 
+@keras_parameterized.run_all_keras_modes(always_skip_v1=True)
+class DeterminismTest(keras_parameterized.TestCase):
+
+  @parameterized.named_parameters(
+      ('random_flip', image_preprocessing.RandomFlip),
+      ('random_contrast',
+       functools.partial(image_preprocessing.RandomContrast, factor=1.)),
+      ('random_crop',
+       functools.partial(image_preprocessing.RandomCrop, height=2, width=2)),
+      ('random_translation',
+       functools.partial(image_preprocessing.RandomTranslation, 0.3, 0.2)),
+      ('random_rotation',
+       functools.partial(image_preprocessing.RandomRotation, 0.5)),
+      ('random_zoom', functools.partial(image_preprocessing.RandomZoom, 0.2)),
+      ('random_height', functools.partial(image_preprocessing.RandomHeight,
+                                          0.4)),
+      ('random_width', functools.partial(image_preprocessing.RandomWidth, 0.3)),
+  )
+  def test_seed_constructor_arg(self, layer_cls):
+    input_image = np.random.random((2, 5, 8, 3)).astype(np.float32)
+
+    layer1 = layer_cls(seed=0.)
+    layer2 = layer_cls(seed=0.)
+    layer1_output = layer1(input_image)
+    layer2_output = layer2(input_image)
+
+    self.assertAllClose(layer1_output.numpy().tolist(),
+                        layer2_output.numpy().tolist())
+
+
 if __name__ == '__main__':
   test.main()
",0,test
bc78f9b060cece8e29a89f7dbcdedcadbc61891d,tensorflow/tensorflow,"internal
END_PUBLIC

BEGIN_PUBLIC
Automated g4 rollback of changelist 193600682

PiperOrigin-RevId: 193723856",rev_block_lib.py,"@@ -45,7 +45,6 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.util import nest
-from tensorflow.python.util import tf_inspect
 
 __all__ = [""rev_block"", ""RevBlock"", ""recompute_grad""]
 
@@ -430,13 +429,12 @@ def enable_with_args(dec):
 
 
 @enable_with_args
-def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False,
-                   tensor_arg_names=None):
+def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False):
   """"""Decorator that recomputes the function on the backwards pass.
 
   Args:
-    fn: the subgraph-producing function to wrap and recompute when computing
-      gradients. Provide `tensor_arg_names` if not all arguments are `Tensor`s.
+    fn: a function that takes Tensors (all as positional arguments) and returns
+      a tuple of Tensors.
     use_data_dep: `bool`, if `True` will use a dummy data dependency to force
       the recompute to happen. If `False` will use a control dependency. By
       default will be `True` if in an XLA context and `False` otherwise. XLA
@@ -445,25 +443,17 @@ def recompute_grad(fn, use_data_dep=_USE_DEFAULT, tupleize_grads=False,
       that all gradients are produced before any are consumed by downstream ops.
       If `use_data_dep` is also `True`, will use a data dependency instead of
       a control dependency.
-    tensor_arg_names: `list<str>`, names of the `Tensor` arguments to `fn`. If
-      `None`, assumes all arguments are `Tensor`s.
 
   Returns:
     A wrapped fn that is identical to fn when called, but its activations will
     be discarded and recomputed on the backwards pass (i.e. on a call to
     tf.gradients).
   """"""
-  if tensor_arg_names:
-    if not isinstance(tensor_arg_names, (list, tuple)):
-      raise TypeError(""tensor_arg_names must be a list"")
 
   @functools.wraps(fn)
-  def wrapped(*args, **kwargs):
-    tensor_only_fn, tensor_args = _make_tensor_only(fn, args, kwargs,
-                                                    tensor_arg_names)
+  def wrapped(*args):
     return _recompute_grad(
-        tensor_only_fn, tensor_args, use_data_dep=use_data_dep,
-        tupleize_grads=tupleize_grads)
+        fn, args, use_data_dep=use_data_dep, tupleize_grads=tupleize_grads)
 
   return wrapped
 
@@ -473,59 +463,11 @@ def _is_on_tpu():
   return control_flow_util.GetContainingXLAContext(ctxt) is not None
 
 
-def _make_tensor_only(fn, args, kwargs, tensor_arg_names):
-  """"""Return fn such that it only takes Tensor args for tensor_arg_names.""""""
-  argspec = tf_inspect.getargspec(fn)
-  if argspec.varargs is not None or argspec.keywords is not None:
-    raise ValueError(""Function decorated with recompute_grad must not use ""
-                     ""*args or **kwargs."")
-  fn_arg_names = list(argspec.args)
-
-  # name_to_arg is a dict of argument name to argument value, including both
-  # positional and keyword arguments passed.
-  name_to_arg = {}
-  # Populate positional arguments.
-  for name, arg in zip(fn_arg_names[:len(args)], args):
-    name_to_arg[name] = arg
-  # Populate keyword arguments.
-  name_to_arg.update(kwargs)
-
-  # Separate the Tensor arguments from the non-Tensor arguments.
-  # The default is that all arguments are Tensor arguments.
-  tensor_arg_names = tensor_arg_names or fn_arg_names
-  for name in tensor_arg_names:
-    if name not in name_to_arg:
-      raise ValueError(""Must provide Tensor argument %s"" % name)
-  tensor_args = [name_to_arg[name] for name in tensor_arg_names]
-  non_tensor_kwargs = dict([(name, arg) for name, arg in name_to_arg.items()
-                            if name not in tensor_arg_names])
-
-  # Check that Tensor arguments are in fact Tensors and that non-Tensor
-  # arguments are not.
-  for name, arg in zip(tensor_arg_names, tensor_args):
-    if not isinstance(arg, framework_ops.Tensor):
-      raise TypeError(""Fn argument %s must be a Tensor."" % name)
-  for name, arg in non_tensor_kwargs.items():
-    if isinstance(arg, framework_ops.Tensor):
-      raise TypeError(""Fn argument %s must not be a Tensor."" % name)
-
-  # Construct a Tensor-only wrapper function that will pass the non-Tensor
-  # arguments as well when called.
-  def tensor_only_fn(*tensors):
-    all_kwargs = dict(zip(tensor_arg_names, tensors))
-    all_kwargs.update(non_tensor_kwargs)
-    return fn(**all_kwargs)
-
-  return tensor_only_fn, tensor_args
-
-
-def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT,
-                    tupleize_grads=False):
+def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT, tupleize_grads=False):
   """"""See recompute_grad.""""""
   for arg in args:
     if not isinstance(arg, framework_ops.Tensor):
       raise ValueError(""All inputs to function must be Tensors"")
-
   use_data_dep_ = use_data_dep
   if use_data_dep_ == _USE_DEFAULT:
     use_data_dep_ = _is_on_tpu()
@@ -559,11 +501,14 @@ def _recompute_grad(fn, args, use_data_dep=_USE_DEFAULT,
     grad_vars = grads[len(inputs):]
     return grad_inputs, grad_vars
 
-  # TODO(rsepassi): Replace with tf.custom_gradient
   @_fn_with_custom_grad(grad_fn)
   def fn_with_recompute(*args):
     cached_vs.append(variable_scope.get_variable_scope())
-    cached_arg_scope.append(contrib_framework_ops.current_arg_scope())
+    # TODO(rsepassi): Rm conditional in TF 1.4
+    if hasattr(contrib_framework_ops, ""current_arg_scope""):
+      cached_arg_scope.append(contrib_framework_ops.current_arg_scope())
+    else:
+      cached_arg_scope.append({})
     return fn(*args)
 
   return fn_with_recompute(*args)
",0,train
bc78f9b060cece8e29a89f7dbcdedcadbc61891d,tensorflow/tensorflow,"internal
END_PUBLIC

BEGIN_PUBLIC
Automated g4 rollback of changelist 193600682

PiperOrigin-RevId: 193723856",rev_block_lib_test.py,"@@ -318,108 +318,6 @@ class RecomputeTest(test.TestCase):
       self.assertEqual(1, len(grads))
       self.assertTrue(grads[0] is not None)
 
-  def testWithNontensorArgs(self):
-    @rev_block_lib.recompute_grad(tupleize_grads=True,
-                                  tensor_arg_names=[""inputs""])
-    def layer_with_recompute(inputs, plus=None):
-      var = variable_scope.get_variable(""var"", ())
-      self.assertFalse(plus)  # called with False below
-      if plus:
-        return var + inputs
-      else:
-        return var * inputs
-
-    inputs = array_ops.ones((), dtypes.float32)
-    outputs = layer_with_recompute(inputs, plus=False)
-    loss = math_ops.square(outputs)
-    grads = gradients_impl.gradients(loss, variables.trainable_variables())
-    self.assertEqual(1, len(grads))
-    self.assertTrue(grads[0] is not None)
-
-
-class MakeTensorOnlyTest(test.TestCase):
-
-  def testMakeTensorOnly(self):
-    def fn(a, b, c, d=1, e=None, f=7):
-      return (a, b, c, d, e, f)
-
-    t1 = array_ops.ones(())
-    t2 = array_ops.ones(())
-    t3 = array_ops.ones(())
-    args = [1, t1, 3, t2]
-    kwargs = {""e"": t3}
-    tensor_only_fn, tensor_args = rev_block_lib._make_tensor_only(
-        fn, args, kwargs, [""b"", ""d"", ""e""])
-    self.assertAllEqual(tensor_args, [t1, t2, t3])
-    out = tensor_only_fn(*tensor_args)
-    self.assertAllEqual(out, (1, t1, 3, t2, t3, 7))
-
-  def testMakeTensorOnlyPositionalArgsOnly(self):
-    def fn(a, b, c):
-      return (a, b, c)
-
-    t1 = array_ops.ones(())
-    t2 = array_ops.ones(())
-    args = [t1, 3, t2]
-    tensor_only_fn, tensor_args = rev_block_lib._make_tensor_only(
-        fn, args, {}, [""a"", ""c""])
-    self.assertAllEqual(tensor_args, [t1, t2])
-    out = tensor_only_fn(*tensor_args)
-    self.assertAllEqual(out, (t1, 3, t2))
-
-  def testMakeTensorOnlyKwargsArgsOnly(self):
-    def fn(a=1, b=2, c=3):
-      return (a, b, c)
-
-    t1 = array_ops.ones(())
-    t2 = array_ops.ones(())
-    args = [t1]
-    kwargs = {""c"": t2}
-    tensor_only_fn, tensor_args = rev_block_lib._make_tensor_only(
-        fn, args, kwargs, [""a"", ""c""])
-    self.assertAllEqual(tensor_args, [t1, t2])
-    out = tensor_only_fn(*tensor_args)
-    self.assertAllEqual(out, (t1, 2, t2))
-
-  def testErrorOnMissingTensorArg(self):
-    def fn(a, b):
-      return (a, b)
-
-    with self.assertRaisesWithPredicateMatch(
-        ValueError, ""provide Tensor argument""):
-      rev_block_lib._make_tensor_only(fn, [], {""b"": 2}, [""a""])
-
-  def testErrorOnSignatureSplats(self):
-    def fn1(a, *args):
-      return (a, args)
-
-    err_msg = r""must not use \*args or \*\*kwargs""
-    with self.assertRaisesWithPredicateMatch(ValueError, err_msg):
-      rev_block_lib._make_tensor_only(fn1, [1, 2], {}, [""a""])
-
-    def fn2(a, **kwargs):
-      return (a, kwargs)
-
-    with self.assertRaisesWithPredicateMatch(ValueError, err_msg):
-      rev_block_lib._make_tensor_only(fn2, [], {""a"": 1, ""b"": 2}, [""a""])
-
-  def testErrorOnNonTensorForTensor(self):
-    def fn(a, b):
-      return (a, b)
-
-    with self.assertRaisesWithPredicateMatch(TypeError, ""must be a Tensor""):
-      rev_block_lib._make_tensor_only(fn, [2, 3], {}, [""a""])
-
-  def testErrorOnTensorForNonTensor(self):
-    def fn(a, b):
-      return (a, b)
-
-    with self.assertRaisesWithPredicateMatch(
-        TypeError, ""must not be a Tensor""):
-      t1 = array_ops.ones(())
-      t2 = array_ops.ones(())
-      rev_block_lib._make_tensor_only(fn, [t1, t2], {}, [""a""])
-
 
 class FnWithCustomGradTest(test.TestCase):
 
",0,train
ab9a9084d2feb820a1fe94e6d42825209eaed076,tensorflow/tensorflow,"Changing ""GpuVersion"" datatype to include hipDeviceProp_t::gcnArchName

Currently the ""GpuVersion"" datatype (for AMDGPU in XLA code) is an `int`, whose value is the same as the `int hipDeviceProp_t::gcnArch;` field.

Starting with ROCm 4.?, which introduces targetID support, that value will no longer be sufficient to create a LLVM AMDGPUTarget that accurately represents underlying GPU. We will need to information contained withing the `string hipDeviceProp_t gcnArchName` field for that purpose.

This commit updates fthe ""GpuVersion"" datatype from being a simple int to a (int, string) pair, and stores the value of the `string hipDeviceProp_t gcnArchName` field in the string.",amdgpu_compiler.cc,"@@ -100,8 +100,14 @@ GpuVersion AMDGPUCompiler::GetGpuVersion(se::StreamExecutor* stream_exec) {
         << ""Couldn't get AMDGPU ISA version for device; assuming gfx803."";
     isa_version = 803;
   }
+  std::string gcn_arch_name =
+      stream_exec->GetDeviceDescription().rocm_amdgpu_gcn_arch_name();
+  if (gcn_arch_name == stream_exec->GetDeviceDescription().kUndefinedString) {
+    LOG(WARNING) << ""Couldn't get AMDGPU GCN Arch for device; assuming gfx803."";
+    gcn_arch_name = ""gfx803"";
+  }
 
-  return isa_version;
+  return std::make_pair(isa_version, gcn_arch_name);
 }
 
 StatusOr<std::pair<std::string, std::vector<uint8>>>
",0,train
ab9a9084d2feb820a1fe94e6d42825209eaed076,tensorflow/tensorflow,"Changing ""GpuVersion"" datatype to include hipDeviceProp_t::gcnArchName

Currently the ""GpuVersion"" datatype (for AMDGPU in XLA code) is an `int`, whose value is the same as the `int hipDeviceProp_t::gcnArch;` field.

Starting with ROCm 4.?, which introduces targetID support, that value will no longer be sufficient to create a LLVM AMDGPUTarget that accurately represents underlying GPU. We will need to information contained withing the `string hipDeviceProp_t gcnArchName` field for that purpose.

This commit updates fthe ""GpuVersion"" datatype from being a simple int to a (int, string) pair, and stores the value of the `string hipDeviceProp_t gcnArchName` field in the string.",gpu_executable.cc,"@@ -101,10 +101,11 @@ Status GpuExecutable::CheckCompatibilityWithServiceExecutableRunOptions(
     int stream_isa_version;
     main_stream->parent()->GetDeviceDescription().rocm_amdgpu_isa_version(
         &stream_isa_version);
-    GpuVersion amd_isa_version = stream_isa_version;
-    TF_RET_CHECK(amd_isa_version == gpu_version_)
-        << ""AMDGPU GCN ISA version mismatch; expected {""
-        << absl::get<int>(gpu_version_) << "", but was "" << stream_isa_version;
+    int gpu_exec_isa_version =
+        absl::get<std::pair<int, std::string>>(gpu_version_).first;
+    TF_RET_CHECK(stream_isa_version == gpu_exec_isa_version)
+        << ""AMDGPU GCN ISA version mismatch; expected {"" << gpu_exec_isa_version
+        << "", but was "" << stream_isa_version;
   } else if (platform_kind == stream_executor::PlatformKind::kCuda) {
     std::pair<int, int> stream_compute_compatibility;
     main_stream->parent()->GetDeviceDescription().cuda_compute_capability(
",0,train
ab9a9084d2feb820a1fe94e6d42825209eaed076,tensorflow/tensorflow,"Changing ""GpuVersion"" datatype to include hipDeviceProp_t::gcnArchName

Currently the ""GpuVersion"" datatype (for AMDGPU in XLA code) is an `int`, whose value is the same as the `int hipDeviceProp_t::gcnArch;` field.

Starting with ROCm 4.?, which introduces targetID support, that value will no longer be sufficient to create a LLVM AMDGPUTarget that accurately represents underlying GPU. We will need to information contained withing the `string hipDeviceProp_t gcnArchName` field for that purpose.

This commit updates fthe ""GpuVersion"" datatype from being a simple int to a (int, string) pair, and stores the value of the `string hipDeviceProp_t gcnArchName` field in the string.",gpu_types.h,"@@ -21,10 +21,19 @@ limitations under the License.
 namespace xla {
 namespace gpu {
 
-// GpuVersion is used to abstract Gpu hardware version. On Cuda platform,
-// it comprises a pair of integers denoting major and minor version.
-// On ROCm platform, it comprises one integer for AMD GCN ISA version.
-using GpuVersion = absl::variant<std::pair<int, int>, int>;
+// GpuVersion is used to abstract Gpu hardware version.
+//
+// On Cuda platform, it comprises of an <int, int> pair
+// denoting major and minor version.
+//
+// On ROCm platform, it comprises of an <int, string> pair
+// the int has the contents of the hipDeviceProp_t::gcnArchValue field.
+// the string has the contents of the hipDeviceProp_t::gcnArchName field.
+// The string contains all the information needed to create an exact LLVM
+// AMDGPUTarget corresopnding the AMDGPU device it represents, the int value
+// by itself is not sufficient for this purpose
+using GpuVersion =
+    absl::variant<std::pair<int, int>, std::pair<int, std::string>>;
 }  // namespace gpu
 }  // namespace xla
 
",0,train
ab9a9084d2feb820a1fe94e6d42825209eaed076,tensorflow/tensorflow,"Changing ""GpuVersion"" datatype to include hipDeviceProp_t::gcnArchName

Currently the ""GpuVersion"" datatype (for AMDGPU in XLA code) is an `int`, whose value is the same as the `int hipDeviceProp_t::gcnArch;` field.

Starting with ROCm 4.?, which introduces targetID support, that value will no longer be sufficient to create a LLVM AMDGPUTarget that accurately represents underlying GPU. We will need to information contained withing the `string hipDeviceProp_t gcnArchName` field for that purpose.

This commit updates fthe ""GpuVersion"" datatype from being a simple int to a (int, string) pair, and stores the value of the `string hipDeviceProp_t gcnArchName` field in the string.",gpu_backend_lib.cc,"@@ -787,13 +787,13 @@ Status AMDGPUTargetModuleLinker(llvm::Module* module, GpuVersion gpu_version,
                                 const HloModuleConfig& hlo_module_config,
                                 const string& device_bitcode_dir_path) {
   // Link the input module with ROCDL.
-  auto amdgpu_version = absl::get_if<int>(&gpu_version);
+  auto amdgpu_version = absl::get_if<std::pair<int, std::string>>(&gpu_version);
   if (!amdgpu_version) {
     return xla::InternalError(
         ""Incompatible AMD GCN ISA version was specified."");
   }
-  TF_RETURN_IF_ERROR(
-      LinkROCDLIfNecessary(module, *amdgpu_version, device_bitcode_dir_path));
+  TF_RETURN_IF_ERROR(LinkROCDLIfNecessary(module, amdgpu_version->first,
+                                          device_bitcode_dir_path));
 
   return Status::OK();
 }
@@ -861,13 +861,14 @@ StatusOr<std::vector<uint8>> CompileToHsaco(
         tensorflow::profiler::TraceMeLevel::kInfo);
     XLA_SCOPED_LOGGING_TIMER(""Compile module "" + module->getName().str());
 
-    auto amdgpu_version = absl::get_if<int>(&gpu_version);
+    auto amdgpu_version =
+        absl::get_if<std::pair<int, std::string>>(&gpu_version);
     if (!amdgpu_version) {
       return xla::InternalError(
           ""Incompatible AMD GCN ISA version was specified."");
     }
     uint64_t hash;
-    if (HsacoCache::Find(str, hash, *amdgpu_version, hsaco)) {
+    if (HsacoCache::Find(str, hash, amdgpu_version->first, hsaco)) {
       VLOG(1) << ""HSACO cache hit"";
       return hsaco;
     }
@@ -885,7 +886,7 @@ StatusOr<std::vector<uint8>> CompileToHsaco(
     llvm::Triple default_target_triple(""amdgcn--amdhsa-amdgiz"");
     // Construct LLVM TargetMachine for AMDGPU.
     std::unique_ptr<llvm::TargetMachine> target_machine =
-        AMDGPUGetTargetMachine(default_target_triple, *amdgpu_version,
+        AMDGPUGetTargetMachine(default_target_triple, amdgpu_version->first,
                                hlo_module_config);
 
     // Link with ROCm-Device-Libs, and optimize the LLVM module.
@@ -896,7 +897,7 @@ StatusOr<std::vector<uint8>> CompileToHsaco(
 
     // Lower optimized LLVM module to HSA code object.
     TF_ASSIGN_OR_RETURN(hsaco, EmitModuleToHsaco(module, target_machine.get()));
-    HsacoCache::Add(str, hash, *amdgpu_version, hsaco);
+    HsacoCache::Add(str, hash, amdgpu_version->first, hsaco);
   }
   return hsaco;
 }
",0,train
ab9a9084d2feb820a1fe94e6d42825209eaed076,tensorflow/tensorflow,"Changing ""GpuVersion"" datatype to include hipDeviceProp_t::gcnArchName

Currently the ""GpuVersion"" datatype (for AMDGPU in XLA code) is an `int`, whose value is the same as the `int hipDeviceProp_t::gcnArch;` field.

Starting with ROCm 4.?, which introduces targetID support, that value will no longer be sufficient to create a LLVM AMDGPUTarget that accurately represents underlying GPU. We will need to information contained withing the `string hipDeviceProp_t gcnArchName` field for that purpose.

This commit updates fthe ""GpuVersion"" datatype from being a simple int to a (int, string) pair, and stores the value of the `string hipDeviceProp_t gcnArchName` field in the string.",llvm_compiler_test.cc,"@@ -53,7 +53,9 @@ class GpuDummyCompiler : public GpuCompiler {
     return Status::OK();
   }
 
-  GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) { return 0; }
+  GpuVersion GetGpuVersion(se::StreamExecutor* stream_exec) {
+    return std::make_pair(0, 0);
+  }
 
   StatusOr<std::pair<std::string, std::vector<uint8>>> CompileTargetBinary(
       const HloModuleConfig& module_config, llvm::Module* llvm_module,
",0,train
af8ec29dc16c6d1ddae557dbbfd3f6f4e7e88fbd,tensorflow/tensorflow,"Use GesvdjBatched on GPU for batches of square matrices up to size 32x32. This substantially speeds up SVD on small matrices on the GPU.
On a Quadro P4000 performing SVD on 1000 16x16 matrices reduces from 0.617s to 0.0135s.

PiperOrigin-RevId: 267036566",svd_op_gpu.cu.cc,"@@ -99,40 +99,70 @@ class SvdOpGpu : public AsyncOpKernel {
               std::unique_ptr<CudaSolver> solver) {
     // Compute U S V* = M.
     // 1. cuSolver works in column-major rather than row-major.
-    // 2. Gesvd returns V*.
-    // 3. Hence M should be transposed before input and U (rather than V) should
-    // be transposed on output.
+    // 2. Gesvd returns V*. GesvdjBatched returns V.
+    // 3. Hence M should be transposed before input and
+    //    a) U (rather than V) should be transposed on output with Gesvd.
+    //    b) U and V should be transposed on output with GesvdjBatched.
 
-    Tensor u_copy;
-    if (compute_uv_) {
-      TensorShape u_shape;
-      if (full_matrices_) {
+    // get the pointers to input data
+    Scalar* input_ptr;
+    RealScalar* outputS_ptr;
+    auto input_reshaped = M_copy.template flat_inner_dims<Scalar, 3>();
+    input_ptr = input_reshaped.data();
+    const int64 batch_size =
+        M_copy.dims() > 2 ? input_reshaped.dimension(0) : 1;
+    // Gesvdjbatched handles matrices up to 32x32.
+    // TODO(jamessspencer): if not full_matrices, compute full U and V matrices
+    // using Gesvdjbatched and return slices.
+    const bool batched = m <= 32 && n <= 32 && batch_size > 1 && full_matrices_;
+
+    // Copies of U and V if required so can take transposes after SVD.
+    Tensor u_copy, v_copy;
+    Scalar* outputU_ptr = NULL;
+    Scalar* outputV_ptr = NULL;
+    if (compute_uv_ || batched) {
+      TensorShape u_shape, v_shape;
+      if (batched) {
+        // Gesvdjbatched seems to require U and V matrices even if the vectors
+        // aren't computed.
+        TensorShape shapeRaw = M_copy.shape();
+        shapeRaw.RemoveLastDims(2);
+        u_shape = shapeRaw;
+        u_shape.AddDim(m);
+        u_shape.AddDim(m);
+        v_shape = shapeRaw;
+        v_shape.AddDim(n);
+        v_shape.AddDim(n);
+      } else if (full_matrices_) {
         u_shape = U->shape();
+        v_shape = V->shape();
       } else {
         TensorShape shapeRaw = M_copy.shape();
         shapeRaw.RemoveLastDims(2);
         u_shape = shapeRaw;
         u_shape.AddDim(p);
         u_shape.AddDim(m);
+        v_shape = shapeRaw;
+        v_shape.AddDim(p);
+        v_shape.AddDim(n);
       }
       OP_REQUIRES_OK_ASYNC(
           context, solver->allocate_scoped_tensor(U->dtype(), u_shape, &u_copy),
           done);
+      if (batched) {
+        OP_REQUIRES_OK_ASYNC(
+            context,
+            solver->allocate_scoped_tensor(V->dtype(), v_shape, &v_copy), done);
+      }
+      outputU_ptr = u_copy.template flat_inner_dims<Scalar, 3>().data();
+      if (batched) {
+        outputV_ptr = v_copy.template flat_inner_dims<Scalar, 3>().data();
+      } else {
+        outputV_ptr = V->template flat_inner_dims<Scalar, 3>().data();
+      }
     }
 
-    // get the pointers to the data
-    Scalar* input_ptr;
-    RealScalar* outputS_ptr;
-    Scalar* outputU_ptr = NULL;
-    Scalar* outputV_ptr = NULL;
-    auto input_reshaped = M_copy.template flat_inner_dims<Scalar, 3>();
-    input_ptr = input_reshaped.data();
     outputS_ptr = S->template flat_inner_dims<RealScalar, 2>().data();
-    if (compute_uv_) {
-      outputU_ptr = u_copy.template flat_inner_dims<Scalar, 3>().data();
-      outputV_ptr = V->template flat_inner_dims<Scalar, 3>().data();
-    }
-    const int64 batch_size = input_reshaped.dimension(0);
     std::vector<DeviceLapackInfo> dev_info;
     dev_info.push_back(solver->GetDeviceLapackInfo(batch_size, ""gesvd""));
     int* dev_info_ptr = dev_info.back().mutable_data();
@@ -151,33 +181,44 @@ class SvdOpGpu : public AsyncOpKernel {
                batch_size * m * sizeof(Scalar));
     }
 
-    for (int64 batch = 0; batch < batch_size; ++batch) {
-      Scalar* input = input_ptr + batch * m * n;
-      RealScalar* outputS = outputS_ptr + batch * p;
-      Scalar* outputU = NULL;
-      Scalar* outputVT = NULL;
-      char jobu = 'N';
-      char jobvt = 'N';
-
-      if (compute_uv_) {
-        if (full_matrices_) {
-          outputU = outputU_ptr + batch * m * m;
-          outputVT = outputV_ptr + batch * n * n;
-          jobu = 'A';
-          jobvt = 'A';
-        } else {
-          outputU = outputU_ptr + batch * m * p;
-          outputVT = outputV_ptr + batch * n * p;
-          jobu = 'S';
-          jobvt = 'S';
-        }
-      }
-
+    if (batched) {
+      cusolverEigMode_t jobz = CUSOLVER_EIG_MODE_NOVECTOR;
+      if (compute_uv_) jobz = CUSOLVER_EIG_MODE_VECTOR;
       OP_REQUIRES_OK_ASYNC(
           context,
-          solver->Gesvd(jobu, jobvt, m, n, input, m, outputS, outputU, m,
-                        outputVT, n, dev_info_ptr + batch),
+          solver->GesvdjBatched(jobz, m, n, input_ptr, m, outputS_ptr,
+                                outputU_ptr, m, outputV_ptr, n, dev_info_ptr,
+                                batch_size),
           done);
+    } else {
+      for (int64 batch = 0; batch < batch_size; ++batch) {
+        Scalar* input = input_ptr + batch * m * n;
+        RealScalar* outputS = outputS_ptr + batch * p;
+        Scalar* outputU = NULL;
+        Scalar* outputVT = NULL;
+        char jobu = 'N';
+        char jobvt = 'N';
+
+        if (compute_uv_) {
+          if (full_matrices_) {
+            outputU = outputU_ptr + batch * m * m;
+            outputVT = outputV_ptr + batch * n * n;
+            jobu = 'A';
+            jobvt = 'A';
+          } else {
+            outputU = outputU_ptr + batch * m * p;
+            outputVT = outputV_ptr + batch * n * p;
+            jobu = 'S';
+            jobvt = 'S';
+          }
+        }
+
+        OP_REQUIRES_OK_ASYNC(
+            context,
+            solver->Gesvd(jobu, jobvt, m, n, input, m, outputS, outputU, m,
+                          outputVT, n, dev_info_ptr + batch),
+            done);
+      }
     }
 
     // This is a bug in cuSolver:
@@ -213,6 +254,10 @@ class SvdOpGpu : public AsyncOpKernel {
     if (compute_uv_) {
       auto device = context->eigen_device<GPUDevice>();
       OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, u_copy, U), done);
+      if (batched) {
+        OP_REQUIRES_OK_ASYNC(context, DoMatrixTranspose(device, v_copy, V),
+                             done);
+      }
     }
 
     CheckResult(context, std::move(done), dev_info, std::move(solver));
@@ -289,6 +334,7 @@ class SvdOpGpu : public AsyncOpKernel {
     }
 
     // Call the SVD: compute V S U* = M*.
+    // Note (m, n) and (U, V) are swapped accordingly.
     RunSVD(context, done, n, m, p, input_copy, S, V, U, std::move(solver));
   }
 
",0,train
a4a6bab62151616b54216059919bb2c111a45881,tensorflow/tensorflow,"Expose stream executor namespace in cmake shared object. (#19415)

Instead of perftools::gputools expose stream executor namespace in cmake shared object.",create_def_file.py,"@@ -44,7 +44,7 @@ UNDNAME = ""undname.exe""
 DUMPBIN = ""dumpbin.exe""
 
 # Exclude if matched
-EXCLUDE_RE = re.compile(r""RTTI|deleting destructor|::internal::"")
+EXCLUDE_RE = re.compile(r""RTTI|deleting destructor|::internal::|Internal|python_op_gen_internal|grappler"")
 
 # Include if matched before exclude
 INCLUDEPRE_RE = re.compile(r""google::protobuf::internal::ExplicitlyConstructed|""
@@ -56,6 +56,9 @@ INCLUDEPRE_RE = re.compile(r""google::protobuf::internal::ExplicitlyConstructed|""
                            r""tensorflow::ops::internal::Enter|""
                            r""tensorflow::strings::internal::AppendPieces|""
                            r""tensorflow::strings::internal::CatPieces|""
+                           r""tensorflow::errors::Internal|""
+                           r""tensorflow::Tensor::CopyFromInternal|""
+                           r""tensorflow::kernel_factory::OpKernelRegistrar::InitInternal|""
                            r""tensorflow::io::internal::JoinPathImpl"")
 
 # Include if matched after exclude
@@ -64,7 +67,7 @@ INCLUDE_RE = re.compile(r""^(TF_\w*)$|""
                         r""tensorflow::|""
                         r""functor::|""
                         r""\?nsync_|""
-                        r""perftools::gputools"")
+                        r""stream_executor::"")
 
 # We want to identify data members explicitly in the DEF file, so that no one
 # can implicitly link against the DLL if they use one of the variables exported
",0,train
fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset.

PiperOrigin-RevId: 193437651",minimize_loss_test.py,"@@ -54,21 +54,18 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
   def testTrainNetwork(self, distribution, optimizer_fn, use_callable_loss,
                        is_tpu):
     with distribution.scope():
-      model_fn, dataset, layer = minimize_loss_example(
-          optimizer_fn,
-          use_bias=True,
-          use_callable_loss=use_callable_loss)
+      model_fn, dataset_fn, layer = minimize_loss_example(
+          optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss)
 
+      def tpu_dataset_fn():
+        return dataset_fn().batch(2)
       # TODO(isaprykin):  Eliminate `is_tpu`. Probably add a
       # `DistributionStrategy.create_monitor` so that each DistributionStrategy
       # could influence its training loop. That method would return an instance
       # of Monitor.  TPUMonitor would execute tpu.initialize_system() and
       # tpu.shutdown_system().
-      if is_tpu:
-        dataset = dataset.batch(2)
-
       iterator = distribution.distribute_dataset(
-          dataset).make_one_shot_iterator()
+          tpu_dataset_fn if is_tpu else dataset_fn).make_one_shot_iterator()
 
       def run_step():
         # TODO(isaprykin): Make iterator get_next() return a list of sub-
@@ -122,14 +119,14 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
     # `distribution.scope`.
     with variable_scope.variable_creator_scope(
         appending_creator), distribution.scope():
-      model_fn, dataset, layer = minimize_loss_example(
+      model_fn, dataset_fn, layer = minimize_loss_example(
           optimizer_fn,
           use_bias=True,
           use_callable_loss=True,
           create_optimizer_inside_model_fn=True)
 
       iterator = distribution.distribute_dataset(
-          dataset).make_one_shot_iterator()
+          dataset_fn).make_one_shot_iterator()
 
       def run_step():
         return distribution.group(
@@ -176,7 +173,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
     """"""Verifies that moving mean updates are reduced across towers.""""""
     with distribution.scope():
       num_towers = len(distribution.worker_devices)
-      model_fn, dataset, batchnorm = batchnorm_example(
+      model_fn, dataset_fn, batchnorm = batchnorm_example(
           optimizer_fn,
           batch_per_epoch=num_towers,
           momentum=momentum,
@@ -188,7 +185,7 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
       if isinstance(distribution, mirrored_strategy.MirroredStrategy):
         distribution._prefetch_on_device = False
       iterator = distribution.distribute_dataset(
-          dataset).make_one_shot_iterator()
+          dataset_fn).make_one_shot_iterator()
 
       def run_step():
         return control_flow_ops.group(
@@ -260,11 +257,13 @@ class MinimizeLossStepTest(test.TestCase, parameterized.TestCase):
         else:
           return optimizer.minimize(loss_fn())
 
-      features = dataset_ops.Dataset.from_tensors([[2.], [7.]])
-      labels = dataset_ops.Dataset.from_tensors([[6.], [21.]])
-      dataset = dataset_ops.Dataset.zip((features, labels)).repeat()
+      def dataset_fn():
+        features = dataset_ops.Dataset.from_tensors([[2.], [7.]])
+        labels = dataset_ops.Dataset.from_tensors([[6.], [21.]])
+        return dataset_ops.Dataset.zip((features, labels)).repeat()
+
       iterator = distribution.distribute_dataset(
-          dataset).make_one_shot_iterator()
+          dataset_fn).make_one_shot_iterator()
 
       def run_step():
         return distribution.group(
",0,train
fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset.

PiperOrigin-RevId: 193437651",mirrored_strategy.py,"@@ -140,9 +140,10 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
       g.add_to_collections(collections, result)
     return result
 
-  def distribute_dataset(self, dataset):
+  def distribute_dataset(self, dataset_fn):
     return values.PerDeviceDataset(
-        dataset, self._devices, self._prefetch_on_device)
+        self._call_dataset_fn(dataset_fn), self._devices,
+        self._prefetch_on_device)
 
   def _broadcast(self, tensor, destinations):
     # TODO(josh11b): In eager mode, use one thread per device, or async mode.
",0,train
fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset.

PiperOrigin-RevId: 193437651",mirrored_strategy_multigpu_test.py,"@@ -247,9 +247,9 @@ class MirroredStrategyVariableCreationTest(test.TestCase):
 
     dist = mirrored_strategy.MirroredStrategy(
         [""/device:GPU:0"", ""/device:CPU:0""])
-    features = dataset_ops.Dataset.from_tensors([[1.]]).repeat(10)
     features = dist.distribute_dataset(
-        features).make_one_shot_iterator().get_next()
+        lambda: dataset_ops.Dataset.from_tensors([[1.]]).repeat(10)
+    ).make_one_shot_iterator().get_next()
 
     with dist.scope():
       result = dist.call_for_each_tower(
",0,train
fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset.

PiperOrigin-RevId: 193437651",one_device_strategy.py,"@@ -60,8 +60,8 @@ class OneDeviceStrategy(distribute_lib.DistributionStrategy):
     with ops.colocate_with(colocate_with):
       return next_creator(*args, **kwargs)
 
-  def distribute_dataset(self, dataset):
-    return dataset
+  def distribute_dataset(self, dataset_fn):
+    return self._call_dataset_fn(dataset_fn)
 
   def _broadcast(self, tensor, destinations):
     return tensor
",0,train
fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset.

PiperOrigin-RevId: 193437651",optimizer_v2_test.py,"@@ -39,11 +39,11 @@ class MinimizeLossOptimizerV2Test(test.TestCase, parameterized.TestCase):
   def testTrainNetwork(self, distribution, optimizer_fn,
                        use_callable_loss=True):
     with distribution.scope():
-      model_fn, dataset, layer = minimize_loss_example(
+      model_fn, dataset_fn, layer = minimize_loss_example(
           optimizer_fn, use_bias=True, use_callable_loss=use_callable_loss)
 
       iterator = distribution.distribute_dataset(
-          dataset).make_one_shot_iterator()
+          dataset_fn).make_one_shot_iterator()
 
       def run_step():
         return control_flow_ops.group(distribution.unwrap(
",0,train
fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset.

PiperOrigin-RevId: 193437651",single_loss_example.py,"@@ -29,7 +29,10 @@ from tensorflow.python.ops import math_ops
 
 def single_loss_example(optimizer_fn, distribution, use_bias=False):
   """"""Build a very simple network to use in tests and examples.""""""
-  dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat()
+
+  def dataset_fn():
+    return dataset_ops.Dataset.from_tensors([[1.]]).repeat()
+
   optimizer = optimizer_fn()
   layer = core.Dense(1, use_bias=use_bias)
 
@@ -37,8 +40,8 @@ def single_loss_example(optimizer_fn, distribution, use_bias=False):
     y = array_ops.reshape(layer(x), []) - constant_op.constant(1.)
     return y * y
 
-  single_loss_step = step_fn.StandardSingleLossStep(dataset, loss_fn, optimizer,
-                                                    distribution)
+  single_loss_step = step_fn.StandardSingleLossStep(dataset_fn, loss_fn,
+                                                    optimizer, distribution)
 
   # Layer is returned for inspecting the kernels in tests.
   return single_loss_step, layer
@@ -49,7 +52,10 @@ def minimize_loss_example(optimizer_fn,
                           use_callable_loss=True,
                           create_optimizer_inside_model_fn=False):
   """"""Example of non-distribution-aware legacy code.""""""
-  dataset = dataset_ops.Dataset.from_tensors([[1.]]).repeat()
+
+  def dataset_fn():
+    return dataset_ops.Dataset.from_tensors([[1.]]).repeat()
+
   # An Optimizer instance is created either outside or inside model_fn.
   outer_optimizer = None
   if not create_optimizer_inside_model_fn:
@@ -71,7 +77,7 @@ def minimize_loss_example(optimizer_fn,
     else:
       return optimizer.minimize(loss_fn())
 
-  return model_fn, dataset, layer
+  return model_fn, dataset_fn, layer
 
 
 def batchnorm_example(optimizer_fn,
@@ -79,12 +85,15 @@ def batchnorm_example(optimizer_fn,
                       momentum=0.9,
                       renorm=False):
   """"""Example of non-distribution-aware legacy code with batch normalization.""""""
-  # input shape is [16, 8], input values are increasing in both dimensions.
-  dataset = dataset_ops.Dataset.from_tensor_slices(
-      [[[float(x * 8 + y + z * 100)
-         for y in range(8)]
-        for x in range(16)]
-       for z in range(batch_per_epoch)]).repeat()
+
+  def dataset_fn():
+    # input shape is [16, 8], input values are increasing in both dimensions.
+    return dataset_ops.Dataset.from_tensor_slices(
+        [[[float(x * 8 + y + z * 100)
+           for y in range(8)]
+          for x in range(16)]
+         for z in range(batch_per_epoch)]).repeat()
+
   optimizer = optimizer_fn()
   batchnorm = normalization.BatchNormalization(
       renorm=renorm, momentum=momentum, fused=False)
@@ -99,4 +108,4 @@ def batchnorm_example(optimizer_fn,
     # Callable loss.
     return optimizer.minimize(loss_fn)
 
-  return model_fn, dataset, batchnorm
+  return model_fn, dataset_fn, batchnorm
",0,train
fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset.

PiperOrigin-RevId: 193437651",step_fn.py,"@@ -49,13 +49,14 @@ class StandardInputStep(Step):
   """"""Step with a standard implementation of input handling.
 
   Args:
-    input_dataset: a tf.data Dataset that provides input.
+    dataset_fn: a function that returns a tf.data Dataset that produces the
+      input for the model.
   """"""
 
-  def __init__(self, input_dataset, distribution):
+  def __init__(self, dataset_fn, distribution):
     Step.__init__(self, distribution)
     self._distributed_input = distribution.distribute_dataset(
-        input_dataset).make_one_shot_iterator()
+        dataset_fn).make_one_shot_iterator()
 
   def inputs(self):
     return self._distributed_input.get_next()
@@ -77,14 +78,15 @@ class StandardSingleLossStep(StandardInputStep):
   ```
 
   Args:
-    input_dataset: a tf.data Dataset that provides input.
+    dataset_fn: a function that returns a tf.data Dataset that produces the
+      input for the model.
     loss_fn: a function that returns loss.
     optimizer: an optimizer that implements an update rule.
     distribution: a `DistributionStrategy` object.
   """"""
 
-  def __init__(self, input_dataset, loss_fn, optimizer, distribution):
-    StandardInputStep.__init__(self, input_dataset, distribution)
+  def __init__(self, dataset_fn, loss_fn, optimizer, distribution):
+    StandardInputStep.__init__(self, dataset_fn, distribution)
     self._loss_fn = loss_fn
     self._optimizer = optimizer
     self._is_run_concurrently = False
",0,train
fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset.

PiperOrigin-RevId: 193437651",estimator.py,"@@ -688,22 +688,19 @@ class Estimator(object):
 
   def _get_features_and_labels_from_input_fn(self, input_fn, mode):
     """"""Extracts the `features` and labels from return values of `input_fn`.""""""
-    result = self._call_input_fn(input_fn, mode)
-    # TODO(anjalisridhar): What about the default DistributionStrategy? Perhaps
-    # using any input is alright in that case. There is also a
-    # has_dataset_or_queue_runner function that we may want to extend and use.
-    if (self._distribution is not None and
-        not isinstance(result, dataset_ops.Dataset) and
-        mode == model_fn_lib.ModeKeys.TRAIN):
-      raise ValueError('input_fn() must return a tf.data.Dataset when using a '
-                       'DistributionStrategy.')
     input_hooks = []
-    if isinstance(result, dataset_ops.Dataset):
-      if self._distribution is not None and mode == model_fn_lib.ModeKeys.TRAIN:
-        result = self._distribution.distribute_dataset(result)
+    if self._distribution is not None and mode == model_fn_lib.ModeKeys.TRAIN:
+      result = self._distribution.distribute_dataset(
+          lambda: self._call_input_fn(input_fn, mode))
       iterator = result.make_initializable_iterator()
       input_hooks.append(_DatasetInitializerHook(iterator))
       result = iterator.get_next()
+    else:
+      result = self._call_input_fn(input_fn, mode)
+      if isinstance(result, dataset_ops.Dataset):
+        iterator = result.make_initializable_iterator()
+        input_hooks.append(_DatasetInitializerHook(iterator))
+        result = iterator.get_next()
     if isinstance(result, (list, tuple)):
       if len(result) != 2:
         raise ValueError(
",0,train
fddfa9f8dcd1a922ade5362c0538ca39e99472a7,tensorflow/tensorflow,"Change distribution.distribute_dataset to accept an input_fn instead of a dataset.

PiperOrigin-RevId: 193437651",distribute.py,"@@ -20,6 +20,7 @@ from __future__ import print_function
 
 import threading
 
+from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -672,25 +673,35 @@ class DistributionStrategy(object):
     _require_distribution_strategy_scope(self)
     return variable_scope.variable_creator_scope(create_colocated_variable)
 
+  def _call_dataset_fn(self, dataset_fn):
+    result = dataset_fn()
+    if not isinstance(result, dataset_ops.Dataset):
+      raise ValueError(
+          ""dataset_fn() must return a tf.data.Dataset when using a ""
+          ""DistributionStrategy."")
+    return result
+
   # TODO(josh11b): `PerDeviceDataset` currently only implements a few methods of
   # Dataset API such as make_one_shot_iterator and make_initializable_iterator.
   # Extend to implement more functionality of datasets.
-  def distribute_dataset(self, dataset):
+  def distribute_dataset(self, dataset_fn):
     """"""Return a `dataset` split across all towers.
 
     Suitable for providing input to for `call_for_each_tower()` by creating an
     iterator:
 
     ```
+    def dataset_fn():
+      return tf.data.Dataset.from_tensors([[1.]]).repeat()
     with distribution_strategy.scope():
-      distributed_dataset = distribution_strategy.distribute_dataset(dataset)
+      distributed_dataset = distribution_strategy.distribute_dataset(dataset_fn)
       iterator = distributed_dataset.make_one_shot_iterator()
       tower_results = distribution_strategy.call_for_each_tower(
           tower_fn, iterator.get_next())
     ```
 
     Args:
-      dataset: A `tf.data.Dataset`.
+      dataset_fn: A function that returns a `tf.data.Dataset`.
 
     Returns:
       A `PerDeviceDataset` that will produce data for each tower.
@@ -1135,8 +1146,8 @@ class _DefaultDistributionStrategy(DistributionStrategy):
     _require_distribution_strategy_scope(self)
     return ops.colocate_with(colocate_with_variable)
 
-  def distribute_dataset(self, dataset):
-    return dataset
+  def distribute_dataset(self, dataset_fn):
+    return self._call_dataset_fn(dataset_fn)
 
   def _broadcast(self, tensor, destinations):
     if destinations is None:
",0,train
b0240486be5a8c4286961d1751fe8560e9c6970e,tensorflow/tensorflow,"Exposes the memory limit in the allocator's stats.
Change: 115036211",gpu_bfc_allocator.cc,"@@ -41,6 +41,7 @@ GPUBFCAllocator::GPUBFCAllocator(int device_id, size_t total_memory)
 
   // Allocate the requested amount of memory.
   gpu_memory_size_ = total_memory;
+  stats_.bytes_limit = static_cast<int64>(total_memory);
 
   // Create a bunch of bins of various good sizes.
 
@@ -256,7 +257,7 @@ void* GPUBFCAllocator::AllocateRawInternal(size_t unused_alignment,
     DumpMemoryLog(rounded_bytes);
     LOG(WARNING) << ""Ran out of memory trying to allocate ""
                  << strings::HumanReadableNumBytes(num_bytes)
-                 << "".  See logs for memory state"";
+                 << "".  See logs for memory state."";
   }
   return nullptr;
 }
@@ -544,6 +545,7 @@ void GPUBFCAllocator::DumpMemoryLog(size_t num_bytes) {
   }
   LOG(INFO) << ""Sum Total of in-use chunks: ""
             << strings::HumanReadableNumBytes(total_bytes);
+  LOG(INFO) << ""Stats: \n"" << stats_.DebugString();
 }
 
 void GPUBFCAllocator::GetStats(AllocatorStats* stats) {
",0,test
b0240486be5a8c4286961d1751fe8560e9c6970e,tensorflow/tensorflow,"Exposes the memory limit in the allocator's stats.
Change: 115036211",allocator.cc,"@@ -27,16 +27,18 @@ void AllocatorStats::Clear() {
   this->bytes_in_use = 0;
   this->max_bytes_in_use = 0;
   this->max_alloc_size = 0;
+  this->bytes_limit = 0;
 }
 
 string AllocatorStats::DebugString() const {
   return strings::Printf(
+      ""Limit:        %20lld\n""
       ""InUse:        %20lld\n""
       ""MaxInUse:     %20lld\n""
       ""NumAllocs:    %20lld\n""
       ""MaxAllocSize: %20lld\n"",
-      this->bytes_in_use, this->max_bytes_in_use, this->num_allocs,
-      this->max_alloc_size);
+      this->bytes_limit, this->bytes_in_use, this->max_bytes_in_use,
+      this->num_allocs, this->max_alloc_size);
 }
 
 Allocator::~Allocator() {}
",0,test
b0240486be5a8c4286961d1751fe8560e9c6970e,tensorflow/tensorflow,"Exposes the memory limit in the allocator's stats.
Change: 115036211",allocator.h,"@@ -45,6 +45,11 @@ struct AllocatorStats {
   int64 max_bytes_in_use;  // The maximum bytes in use.
   int64 max_alloc_size;    // The max single allocation seen.
 
+  // The upper limit what the allocator can allocate, if such a limit
+  // is known. Certain allocator may return 0 to indicate the limit is
+  // unknown.
+  int64 bytes_limit;
+
   AllocatorStats() { Clear(); }
 
   void Clear();
",0,test
0c9ddf3ffd78196cb579d040c59a72d604152073,tensorflow/tensorflow,TST: add unit test,tensor_util_test.py,"@@ -314,6 +314,16 @@ class TensorUtilTest(test.TestCase):
                   shape=[3, 4],
                   dtype=dtype)))
 
+  def testIntMixedWithDimension(self):
+    dtype = dtypes.int32
+    nptype = np.int32
+    t = tensor_util.make_tensor_proto([10, tensor_shape.Dimension(20), 30],
+                                      dtype=dtype)
+    self.assertEquals(dtype, t.dtype)
+    a = tensor_util.MakeNdarray(t)
+    self.assertEquals(nptype, a.dtype)
+    self.assertAllClose(np.array([10, 20, 30], dtype=nptype), a)
+
   def testLong(self):
     t = tensor_util.make_tensor_proto(10, dtype=dtypes.int64)
     self.assertProtoEquals(""""""
",0,train
fc274666282c17e0fdcda350744e07b52dda827d,tensorflow/tensorflow,"Prepare //tensorflow/python/data/kernel_tests:map_test for Tensor equality.

PiperOrigin-RevId: 263589206",map_test.py,"@@ -522,10 +522,10 @@ class MapTest(test_base.DatasetTestBase, parameterized.TestCase):
             divide,
             name=""cond_mult"")
 
-      pred_fn_pairs = {
-          math_ops.logical_or(math_ops.equal(y, 2), math_ops.equal(y, 3)):
-              defaults_two,
-      }
+      pred_fn_pairs = [
+          (math_ops.logical_or(math_ops.equal(y, 2),
+                               math_ops.equal(y, 3)), defaults_two),
+      ]
 
       return control_flow_ops.case(
           pred_fn_pairs, default=multiply, exclusive=True)
@@ -555,10 +555,10 @@ class MapTest(test_base.DatasetTestBase, parameterized.TestCase):
       def divide():
         return x // 2
 
-      pred_fn_pairs = {
-          math_ops.logical_or(math_ops.equal(y, 2), math_ops.equal(y, 3)):
-              divide,
-      }
+      pred_fn_pairs = [
+          (math_ops.logical_or(math_ops.equal(y, 2),
+                               math_ops.equal(y, 3)), divide),
+      ]
 
       return control_flow_ops.case(
           pred_fn_pairs, default=multiply, exclusive=True)
@@ -596,10 +596,10 @@ class MapTest(test_base.DatasetTestBase, parameterized.TestCase):
             divide,
             name=""cond_mult"")
 
-      pred_fn_pairs = {
-          math_ops.logical_or(math_ops.equal(y, 2), math_ops.equal(y, 3)):
-              defaults_two,
-      }
+      pred_fn_pairs = [
+          (math_ops.logical_or(math_ops.equal(y, 2),
+                               math_ops.equal(y, 3)), defaults_two),
+      ]
 
       return control_flow_ops.case(
           pred_fn_pairs, default=multiply, exclusive=True)
",0,test
b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models.

For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them.  When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name.

PiperOrigin-RevId: 359151924
Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",compile_utils.py,"@@ -293,7 +293,19 @@ class LossesContainer(Container):
 class MetricsContainer(Container):
   """"""A container class for metrics passed to `Model.compile`.""""""
 
-  def __init__(self, metrics=None, weighted_metrics=None, output_names=None):
+  def __init__(self, metrics=None, weighted_metrics=None, output_names=None,
+               from_serialized=False):
+    """"""Initializes a container for metrics.
+
+    Arguments:
+      metrics: see the `metrics` argument from `tf.keras.Model.compile`.
+      weighted_metrics: see the `weighted_metrics` argument from
+        `tf.keras.Model.compile`.
+      output_names: A list of strings of names of outputs for the model.
+      from_serialized: Whether the model being compiled is from a serialized
+        model.  Used to avoid redundantly applying pre-processing renaming
+        steps.
+    """"""
     super(MetricsContainer, self).__init__(output_names=output_names)
 
     # Keep user-supplied values untouched for recompiling and serialization.
@@ -304,6 +316,8 @@ class MetricsContainer(Container):
     self._weighted_metrics = weighted_metrics
     self._built = False
 
+    self._from_serialized = from_serialized
+
   @property
   def metrics(self):
     """"""All metrics in this container.""""""
@@ -357,7 +371,11 @@ class MetricsContainer(Container):
         y_pred, self._weighted_metrics, check_types=False)
 
     # Assumes metrics, weighted_metrics have been flattened up to outputs.
-    self._set_metric_names()
+    #
+    # If we are loading a model that has been already serialized, we do not
+    # want to re-apply any pre-processing metric renaming steps.
+    if not self._from_serialized:
+      self._set_metric_names()
     self._create_ordered_metrics()
     self._built = True
 
",0,train
b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models.

For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them.  When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name.

PiperOrigin-RevId: 359151924
Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",training.py,"@@ -569,6 +569,11 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector):
         if not steps_per_execution:
           steps_per_execution = kwargs.pop('experimental_steps_per_execution')
 
+      # When compiling from an already-serialized model, we do not want to
+      # reapply some processing steps (e.g. metric renaming for multi-output
+      # models, which have prefixes added for each corresponding output name).
+      from_serialized = kwargs.pop('from_serialized', False)
+
       self._validate_compile(optimizer, metrics, **kwargs)
       self._run_eagerly = run_eagerly
 
@@ -576,7 +581,8 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector):
       self.compiled_loss = compile_utils.LossesContainer(
           loss, loss_weights, output_names=self.output_names)
       self.compiled_metrics = compile_utils.MetricsContainer(
-          metrics, weighted_metrics, output_names=self.output_names)
+          metrics, weighted_metrics, output_names=self.output_names,
+          from_serialized=from_serialized)
 
       self._configure_steps_per_execution(steps_per_execution or 1)
 
",0,train
b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models.

For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them.  When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name.

PiperOrigin-RevId: 359151924
Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",training_v1.py,"@@ -308,6 +308,7 @@ class Model(training_lib.Model):
 
     # Prepare Session arguments (legacy).
     kwargs.pop('cloning', None)  # Legacy DistStrat argument, never used.
+    kwargs.pop('from_serialized', None)  # Not used in v1.
     allowed_kwargs = {'feed_dict', 'fetches', 'options', 'run_metadata'}
     unknown_kwargs = set(kwargs.keys()) - allowed_kwargs
     if unknown_kwargs:
",0,train
b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models.

For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them.  When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name.

PiperOrigin-RevId: 359151924
Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",hdf5_format.py,"@@ -201,7 +201,7 @@ def load_model_from_hdf5(filepath, custom_objects=None, compile=True):  # pylint
 
       # Compile model.
       model.compile(**saving_utils.compile_args_from_training_config(
-          training_config, custom_objects))
+          training_config, custom_objects), from_serialized=True)
       saving_utils.try_build_compiled_arguments(model)
 
       # Set optimizer weights.
",0,train
b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models.

For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them.  When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name.

PiperOrigin-RevId: 359151924
Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",save_test.py,"@@ -1003,6 +1003,39 @@ class TestWholeModelSaving(keras_parameterized.TestCase):
     loaded = keras.models.load_model(saved_model_dir)
     self.assertIs(loaded.layers[1], loaded.layers[2].layer)
 
+  @combinations.generate(combinations.combine(mode=['eager']))
+  def test_multi_output_metrics_name_stay_same(self):
+    """"""Tests that metric names don't change with each save/load cycle.
+
+    e.g. ""head_0_accuracy"" should not become ""head_0_head_0_accuracy"" after
+    saving and loading a model.
+    """"""
+    input_ = keras.Input((4,))
+    model = keras.Model(
+        input_,
+        [keras.layers.Softmax(name='head_0')(keras.layers.Dense(3)(input_)),
+         keras.layers.Softmax(name='head_1')(keras.layers.Dense(5)(input_))])
+    metric = keras.metrics.BinaryAccuracy()
+    model.compile(optimizer='rmsprop',
+                  loss='mse',
+                  metrics={'head_0': [metric, 'accuracy']})
+
+    # Run one iteration.
+    x = np.random.rand(2, 4)
+    y = {'head_0': np.random.randint(2, size=(2, 3)),
+         'head_1': np.random.randint(2, size=(2, 5))}
+    model.fit(x, y, verbose=0)
+
+    # Save and reload.
+    save_format = testing_utils.get_save_format()
+    saved_model_dir = self._save_model_dir()
+    keras.models.save_model(model, saved_model_dir, save_format=save_format)
+    loaded = keras.models.load_model(saved_model_dir)
+
+    # Make sure the metrics names from the model before saving match the loaded
+    # model.
+    self.assertSequenceEqual(model.metrics_names, loaded.metrics_names)
+
 
 # Factory functions to create models that will be serialized inside a Network.
 def _make_graph_network(input_size, output_size):
",0,train
b8d17793a7aae010754d7776612af5b9b1f5252f,tensorflow/tensorflow,"Don't redundantly prefix metric names for multi-output models.

For multi-output models, we prefix metric names with their associated output names to disambiguate / uniqify them.  When these models are repeatedly saved and loaded, this prefixing repeats as well, leading to long metrics names such as ""head_0_head_0_head_0_accuracy"", where ""head_0"" is an output name.

PiperOrigin-RevId: 359151924
Change-Id: I509ea27a7d91446c3893d13d73818f857705ef5f",load.py,"@@ -168,7 +168,7 @@ def load(path, compile=True, options=None):  # pylint: disable=redefined-builtin
         'training_config', None)
     if training_config is not None:
       model.compile(**saving_utils.compile_args_from_training_config(
-          training_config))
+          training_config), from_serialized=True)
       saving_utils.try_build_compiled_arguments(model)
     else:
       logging.warning('No training configuration found in save file, so the '
",0,train
7b1ebf50c9686dacf5fb5036168a9110ae0add32,tensorflow/tensorflow,"Do not assume hasattr is available in Metric.__del__

Python does not guarantee that builtins are available by the time __del__
is called, so using hasattr is unsafe.

PiperOrigin-RevId: 254255638",monitoring.py,"@@ -121,10 +121,14 @@ class Metric(object):
     self._metric = self._metric_methods[self._label_length].create(*args)
 
   def __del__(self):
-    if hasattr(self, '_metric'):
+    try:
       deleter = self._metric_methods[self._label_length].delete
-      if deleter is not None:
-        deleter(self._metric)
+      metric = self._metric
+    except AttributeError:
+      return
+
+    if deleter is not None:
+      deleter(metric)
 
   def get_cell(self, *labels):
     """"""Retrieves the cell.""""""
",0,train
d6efc3b8d44c6ac583b4ff529343749cdebbff06,tensorflow/tensorflow,"Remove deprecated overload of CopyRawToHost

PiperOrigin-RevId: 438902197",pjrt_client.h,"@@ -667,18 +667,6 @@ class PjRtBuffer {
   virtual PjRtFuture<Status> CopyRawToHost(void* dst, int64_t offset,
                                            int64_t transfer_size) = 0;
 
-  // Transfers a sub-range of the on-device representation of the buffer.
-  // offset+transfer_size must be less than GetOnDeviceSizeInBytes. on_ready
-  // is called if and only if CopyRawToHost returns OK. on_ready will be called
-  // with a non-OK status if the buffer asynchronously transitions to an error
-  // state.
-  ABSL_DEPRECATED(""Use CopyRawToHost(...).OnReady() instead"")
-  Status CopyRawToHost(void* dst, int64_t offset, int64_t transfer_size,
-                       std::function<void(Status)> on_ready) {
-    CopyRawToHost(dst, offset, transfer_size).OnReady(std::move(on_ready));
-    return Status::OK();
-  }
-
   // Drops the buffer's reference to its associated device memory, leaving the
   // buffer in an invalid state. The memory will be freed lazily when all async
   // operations using the buffer have completed, according to the allocation
",0,train
9085ba6c5e291a27b17ca6e6c5e6e1d3fbda77c7,tensorflow/tensorflow,Change batch_norm default (#1831),batch_norm_ops.py,"@@ -19,7 +19,7 @@ from __future__ import print_function
 import tensorflow as tf
 
 
-def batch_normalize(tensor_in, epsilon=1e-5, convnet=True, decay=0.9,
+def batch_normalize(tensor_in, epsilon=1e-5, convnet=False, decay=0.9,
                     scale_after_normalization=True):
     """"""Batch Normalization
 
",0,train
9085ba6c5e291a27b17ca6e6c5e6e1d3fbda77c7,tensorflow/tensorflow,Change batch_norm default (#1831),conv_ops.py,"@@ -57,7 +57,7 @@ def conv2d(tensor_in, n_filters, filter_shape, strides=None, padding='SAME',
                                        tf.float32)
             output = output + bias_var
         if batch_norm:
-            output = batch_normalize(output)
+            output = batch_normalize(output, convnet=True)
         if activation:
             output = activation(output)
         return output
",0,train
f94c8482969c50c07f43063e01fd63747ef3a99f,tensorflow/tensorflow,Reverts erroneous change that removed adj_x/adj_y support in BatchMatMul,convert_nodes.cc,"@@ -4194,9 +4194,8 @@ Status ConvertBatchMatMul(OpConverterParams* params) {
   }
 
   TFAttrs attrs(node_def);
-  if (attrs.get<bool>(""adj_x"") || attrs.get<bool>(""adj_y"")) {
-    return errors::InvalidArgument(""TensorRT cannot adjoint inputs."");
-  }
+  const bool transpose_a = attrs.get<bool>(""adj_x"");
+  const bool transpose_b = attrs.get<bool>(""adj_y"");
 
   // Removes the batch dimension from weights.
   const auto remove_weights_batch_dim =
@@ -4232,8 +4231,8 @@ Status ConvertBatchMatMul(OpConverterParams* params) {
     return Status::OK();
   }
 
-  return ConvertMatMulHelper(params, tensor_l, tensor_r, /*transpose_a=*/false,
-                             /*transpose_b=*/false, node_def.name());
+  return ConvertMatMulHelper(params, tensor_l, tensor_r, transpose_a,
+                             transpose_b, node_def.name());
 }
 
 Status ConvertSoftmax(OpConverterParams* params) {
",0,train
f94c8482969c50c07f43063e01fd63747ef3a99f,tensorflow/tensorflow,Reverts erroneous change that removed adj_x/adj_y support in BatchMatMul,convert_nodes_test.cc,"@@ -1678,8 +1678,10 @@ void OpConverterTest::TestMatMulHelper(
       AddTestWeights<float>(""weights"", {2, 2}, {0, 1, 2, 3});
       if (is_batch_matmul) {
         if (transpose_a || transpose_b) {
-          RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
-                                     ""TensorRT cannot adjoint inputs."");
+          RunValidationAndConversion(
+              node_def, error::INVALID_ARGUMENT,
+              ""Input weight attempts to broadcast across batch dimension for ""
+              ""BatchMatMul, at my_matmul"");
         } else {
           RunValidationAndConversion(
               node_def, error::INVALID_ARGUMENT,
@@ -1717,8 +1719,10 @@ void OpConverterTest::TestMatMulHelper(
     AddTestWeights<float>(""weights"", {2, 2}, {0, 1, 2, 3});
     if (is_batch_matmul) {
       if (transpose_b) {
-        RunValidationAndConversion(node_def, error::INVALID_ARGUMENT,
-                                   ""TensorRT cannot adjoint inputs."");
+        RunValidationAndConversion(
+            node_def, error::INVALID_ARGUMENT,
+            ""Input weight attempts to broadcast across batch dimension for ""
+            ""BatchMatMul, at my_matmul"");
       } else {
         RunValidationAndConversion(
             node_def, error::INVALID_ARGUMENT,
@@ -1822,22 +1826,36 @@ TEST_F(OpConverterTest, ConvertBatchMatMul) {
     return matmul.operation.node()->def();
   };
 
-  {
-    Reset();
-    NodeDef node_def = get_batch_matmul_nodedef(DT_FLOAT, /*transpose_a=*/false,
-                                                /*transpose_b=*/false);
-    AddTestTensor(""input"", {1, 3}, /*batch_size=*/1);
-    AddTestWeights<float>(""weights"", {1, 3, 1}, {1, 2, 3});
+  for (bool transpose_a : {false, true}) {
+    for (bool transpose_b : {false, true}) {
+      Reset();
+      NodeDef node_def =
+          get_batch_matmul_nodedef(DT_FLOAT, transpose_a, transpose_b);
+      AddTestTensor(""input"", {2, 2}, /*batch_size=*/1);
+      AddTestWeights<float>(""weights"", {1, 2, 2}, {1, 2, 3, 4});
 
-    RunValidationAndConversion(node_def);
-    TRT_TensorOrWeights output;
-    TF_EXPECT_OK(GetTensorOrWeights(""my_matmul"", &output));
-    ASSERT_TRUE(output.is_tensor());
-    ExpectTrtDimsEqualsArray({1, 1}, output.tensor()->getDimensions());
-    const DataVec input_data{{""input"", test::AsTensor<float>({0, 1, 2})}};
-    DataVec output_data{{""my_matmul"", ConstructTensor<float>(1, 1)}};
-    BuildAndRun(input_data, &output_data);
-    EXPECT_THAT(GetSpanForData<float>(output_data[0]), ElementsAre(8));
+      RunValidationAndConversion(node_def);
+      TRT_TensorOrWeights output;
+      TF_EXPECT_OK(GetTensorOrWeights(""my_matmul"", &output));
+      ASSERT_TRUE(output.is_tensor());
+      ExpectTrtDimsEqualsArray({2, 2}, output.tensor()->getDimensions());
+      const DataVec input_data{{""input"", test::AsTensor<float>({0, 1, 2, 3})}};
+      DataVec output_data{{""my_matmul"", ConstructTensor<float>(4)}};
+      BuildAndRun(input_data, &output_data);
+      if (!transpose_a && !transpose_b) {
+        EXPECT_THAT(GetSpanForData<float>(output_data[0]),
+                    ElementsAre(3, 4, 11, 16));
+      } else if (transpose_a && transpose_b) {
+        EXPECT_THAT(GetSpanForData<float>(output_data[0]),
+                    ElementsAre(4, 8, 7, 15));
+      } else if (transpose_a) {
+        EXPECT_THAT(GetSpanForData<float>(output_data[0]),
+                    ElementsAre(6, 8, 10, 14));
+      } else if (transpose_b) {
+        EXPECT_THAT(GetSpanForData<float>(output_data[0]),
+                    ElementsAre(2, 4, 8, 18));
+      }
+    }
   }
 
   TestMatMulHelper(get_batch_matmul_nodedef, ""BatchMatMul"");
",0,train
8017c247c84c4c80fa11744b1b913aec3ee88f3e,tensorflow/tensorflow,"Mark the `SerializeSparseOp<Variant>` kernel as inexpensive.

Since this op only performs a constant amount of work, and typically
executes in a few microseconds, it should be profitable to execute
this op inline, rather than scheduling it on a remote thread.

PiperOrigin-RevId: 186522885",serialize_sparse_op.cc,"@@ -44,6 +44,8 @@ class SerializeSparseOp : public OpKernel {
   explicit SerializeSparseOp(OpKernelConstruction* context)
       : OpKernel(context) {}
 
+  bool IsExpensive() override;
+
   Status Initialize(Tensor* result);
   Status Serialize(const Tensor& input, T* result);
 
@@ -82,6 +84,21 @@ class SerializeSparseOp : public OpKernel {
   }
 };
 
+// NOTE(mrry): We specialize the IsExpensive() method differently for
+// the string and variant cases, because (i) the string version
+// actually performs memory copies as part of its serialization (and
+// is hence potentially expensive), and (ii) the variant version
+// performs O(1) shallow copies (and hence is much cheaper than
+// dispatching to another thread would be).
+template <>
+bool SerializeSparseOp<string>::IsExpensive() {
+  return true;
+}
+template <>
+bool SerializeSparseOp<Variant>::IsExpensive() {
+  return false;
+}
+
 template <>
 Status SerializeSparseOp<string>::Initialize(Tensor* result) {
   *result = Tensor(DT_STRING, TensorShape({3}));
",0,train
29a67eaedd8d95866011bb1c87a9d1739d448686,tensorflow/tensorflow,"Fix typo in error message.

PiperOrigin-RevId: 214348730",nvptx_compiler.cc,"@@ -402,7 +402,7 @@ void WarnIfBadPtxasVersion(const string& ptxas_path) {
     LOG(WARNING)
         << ""*** WARNING *** You are using ptxas "" << vmaj << ""."" << vmin << "".""
         << vdot
-        << "", which older than 9.2.88. ptxas 9.x before 9.2.88 is known to ""
+        << "", which is older than 9.2.88. ptxas 9.x before 9.2.88 is known to ""
            ""miscompile XLA code, leading to incorrect results or ""
            ""invalid-address errors.\n\nYou do not need to update to CUDA ""
            ""9.2.88; cherry-picking the ptxas binary is sufficient."";
",0,train
16a50fcbacb8e46f6c4560a6e58ed26f5fd2d133,tensorflow/tensorflow,"Enable int64 test cases for MatMul

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",matmul_op_test.py,"@@ -102,7 +102,7 @@ class MatMulGradientTest(test_lib.TestCase):
 def _GetMatMulGradientTest(a_np_, b_np_, use_static_shape_, **kwargs_):
 
   def Test(self):
-    if not use_static_shape_ or a_np_.dtype in (np.int32, np.float16):
+    if not use_static_shape_ or a_np_.dtype in (np.int32, np.int64, np.float16):
       self.skipTest(""Skipping infeasible gradient test."")
 
     # Transpose and possibly conjugate a_np_ and b_np_ according to the
@@ -214,9 +214,9 @@ if __name__ == ""__main__"":
   sizes = [1, 3, 5]
   trans_options = [[False, False], [True, False], [False, True]]
   for use_static_shape in [False, True]:
-    for dtype in (np.int32, np.float16, np.float32, np.float64, np.complex64,
-                  np.complex128):
-      if not use_static_shape and dtype == np.int32:
+    for dtype in (np.int32, np.int64, np.float16, np.float32, np.float64,
+                  np.complex64, np.complex128):
+      if not use_static_shape and dtype == np.int32 and dtype == np.int64:
         # TODO(rmlarsen): Re-enable this test when we have fixed the underlying
         # bug in Windows (b/35935459).
         continue
",0,train
bbf6bb9920823f2fafe83bcd38ed0b06806b6d5b,tensorflow/tensorflow,"[cleanup] Use scoped counter for measuring GraphOptimizer and TFData latencies.

Add accumulate functionality to the ScopedTimer.

PiperOrigin-RevId: 409447266
Change-Id: I12e377f5425681ce1933571269f00cd23f0f1ac8",graph_optimizer.cc,"@@ -39,9 +39,10 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env,
                               const Device* device,
                               std::unique_ptr<Graph>* graph,
                               const Options& options) {
+  static const char* kGraphOptimizerCategory = ""GraphOptimizerPass"";
+
   Graph* g = graph->get();
   DumpGraph(""Initial"", g);
-
   bool changed = true;
   const int kMaxRounds = 10;
   for (int rounds = 0; rounds < kMaxRounds; ++rounds) {
@@ -51,8 +52,9 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env,
       changed = true;
     }
 
-    uint64 inlining_start_us = Env::Default()->NowMicros();
-    uint64 inlining_total_us = 0;
+    tensorflow::metrics::ScopedCounter<2> inlining_timings(
+        tensorflow::metrics::GetGraphOptimizationCounter(),
+        {kGraphOptimizerCategory, ""function_inlining""});
     if (opts_.do_function_inlining() && RemoveDeadNodes(g)) {
       DumpGraph(""RemoveDeadNodes"", g);
       changed = true;
@@ -62,11 +64,14 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env,
       changed = true;
     }
     if (opts_.do_function_inlining()) {
-      inlining_total_us += Env::Default()->NowMicros() - inlining_start_us;
+      inlining_timings.AccumulateAndStop();
     }
 
     if (opts_.do_constant_folding()) {
-      const uint64 pass_start_us = Env::Default()->NowMicros();
+      tensorflow::metrics::ScopedCounter<2> timings(
+          tensorflow::metrics::GetGraphOptimizationCounter(),
+          {kGraphOptimizerCategory, ""constant_folding""});
+
       ConstantFoldingOptions cf_opts;
       cf_opts.shape_map = options.shape_map;
       cf_opts.consider = options.cf_consider_fn;
@@ -82,32 +87,28 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env,
         DumpGraph(""ConstFolding"", g);
         changed = true;
       }
-      const uint64 pass_end_us = Env::Default()->NowMicros();
-      metrics::UpdateGraphOptimizerPassTime(""constant_folding"",
-                                            pass_end_us - pass_start_us);
     }
 
-    inlining_start_us = Env::Default()->NowMicros();
-    if (opts_.do_function_inlining() && FixupSourceAndSinkEdges(g)) {
-      DumpGraph(""FixupSourceAndSinkEdges"", g);
-      changed = true;
-    }
     if (opts_.do_function_inlining()) {
-      inlining_total_us += Env::Default()->NowMicros() - inlining_start_us;
+      inlining_timings.Start();
+      if (FixupSourceAndSinkEdges(g)) {
+        DumpGraph(""FixupSourceAndSinkEdges"", g);
+        changed = true;
+      }
+      inlining_timings.AccumulateAndStop();
     }
 
     if (opts_.do_common_subexpression_elimination()) {
-      const uint64 pass_start_us = Env::Default()->NowMicros();
+      tensorflow::metrics::ScopedCounter<2> timings(
+          tensorflow::metrics::GetGraphOptimizationCounter(),
+          {kGraphOptimizerCategory, ""common_subexpression_elimination""});
       if (OptimizeCSE(g, options.cse_consider_fn)) {
         DumpGraph(""OptimizeCSE"", g);
         changed = true;
       }
-      const uint64 pass_end_us = Env::Default()->NowMicros();
-      metrics::UpdateGraphOptimizerPassTime(""common_subexpression_elimination"",
-                                            pass_end_us - pass_start_us);
     }
     if (opts_.do_function_inlining()) {
-      inlining_start_us = Env::Default()->NowMicros();
+      inlining_timings.Start();
       ExpandInlineFunctionsOptions expand_inline_opts;
       expand_inline_opts.native_options.inlined_function_body_placer =
           InlinedFunctionBodyPlacer::SingleDevice();
@@ -144,10 +145,7 @@ void GraphOptimizer::Optimize(FunctionLibraryRuntime* runtime, Env* env,
         changed = true;
       }
 
-      const uint64 inlining_end_us = Env::Default()->NowMicros();
-      metrics::UpdateGraphOptimizerPassTime(
-          ""function_inlining"",
-          (inlining_end_us - inlining_start_us) + inlining_total_us);
+      inlining_timings.ReportAndStop();
     }
     if (!changed) break;
   }
",0,train
bbf6bb9920823f2fafe83bcd38ed0b06806b6d5b,tensorflow/tensorflow,"[cleanup] Use scoped counter for measuring GraphOptimizer and TFData latencies.

Add accumulate functionality to the ScopedTimer.

PiperOrigin-RevId: 409447266
Change-Id: I12e377f5425681ce1933571269f00cd23f0f1ac8",metrics.cc,"@@ -408,24 +408,6 @@ void UpdateGrapplerPassTime(const string& pass_name,
   }
 }
 
-void UpdateTFDataPassTime(const string& pass_name,
-                          const uint64 running_time_usecs) {
-  if (running_time_usecs > 0) {
-    GetGraphOptimizationCounter()
-        ->GetCell(""TFDataPass"", pass_name)
-        ->IncrementBy(running_time_usecs);
-  }
-}
-
-void UpdateGraphOptimizerPassTime(const string& pass_name,
-                                  const uint64 running_time_usecs) {
-  if (running_time_usecs > 0) {
-    GetGraphOptimizationCounter()
-        ->GetCell(""GraphOptimizerPass"", pass_name)
-        ->IncrementBy(running_time_usecs);
-  }
-}
-
 void UpdateGraphBuildTime(const uint64 running_time_usecs) {
   if (running_time_usecs > 0) {
     static auto* build_graph_calls_cell = build_graph_calls->GetCell();
",0,train
bbf6bb9920823f2fafe83bcd38ed0b06806b6d5b,tensorflow/tensorflow,"[cleanup] Use scoped counter for measuring GraphOptimizer and TFData latencies.

Add accumulate functionality to the ScopedTimer.

PiperOrigin-RevId: 409447266
Change-Id: I12e377f5425681ce1933571269f00cd23f0f1ac8",metrics.h,"@@ -203,6 +203,23 @@ class ScopedCounter final {
   // Start the measurement with the existing set of labels.
   void Reset() { Init(); }
 
+  // Temporarily stop the timer, but keep accumulated time.
+  void AccumulateAndStop() {
+    if (started_) {
+      accumulated_time_ = tensorflow::Env::Default()->NowMicros() - start_time_;
+      started_ = false;
+    }
+  }
+
+  // Start previously stopped timer.
+  void Start() {
+    if (started_) return;
+
+    // Keep previously accumulated time if any.
+    start_time_ = tensorflow::Env::Default()->NowMicros();
+    started_ = true;
+  }
+
   ~ScopedCounter() { ReportAndStop(); }
 
  private:
@@ -210,6 +227,7 @@ class ScopedCounter final {
   void ReportInternal(std::index_sequence<S...>) {
     uint64 time_interval =
         tensorflow::Env::Default()->NowMicros() - start_time_;
+    time_interval += accumulated_time_;
     if (time_interval > 0) {
       counter_->GetCell(labels_[S]...)->IncrementBy(time_interval);
     }
@@ -218,25 +236,22 @@ class ScopedCounter final {
   void Init() {
     start_time_ = tensorflow::Env::Default()->NowMicros();
     started_ = true;
+    accumulated_time_ = 0;
   }
 
   monitoring::Counter<NumLabels>* counter_;
   std::array<std::string, NumLabels> labels_;
   bool started_{false};
   uint64 start_time_;
+  uint64 accumulated_time_;
 };
 
 // Returns a counter used to capture timing metrics for graph optimization
 // passes.
 monitoring::Counter<2>* GetGraphOptimizationCounter();
 
-// Updates the metrics stored about graph optimizations.
 void UpdateGrapplerPassTime(const string& pass_name,
                             const uint64 running_time_usecs);
-void UpdateTFDataPassTime(const string& pass_name,
-                          const uint64 running_time_usecs);
-void UpdateGraphOptimizerPassTime(const string& pass_name,
-                                  const uint64 running_time_usecs);
 
 // Updates metrics for time to distribute variables to all TPU hosts.
 void UpdateTpuVariableDistributionTime(const uint64 distribution_time_usecs);
",0,train
bbf6bb9920823f2fafe83bcd38ed0b06806b6d5b,tensorflow/tensorflow,"[cleanup] Use scoped counter for measuring GraphOptimizer and TFData latencies.

Add accumulate functionality to the ScopedTimer.

PiperOrigin-RevId: 409447266
Change-Id: I12e377f5425681ce1933571269f00cd23f0f1ac8",meta_optimizer.cc,"@@ -103,10 +103,11 @@ Status TFDataMetaOptimizer::Optimize(Cluster* cluster, const GrapplerItem& item,
 
   // Perform optimizations in a meaningful order.
   for (const auto& optimization : kTFDataOptimizations) {
-    const uint64 pass_start_us = Env::Default()->NowMicros();
+    tensorflow::metrics::ScopedCounter<2> timings(
+        tensorflow::metrics::GetGraphOptimizationCounter(),
+        {""TFData"", optimization});
     Status status = ApplyOptimization(optimization, cluster, &optimized_item);
-    const uint64 pass_end_us = Env::Default()->NowMicros();
-    metrics::UpdateTFDataPassTime(optimization, pass_end_us - pass_start_us);
+    timings.ReportAndStop();
     if (!status.ok()) return status;
   }
 
",0,train
b56ecc8f4bff7576bf48431d3f7b61ab466b6420,tensorflow/tensorflow,Used true_divide to make sure float division,ops_test.py,"@@ -178,9 +178,9 @@ class OpsTest(test_util.TensorFlowTestCase):
 
       if all(x >= 0 for x in v2):
         self.assertAllEqual((a**b), np.power(v1, v2))
-      self.assertAllEqual((a / b), np.divide(v1, v2))
+      self.assertAllEqual((a / b), np.true_divide(v1, v2))
 
-      self.assertAllEqual((a / a), np.divide(v1, v1))
+      self.assertAllEqual((a / a), np.true_divide(v1, v1))
       self.assertAllEqual((a % b), np.mod(v1, v2))
 
       self.assertAllEqual((a < b), np.less(v1, v2))
",0,train
a0c56725b12af530a7869de6913826a41448f61f,tensorflow/tensorflow,"Update metrics.py

Updated as per https://github.com/tensorflow/tensorflow/pull/47343#issuecomment-793362918",metrics.py,"@@ -3343,7 +3343,7 @@ def categorical_accuracy(y_true, y_pred):
     Categorical accuracy values.
   """"""
   # assert if predicted and true labels Tensors have the same shape
-  check_ops.assert_equal_v2(array_ops.shape_v2(y_pred), array_ops.shape_v2(y_pred))
+  check_ops.assert_equal_v2(array_ops.shape_v2(y_pred), array_ops.shape_v2(y_true))
   
   return math_ops.cast(
       math_ops.equal(
",0,train
82f4f50f4fbfd74aceb741ff097d6c42688b5023,tensorflow/tensorflow,"Add check for events_ not containing the event we are waiting on because it has already completed

PiperOrigin-RevId: 337185599
Change-Id: I9c73388c0a99c2abbc52aef4e7bf2c61656e8199",pod_tpu_driver.cc,"@@ -736,24 +736,38 @@ class PodTpuDriver : public TpuDriver {
 
       auto done = [this, event_id]() {
         mu_.AssertHeld();
-        if (events_.count(event_id) == 0) {
-          LOG(ERROR) << ""Cannot find event id "" << event_id
-                     << "" in WaitForEvent."";
-        }
-        return events_[event_id]->underlying_event != nullptr &&
-               events_[event_id]->underlying_event.use_count() != 0;
+        // The event was either completed and erased from the map or we have
+        // an underlying event available to us.
+        return events_.count(event_id) == 0 ||
+               (events_[event_id]->underlying_event != nullptr &&
+                events_[event_id]->underlying_event.use_count() != 0);
       };
 
       auto status = mu_.AwaitWithTimeout(absl::Condition(&done), duration);
       if (!status) {
         return absl::nullopt;
       }
-      underlying_event = events_[event_id]->underlying_event;
+
+      if (events_.count(event_id) > 0) {
+        underlying_event = events_[event_id]->underlying_event;
+      } else {
+        underlying_event = nullptr;
+      }
     }
 
     // Wait for the underlying event without holding on to the event_lock_, or
     // else incoming events will not be processed.
-    return underlying_event->AwaitWithTimeout(duration);
+    if (underlying_event != nullptr) {
+      return underlying_event->AwaitWithTimeout(duration);
+    } else {
+      absl::MutexLock l(&mu_);
+      auto event_status = abnormal_event_status_.find(event_id);
+      if (event_status == abnormal_event_status_.end()) {
+        return Status::OK();
+      } else {
+        return event_status->second;
+      }
+    }
   }
 
   void AddCallbackForEvent(int64_t event_id, std::function<void(Status)> fn)
",0,train
e31e0f7c71d051ae8e7d4ce7b07ad9ea3ec8e508,tensorflow/tensorflow,"Use resource variable with placeholder for KMeans.

PiperOrigin-RevId: 236676217",clustering_ops.py,"@@ -286,36 +286,31 @@ class KMeans(object):
       - update_in_steps: numbers of steps left before we sync
             cluster_centers_updated back to cluster_centers.
     """"""
-    init_value = array_ops.constant([], dtype=dtypes.float32)
+    init_value = array_ops.placeholder_with_default([], shape=None)
     cluster_centers = variable_scope.variable(
-        init_value, name=CLUSTERS_VAR_NAME, validate_shape=False,
-        use_resource=False)
+        init_value, name=CLUSTERS_VAR_NAME, validate_shape=False)
     cluster_centers_initialized = variable_scope.variable(
-        False, dtype=dtypes.bool, name='initialized', use_resource=False)
+        False, dtype=dtypes.bool, name='initialized')
 
     if self._use_mini_batch and self._mini_batch_steps_per_iteration > 1:
       # Copy of cluster centers actively updated each step according to
       # mini-batch update rule.
       cluster_centers_updated = variable_scope.variable(
-          init_value, name='clusters_updated', validate_shape=False,
-          use_resource=False)
+          init_value, name='clusters_updated', validate_shape=False)
       # How many steps till we copy the updated clusters to cluster_centers.
       update_in_steps = variable_scope.variable(
           self._mini_batch_steps_per_iteration,
           dtype=dtypes.int64,
-          name='update_in_steps',
-          use_resource=False)
+          name='update_in_steps')
       # Count of points assigned to cluster_centers_updated.
       cluster_counts = variable_scope.variable(
-          array_ops.zeros([num_clusters], dtype=dtypes.int64),
-          use_resource=False)
+          array_ops.zeros([num_clusters], dtype=dtypes.int64))
     else:
       cluster_centers_updated = cluster_centers
       update_in_steps = None
       cluster_counts = (
-          variable_scope.variable(  # pylint:disable=g-long-ternary
-              array_ops.ones([num_clusters], dtype=dtypes.int64),
-              use_resource=False)
+          variable_scope.variable(
+              array_ops.ones([num_clusters], dtype=dtypes.int64))
           if self._use_mini_batch else None)
     return (cluster_centers, cluster_centers_initialized, cluster_counts,
             cluster_centers_updated, update_in_steps)
",0,train
6ab65bc0c96982c538454ace8f97cc010edc66a0,tensorflow/tensorflow,"Better workaround for MSVC 14.0 limitation related to constexpr array.

PiperOrigin-RevId: 258793545",memmapped_file_system.cc,"@@ -190,13 +190,8 @@ const void* MemmappedFileSystem::GetMemoryWithOffset(uint64 offset) const {
   return reinterpret_cast<const uint8*>(mapped_memory_->data()) + offset;
 }
 
-#if defined(_MSC_VER)
-constexpr char* MemmappedFileSystem::kMemmappedPackagePrefix;
-constexpr char* MemmappedFileSystem::kMemmappedPackageDefaultGraphDef;
-#else
-constexpr char MemmappedFileSystem::kMemmappedPackagePrefix[];
-constexpr char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[];
-#endif
+constexpr const char MemmappedFileSystem::kMemmappedPackagePrefix[];
+constexpr const char MemmappedFileSystem::kMemmappedPackageDefaultGraphDef[];
 
 Status MemmappedFileSystem::InitializeFromFile(Env* env,
                                                const string& filename) {
",0,test
6ab65bc0c96982c538454ace8f97cc010edc66a0,tensorflow/tensorflow,"Better workaround for MSVC 14.0 limitation related to constexpr array.

PiperOrigin-RevId: 258793545",memmapped_file_system.h,"@@ -53,19 +53,11 @@ class MemmappedFileSystem : public FileSystem {
  public:
   // Memmapped regions use this prefix to distinguish from
   // the filesystem.
-#if defined(_MSC_VER)
-  static constexpr char* kMemmappedPackagePrefix =
-#else
-  static constexpr char kMemmappedPackagePrefix[] =
-#endif
+  static constexpr const char kMemmappedPackagePrefix[] =
       ""memmapped_package://"";
 
-// The default graphdef in the package.
-#if defined(_MSC_VER)
-  static constexpr char* kMemmappedPackageDefaultGraphDef =
-#else
-  static constexpr char kMemmappedPackageDefaultGraphDef[] =
-#endif
+  // The default graphdef in the package.
+  static constexpr const char kMemmappedPackageDefaultGraphDef[] =
       ""memmapped_package://."";
 
   MemmappedFileSystem();
",0,test
27ea707dfef54fec24e5b92210142898a6e87dfc,tensorflow/tensorflow,"In the doc example, commas in tensors are missing.
Now they are added.",array_ops.cc,"@@ -660,14 +660,14 @@ For example:
 ```prettyprint
 # tensor 't' is [1, 2, 3, 4, 5, 6, 7, 8, 9]
 # tensor 't' has shape [9]
-reshape(t, [3, 3]) ==> [[1, 2, 3]
-                        [4, 5, 6]
+reshape(t, [3, 3]) ==> [[1, 2, 3],
+                        [4, 5, 6],
                         [7, 8, 9]]
 
-# tensor 't' is [[[1, 1], [2, 2]]
+# tensor 't' is [[[1, 1], [2, 2]],
 #                [[3, 3], [4, 4]]]
 # tensor 't' has shape [2, 2, 2]
-reshape(t, [2, 4]) ==> [[1, 1, 2, 2]
+reshape(t, [2, 4]) ==> [[1, 1, 2, 2],
                         [3, 3, 4, 4]]
 
 # tensor 't' is [[[1, 1, 1],
",0,test
9392ffa09224f0a7735aa7076bee2024c39f1e69,tensorflow/tensorflow,"Improve compatibility of while_v2 with XLA tests

Remove assumption where resource variables could not be included as
outputs of the body. We instead iterate through the outputs to find
the first resource variable index.
Also loosen the requirements to specify maximum_iterations for XLA.

PiperOrigin-RevId: 226932912",while_op.cc,"@@ -291,20 +291,15 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
 
   xla::XlaOp while_result = xla::While(cond_wrapper, *body.computation, init);
 
-  auto while_shape_or = builder->GetShape(while_result);
-  OP_REQUIRES_OK(ctx, while_shape_or.status());
-  auto count = xla::ShapeUtil::TupleElementCount(while_shape_or.ValueOrDie());
-  int max_index = body.outputs.size() + body.resource_updates.size() - 1;
-  OP_REQUIRES(
-      ctx, max_index < count,
-      errors::Internal(""Max tuple element requested ("", max_index,
-                       "") needs to be less than tuple size ("", count, "")""));
-
-  // Sets non-variable outputs.
+  // Sets non-variable outputs and determine when resource variables start.
+  int resource_index = 0;
   for (int i = 0; i < ctx->num_outputs(); ++i) {
     if (ctx->input_type(i) != DT_RESOURCE) {
       ctx->SetOutput(body.input_mapping[i],
                      xla::GetTupleElement(while_result, i));
+      ++resource_index;
+    } else {
+      break;
     }
   }
   if (has_token_input_output_) {
@@ -326,7 +321,7 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
     XlaResource* resource;
     OP_REQUIRES_OK(ctx, ctx->GetResourceInput(update.input_index, &resource));
     if (update.modified) {
-      int pos = body.outputs.size() + i;
+      int pos = resource_index + i;
       OP_REQUIRES_OK(ctx,
                      resource->SetFromPack(
                          arguments[update.input_index].tensor_array_gradients,
",0,train
9392ffa09224f0a7735aa7076bee2024c39f1e69,tensorflow/tensorflow,"Improve compatibility of while_v2 with XLA tests

Remove assumption where resource variables could not be included as
outputs of the body. We instead iterate through the outputs to find
the first resource variable index.
Also loosen the requirements to specify maximum_iterations for XLA.

PiperOrigin-RevId: 226932912",control_flow_ops_py_test.py,"@@ -1183,6 +1183,8 @@ class ControlFlowTest(test.TestCase):
 
   @test_util.run_v1_only(""b/120545219"")
   def testInvalidMaximumIterationsWhileLoopGradientInXLAContext(self):
+    if control_flow_util.ENABLE_CONTROL_FLOW_V2:
+      self.skipTest(""WhileV2 does lazy evaluation of maximum_iterations"")
     v = constant_op.constant(1.0)
 
     def inner_body(i, x):
@@ -1203,44 +1205,27 @@ class ControlFlowTest(test.TestCase):
     gs = gradients_impl.gradients(loop_no_xla, v)
     self.evaluate(gs)  # This should execute without error.
 
-    if control_flow_util.ENABLE_CONTROL_FLOW_V2:
-      xla_context = control_flow_ops.XLAControlFlowContext()
-      xla_context.Enter()
-      with self.assertRaisesRegexp(
-          ValueError,
-          r""maximum_iterations is None. It is required and must be statically ""
-          r""known \(e.g. a constant value or known shape dimension\) when ""
-          r""building while_loop in XLA context.""):
-        loop_no_maxiter = create_while_loop()
-      with self.assertRaisesRegexp(
-          ValueError,
-          r""maximum_iterations must be statically ""
-          r""known \(e.g. a constant value or known shape dimension\) when ""
-          r""building while_loop in XLA context.""):
-        loop_with_maxiter = create_while_loop(maximum_iterations=2)
-      xla_context.Exit()
-    else:
-      xla_context = control_flow_ops.XLAControlFlowContext()
-      xla_context.Enter()
-      loop_no_maxiter = create_while_loop()
-      loop_with_maxiter = create_while_loop(maximum_iterations=2)
-      xla_context.Exit()
+    xla_context = control_flow_ops.XLAControlFlowContext()
+    xla_context.Enter()
+    loop_no_maxiter = create_while_loop()
+    loop_with_maxiter = create_while_loop(maximum_iterations=2)
+    xla_context.Exit()
 
-      with self.assertRaisesRegexp(
-          ValueError,
-          r""Cannot create a gradient accumulator for tensor '.+' inside ""
-          r""XLA while_loop because maximum_iterations was not passed to ""
-          r""the tf.while_loop call \('.+'\).""):
-        _ = gradients_impl.gradients(loop_no_maxiter, v)
+    with self.assertRaisesRegexp(
+        ValueError,
+        r""Cannot create a gradient accumulator for tensor '.+' inside ""
+        r""XLA while_loop because maximum_iterations was not passed to ""
+        r""the tf.while_loop call \('.+'\).""):
+      _ = gradients_impl.gradients(loop_no_maxiter, v)
 
-      with self.assertRaisesRegexp(
-          ValueError,
-          r""Cannot create a gradient accumulator for tensor '.+' inside XLA ""
-          r""while_loop. maximum_iterations tensor '.+' for while_loop context ""
-          r""'.+' must be statically known \(e.g. a constant value or known ""
-          r""shape dimension\), or be defined at or outside the while loop ""
-          r""context '.*' \(currently defined in '.*'\)""):
-        _ = gradients_impl.gradients(loop_with_maxiter, v)
+    with self.assertRaisesRegexp(
+        ValueError,
+        r""Cannot create a gradient accumulator for tensor '.+' inside XLA ""
+        r""while_loop. maximum_iterations tensor '.+' for while_loop context ""
+        r""'.+' must be statically known \(e.g. a constant value or known ""
+        r""shape dimension\), or be defined at or outside the while loop ""
+        r""context '.*' \(currently defined in '.*'\)""):
+      _ = gradients_impl.gradients(loop_with_maxiter, v)
 
   @test_util.run_v1_only(""b/120545219"")
   def testInvalidMaximumIterationsFromSiblingContextWhileLoopInXLAContext(self):
@@ -1265,10 +1250,7 @@ class ControlFlowTest(test.TestCase):
       xla_context = control_flow_ops.XLAControlFlowContext()
       xla_context.Enter()
       with self.assertRaisesRegexp(
-          ValueError,
-          r""maximum_iterations must be statically known \(e.g. a constant value""
-          r"" or known shape dimension\) when building while_loop in XLA ""
-          r""context.""):
+          ValueError, r""Tensor.*Placeholder:0.* must be from the same graph.*""):
         loop = create_while_loop()
       xla_context.Exit()
     else:
",0,train
9392ffa09224f0a7735aa7076bee2024c39f1e69,tensorflow/tensorflow,"Improve compatibility of while_v2 with XLA tests

Remove assumption where resource variables could not be included as
outputs of the body. We instead iterate through the outputs to find
the first resource variable index.
Also loosen the requirements to specify maximum_iterations for XLA.

PiperOrigin-RevId: 226932912",while_v2.py,"@@ -254,6 +254,7 @@ def _WhileGrad(op, *grads):  # pylint: disable=invalid-name
   maximum_iterations = op.get_attr(
       ""_maximum_iterations"") if _is_in_xla_context() else None
   assert not _is_in_xla_context() or maximum_iterations is not None
+  maximum_iterations = _validate_and_convert_to_tensor(maximum_iterations)
 
   # Set the incoming gradient of non-trainable inputs to None. It is possible
   # that we receive non-None gradients for non-trainable types in nested while
@@ -376,28 +377,30 @@ def _validate_and_convert_to_tensor(maximum_iterations):
   Raises:
     ValueError: If `maximum_iterations` is invalid.
   """"""
-  if _is_in_xla_context():
-    if maximum_iterations is None:
-      raise ValueError(""maximum_iterations is None. It is required and must ""
-                       ""be statically known (e.g. a constant value or known ""
-                       ""shape dimension) when building while_loop in XLA ""
-                       ""context."")
-    if isinstance(maximum_iterations, ops.Tensor):
-      # Get the constant value from the `maximum_iterations` tensor to avoid
-      # capturing a Const tensor from outside this graph.
-      maximum_iterations = tensor_util.constant_value(maximum_iterations)
-      if maximum_iterations is None:
-        raise ValueError(""maximum_iterations must be statically known (e.g. a ""
-                         ""constant value or known shape dimension) when ""
-                         ""building while_loop in XLA context."")
-
-  if maximum_iterations is not None:
-    # EmptyTensorList expects `max_num_elements` to be of type int32.
-    maximum_iterations = ops.convert_to_tensor(
-        maximum_iterations, dtype=dtypes.int32, name=""maximum_iterations"")
-    if maximum_iterations.shape.ndims != 0:
-      raise ValueError(""maximum_iterations must be a scalar, saw shape: %s"" %
-                       maximum_iterations.shape)
+  if maximum_iterations is None:
+    return None
+
+  if _is_in_xla_context() and isinstance(maximum_iterations, ops.Tensor):
+    # Get the constant value from the `maximum_iterations` tensor to avoid
+    # capturing a Const tensor from outside this graph.
+    value = tensor_util.constant_value(maximum_iterations)
+    if value is None:
+      # XLA requires maximum_iterations to be statically known (e.g. a
+      # constant value or known shape dimension) when intermediate values
+      # from the forward pass are needed in the gradients pass. However,
+      # maximum_iterations may not be required if the gradient isn't built
+      # or no intermediates are required, thus we return the tensor as is.
+      return maximum_iterations
+
+    maximum_iterations = value
+
+  # EmptyTensorList expects `max_num_elements` to be of type int32.
+  maximum_iterations = ops.convert_to_tensor(
+      maximum_iterations, dtype=dtypes.int32, name=""maximum_iterations"")
+  if maximum_iterations.shape.ndims != 0:
+    raise ValueError(""maximum_iterations must be a scalar, saw shape: %s"" %
+                     maximum_iterations.shape)
+
   return maximum_iterations
 
 
@@ -815,7 +818,7 @@ def _copy_handle_data(src_tensors, tgt_tensors):
 
 
 def _maybe_set_maximum_iterations_attr(op, maximum_iterations):
-  if control_flow_util.IsInXLAContext(op):
+  if maximum_iterations is not None and control_flow_util.IsInXLAContext(op):
     # Store the maximum_iterations to use in the gradient pass.
     op._set_attr(  # pylint: disable=protected-access
         ""_maximum_iterations"",
",0,train
ada0605591911094c142d39cbd87294ed2716e8b,tensorflow/tensorflow,"Update Keras Tracking API
1. move _keras_api_gauge.get_cell('compile').set(True) after line 316
(Reasoning: according to Pavithra, for some use cases, when user first call compile, it will save the info and run it later; move metric after is_compiled set to true to avoid double count)
2. Breakdown tracking for different training/evaluating/predicting methods
(Different methods for train/evaluate/predict are useful for engineers to observe which methods is mostly used, the previous implementation covers most of the train/evaluate/predict, except for train_on_batch/test_on_batch/predict_on_batch)
3. add a meta metric in __init__ of model to decide if a borg job uses keras API (should be a combination of 1 + 2 + user self-defined model)
Draft doc: go/tensorflow-api-metrics

PiperOrigin-RevId: 260419061",training.py,"@@ -143,6 +143,7 @@ class Model(network.Network):
 
   def __init__(self, *args, **kwargs):
     super(Model, self).__init__(*args, **kwargs)
+    _keras_api_gauge.get_cell('model').set(True)
     # initializing _distribution_strategy here since it is possible to call
     # predict on a model without compiling it.
     self._distribution_strategy = None
@@ -242,7 +243,6 @@ class Model(network.Network):
         ValueError: In case of invalid arguments for
             `optimizer`, `loss`, `metrics` or `sample_weight_mode`.
     """"""
-    _keras_api_gauge.get_cell('compile').set(True)
     self._run_eagerly = kwargs.pop('run_eagerly', None)
     self._run_distributed = kwargs.pop('run_distributed', False)
 
@@ -323,6 +323,7 @@ class Model(network.Network):
       # time the model gets called on training data.
       return
     self._is_compiled = True
+    _keras_api_gauge.get_cell('compile').set(True)
 
     # Prepare list of loss functions, same size of model outputs.
     self.loss_functions = training_utils.prepare_loss_functions(
@@ -705,7 +706,7 @@ class Model(network.Network):
         ValueError: In case of mismatch between the provided input data
             and what the model expects.
     """"""
-    _keras_api_gauge.get_cell('train').set(True)
+    _keras_api_gauge.get_cell('fit').set(True)
     # Legacy support
     if 'nb_epoch' in kwargs:
       logging.warning(
@@ -1279,7 +1280,7 @@ class Model(network.Network):
     if self._distribution_strategy:
       raise NotImplementedError('`fit_generator` is not supported for '
                                 'models compiled with tf.distribute.Strategy.')
-    _keras_api_gauge.get_cell('train').set(True)
+    _keras_api_gauge.get_cell('fit_generator').set(True)
     self._check_call_args('fit_generator')
     return training_generator.fit_generator(
         self,
@@ -1353,8 +1354,9 @@ class Model(network.Network):
     if self._distribution_strategy:
       raise NotImplementedError('`evaluate_generator` is not supported for '
                                 'models compiled with tf.distribute.Strategy.')
-    _keras_api_gauge.get_cell('evaluate').set(True)
+    _keras_api_gauge.get_cell('evaluate_generator').set(True)
     self._check_call_args('evaluate_generator')
+
     return training_generator.evaluate_generator(
         self,
         generator,
@@ -1411,8 +1413,7 @@ class Model(network.Network):
     if self._distribution_strategy:
       raise NotImplementedError('`predict_generator` is not supported for '
                                 'models compiled with tf.distribute.Strategy.')
-    _keras_api_gauge.get_cell('predict').set(True)
-    self._check_call_args('predict_generator')
+    _keras_api_gauge.get_cell('predict_generator').set(True)
     return training_generator.predict_generator(
         self,
         generator,
",0,train
57dc9f9681b8c4ea88eb0a3ee43c82d67f7707a2,tensorflow/tensorflow,"Forward Dataset._functions() through DatasetV1Adapter

We will need to re-register functions in the exported graph when saving SavedModels

PiperOrigin-RevId: 243359370",dataset_test.py,"@@ -194,6 +194,10 @@ class DatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
         nest.flatten(input_datasets),
         dataset_fn(input_datasets)._inputs())
 
+  def testFunctions(self):
+    dataset = dataset_ops.Dataset.range(5).map(lambda x: x * 2)
+    self.assertLen(dataset._functions(), 1)
+
   def testCollectInputs(self):
     ds1 = dataset_ops.Dataset.range(0)
     ds2 = ds1.concatenate(ds1)
",0,train
57dc9f9681b8c4ea88eb0a3ee43c82d67f7707a2,tensorflow/tensorflow,"Forward Dataset._functions() through DatasetV1Adapter

We will need to re-register functions in the exported graph when saving SavedModels

PiperOrigin-RevId: 243359370",dataset_ops.py,"@@ -1781,6 +1781,9 @@ class DatasetV1Adapter(DatasetV1):
   def _inputs(self):
     return self._dataset._inputs()  # pylint: disable=protected-access
 
+  def _functions(self):
+    return self._dataset._functions()  # pylint: disable=protected-access
+
   def options(self):
     return self._dataset.options()
 
",0,train
ce25634e3ec6c79c89645e4c52b004eabb869cb8,tensorflow/tensorflow,"Fix iterator invalidation when pushing a self-reference on a SmallVector

This crashes with https://github.com/llvm/llvm-project/commit/2c196bbc6bd897b3dcc1d87a3baac28e1e88df41

PiperOrigin-RevId: 342653532
Change-Id: I26b32e9674a03a1dfeb74c7760916a9e43a080fc",tf_executor.cc,"@@ -677,7 +677,7 @@ ParseResult ParseMergeOp(OpAsmParser &parser, OperationState &result) {
   } else {
     // In case of the short form, use the parsed type for both the operands and
     // the remaining operands are expected to be control inputs.
-    types.push_back(types.front());
+    types.push_back(Type(types.front()));
     Type control_type = ControlType::get(parser.getBuilder().getContext());
     types.append(op_infos.size() - 2, control_type);
 
",0,train
bba4c4a5524522ac24df5bfe8a4c4843a084a990,tensorflow/tensorflow,"[MLIR][KernelGen] Add MLIR-generated `tf.Round` kernel

PiperOrigin-RevId: 378725030
Change-Id: I531306aa301fe1b6087fd8e53dcdb240c7a21429",cwise_op_gpu_round.cu.cc,"@@ -19,7 +19,12 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
+
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 DEFINE_UNARY5(round, Eigen::half, float, double, int32, int64);
+#endif
+
 }  // namespace functor
 }  // namespace tensorflow
 
",0,train
bba4c4a5524522ac24df5bfe8a4c4843a084a990,tensorflow/tensorflow,"[MLIR][KernelGen] Add MLIR-generated `tf.Round` kernel

PiperOrigin-RevId: 378725030
Change-Id: I531306aa301fe1b6087fd8e53dcdb240c7a21429",cwise_op_round.cc,"@@ -16,12 +16,16 @@ limitations under the License.
 #include ""tensorflow/core/kernels/cwise_ops_common.h""
 
 namespace tensorflow {
+
 REGISTER5(UnaryOp, CPU, ""Round"", functor::round, Eigen::half, float, double,
           int32, int64);
 
-
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
 REGISTER5(UnaryOp, GPU, ""Round"", functor::round, Eigen::half, float, double,
           int32, int64);
 #endif
+#endif
+
 }  // namespace tensorflow
",0,train
bba4c4a5524522ac24df5bfe8a4c4843a084a990,tensorflow/tensorflow,"[MLIR][KernelGen] Add MLIR-generated `tf.Round` kernel

PiperOrigin-RevId: 378725030
Change-Id: I531306aa301fe1b6087fd8e53dcdb240c7a21429",gpu_op_round.cc,"@@ -0,0 +1,27 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
+#include ""tensorflow/core/kernels/mlir_generated/base_gpu_op.h""
+
+namespace tensorflow {
+
+GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Round, DT_HALF);
+GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Round, DT_FLOAT);
+GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Round, DT_DOUBLE);
+GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Round, DT_INT32);
+GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Round, DT_INT64);
+
+}  // namespace tensorflow
",0,train
bba4c4a5524522ac24df5bfe8a4c4843a084a990,tensorflow/tensorflow,"[MLIR][KernelGen] Add MLIR-generated `tf.Round` kernel

PiperOrigin-RevId: 378725030
Change-Id: I531306aa301fe1b6087fd8e53dcdb240c7a21429",gpu_unary_ops_test.cc,"@@ -737,10 +737,40 @@ GENERATE_DEFAULT_TEST(Rint, DT_FLOAT, DT_FLOAT, std::rint,
 
 GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES(
     Rint, DT_DOUBLE, DT_DOUBLE,
-    test::InputAsVector<double>({-1.7, -1.5, -0.2, 0.2, 0.5000001, 1.5, 1.7,
-                                 2.0}),
+    test::InputAsVector<double>({-1.7, -1.5, -0.2, -0.0, 0.0, 0.2, 0.5000001,
+                                 1.5, 1.7, 2.0}),
     std::rint, test::OpsTestConfig().ExpectStrictlyEqual())
 
+/// Test `tf.Round`.
+
+/// `tf.Round` is the same as `std::rint` and different from `std::round`. It
+/// rounds to the nearest even integer, not towards zero.
+
+template <typename T>
+T baseline_round(T x) {
+  T y = std::rint(x);
+  return y == T(0) ? T(0) : y;
+}
+
+GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES(
+    Round, DT_DOUBLE, DT_DOUBLE,
+    test::InputAsVector<double>({-1.7, -1.5, -0.2, -0.0, 0.0, 0.2, 0.5000001,
+                                 1.5, 1.7, 2.0}),
+    baseline_round, test::OpsTestConfig().ExpectStrictlyEqual())
+
+GENERATE_DEFAULT_TEST(Round, DT_FLOAT, DT_FLOAT, baseline_round,
+                      test::OpsTestConfig().ExpectStrictlyEqual())
+
+GENERATE_DEFAULT_TEST_2(Round, DT_HALF, DT_FLOAT, DT_HALF, DT_FLOAT,
+                        baseline_round,
+                        test::OpsTestConfig().ExpectStrictlyEqual())
+
+GENERATE_DEFAULT_TEST(Round, DT_INT32, DT_INT32, baseline_round,
+                      test::OpsTestConfig().ExpectStrictlyEqual())
+
+GENERATE_DEFAULT_TEST(Round, DT_INT64, DT_INT64, baseline_round,
+                      test::OpsTestConfig().ExpectStrictlyEqual())
+
 /// Test `tf.Rsqrt`.
 
 /// Reference implementation.
",0,train
f9d0d3205f7d0522dcd6f6d7e2d32896275fc78c,tensorflow/tensorflow,"Remove erroneous comment from resnet_v2.py

PiperOrigin-RevId: 155933792",resnet_v2.py,"@@ -25,8 +25,6 @@ introduced by:
 
 The key difference of the full preactivation 'v2' variant compared to the
 'v1' variant in [1] is the use of batch normalization before every weight layer.
-Another difference is that 'v2' ResNets do not include an activation function in
-the main pathway. Also see [2; Fig. 4e].
 
 Typical use:
 
",0,train
c444870b2d6f85c3d6936322e74984e0f889acfd,tensorflow/tensorflow,"[MLIR][KernelGen] Add experimental JIT-compiled sign kernels for i8, i16 on GPU

PiperOrigin-RevId: 404633033
Change-Id: I8bbfbc2266b2ba3d5cee9dfa1463cca696481a35",gpu_op_sign.cc,"@@ -26,4 +26,10 @@ GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_INT64);
 GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_COMPLEX64);
 GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_COMPLEX128);
 
+// These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
+GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_INT8);
+GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Sign, DT_INT16);
+#endif
+
 }  // namespace tensorflow
",0,train
c444870b2d6f85c3d6936322e74984e0f889acfd,tensorflow/tensorflow,"[MLIR][KernelGen] Add experimental JIT-compiled sign kernels for i8, i16 on GPU

PiperOrigin-RevId: 404633033
Change-Id: I8bbfbc2266b2ba3d5cee9dfa1463cca696481a35",gpu_unary_ops_test.cc,"@@ -990,25 +990,29 @@ std::complex<double> baseline_sign(std::complex<double> x) {
 
 GENERATE_DEFAULT_TEST(Sign, DT_FLOAT, DT_FLOAT, baseline_sign,
                       test::OpsTestConfig().ExpectStrictlyEqual())
-
 GENERATE_DEFAULT_TEST(Sign, DT_DOUBLE, DT_DOUBLE, baseline_sign,
                       test::OpsTestConfig().ExpectStrictlyEqual())
-
 // TODO(b/162577610): We should actually use ExpectStrictlyEqual()
 // here. This requires returning 0.0 for input -0.0.
 GENERATE_DEFAULT_TEST_2(Sign, DT_HALF, DT_FLOAT, DT_HALF, DT_FLOAT,
                         baseline_sign, test::OpsTestConfig())
-
 GENERATE_DEFAULT_TEST(Sign, DT_INT64, DT_INT64, baseline_sign,
                       test::OpsTestConfig().ExpectStrictlyEqual())
-
 GENERATE_DEFAULT_TEST_2(Sign, DT_COMPLEX64, DT_COMPLEX128, DT_COMPLEX64,
                         DT_COMPLEX128, baseline_sign,
                         test::OpsTestConfig().ExpectStrictlyEqual())
-
 GENERATE_DEFAULT_TEST(Sign, DT_COMPLEX128, DT_COMPLEX128, baseline_sign,
                       test::OpsTestConfig().ExpectStrictlyEqual())
 
+// These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
+GENERATE_DEFAULT_TEST(Sign, DT_INT8, DT_INT8, baseline_sign,
+                      test::OpsTestConfig().ExpectStrictlyEqual())
+GENERATE_DEFAULT_TEST(Sign, DT_INT16, DT_INT16, baseline_sign,
+                      test::OpsTestConfig().ExpectStrictlyEqual())
+#endif
+
 /// Test `tf.Sin`.
 
 GENERATE_DEFAULT_TEST(Sin, DT_FLOAT, DT_FLOAT, std::sin, test::OpsTestConfig())
",0,train
373f458fb66fdb709d7af828fe6200e3137942e6,tensorflow/tensorflow,"Optimize calls to std::string::find() and friends for a single char.

The character literal overload is more efficient.

PiperOrigin-RevId: 348473483
Change-Id: Ia76efa5ee243f7a92b35f1fb81d4af864fca8372",gpu_backend_lib.cc,"@@ -838,11 +838,11 @@ StatusOr<std::vector<uint8>> CompileToHsaco(
   // Delete the first two lines, since they usually vary even when the rest of
   // the code is the same (but verify that they are what we expect).
   if (str.size() >= 13 && str.substr(0, 13) == ""; ModuleID = "") {
-    auto pos = str.find(""\n"");
+    auto pos = str.find('\n');
     if (pos != std::string::npos) str = str.substr(pos + 1);
   }
   if (str.size() >= 18 && str.substr(0, 18) == ""source_filename = "") {
-    auto pos = str.find(""\n"");
+    auto pos = str.find('\n');
     if (pos != std::string::npos) str = str.substr(pos + 1);
   }
   str += hlo_module_config.compilation_cache_key();
",0,test
373f458fb66fdb709d7af828fe6200e3137942e6,tensorflow/tensorflow,"Optimize calls to std::string::find() and friends for a single char.

The character literal overload is more efficient.

PiperOrigin-RevId: 348473483
Change-Id: Ia76efa5ee243f7a92b35f1fb81d4af864fca8372",hlo_instruction.cc,"@@ -2538,7 +2538,7 @@ string PrintName(const string& name, bool print_ids) {
   if (print_ids) {
     return name;
   } else {
-    auto dot_position = name.find_first_of(""."");
+    auto dot_position = name.find_first_of('.');
     return name.substr(0, dot_position);
   }
 }
",0,test
373f458fb66fdb709d7af828fe6200e3137942e6,tensorflow/tensorflow,"Optimize calls to std::string::find() and friends for a single char.

The character literal overload is more efficient.

PiperOrigin-RevId: 348473483
Change-Id: Ia76efa5ee243f7a92b35f1fb81d4af864fca8372",dot_operation_test.cc,"@@ -1210,7 +1210,7 @@ XLA_TEST_P(EinsumTest, SimpleEinsumTest) {
           .ValueOrDie(),
       &builder);
   auto config = std::get<2>(GetParam());
-  if (config.find("","") == config.npos) {
+  if (config.find(',') == config.npos) {
     Einsum(x, config);
   } else {
     Einsum(x, y, config);
",0,test
5cf69dc5d8caa129903a7812a916f619b4e03114,tensorflow/tensorflow,refine,hlo_ops.cc,"@@ -4468,17 +4468,14 @@ OpFoldResult SelectOp::fold(ArrayRef<Attribute> operands) {
 // false_value, true_value)
 static LogicalResult selectCanonicalization(SelectOp selectOp,
                                             PatternRewriter& rewriter) {
-  if (auto notOp = selectOp.pred().getDefiningOp<NotOp>()) {
-    if (1 ==
-        notOp.operand().getType().cast<ShapedType>().getElementTypeBitWidth()) {
-      std::array<Value, 3> newOperands = {notOp.operand(),
-                                          selectOp.getOperands()[2],
-                                          selectOp.getOperands()[1]};
-      selectOp.getOperation()->setOperands(newOperands);
-      return success();
-    }
+  auto notOp = selectOp.pred().getDefiningOp<NotOp>();
+  if (!notOp) {
+    return failure();
   }
-  return failure();
+  std::array<Value, 3> newOperands = {notOp.operand(), selectOp.on_false(),
+                                      selectOp.on_true()};
+  selectOp.getOperation()->setOperands(newOperands);
+  return success();
 }
 
 void SelectOp::getCanonicalizationPatterns(RewritePatternSet& results,
",0,train
a2dc1ae9cd5991e248dc8885ed8879bb6ab15096,tensorflow/tensorflow,"Fix the training losses when examples have weights associated with it.

Note: in the unlikely event this causes your relus to die, you can lower the learning rate.
Change: 131131862",target_column.py,"@@ -175,9 +175,51 @@ class _TargetColumn(object):
   def problem_type(self):
     return self._problem_type
 
+  def _weighted_loss(self, loss, weight_tensor):
+    """"""Returns cumulative weighted loss.""""""
+    unweighted_loss = array_ops.reshape(loss, shape=(-1,))
+    weighted_loss = math_ops.mul(unweighted_loss,
+                                 array_ops.reshape(
+                                     weight_tensor, shape=(-1,)))
+    return weighted_loss
+
+  def training_loss(self, logits, target, features):
+    """"""Returns training loss tensor for this head.
+
+    Training loss is different from the loss reported on the tensorboard as we
+    should respect the example weights when computing the gradient.
+
+      L = sum_{i} w_{i} * l_{i} / B
+
+    where B is the number of examples in the batch, l_{i}, w_{i} are individual
+    losses, and example weight.
+
+    Args:
+      logits: logits, a float tensor.
+      target: either a tensor for labels or in multihead case, a dict of string
+        to target tensor.
+      features: features dict.
+
+    Returns:
+      Loss tensor.
+    """"""
+    target = target[self.name] if isinstance(target, dict) else target
+    loss_unweighted = self._loss_fn(logits, target)
+
+    weight_tensor = self.get_weight_tensor(features)
+    if weight_tensor is None:
+      return math_ops.reduce_mean(loss_unweighted, name=""loss"")
+    else:
+      loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor)
+      return math_ops.reduce_mean(loss_weighted, name=""loss"")
+
   def loss(self, logits, target, features):
     """"""Returns loss tensor for this head.
 
+    The loss returned is the weighted average.
+
+      L = sum_{i} w_{i} * l_{i} / sum_{i} w_{i}
+
     Args:
       logits: logits, a float tensor.
       target: either a tensor for labels or in multihead case, a dict of string
@@ -194,10 +236,7 @@ class _TargetColumn(object):
     if weight_tensor is None:
       return math_ops.reduce_mean(loss_unweighted, name=""loss"")
     else:
-      loss_unweighted = array_ops.reshape(loss_unweighted, shape=(-1,))
-      loss_weighted = math_ops.mul(
-          loss_unweighted,
-          array_ops.reshape(weight_tensor, shape=(-1,)))
+      loss_weighted = self._weighted_loss(loss_unweighted, weight_tensor)
       return math_ops.div(
           math_ops.reduce_sum(loss_weighted),
           math_ops.to_float(math_ops.reduce_sum(weight_tensor)),
",0,train
a2dc1ae9cd5991e248dc8885ed8879bb6ab15096,tensorflow/tensorflow,"Fix the training losses when examples have weights associated with it.

Note: in the unlikely event this causes your relus to die, you can lower the learning rate.
Change: 131131862",target_column_test.py,"@@ -27,23 +27,29 @@ class RegressionTargetColumnTest(tf.test.TestCase):
   def testRegression(self):
     target_column = tf.contrib.layers.regression_target()
     with tf.Graph().as_default(), tf.Session() as sess:
-      logits = tf.constant([[1.], [1.], [3.]])
+      prediction = tf.constant([[1.], [1.], [3.]])
       targets = tf.constant([[0.], [1.], [1.]])
-      self.assertAlmostEqual(5. / 3,
-                             sess.run(target_column.loss(logits, targets, {})))
+      self.assertAlmostEqual(
+          5. / 3, sess.run(target_column.loss(prediction, targets, {})))
 
   def testRegressionWithWeights(self):
     target_column = tf.contrib.layers.regression_target(
         weight_column_name=""label_weight"")
     with tf.Graph().as_default(), tf.Session() as sess:
-      features = {""label_weight"": tf.constant([[1.], [0.], [0.]])}
-      logits = tf.constant([[1.], [1.], [3.]])
+      features = {""label_weight"": tf.constant([[2.], [5.], [0.]])}
+      prediction = tf.constant([[1.], [1.], [3.]])
       targets = tf.constant([[0.], [1.], [1.]])
       self.assertAlmostEqual(
-          1., sess.run(target_column.loss(logits, targets, features)))
+          2. / 7,
+          sess.run(target_column.loss(prediction, targets, features)),
+          places=3)
+      self.assertAlmostEqual(
+          2. / 3,
+          sess.run(target_column.training_loss(prediction, targets, features)),
+          places=3)
 
 
-class MulltiClassTargetColumnTest(tf.test.TestCase):
+class MultiClassTargetColumnTest(tf.test.TestCase):
 
   def testBinaryClassification(self):
     target_column = tf.contrib.layers.multi_class_target(n_classes=2)
@@ -126,9 +132,9 @@ class MulltiClassTargetColumnTest(tf.test.TestCase):
 
   def testBinarySVMDefaultWeights(self):
     target_column = tf.contrib.layers.binary_svm_target()
-    logits = tf.constant([[-0.5], [1.2]])
+    predictions = tf.constant([[-0.5], [1.2]])
     targets = tf.constant([0, 1])
-    loss = target_column.loss(logits, targets, {})
+    loss = target_column.loss(predictions, targets, {})
     # Prediction for first example is in the right side of the hyperplane (i.e.,
     # < 0) but it is within the [-1,1] margin. There is a 0.5 loss incurred by
     # this example. The 2nd prediction is outside the margin so it incurs no
@@ -139,15 +145,17 @@ class MulltiClassTargetColumnTest(tf.test.TestCase):
   def testBinarySVMWithWeights(self):
     target_column = tf.contrib.layers.binary_svm_target(
         weight_column_name=""weights"")
-    logits = tf.constant([[-0.7], [0.2]])
+    predictions = tf.constant([[-0.7], [0.2]])
     targets = tf.constant([0, 1])
     features = {""weights"": tf.constant([2.0, 10.0])}
-    loss = target_column.loss(logits, targets, features)
+    loss = target_column.loss(predictions, targets, features)
+    training_loss = target_column.training_loss(predictions, targets, features)
     # Prediction for both examples are in the right side of the hyperplane but
     # within the margin. The (weighted) loss incurred is 2*0.3=0.6 and 10*0.8=8
     # respectively. The overall (normalized) loss is therefore 8.6/12.
     with tf.Session() as sess:
-      self.assertAlmostEqual(8.6 / 12, sess.run(loss))
+      self.assertAlmostEqual(8.6 / 12, sess.run(loss), places=3)
+      self.assertAlmostEqual(8.6 / 2, sess.run(training_loss), places=3)
 
 
 if __name__ == ""__main__"":
",0,train
a2dc1ae9cd5991e248dc8885ed8879bb6ab15096,tensorflow/tensorflow,"Fix the training losses when examples have weights associated with it.

Note: in the unlikely event this causes your relus to die, you can lower the learning rate.
Change: 131131862",dnn_linear_combined.py,"@@ -174,16 +174,20 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
     else:
       centered_bias_step = []
     with ops.control_dependencies(centered_bias_step):
-      loss = self._target_column.loss(logits, targets, features)
-    logging_ops.scalar_summary(""loss"", loss)
+      training_loss = self._target_column.training_loss(logits, targets,
+                                                        features)
+      weighted_average_loss = self._target_column.loss(logits, targets,
+                                                       features)
 
-    linear_train_step = self._linear_model.get_train_step(loss)
-    dnn_train_step = (self._dnn_model.get_train_step(loss)
-                      if self._dnn_model else [])
+    logging_ops.scalar_summary(""loss"", weighted_average_loss)
+
+    linear_train_step = self._linear_model.get_train_step(training_loss)
+    dnn_train_step = (self._dnn_model.get_train_step(training_loss) if
+                      self._dnn_model else [])
 
     with ops.control_dependencies(linear_train_step + dnn_train_step):
       with ops.get_default_graph().colocate_with(global_step):
-        return state_ops.assign_add(global_step, 1).op, loss
+        return state_ops.assign_add(global_step, 1).op, weighted_average_loss
 
   def _get_eval_ops(self, features, targets, metrics=None):
     """"""See base class.""""""
@@ -242,10 +246,11 @@ class _DNNLinearCombinedBaseEstimator(estimator.BaseEstimator):
     logits = array_ops.reshape(
         array_ops.tile(centered_bias[0], [batch_size]),
         [batch_size, self._target_column.num_label_columns])
-    loss = self._target_column.loss(logits, targets, features)
+    training_loss = self._target_column.training_loss(logits, targets, features)
     # Learn central bias by an optimizer. 0.1 is a convervative lr for a single
     # variable.
-    return training.AdagradOptimizer(0.1).minimize(loss, var_list=centered_bias)
+    return training.AdagradOptimizer(0.1).minimize(
+        training_loss, var_list=centered_bias)
 
   def _logits(self, features, is_training=False):
     linear_feature_columns = self._get_linear_feature_columns()
",0,train
88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory

PiperOrigin-RevId: 244956772",cuda_gpu_executor.cc,"@@ -1139,14 +1139,6 @@ DeviceDescription* GpuExecutor::PopulateDeviceDescription() const {
 
 }  // namespace gpu
 
-void initialize_cuda_gpu_executor() {
-  *internal::MakeCUDAExecutorImplementation() = [](const PluginConfig& config) {
-    return new gpu::GpuExecutor{config};
-  };
-}
-
 }  // namespace stream_executor
 
-REGISTER_MODULE_INITIALIZER(cuda_gpu_executor, {
-  stream_executor::initialize_cuda_gpu_executor();
-});
+REGISTER_MODULE_INITIALIZER(cuda_gpu_executor, {});
",0,train
88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory

PiperOrigin-RevId: 244956772",rocm_gpu_executor.cc,"@@ -961,14 +961,6 @@ DeviceDescription* GpuExecutor::PopulateDeviceDescription() const {
 
 }  // namespace gpu
 
-void initialize_rocm_gpu_executor() {
-  *internal::MakeROCMExecutorImplementation() = [](const PluginConfig& config) {
-    return new gpu::GpuExecutor{config};
-  };
-}
-
 }  // namespace stream_executor
 
-REGISTER_MODULE_INITIALIZER(rocm_gpu_executor, {
-  stream_executor::initialize_rocm_gpu_executor();
-});
+REGISTER_MODULE_INITIALIZER(rocm_gpu_executor, {});
",0,train
88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory

PiperOrigin-RevId: 244956772",stream_executor_internal.cc,"@@ -18,31 +18,6 @@ limitations under the License.
 namespace stream_executor {
 namespace internal {
 
-// -- CUDA
-
-StreamExecutorFactory* MakeCUDAExecutorImplementation() {
-  static StreamExecutorFactory instance;
-  return &instance;
-}
-
-// -- ROCm
-
-StreamExecutorFactory* MakeROCMExecutorImplementation() {
-  static StreamExecutorFactory instance;
-  return &instance;
-}
-
-// -- OpenCL
-
-StreamExecutorFactory* MakeOpenCLExecutorImplementation() {
-  static StreamExecutorFactory instance;
-  return &instance;
-}
-
-// -- Host
-
-StreamExecutorFactory MakeHostExecutorImplementation;
-
 // The default implementation just calls the other HostCallback method.
 // It should make all existing code that uses a void() callback still work.
 bool StreamExecutorInterface::HostCallback(Stream* stream,
",0,train
88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory

PiperOrigin-RevId: 244956772",stream_executor_internal.h,"@@ -383,21 +383,6 @@ class StreamExecutorInterface {
   SE_DISALLOW_COPY_AND_ASSIGN(StreamExecutorInterface);
 };
 
-using StreamExecutorFactory =
-    std::function<StreamExecutorInterface *(const PluginConfig &)>;
-using EventFactory = std::function<EventInterface *(StreamExecutor *)>;
-using StreamFactory = std::function<StreamInterface *(StreamExecutor *)>;
-using TimerFactory = std::function<TimerInterface *(StreamExecutor *)>;
-using KernelFactory = std::function<KernelInterface*()>;
-
-StreamExecutorFactory *MakeCUDAExecutorImplementation();
-
-StreamExecutorFactory *MakeROCMExecutorImplementation();
-
-StreamExecutorFactory *MakeOpenCLExecutorImplementation();
-
-extern StreamExecutorFactory MakeHostExecutorImplementation;
-
 
 }  // namespace internal
 }  // namespace stream_executor
",0,train
88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory

PiperOrigin-RevId: 244956772",stream_executor_pimpl.cc,"@@ -60,37 +60,6 @@ void BlockOnThreadExecutor(port::ThreadPool *executor) {
   n.WaitForNotification();
 }
 
-internal::StreamExecutorInterface *StreamExecutorImplementationFromPlatformKind(
-    PlatformKind platform_kind, const PluginConfig &plugin_config) {
-  // Note: we use this factory-assignment-in-switch pattern instead of just
-  // invoking the callable in case linkage is messed up -- instead of invoking a
-  // nullptr std::function (due to failed registration) we give a nice
-  // LOG(FATAL) message.
-  internal::StreamExecutorFactory factory;
-  switch (platform_kind) {
-    case PlatformKind::kCuda:
-      factory = *internal::MakeCUDAExecutorImplementation();
-      break;
-    case PlatformKind::kROCm:
-      factory = *internal::MakeROCMExecutorImplementation();
-      break;
-    case PlatformKind::kOpenCL:
-      factory = *internal::MakeOpenCLExecutorImplementation();
-      break;
-    case PlatformKind::kHost:
-      factory = internal::MakeHostExecutorImplementation;
-      break;
-    default:
-      factory = nullptr;
-  }
-  if (factory == nullptr) {
-    LOG(FATAL)
-        << ""cannot create StreamExecutor implementation for platform kind: ""
-        << PlatformKindString(platform_kind);
-  }
-  return factory(plugin_config);
-}
-
 std::atomic_int_fast64_t correlation_id_generator(0);
 
 }  // namespace
@@ -154,20 +123,6 @@ MakeScopedTracer(StreamExecutor *stream_exec, BeginCallT begin_call,
 
 /* static */ mutex StreamExecutor::static_mu_{LINKER_INITIALIZED};
 
-StreamExecutor::StreamExecutor(PlatformKind platform_kind,
-                               const PluginConfig &plugin_config)
-    : platform_(nullptr),
-      implementation_(StreamExecutorImplementationFromPlatformKind(
-          platform_kind, plugin_config)),
-      platform_kind_(platform_kind),
-      device_ordinal_(-1),
-      background_threads_(new port::ThreadPool(
-          port::Env::Default(), ""stream_executor"", kNumBackgroundThreads)),
-      live_stream_count_(0),
-      tracing_enabled_(false) {
-  CheckPlatformKindIsValid(platform_kind);
-}
-
 // Get per-device memory limit in bytes. Returns 0 if
 // TF_PER_DEVICE_MEMORY_LIMIT_MB environment variable is not set.
 static int64 GetMemoryLimitBytes() {
",0,train
88e0e42fdd472200c91732ab5669f91c48b1fdd7,tensorflow/tensorflow,"Remove unused StreamExecutorFactory

PiperOrigin-RevId: 244956772",stream_executor_pimpl.h,"@@ -70,9 +70,6 @@ class ScopedTracer;
 // StreamExecutor interface should not be invoked from a signal handler.
 class StreamExecutor {
  public:
-  explicit StreamExecutor(PlatformKind kind,
-                          const PluginConfig &plugin_config = PluginConfig());
-
   StreamExecutor(
       const Platform *platform,
       std::unique_ptr<internal::StreamExecutorInterface> implementation);
",0,train
9edecf8e2391d73e506878da92951a902da0719b,tensorflow/tensorflow,"Fix a typo in set_virtual_device_configuration

PiperOrigin-RevId: 275292912
Change-Id: Ia710b9bb14d466710fdc6bd9a60c10d849f9e19d",context.py,"@@ -1245,7 +1245,7 @@ class Context(object):
               ""Setting memory limit is required for GPU virtual devices"")
     else:
       raise ValueError(""Virtual devices are not supported for %s"" %
-                       dev.device_type())
+                       dev.device_type)
 
     if self._virtual_device_map.get(dev) == virtual_devices:
       return
",0,test
f901da42e3a7781add9023965eb76162cdbfe29b,tensorflow/tensorflow,"Lowering for tfl.sparse_to_dense to tosa

Sparse to dense can be implemented using a series of reshapes, constants,
numerical operations, and a final scatter. This should work to decompose into
a TOSA compatible form.",legalize_tfl.cc,"@@ -152,6 +152,7 @@ DECL_CONVERT_OP(Const);
 DECL_CONVERT_OP(QConst);
 DECL_CONVERT_OP(Gather);
 DECL_CONVERT_OP(GatherNd);
+DECL_CONVERT_OP(SparseToDense);
 DECL_CONVERT_OP(OneHot);
 DECL_CONVERT_OP(ArgMax);
 DECL_CONVERT_OP(FakeQuant);
@@ -3016,6 +3017,87 @@ LogicalResult ConvertTFLGatherNdOp::matchAndRewrite(
   return success();
 }
 
+LogicalResult ConvertTFLSparseToDenseOp::matchAndRewrite(
+    Operation* op, PatternRewriter& rewriter) const {
+  auto tfl_sparse_to_dense_op = cast<TFL::SparseToDenseOp>(op);
+  auto indices = tfl_sparse_to_dense_op.sparse_indices();
+  auto values = tfl_sparse_to_dense_op.sparse_values();
+  auto output_shape_value = tfl_sparse_to_dense_op.output_shape();
+  auto default_value = tfl_sparse_to_dense_op.default_value();
+  auto indices_ty = indices.getType().cast<ShapedType>();
+  auto indices_ety = indices_ty.getElementType();
+  auto values_ty = values.getType().cast<ShapedType>();
+  auto result_ty =
+      tfl_sparse_to_dense_op.getResult().getType().cast<ShapedType>();
+  auto result_ety = result_ty.getElementType();
+  auto loc = op->getLoc();
+
+  if (!result_ty.hasStaticShape()) return failure();
+  auto result_rank = result_ty.getRank();
+
+  // We want to generate the default tensor we need to scatter. Note that the
+  // result_ty needs to be a statically shaped tensor.
+  ElementsAttr default_value_attr;
+  if (!matchPattern(default_value, m_Constant(&default_value_attr)))
+    return failure();
+
+  if (!default_value_attr.isSplat()) return failure();
+
+  ShapedType scatter_ty =
+      RankedTensorType::get({1, result_ty.getNumElements(), 1}, result_ety);
+
+  Value default_const = rewriter.create<tosa::ConstOp>(
+      loc, scatter_ty,
+      DenseElementsAttr::get(scatter_ty,
+                             default_value_attr.getSplatValue<APInt>().sext(
+                                 result_ety.getIntOrFloatBitWidth())));
+
+  // We need to determine what the index multiplier does
+  llvm::SmallVector<int32_t> multiply_constant_ints;
+  multiply_constant_ints.resize(result_rank, 1);
+  for (int i = result_rank - 1; i > 0; i--) {
+    multiply_constant_ints[i - 1] =
+        result_ty.getDimSize(i) * multiply_constant_ints[i];
+  }
+
+  indices_ety = rewriter.getI32Type();
+  indices_ty = RankedTensorType::get(indices_ty.getShape(), indices_ety);
+  indices = CreateOpAndInfer<tosa::CastOp>(rewriter, loc, indices_ty, indices);
+
+  auto multiply_constant_type =
+      RankedTensorType::get({result_rank}, indices_ety);
+  auto multiply_constant_attr = DenseElementsAttr::get(
+      multiply_constant_type, llvm::makeArrayRef(multiply_constant_ints));
+  Value multiply_constant = CreateOpAndInfer<tosa::ConstOp>(
+      rewriter, loc, multiply_constant_type, multiply_constant_attr);
+
+  Value multiply_op = CreateOpAndInfer<tosa::MulOp>(
+      rewriter, loc, indices_ty, indices, multiply_constant, 0);
+
+  Value reduce_op = CreateOpAndInfer<tosa::ReduceSumOp>(
+      rewriter, loc, UnrankedTensorType::get(indices_ety), multiply_op,
+      rewriter.getI64IntegerAttr(1));
+
+  auto values_reshape_op = CreateOpAndInfer<tosa::ReshapeOp>(
+      rewriter, loc, UnrankedTensorType::get(result_ety), values,
+      rewriter.getI64ArrayAttr(
+          ArrayRef<int64_t>{1, values_ty.getDimSize(0), 1}));
+
+  auto index_reshape_op = CreateOpAndInfer<tosa::ReshapeOp>(
+      rewriter, loc, UnrankedTensorType::get(indices_ety), reduce_op,
+      rewriter.getI64ArrayAttr(ArrayRef<int64_t>{1, indices_ty.getDimSize(0)}));
+
+  auto scatter = CreateOpAndInfer<tosa::ScatterOp>(
+      rewriter, loc, UnrankedTensorType::get(result_ety), default_const,
+      index_reshape_op, values_reshape_op);
+
+  CreateReplaceOpAndInfer<tosa::ReshapeOp>(
+      rewriter, op, result_ty, scatter,
+      rewriter.getI64ArrayAttr(result_ty.getShape()));
+
+  return success();
+}
+
 LogicalResult ConvertTFLOneHotOp::matchAndRewrite(
     Operation* op, PatternRewriter& rewriter) const {
   auto tfl_one_hot_op = cast<TFL::OneHotOp>(op);
@@ -3179,6 +3261,7 @@ void LegalizeTFL::runOnFunction() {
   DEF_PATTERN_INSERT(Constant);
   DEF_PATTERN_INSERT(TFLGather);
   DEF_PATTERN_INSERT(TFLGatherNd);
+  DEF_PATTERN_INSERT(TFLSparseToDense);
   DEF_PATTERN_INSERT(TFLArgMax);
   DEF_PATTERN_INSERT(TFLFakeQuant);
   DEF_PATTERN_INSERT(TFLOneHot);
",0,train
c56bdc3302fba4b89a30f64a4ec0a18f64ccc2e4,tensorflow/tensorflow,"Changing docs to match the code.
Change: 111445385",port.h,"@@ -24,12 +24,12 @@ limitations under the License.
 
 // Choose which platform we are on.
 #if defined(ANDROID) || defined(__ANDROID__)
-#define PLATFORM_POSIX_ANDROID
+#define PLATFORM_GOOGLE_ANDROID
 #elif defined(__APPLE__)
 #define PLATFORM_POSIX
 #else
 // If no platform specified, use:
-#define PLATFORM_POSIX
+#define PLATFORM_GOOGLE
 #endif
 
 #endif
",0,train
c56bdc3302fba4b89a30f64a4ec0a18f64ccc2e4,tensorflow/tensorflow,"Changing docs to match the code.
Change: 111445385",ops.py,"@@ -785,16 +785,16 @@ class SparseTensor(object):
   """"""Represents a sparse tensor.
 
   Tensorflow represents a sparse tensor as three separate dense tensors:
-  `indices`, `values`, and `dense_shape`.  In Python, the three tensors are
+  `indices`, `values`, and `shape`.  In Python, the three tensors are
   collected into a `SparseTensor` class for ease of use.  If you have separate
-  `indices`, `values`, and `dense_shape` tensors, wrap them in a `SparseTensor`
-  object before passing to the Ops below.
+  `indices`, `values`, and `shape` tensors, wrap them in a `SparseTensor`
+  object before passing to the ops below.
 
-  Concretely, the sparse tensor `SparseTensor(values, indices, dense_shape)` is
+  Concretely, the sparse tensor `SparseTensor(values, indices, shape)` is
 
   * `indices`: A 2-D int64 tensor of shape `[N, ndims]`.
   * `values`: A 1-D tensor of any type and shape `[N]`.
-  * `dense_shape`: A 1-D int64 tensor of shape `[ndims]`.
+  * `shape`: A 1-D int64 tensor of shape `[ndims]`.
 
   where `N` and `ndims` are the number of values, and number of dimensions in
   the `SparseTensor` respectively.
@@ -802,15 +802,15 @@ class SparseTensor(object):
   The corresponding dense tensor satisfies
 
   ```python
-  dense.shape = dense_shape
+  dense.shape = shape
   dense[tuple(indices[i])] = values[i]
   ```
 
   By convention, `indices` should be sorted in row-major order (or equivalently
   lexicographic order on the tuples `indices[i]`).  This is not enforced when
   `SparseTensor` objects are constructed, but most ops assume correct ordering.
-  If the ordering is wrong, it can be fixed by calling `sparse_reorder` on the
-  misordered `SparseTensor`.
+  If the ordering of sparse tensor `st` is wrong, a fixed version can be
+  obtained by calling `tf.sparse_reorder(st)`.
 
   Example: The sparse tensor
 
",0,train
6b493f72c82593cb1a642af2d091e93b15b56ddc,tensorflow/tensorflow,"Change contrib estimator to save relative paths in checkpoint.
Change: 155016674",estimator.py,"@@ -966,7 +966,8 @@ class BaseEstimator(
             saver.Saver(
                 sharded=True,
                 max_to_keep=self._config.keep_checkpoint_max,
-                defer_build=True))
+                defer_build=True,
+                save_relative_paths=True))
 
       chief_hooks = []
       if (self._config.save_checkpoints_secs or
",0,train
6b493f72c82593cb1a642af2d091e93b15b56ddc,tensorflow/tensorflow,"Change contrib estimator to save relative paths in checkpoint.
Change: 155016674",estimator_test.py,"@@ -28,6 +28,8 @@ import numpy as np
 import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from google.protobuf import text_format
+
 from tensorflow.contrib import learn
 from tensorflow.contrib import lookup
 from tensorflow.contrib.framework.python.ops import variables
@@ -50,6 +52,7 @@ from tensorflow.python.client import session as session_lib
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.lib.io import file_io
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
@@ -61,6 +64,7 @@ from tensorflow.python.platform import test
 from tensorflow.python.saved_model import loader
 from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.training import basic_session_run_hooks
+from tensorflow.python.training import checkpoint_state_pb2
 from tensorflow.python.training import input as input_lib
 from tensorflow.python.training import monitored_session
 from tensorflow.python.training import saver as saver_lib
@@ -674,6 +678,38 @@ class EstimatorTest(test.TestCase):
         metrics={'MSE': metric_ops.streaming_mean_squared_error})
     self.assertLess(scores3['MSE'], scores['MSE'])
 
+  def test_checkpoint_contains_relative_paths(self):
+    tmpdir = tempfile.mkdtemp()
+    est = estimator.Estimator(
+        model_dir=tmpdir,
+        model_fn=linear_model_fn_with_model_fn_ops)
+    est.fit(input_fn=boston_input_fn, steps=5)
+
+    checkpoint_file_content = file_io.read_file_to_string(
+        os.path.join(tmpdir, 'checkpoint'))
+    ckpt = checkpoint_state_pb2.CheckpointState()
+    text_format.Merge(checkpoint_file_content, ckpt)
+    self.assertEqual(ckpt.model_checkpoint_path, 'model.ckpt-5')
+    self.assertAllEqual(
+        ['model.ckpt-1', 'model.ckpt-5'], ckpt.all_model_checkpoint_paths)
+
+  def test_train_save_copy_reload(self):
+    tmpdir = tempfile.mkdtemp()
+    model_dir1 = os.path.join(tmpdir, 'model_dir1')
+    est1 = estimator.Estimator(
+        model_dir=model_dir1,
+        model_fn=linear_model_fn_with_model_fn_ops)
+    est1.fit(input_fn=boston_input_fn, steps=5)
+
+    model_dir2 = os.path.join(tmpdir, 'model_dir2')
+    os.renames(model_dir1, model_dir2)
+    est2 = estimator.Estimator(
+        model_dir=model_dir2,
+        model_fn=linear_model_fn_with_model_fn_ops)
+    self.assertEqual(5, est2.get_variable_value('global_step'))
+    est2.fit(input_fn=boston_input_fn, steps=5)
+    self.assertEqual(10, est2.get_variable_value('global_step'))
+
   def testEstimatorParams(self):
     boston = base.load_boston()
     est = estimator.SKCompat(
",0,train
8ce1e9268e6f3f9a46504b9f2112b21fd1799b18,tensorflow/tensorflow,"Fix typo in StreamingFilesDataset

PiperOrigin-RevId: 244678021",datasets.py,"@@ -130,8 +130,8 @@ def StreamingFilesDataset(files,
   if sloppy is None:
     sloppy = True
 
-  if file_reader_job == 'cordinator':
-    file_reader_device = '/job:%s/task:0' % file_reader_job
+  if file_reader_job == 'coordinator':
+    file_reader_device = '/job:coordinator/task:0'
   else:
     file_reader_device = '/job:%s' % file_reader_job
 
",0,test
a16391de4e9d3c799e916959b47934472daa6fba,tensorflow/tensorflow,"Added selection of best storage type for Metal api.

PiperOrigin-RevId: 410322725
Change-Id: I5848cdab1a35a867ed508258067c9510bdd5a657",metal_spatial_tensor.cc,"@@ -510,6 +510,17 @@ absl::Status CreateSharedImage2DBufferTensor(id<MTLBuffer> buffer,
   return absl::OkStatus();
 }
 
+TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info) {
+  const bool a7_or_a8 =
+      gpu_info.IsApple() && (gpu_info.apple_info.IsA7GenerationGpu() ||
+                             gpu_info.apple_info.IsA8GenerationGpu());
+  if (a7_or_a8) {
+    return TensorStorageType::TEXTURE_2D;
+  } else {
+    return TensorStorageType::BUFFER;
+  }
+}
+
 }  // namespace metal
 }  // namespace gpu
 }  // namespace tflite
",0,train
a16391de4e9d3c799e916959b47934472daa6fba,tensorflow/tensorflow,"Added selection of best storage type for Metal api.

PiperOrigin-RevId: 410322725
Change-Id: I5848cdab1a35a867ed508258067c9510bdd5a657",metal_spatial_tensor.h,"@@ -134,6 +134,8 @@ absl::Status CreateSharedImage2DBufferTensor(id<MTLBuffer> buffer, const BHWDC&
                                              const TensorDescriptor& descriptor,
                                              int row_bytes_alignment, MetalSpatialTensor* result);
 
+TensorStorageType GetFastestStorageType(const GpuInfo& gpu_info);
+
 template <DataType T>
 absl::Status MetalSpatialTensor::WriteData(id<MTLDevice> device,
                                            const tflite::gpu::Tensor<BHWC, T>& src) {
",0,train
6c08402e3a7d3e440d6913cb683f26d28514ad8d,tensorflow/tensorflow,"[tf.data] Properly export `tf.contrib.data.group_by_reducer()`

PiperOrigin-RevId: 201386380",__init__.py,"@@ -33,6 +33,7 @@ See the @{$datasets$Importing Data} Programmer's Guide for an overview.
 @@choose_from_datasets
 @@dense_to_sparse_batch
 @@enumerate_dataset
+@@group_by_reducer
 @@group_by_window
 @@ignore_errors
 @@make_batched_features_dataset
@@ -71,6 +72,7 @@ from tensorflow.contrib.data.python.ops.enumerate_ops import enumerate_dataset
 from tensorflow.contrib.data.python.ops.error_ops import ignore_errors
 from tensorflow.contrib.data.python.ops.get_single_element import get_single_element
 from tensorflow.contrib.data.python.ops.grouping import bucket_by_sequence_length
+from tensorflow.contrib.data.python.ops.grouping import group_by_reducer
 from tensorflow.contrib.data.python.ops.grouping import group_by_window
 from tensorflow.contrib.data.python.ops.interleave_ops import choose_from_datasets
 from tensorflow.contrib.data.python.ops.interleave_ops import parallel_interleave
",0,train
09e0b6cea4c357049c0cc7dbd415f99b1eae568d,tensorflow/tensorflow,"Adds a max_rematerialized_block_size field.

PiperOrigin-RevId: 301205071
Change-Id: I9403816bbdc1e079a634b6ada730ecf7983eba6c",hlo_rematerialization.cc,"@@ -1648,6 +1648,8 @@ StatusOr<bool> HloRematerialization::RematerializeComputation(
       } else {
         // Found a valid block. Reset to start looking for single instructions
         // again.
+        max_rematerialized_block_size_ =
+            std::max(max_rematerialized_block_size_, max_block_size);
         changed = true;
         min_block_size = 1;
         max_block_size = 1;
",0,train
09e0b6cea4c357049c0cc7dbd415f99b1eae568d,tensorflow/tensorflow,"Adds a max_rematerialized_block_size field.

PiperOrigin-RevId: 301205071
Change-Id: I9403816bbdc1e079a634b6ada730ecf7983eba6c",hlo_rematerialization.h,"@@ -180,6 +180,10 @@ class HloRematerialization : public HloModulePass {
   // dead. Hence, no net instructions were added.
   int64 net_instructions_added_ = 0;
 
+  // Size of the largest block that has been rematerialized. This is actually an
+  // upper bound (within a factor of 2) on the block size.
+  int max_rematerialized_block_size_ = 0;
+
   RematerializationMode mode_;
 };
 
",0,train
383023c892ce9e89b6ff993f71c6ae65e838ab0d,tensorflow/tensorflow,"[XLA] Use a stricter ErrorSpec for some tests

PiperOrigin-RevId: 279972069
Change-Id: I3116e9812999398ebd37f6bf78e509a0549b5e66",array_elementwise_ops_test.cc,"@@ -43,6 +43,7 @@ namespace {
 class ArrayElementwiseOpTest : public ClientLibraryTestBase {
  public:
   ErrorSpec error_spec_{0.0001, 0.0001};
+  ErrorSpec strict_error_spec_{0x1p-48, 0x1p-48};
 };
 
 class ArrayElementwiseOpTestParamCount
@@ -71,7 +72,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, NegConstantF64) {
   auto a = ConstantR1<double>(&builder, {-2.5, 3.14, 2.25, -10.0, 6.0});
   Neg(a);
 
-  ComputeAndCompare(&builder, {}, error_spec_);
+  ComputeAndCompare(&builder, {}, strict_error_spec_);
 }
 
 XLA_TEST_F(ArrayElementwiseOpTest, NegConstantS32) {
@@ -458,7 +459,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, SubTwoConstantF64s) {
   auto b = ConstantR1<double>(&builder, {100.0, 3.13, 2.75, 10.5, -999.0});
   Sub(a, b);
 
-  ComputeAndCompare(&builder, {}, error_spec_);
+  ComputeAndCompare(&builder, {}, strict_error_spec_);
 }
 
 XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantF32s) {
@@ -490,7 +491,7 @@ XLA_TEST_F(ArrayElementwiseOpTest, DivTwoConstantF64s) {
                  2.1, 3.1, 9.9, -4.5, -11.0, -21.5, M_PI});
   Div(a, b);
 
-  ComputeAndCompare(&builder, {}, error_spec_);
+  ComputeAndCompare(&builder, {}, strict_error_spec_);
 }
 
 class IntegerDivideOpTest : public ArrayElementwiseOpTest {
",0,test
d966b6a9600e64ebbde9c982e85c8ef2fc7e36f5,tensorflow/tensorflow,"[tf.data service] Raise an error when using dynamic sharding with Dataset.snapshot.

PiperOrigin-RevId: 387858516
Change-Id: Ic96e70442251a417438220f2e762224f84f12f21",snapshot_dataset_op.cc,"@@ -141,6 +141,12 @@ class SnapshotDatasetV2Op::Dataset : public DatasetBase {
         Iterator::Params{this, absl::StrCat(prefix, ""::Snapshot"")});
   }
 
+  Status MakeSplitProviders(std::vector<std::unique_ptr<SplitProvider>>*
+                                split_providers) const override {
+    return errors::Unimplemented(
+        ""Splitting is not implemented for snapshot datasets."");
+  }
+
   const DataTypeVector& output_dtypes() const override {
     return input_->output_dtypes();
   }
@@ -985,6 +991,12 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
           Iterator::Params{this, absl::StrCat(prefix, ""::Snapshot"")});
     }
 
+    Status MakeSplitProviders(std::vector<std::unique_ptr<SplitProvider>>*
+                                  split_providers) const override {
+      return errors::Unimplemented(
+          ""Splitting is not implemented for snapshot datasets."");
+    }
+
     const DataTypeVector& output_dtypes() const override {
       return input_->output_dtypes();
     }
",0,test
d966b6a9600e64ebbde9c982e85c8ef2fc7e36f5,tensorflow/tensorflow,"[tf.data service] Raise an error when using dynamic sharding with Dataset.snapshot.

PiperOrigin-RevId: 387858516
Change-Id: Ic96e70442251a417438220f2e762224f84f12f21",dynamic_sharding_test.py,"@@ -274,6 +274,23 @@ class DynamicShardingTest(data_service_test_base.TestBase,
     self.assertDatasetProduces(
         ds, list(range(200)), assert_items_equal=assert_items_equal)
 
+  @combinations.generate(
+      combinations.times(test_base.default_test_combinations(),
+                         combinations.combine(already_written=[True, False])))
+  def testSnapshot(self, already_written):
+    num_workers = 3
+    cluster = data_service_test_base.TestCluster(num_workers=num_workers)
+    ds = dataset_ops.Dataset.range(100)
+    ds = ds.snapshot(self.get_temp_dir())
+    if already_written:
+      # Materialize the snapshot.
+      self.getDatasetOutput(ds)
+
+    ds = self._make_dynamic_sharding_dataset(ds, cluster)
+    error_regex = ""Splitting is not implemented for snapshot datasets""
+    with self.assertRaisesRegex(errors.UnimplementedError, error_regex):
+      self.getDatasetOutput(ds)
+
   @combinations.generate(test_base.default_test_combinations())
   def testDistributedDataset(self):
     cluster_1 = data_service_test_base.TestCluster(num_workers=1)
",0,test
6e17966cc2ac75737eee912863d7a4599eaaad3e,tensorflow/tensorflow,"[tf.data] Follow up to cl/270460372 which extends the multi-device function check with an op kernel registration check.

PiperOrigin-RevId: 270469526",captured_function.cc,"@@ -23,6 +23,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/cancellation.h""
 #include ""tensorflow/core/framework/function.h""
 #include ""tensorflow/core/framework/function_handle_cache.h""
+#include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/stats_aggregator.h""
 #include ""tensorflow/core/kernels/data/dataset_utils.h""
 #include ""tensorflow/core/kernels/data/stats_utils.h""
@@ -856,6 +857,7 @@ Status CapturedFunction::IsMultiDevice(IteratorContext* ctx,
       LookupFunction(*metadata_->lib_def(), metadata_->func().name(), &fdef));
 
   Device* current_device = ctx->flr()->device();
+  DeviceType current_device_type(current_device->device_type());
   DeviceNameUtils::ParsedName current_device_name;
   if (!DeviceNameUtils::ParseFullName(current_device->name(),
                                       &current_device_name)) {
@@ -864,8 +866,8 @@ Status CapturedFunction::IsMultiDevice(IteratorContext* ctx,
   }
 
   // Check if any of the captured inputs are placed on a device not compatible
-  // with the current device. For non-captured inputs, we assume the are placed
-  // on the same device as the iterator.
+  // with the current device. For non-captured inputs, we assume they are placed
+  // on the current device.
   for (const auto& input : captured_inputs_) {
     DataType dtype = input.dtype();
     if (dtype == DT_RESOURCE) {
@@ -884,12 +886,18 @@ Status CapturedFunction::IsMultiDevice(IteratorContext* ctx,
     }
   }
 
-  // Check if any of the ops are placed on a device not compatible with the
-  // current device.
+  // Check if all ops could be placed on the current device.
   for (const auto& name : metadata_->lib_def()->ListFunctionNames()) {
     const FunctionDef* fdef;
     TF_RETURN_IF_ERROR(LookupFunction(*metadata_->lib_def(), name, &fdef));
     for (const auto& node : fdef->node_def()) {
+      // Check if the op has a kernel availabe for the current device.
+      if (!KernelDefAvailable(current_device_type, node)) {
+        *is_multi_device = true;
+        return Status::OK();
+      }
+      // If the op has a requested device, check if the requested device is
+      // compatible with the current device.
       if (!node.device().empty()) {
         DeviceNameUtils::ParsedName node_device_name;
         if (!DeviceNameUtils::ParseFullName(node.device(), &node_device_name)) {
",0,train
688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions.

PiperOrigin-RevId: 200271078",add.cc,"@@ -126,16 +126,19 @@ void EvalAddQuantized(TfLiteContext* context, TfLiteNode* node,
 
   int32 input1_multiplier;
   int input1_shift;
-  QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier,
-                                   &input1_shift);
+  QuantizeMultiplierSmallerThanOneExp(real_input1_multiplier,
+                                      &input1_multiplier, &input1_shift);
+  input1_shift *= -1;
   int32 input2_multiplier;
   int input2_shift;
-  QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier,
-                                   &input2_shift);
+  QuantizeMultiplierSmallerThanOneExp(real_input2_multiplier,
+                                      &input2_multiplier, &input2_shift);
+  input2_shift *= -1;
   int32 output_multiplier;
   int output_shift;
-  QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier,
-                                   &output_shift);
+  QuantizeMultiplierSmallerThanOneExp(real_output_multiplier,
+                                      &output_multiplier, &output_shift);
+  output_shift *= -1;
 
   int32 output_activation_min, output_activation_max;
   CalculateActivationRangeUint8(params->activation, output,
",0,test
688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions.

PiperOrigin-RevId: 200271078",conv.cc,"@@ -257,8 +257,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
         context, input, filter, bias, output, &real_multiplier));
     TF_LITE_ENSURE(context, real_multiplier < 1.0);
-    QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier,
-                                     &data->output_shift);
+    QuantizeMultiplierSmallerThanOneExp(
+        real_multiplier, &data->output_multiplier, &data->output_shift);
+    data->output_shift *= -1;
     CalculateActivationRangeUint8(params->activation, output,
                                   &data->output_activation_min,
                                   &data->output_activation_max);
",0,test
688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions.

PiperOrigin-RevId: 200271078",fully_connected.cc,"@@ -118,8 +118,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     TF_LITE_ENSURE_STATUS(GetQuantizedConvolutionMultipler(
         context, input, filter, bias, output, &real_multiplier));
     TF_LITE_ENSURE(context, real_multiplier < 1.0);
-    QuantizeMultiplierSmallerThanOne(real_multiplier, &data->output_multiplier,
-                                     &data->output_shift);
+    QuantizeMultiplierSmallerThanOneExp(
+        real_multiplier, &data->output_multiplier, &data->output_shift);
+    data->output_shift *= -1;
     CalculateActivationRangeUint8(params->activation, output,
                                   &data->output_activation_min,
                                   &data->output_activation_max);
",0,test
688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions.

PiperOrigin-RevId: 200271078",logsoftmax_quantized_test.cc,"@@ -116,10 +116,11 @@ void RunOneLogSoftmaxTest(const uint8* input_data, const Dims<4>& dims_common,
   int32 reverse_scaling_divisor;
   int reverse_scaling_right_shift;
   static const int kScaledDiffIntegerBits = 5;
-  tflite::PreprocessLogSoftmaxScaling(
+  tflite::PreprocessLogSoftmaxScalingExp(
       beta, input_scale, kScaledDiffIntegerBits, &input_beta_multiplier,
       &input_beta_left_shift, &reverse_scaling_divisor,
       &reverse_scaling_right_shift);
+  reverse_scaling_right_shift *= -1;
   // diff_min has a negative value, and is used to limit the maximum magnitude
   // of the diffs, which are <= 0.
   const int diff_min = -tflite::CalculateInputRadius(kScaledDiffIntegerBits,
",0,test
688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions.

PiperOrigin-RevId: 200271078",optimized_ops.h,"@@ -1082,10 +1082,10 @@ struct GemmlowpOutputPipeline {
       gemmlowp::OutputStageQuantizeDownInt32ToUint8ScaleByFixedPoint,
       gemmlowp::OutputStageClamp, gemmlowp::OutputStageSaturatingCastToUint8>
       Pipeline;
-  static Pipeline Make(const int32* bias_data, int output_rows,
-                       int32 output_offset, int32 output_multiplier,
-                       int output_shift, int32 output_activation_min,
-                       int32 output_activation_max) {
+  static Pipeline MakeExp(const int32* bias_data, int output_rows,
+                          int32 output_offset, int32 output_multiplier,
+                          int output_left_shift, int32 output_activation_min,
+                          int32 output_activation_max) {
     ColVectorMap bias_vector(bias_data, output_rows);
     gemmlowp::OutputStageBiasAddition<ColVectorMap> bias_addition_stage;
     bias_addition_stage.bias_vector = bias_vector;
@@ -1093,7 +1093,7 @@ struct GemmlowpOutputPipeline {
         quantize_down_stage;
     quantize_down_stage.result_offset_after_shift = output_offset;
     quantize_down_stage.result_fixedpoint_multiplier = output_multiplier;
-    quantize_down_stage.result_shift = output_shift;
+    quantize_down_stage.result_shift = -output_left_shift;
     gemmlowp::OutputStageClamp clamp_stage;
     clamp_stage.min = output_activation_min;
     clamp_stage.max = output_activation_max;
@@ -1146,8 +1146,8 @@ inline void FullyConnected(const uint8* input_data, const Dims<4>& input_dims,
       input_data, filter_cols, batches, filter_cols);
   gemmlowp::MatrixMap<uint8, gemmlowp::MapOrder::ColMajor> output_matrix(
       output_data, output_rows, batches, output_rows);
-  const auto& output_pipeline = GemmlowpOutputPipeline::Make(
-      bias_data, output_rows, output_offset, output_multiplier, output_shift,
+  const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp(
+      bias_data, output_rows, output_offset, output_multiplier, -output_shift,
       output_activation_min, output_activation_max);
   gemmlowp::GemmWithOutputPipeline<uint8, uint8,
                                    gemmlowp::L8R8WithLhsNonzeroBitDepthParams>(
@@ -2084,8 +2084,8 @@ inline void Conv(const uint8* input_data, const Dims<4>& input_dims,
       gemm_input_data, gemm_input_rows, gemm_input_cols);
   gemmlowp::MatrixMap<uint8, gemmlowp::MapOrder::ColMajor> output_matrix(
       output_data, output_rows, output_cols);
-  const auto& output_pipeline = GemmlowpOutputPipeline::Make(
-      bias_data, output_rows, output_offset, output_multiplier, output_shift,
+  const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp(
+      bias_data, output_rows, output_offset, output_multiplier, -output_shift,
       output_activation_min, output_activation_max);
   gemmlowp::GemmWithOutputPipeline<uint8, uint8,
                                    gemmlowp::L8R8WithLhsNonzeroBitDepthParams>(
@@ -2242,8 +2242,8 @@ void ConvAsGemm(const uint8* input_data, const Dims<4>& input_dims,
       input_data, filter_cols, output_cols, filter_cols);
   gemmlowp::MatrixMap<uint8, gemmlowp::MapOrder::ColMajor> output_matrix(
       output_data, output_rows, output_cols, output_rows);
-  const auto& output_pipeline = GemmlowpOutputPipeline::Make(
-      bias_data, output_rows, output_offset, output_multiplier, output_shift,
+  const auto& output_pipeline = GemmlowpOutputPipeline::MakeExp(
+      bias_data, output_rows, output_offset, output_multiplier, -output_shift,
       output_activation_min, output_activation_max);
   gemmlowp::GemmWithOutputPipeline<uint8, uint8,
                                    gemmlowp::L8R8WithLhsNonzeroBitDepthParams>(
@@ -2387,8 +2387,9 @@ void L2Normalization(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
-inline void GetInvSqrtQuantizedMultiplier(int32 input, int32* output_inv_sqrt,
-                                          int* output_shift) {
+inline void GetInvSqrtQuantizedMultiplierExp(int32 input,
+                                             int32* output_inv_sqrt,
+                                             int* output_shift) {
   *output_shift = 11;
   while (input >= (1 << 29)) {
     input /= 4;
@@ -2430,6 +2431,7 @@ inline void GetInvSqrtQuantizedMultiplier(int32 input, int32* output_inv_sqrt,
     *output_inv_sqrt <<= -*output_shift;
     *output_shift = 0;
   }
+  *output_shift *= kReverseShift;
 }
 
 inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims,
@@ -2448,13 +2450,13 @@ inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims,
     }
     int32 inv_l2norm_multiplier;
     int inv_l2norm_shift;
-    GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier,
-                                  &inv_l2norm_shift);
+    GetInvSqrtQuantizedMultiplierExp(square_l2_norm, &inv_l2norm_multiplier,
+                                     &inv_l2norm_shift);
 
     for (int c = 0; c < depth; c++) {
       int32 diff = *input_data - input_zero_point;
       int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          128 * diff, inv_l2norm_multiplier, kReverseShift * inv_l2norm_shift);
+          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
       int32 unclamped_output_val = 128 + rescaled_diff;
       int32 output_val = std::min(255, std::max(0, unclamped_output_val));
       *output_data = static_cast<uint8>(output_val);
",0,test
688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions.

PiperOrigin-RevId: 200271078",quantization_util.cc,"@@ -48,15 +48,15 @@ void QuantizeMultiplierGreaterThanOne(double double_multiplier,
   TFLITE_CHECK_GE(*left_shift, 0);
 }
 
-void QuantizeMultiplierSmallerThanOne(double double_multiplier,
-                                      int32_t* quantized_multiplier,
-                                      int* right_shift) {
+void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
+                                         int32_t* quantized_multiplier,
+                                         int* left_shift) {
   TFLITE_CHECK_LT(double_multiplier, 1.);
   TFLITE_CHECK_GT(double_multiplier, 0.);
   int shift;
   QuantizeMultiplier(double_multiplier, quantized_multiplier, &shift);
   TFLITE_CHECK_LE(shift, 0);
-  *right_shift = -shift;
+  *left_shift = shift;
 }
 
 void PreprocessSoftmaxScaling(double beta, double input_scale,
@@ -78,20 +78,21 @@ void PreprocessSoftmaxScaling(double beta, double input_scale,
                                    quantized_multiplier, left_shift);
 }
 
-void PreprocessLogSoftmaxScaling(double beta, double input_scale,
-                                 int input_integer_bits,
-                                 int32_t* quantized_multiplier, int* left_shift,
-                                 int32_t* reverse_scaling_divisor,
-                                 int* reverse_scaling_right_shift) {
+void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
+                                    int input_integer_bits,
+                                    int32_t* quantized_multiplier,
+                                    int* left_shift,
+                                    int32_t* reverse_scaling_divisor,
+                                    int* reverse_scaling_left_shift) {
   PreprocessSoftmaxScaling(beta, input_scale, input_integer_bits,
                            quantized_multiplier, left_shift);
 
   // Also calculate what amounts to the inverse scaling factor for the input.
   const double real_reverse_scaling_divisor =
       (1 << (31 - *left_shift)) / static_cast<double>(*quantized_multiplier);
-  tflite::QuantizeMultiplierSmallerThanOne(real_reverse_scaling_divisor,
-                                           reverse_scaling_divisor,
-                                           reverse_scaling_right_shift);
+  tflite::QuantizeMultiplierSmallerThanOneExp(real_reverse_scaling_divisor,
+                                              reverse_scaling_divisor,
+                                              reverse_scaling_left_shift);
 }
 
 int CalculateInputRadius(int input_integer_bits, int input_left_shift) {
",0,test
688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions.

PiperOrigin-RevId: 200271078",quantization_util.h,"@@ -167,9 +167,9 @@ IntOut SafeCast(FloatIn x) {
 // this is intended as a RIGHT-shift.
 //
 // Restricted to the case where the multiplier < 1 (and non-negative).
-void QuantizeMultiplierSmallerThanOne(double double_multiplier,
-                                      int32_t* quantized_multiplier,
-                                      int* right_shift);
+void QuantizeMultiplierSmallerThanOneExp(double double_multiplier,
+                                         int32_t* quantized_multiplier,
+                                         int* left_shift);
 
 // Decompose a double multiplier into a Q0.31 int32 representation of its
 // significand, and shift representation of its exponent.
@@ -197,11 +197,12 @@ void PreprocessSoftmaxScaling(double beta, double input_scale,
                               int input_integer_bits,
                               int32_t* quantized_multiplier, int* left_shift);
 // Like PreprocessSoftmaxScaling, but inverse scaling factors also calculated.
-void PreprocessLogSoftmaxScaling(double beta, double input_scale,
-                                 int input_integer_bits,
-                                 int32_t* quantized_multiplier, int* left_shift,
-                                 int32_t* reverse_scaling_divisor,
-                                 int* reverse_scaling_right_shift);
+void PreprocessLogSoftmaxScalingExp(double beta, double input_scale,
+                                    int input_integer_bits,
+                                    int32_t* quantized_multiplier,
+                                    int* left_shift,
+                                    int32_t* reverse_scaling_divisor,
+                                    int* reverse_scaling_left_shift);
 // Calculate the largest input that will result in a within-bounds intermediate
 // result within MultiplyByQuantizedMultiplierGreaterThanOne.  In other words,
 // it must not overflow before we reduce the value by multiplication by the
",0,test
688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions.

PiperOrigin-RevId: 200271078",quantization_util_test.cc,"@@ -196,21 +196,21 @@ TEST(QuantizationUtilTest, ChooseQuantizationParamsInvalidRange) {
   EXPECT_DEATH(ChooseQuantizationParams<uint8>(10.0, -30.0), """");
 }
 
-TEST(QuantizationUtilTest, QuantizeMultiplierSmallerThanOne) {
+TEST(QuantizationUtilTest, QuantizeMultiplierSmallerThanOneExp) {
   auto quantize = [](double d) {
     int32_t q;
     int s;
-    QuantizeMultiplierSmallerThanOne(d, &q, &s);
+    QuantizeMultiplierSmallerThanOneExp(d, &q, &s);
     return std::pair<int32_t, int>{q, s};
   };
 
   EXPECT_DEATH(quantize(-0.1), """");
   EXPECT_DEATH(quantize(0.0), """");
-  EXPECT_THAT(quantize(0.25), Pair(1073741824, 1));
+  EXPECT_THAT(quantize(0.25), Pair(1073741824, -1));
 
   // Around 0.5 we can see the change in exponent and how we try hard to
   // void hitting max int32.
-  EXPECT_THAT(quantize(0.50 - 5e-9), Pair(2147483627, 1));
+  EXPECT_THAT(quantize(0.50 - 5e-9), Pair(2147483627, -1));
   EXPECT_THAT(quantize(0.50 - 1e-10), Pair(1073741824, 0));
   EXPECT_THAT(quantize(0.50), Pair(1073741824, 0));
 
",0,test
688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions.

PiperOrigin-RevId: 200271078",reference_ops.h,"@@ -968,8 +968,9 @@ void L2Normalization(const float* input_data, const Dims<4>& input_dims,
   }
 }
 
-inline void GetInvSqrtQuantizedMultiplier(int32 input, int32* output_inv_sqrt,
-                                          int* output_shift) {
+inline void GetInvSqrtQuantizedMultiplierExp(int32 input,
+                                             int32* output_inv_sqrt,
+                                             int* output_shift) {
   *output_shift = 11;
   while (input >= (1 << 29)) {
     input /= 4;
@@ -1011,6 +1012,7 @@ inline void GetInvSqrtQuantizedMultiplier(int32 input, int32* output_inv_sqrt,
     *output_inv_sqrt <<= -*output_shift;
     *output_shift = 0;
   }
+  *output_shift *= kReverseShift;
 }
 
 inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims,
@@ -1027,14 +1029,14 @@ inline void L2Normalization(const uint8* input_data, const Dims<4>& input_dims,
     }
     int32 inv_l2norm_multiplier;
     int inv_l2norm_shift;
-    GetInvSqrtQuantizedMultiplier(square_l2_norm, &inv_l2norm_multiplier,
-                                  &inv_l2norm_shift);
+    GetInvSqrtQuantizedMultiplierExp(square_l2_norm, &inv_l2norm_multiplier,
+                                     &inv_l2norm_shift);
 
     for (int c = 0; c < depth; c++) {
       int32 diff =
           input_data[Offset(input_dims, c, i, 0, 0)] - input_zero_point;
       int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          128 * diff, inv_l2norm_multiplier, kReverseShift * inv_l2norm_shift);
+          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
       int32 unclamped_output_val = 128 + rescaled_diff;
       int32 output_val = std::min(255, std::max(0, unclamped_output_val));
       output_data[Offset(output_dims, c, i, 0, 0)] =
",0,test
688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions.

PiperOrigin-RevId: 200271078",mul.cc,"@@ -120,8 +120,9 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
 
   double real_multiplier =
       input1->params.scale * input2->params.scale / output->params.scale;
-  QuantizeMultiplierSmallerThanOne(real_multiplier, &output_multiplier,
-                                   &output_shift);
+  QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
+                                      &output_shift);
+  output_shift *= -1;
 
   int32 output_activation_min, output_activation_max;
   CalculateActivationRangeUint8(params->activation, output,
",0,test
688a09dc6b70a81cae12a7e263515964311f8d86,tensorflow/tensorflow,"Standardize shifts in (more) multiplication util functions.

PiperOrigin-RevId: 200271078",sub.cc,"@@ -126,16 +126,19 @@ void EvalQuantized(TfLiteContext* context, TfLiteNode* node,
 
   int32 input1_multiplier;
   int input1_shift;
-  QuantizeMultiplierSmallerThanOne(real_input1_multiplier, &input1_multiplier,
-                                   &input1_shift);
+  QuantizeMultiplierSmallerThanOneExp(real_input1_multiplier,
+                                      &input1_multiplier, &input1_shift);
+  input1_shift *= -1;
   int32 input2_multiplier;
   int input2_shift;
-  QuantizeMultiplierSmallerThanOne(real_input2_multiplier, &input2_multiplier,
-                                   &input2_shift);
+  QuantizeMultiplierSmallerThanOneExp(real_input2_multiplier,
+                                      &input2_multiplier, &input2_shift);
+  input2_shift *= -1;
   int32 output_multiplier;
   int output_shift;
-  QuantizeMultiplierSmallerThanOne(real_output_multiplier, &output_multiplier,
-                                   &output_shift);
+  QuantizeMultiplierSmallerThanOneExp(real_output_multiplier,
+                                      &output_multiplier, &output_shift);
+  output_shift *= -1;
 
   int32 output_activation_min, output_activation_max;
   CalculateActivationRangeUint8(params->activation, output,
",0,test
b8969d12f9260a7b1981b8d22788aa1f8c8cbbb6,tensorflow/tensorflow,"Mark Supervisor deprecated. Please use MonitoredTrainingSession instead.

Fixes #6263.

PiperOrigin-RevId: 177230053",monitored_session.py,"@@ -52,7 +52,6 @@ _PREEMPTION_ERRORS = (errors.AbortedError, errors.UnavailableError)
 USE_DEFAULT = object()
 
 
-# TODO(touts): Share that with the Supervisor.
 class Scaffold(object):
   """"""Structure to create or gather pieces commonly needed to train a model.
 
",0,train
b8969d12f9260a7b1981b8d22788aa1f8c8cbbb6,tensorflow/tensorflow,"Mark Supervisor deprecated. Please use MonitoredTrainingSession instead.

Fixes #6263.

PiperOrigin-RevId: 177230053",supervisor.py,"@@ -36,11 +36,15 @@ from tensorflow.python.training import coordinator
 from tensorflow.python.training import saver as saver_mod
 from tensorflow.python.training import session_manager as session_manager_mod
 from tensorflow.python.training import training_util
+from tensorflow.python.util import deprecation
 
 
 class Supervisor(object):
   """"""A training helper that checkpoints models and computes summaries.
 
+  This class is deprecated. Please use
+  ${tf.train.MonitoredTrainingSession} instead.
+
   The Supervisor is a small wrapper around a `Coordinator`, a `Saver`,
   and a `SessionManager` that takes care of common needs of TensorFlow
   training programs.
@@ -198,6 +202,8 @@ class Supervisor(object):
   # the default behavior should be used.
   USE_DEFAULT = 0
 
+  @deprecation.deprecated(None,
+                          ""Please switch to tf.train.MonitoredTrainingSession"")
   def __init__(self,
                graph=None,
                ready_op=USE_DEFAULT,
",0,train
305d30ce6130d0d621746a59dc3116c5824ab523,tensorflow/tensorflow,"Fix MSAN failure in TF-XLA lowering

This is an issue with an ArrayRef with content going out of scope at the end of the expression.

PiperOrigin-RevId: 399055398
Change-Id: Ieb7c4c1ce3cf77b5000553fb13b254961f187bd3",legalize_tf.cc,"@@ -466,10 +466,10 @@ Value BatchDot(Location loc, Value lhs, bool transpose_lhs, Value rhs,
                ArrayAttr precision_config, OpBuilder *builder) {
   auto batch_dimensions =
       llvm::to_vector<4>(llvm::seq<int64_t>(0, num_batch_dims));
-  auto lhs_contracting_dimensions =
-      llvm::makeArrayRef({transpose_lhs ? num_batch_dims : num_batch_dims + 1});
-  auto rhs_contracting_dimensions =
-      llvm::makeArrayRef({transpose_rhs ? num_batch_dims + 1 : num_batch_dims});
+  auto lhs_contracting_dimensions = llvm::to_vector<1>(llvm::makeArrayRef(
+      {transpose_lhs ? num_batch_dims : num_batch_dims + 1}));
+  auto rhs_contracting_dimensions = llvm::to_vector<1>(llvm::makeArrayRef(
+      {transpose_rhs ? num_batch_dims + 1 : num_batch_dims}));
   auto dimension_numbers = DotDimensionNumbersAttr::get(
       builder->getContext(),
       /*lhs_batching_dimensions=*/batch_dimensions,
",0,test
edfc5938ba99cbe81ac50796f6ff647a374daf82,tensorflow/tensorflow,"Don't match to backward input convolution in unsupported case.

For grouped convolutions, we assume that in the backward input convolution
case, the input and output feature dimensions of the kernel are adjacent.
If that is not the case, don't treat it as backward input convolution.

PiperOrigin-RevId: 339029980
Change-Id: If0b4f8a64cd3ca73e9648358d8a579ce262b27c9",gpu_conv_rewriter.cc,"@@ -536,11 +536,12 @@ MatchBackwardInput(HloInstruction* conv) {
   // 'kernel_output_feature_dimension' by 'feature_group_count'.
   int64 input_feature_dimension = dnums.kernel_input_feature_dimension();
   int64 output_feature_dimension = dnums.kernel_output_feature_dimension();
+  // The following code assumes that input_feature_dimension and
+  // output_feature_dimension are adjacent.
+  if (std::abs(input_feature_dimension - output_feature_dimension) != 1) {
+    return no_match_result;
+  }
 
-  // In the backward convolution case, the spatial dimensions become the
-  // feature dimensions, and we are guaranteed that the spatial dimensions are
-  // adjacent.
-  CHECK_EQ(std::abs(input_feature_dimension - output_feature_dimension), 1LL);
   int64 input_features = rhs->shape().dimensions(input_feature_dimension);
   int64 output_features = rhs->shape().dimensions(output_feature_dimension);
 
",0,train
edfc5938ba99cbe81ac50796f6ff647a374daf82,tensorflow/tensorflow,"Don't match to backward input convolution in unsupported case.

For grouped convolutions, we assume that in the backward input convolution
case, the input and output feature dimensions of the kernel are adjacent.
If that is not the case, don't treat it as backward input convolution.

PiperOrigin-RevId: 339029980
Change-Id: If0b4f8a64cd3ca73e9648358d8a579ce262b27c9",grouped_convolution_test.cc,"@@ -13,6 +13,10 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <string>
+#include <vector>
+
+#include ""absl/algorithm/container.h""
 #include ""absl/types/optional.h""
 #include ""tensorflow/compiler/xla/client/xla_computation.h""
 #include ""tensorflow/compiler/xla/execution_options_util.h""
@@ -23,6 +27,7 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/tests/client_library_test_base.h""
 #include ""tensorflow/compiler/xla/tests/hlo_test_base.h""
 #include ""tensorflow/compiler/xla/tests/test_macros.h""
+#include ""tensorflow/compiler/xla/tests/test_utils.h""
 
 namespace xla {
 namespace {
@@ -248,5 +253,28 @@ INSTANTIATE_TEST_CASE_P(
                        ::testing::Bool()),
     GroupedConvolution2DTestDataToString);
 
+using GroupedConvolutionTest = HloTestBase;
+
+XLA_TEST_F(GroupedConvolutionTest, BackwardInputConvolution) {
+  auto module = ParseAndReturnVerifiedModule(R""(
+  HloModule convolution_module
+
+ENTRY convolution {
+  p1 = f32[2,1,1,1]{3,2,1,0} parameter(0)
+  p2 = f32[2,4,4,1]{3,2,1,0} parameter(1)
+  reverse = f32[2,4,4,1]{3,2,1,0} reverse(p2), dimensions={1,2}
+  ROOT convolution = f32[2,4,4,1]{3,2,1,0} convolution(p1, reverse), window={size=4x4 pad=3_3x3_3}, dim_labels=fb01_o01i->f01b, feature_group_count=2
+}
+)"")
+                    .ValueOrDie();
+  TF_ASSERT_OK_AND_ASSIGN(auto fake_arguments, MakeFakeArguments(module.get()));
+  std::vector<Literal*> fake_argument_ptrs;
+  absl::c_transform(
+      fake_arguments, std::back_inserter(fake_argument_ptrs),
+      [](const Literal& literal) { return &const_cast<Literal&>(literal); });
+  EXPECT_TRUE(RunAndCompare(std::move(module), fake_argument_ptrs,
+                            ErrorSpec{0.01, 0.01}));
+}
+
 }  // namespace
 }  // namespace xla
",0,train
0b69e6ed798b40b64aecea24a97aa2f198120688,tensorflow/tensorflow,"Fix two race conditions found in eager/c_api_test:
1. context_id shouldn't be read during update.
2. EagerExecutor::state_ should be set before creating EagerExecutor::thread_

PiperOrigin-RevId: 262968876",context.cc,"@@ -210,10 +210,16 @@ bool EagerContext::MirrorTensors() const {
 void EagerContext::CloseRemoteContexts() {
   // Close all remote contexts.
   eager::CloseContextRequest request;
-  request.set_context_id(context_id_);
+  uint64 context_id;
+  {
+    mutex_lock l(remote_state_mu_);
+    if (!is_master_) return;
+    context_id = context_id_;
+    context_id_ = kInvalidContextId;
+  }
+  request.set_context_id(context_id);
   // Setting context_id to a new value can avoid us issuing DestroyTensorHandle
   // request to closed remote workers.
-  context_id_ = kInvalidContextId;
   std::vector<eager::CloseContextResponse> responses(remote_contexts_.size());
   BlockingCounter counter(static_cast<int>(remote_contexts_.size()));
 
@@ -223,10 +229,11 @@ void EagerContext::CloseRemoteContexts() {
     Status s = remote_eager_workers_->GetClient(worker, &client);
 
     client->CloseContextAsync(
-        &request, &responses[i], [this, &worker, &counter](const Status& s) {
+        &request, &responses[i],
+        [&worker, &counter, context_id](const Status& s) {
           if (!s.ok()) {
             LOG(ERROR) << ""Unable to close remote context with ID ""
-                       << context_id_ << "" for worker: "" << worker << "" due to ""
+                       << context_id << "" for worker: "" << worker << "" due to ""
                        << s.error_message();
           }
           counter.DecrementCount();
@@ -252,11 +259,12 @@ void EagerContext::WaitForAndCloseRemoteContexts() {
   }
   keep_alive_thread_.reset();
 
-  mutex_lock l(remote_state_mu_);
-  if (!remote_contexts_.empty() && is_master_) {
+  if (!remote_contexts_.empty()) {
     CloseRemoteContexts();
   }
 
+  mutex_lock l(remote_state_mu_);
+
   default_executor_.ShutDown().IgnoreError();
   std::unordered_map<std::thread::id, EagerExecutor*> executors_copy;
   {
@@ -301,7 +309,7 @@ EagerContext::~EagerContext() {
     keep_alive_thread_cv_.notify_all();
   }
   keep_alive_thread_.reset();
-  if (!remote_contexts_.empty() && is_master_) {
+  if (!remote_contexts_.empty()) {
     CloseRemoteContexts();
   }
 #endif  // !IS_MOBILE_PLATFORM
@@ -392,7 +400,7 @@ Status EagerContext::MaybeRegisterFunctionRemotely(const FunctionDef& fdef) {
   BlockingCounter blocking_counter(static_cast<int>(remote_contexts_.size()));
 
   eager::RegisterFunctionRequest request;
-  request.set_context_id(context_id_);
+  request.set_context_id(GetContextId());
   *request.mutable_function_def() = fdef;
   std::vector<eager::RegisterFunctionResponse> responses(
       remote_contexts_.size());
@@ -618,7 +626,10 @@ Status EagerContext::GetClient(const DeviceNameUtils::ParsedName& device_name,
   return Status::OK();
 }
 
-uint64 EagerContext::GetContextId() { return context_id_; }
+uint64 EagerContext::GetContextId() {
+  tf_shared_lock l(remote_state_mu_);
+  return context_id_;
+}
 
 Status EagerContext::StoreCollectiveOpsServer(
     std::unique_ptr<ServerInterface> server, DeviceMgr* device_mgr,
@@ -672,14 +683,15 @@ Status EagerContext::InitializeRemoteMaster(
         ""Failed to initialize remote for master context due to invalid "",
         ""context id"");
   }
-  mutex_lock l(remote_state_mu_);
-  is_master_ = true;
 
   if (!remote_contexts_.empty()) {
     CloseRemoteContexts();
   }
-  remote_contexts_ = remote_contexts;
+
+  mutex_lock l(remote_state_mu_);
+  is_master_ = true;
   context_id_ = context_id;
+  remote_contexts_ = remote_contexts;
 
   use_send_tensor_rpc_ =
       ReadBoolFromEnvVar(""TF_EAGER_REMOTE_USE_SEND_TENSOR_RPC"", false);
",0,test
0b69e6ed798b40b64aecea24a97aa2f198120688,tensorflow/tensorflow,"Fix two race conditions found in eager/c_api_test:
1. context_id shouldn't be read during update.
2. EagerExecutor::state_ should be set before creating EagerExecutor::thread_

PiperOrigin-RevId: 262968876",context.h,"@@ -439,7 +439,7 @@ class EagerContext : public core::RefCounted {
 
   mutex remote_state_mu_;
 
-  uint64 context_id_;
+  uint64 context_id_ GUARDED_BY(remote_state_mu_);
   std::vector<string> remote_contexts_;
 
   int keep_alive_secs_ GUARDED_BY(remote_state_mu_);
",0,test
0b69e6ed798b40b64aecea24a97aa2f198120688,tensorflow/tensorflow,"Fix two race conditions found in eager/c_api_test:
1. context_id shouldn't be read during update.
2. EagerExecutor::state_ should be set before creating EagerExecutor::thread_

PiperOrigin-RevId: 262968876",eager_executor.h,"@@ -160,10 +160,6 @@ class EagerExecutor {
   std::multimap<EagerNode*, condition_variable*> node_done_notifications_
       GUARDED_BY(node_queue_mutex_);
 
-  // Thread object that calls the `Run` method in async mode.This thread runs
-  // till thread_done_ is set to true. It is `nullptr` in sync mode.
-  const std::unique_ptr<Thread> thread_;
-
   // thread_exited_notification_ is notified by the `thread_` right before it
   // exits.
   Notification thread_exited_notification_;
@@ -171,6 +167,10 @@ class EagerExecutor {
   // Indicates that `thread_` should stop as soon as it is done executing the
   // current EagerNode.
   ExecutorState state_ GUARDED_BY(node_queue_mutex_) = ExecutorState::kActive;
+
+  // Thread object that calls the `Run` method in async mode.This thread runs
+  // until state_ is set to kShuttingDown. It is `nullptr` in sync mode.
+  const std::unique_ptr<Thread> thread_;
 };
 
 }  // namespace tensorflow
",0,test
b131b30fb59aa41bc826588b53571f6f98dcabc7,tensorflow/tensorflow,"PR #46097: [INTEL MKL] Change order for remapper

Imported from GitHub PR https://github.com/tensorflow/tensorflow/pull/46097

This PR is to test if there will be any regression if the order of remapper in grappler meta_optimizer is moved before arithmetic_optimizer.
Copybara import of the project:

--
613041d6e6b28f331aecd01aa6f69c8f0953fdac by mdfaijul <md.faijul.amin@intel.com>:

Change order for remapper.

PiperOrigin-RevId: 352011080
Change-Id: I5041e87bfeeb41120517ce454d1baa247c32d4fb",meta_optimizer.cc,"@@ -269,9 +269,6 @@ Status MetaOptimizer::InitializeOptimizers(
   if (cfg_.pin_to_host_optimization() == RewriterConfig::ON) {
     optimizers->push_back(MakeUnique<PinToHostOptimizer>());
   }
-  if (cfg_.remapping() != RewriterConfig::OFF) {
-    optimizers->push_back(MakeUnique<Remapper>(cfg_.remapping()));
-  }
   if (cfg_.arithmetic_optimization() != RewriterConfig::OFF) {
     optimizers->push_back(
         MakeUnique<ArithmeticOptimizer>(cfg_.arithmetic_optimization()));
@@ -281,6 +278,9 @@ Status MetaOptimizer::InitializeOptimizers(
         /*optimization level*/ cfg_.layout_optimizer(),
         /*CPU layout conversion*/ cfg_.cpu_layout_conversion()));
   }
+  if (cfg_.remapping() != RewriterConfig::OFF) {
+    optimizers->push_back(MakeUnique<Remapper>(cfg_.remapping()));
+  }
   if (cfg_.loop_optimization() != RewriterConfig::OFF) {
     optimizers->push_back(
         MakeUnique<LoopOptimizer>(cfg_.loop_optimization(), cpu_device_));
",0,test
c0757ec6ed6e55b7fb50b5049276e6c140981b5d,tensorflow/tensorflow,"Add image decoding ops to flex delegate

These include:
- DecodeBmp
- DecodeGif
- DecodeJpeg
- DecodePng

PiperOrigin-RevId: 329224370
Change-Id: I696bd5bea4ab2cc570408b202d66058c7ca35a83",allowlisted_flex_ops.cc,"@@ -112,13 +112,7 @@ const std::set<std::string>& GetFlexAllowlist() {
           ""DataFormatVecPermute"",
           ""DebugGradientIdentity"",
           ""DebugGradientRefIdentity"",
-          ""DecodeAndCropJpeg"",
           ""DecodeBase64"",
-          ""DecodeBmp"",
-          ""DecodeGif"",
-          ""DecodeImage"",
-          ""DecodeJpeg"",
-          ""DecodePng"",
           ""DecodeRaw"",
           ""DecodeWav"",
           ""DeepCopy"",
@@ -139,9 +133,6 @@ const std::set<std::string>& GetFlexAllowlist() {
           ""EluGrad"",
           ""Empty"",
           ""EncodeBase64"",
-          ""EncodeJpeg"",
-          ""EncodeJpegVariableQuality"",
-          ""EncodePng"",
           ""EncodeWav"",
           ""EnsureShape"",
           ""Enter"",
",0,train
7a0fa5db007318965d820127e318a3acbc42e701,tensorflow/tensorflow,Cleaned up code; added test for kl(p||p) = 0,beta_test.py,"@@ -322,6 +322,10 @@ class BetaTest(tf.test.TestCase):
             kl_val = sess.run(kl)
             self.assertEqual(kl.get_shape(), shape)
             self.assertAllClose(kl_val, kl_expected)
+        
+        # Make sure KL(d1||d1) is 0
+        kl_same = sess.run(tf.contrib.distributions.kl(d1, d1))
+        self.assertAllClose(kl_same, np.zeros_like(kl_expected))
 
 
 if __name__ == ""__main__"":
",0,train
7a0fa5db007318965d820127e318a3acbc42e701,tensorflow/tensorflow,Cleaned up code; added test for kl(p||p) = 0,categorical_test.py,"@@ -226,16 +226,17 @@ class CategoricalTest(tf.test.TestCase):
 
           kl = tf.contrib.distributions.kl(a, b)
           kl_val = sess.run(kl)
+          # Make sure KL(a||a) is 0
+          kl_same = sess.run(tf.contrib.distributions.kl(a, a))
 
           prob_a = np_softmax(a_logits)
           prob_b = np_softmax(b_logits)
           kl_expected = np.sum(
-            prob_a * (np.log(prob_a) - np.log(prob_b)),
-            axis=-1,
-          )
+              prob_a * (np.log(prob_a) - np.log(prob_b)), axis=-1)
 
           self.assertEqual(kl.get_shape(), (batch_size,))
           self.assertAllClose(kl_val, kl_expected)
+          self.assertAllClose(kl_same, np.zeros_like(kl_expected))
 
 
 if __name__ == ""__main__"":
",0,train
7a0fa5db007318965d820127e318a3acbc42e701,tensorflow/tensorflow,Cleaned up code; added test for kl(p||p) = 0,beta.py,"@@ -308,15 +308,16 @@ def _kl_beta_beta(d1, d2, name=None):
     Batchwise KL(d1 || d2)
   """"""
   inputs = [d1.a, d1.b, d1.a_b_sum, d2.a_b_sum]
-  with ops.name_scope(
-    name, ""kl_beta_beta"", inputs):
-      log_betas = (math_ops.lgamma(d2.a) + math_ops.lgamma(d2.b)
-                 - math_ops.lgamma(d2.a_b_sum) + math_ops.lgamma(d1.a_b_sum)
-                 - math_ops.lgamma(d1.a) - math_ops.lgamma(d1.b))
-      digammas = ((d1.a - d2.a)*math_ops.digamma(d1.a)
-                + (d1.b - d2.b)*math_ops.digamma(d1.b)
-                + (d2.a_b_sum - d1.a_b_sum)*math_ops.digamma(d1.a_b_sum))
-      return log_betas + digammas
+  with ops.name_scope(name, ""kl_beta_beta"", inputs):
+    # ln(B(a', b') / B(a, b))
+    log_betas = (math_ops.lgamma(d2.a) + math_ops.lgamma(d2.b)
+                - math_ops.lgamma(d2.a_b_sum) + math_ops.lgamma(d1.a_b_sum)
+                - math_ops.lgamma(d1.a) - math_ops.lgamma(d1.b))
+    # (a - a')*psi(a) + (b - b')*psi(b) + (a' - a + b' - b)*psi(a + b)
+    digammas = ((d1.a - d2.a)*math_ops.digamma(d1.a)
+              + (d1.b - d2.b)*math_ops.digamma(d1.b)
+              + (d2.a_b_sum - d1.a_b_sum)*math_ops.digamma(d1.a_b_sum))
+    return log_betas + digammas
 
 
 # Register KL divergences.
",0,train
7a0fa5db007318965d820127e318a3acbc42e701,tensorflow/tensorflow,Cleaned up code; added test for kl(p||p) = 0,categorical.py,"@@ -183,8 +183,7 @@ def _kl_categorical_categorical(a, b, name=None):
   """"""
   with ops.name_scope(
     name, ""kl_categorical_categorical"", [a.logits, b.logits]):
+    # sum(p*ln(p/q))
     return math_ops.reduce_sum(
         nn_ops.softmax(a.logits)*(nn_ops.log_softmax(a.logits)
-                                  - nn_ops.log_softmax(b.logits)),
-        reduction_indices=[-1],
-    )
+            - nn_ops.log_softmax(b.logits)), reduction_indices=[-1])
",0,train
03d13a4ddfde5ab2ac0edee72658d1f40fcfe3c9,tensorflow/tensorflow,"Delete seemingly unused and deprecated code.

PiperOrigin-RevId: 375467299
Change-Id: I2df58f7b6872697cf7534c3fc9a35aefde13fd93",status_bar.py,"@@ -1,24 +0,0 @@
-# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the ""License"");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an ""AS IS"" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-
-""""""A no-op implementation of status bar functions.""""""
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-
-def SetupStatusBarInsideGoogle(unused_link_text, unused_port):
-  pass
",0,test
92ce8a70a5d6aacd84d4e4db250786fb86c2ac86,tensorflow/tensorflow,remove temp variable,grpc_worker_cache.cc,"@@ -43,7 +43,7 @@ class GrpcWorkerCache : public WorkerCachePartial {
           void* tag;
           bool ok;
           while (completion_queue_.Next(&tag, &ok)) {
-            GrpcClientCQTag* callback_tag = static_cast<GrpcClientCQTag*>(tag);
+            auto callback_tag = static_cast<GrpcClientCQTag*>(tag);
             callback_tag->OnCompleted(ok);
           }
         });
@@ -67,11 +67,10 @@ class GrpcWorkerCache : public WorkerCachePartial {
     if (target == local_target_) {
       return local_worker_;
     } else {
-      SharedGrpcChannelPtr channel = channel_cache_->FindWorkerChannel(target);
+      auto channel = channel_cache_->FindWorkerChannel(target);
       if (!channel) return nullptr;
-      WorkerInterface* ret = NewGrpcRemoteWorker(&live_rpc_counter_, channel,
-                                                 &completion_queue_, &logger_);
-      return ret;
+      return NewGrpcRemoteWorker(&live_rpc_counter_, channel,
+                                 &completion_queue_, &logger_);
     }
   }
 
",0,train
8984bd30b49837893b95e44357264f5b4ee95118,tensorflow/tensorflow,"Only use cancellation_manager to cancel recv_op_ in eager mode.

There is a race condition in TensorFlow 1.x if assigning a cancellation_manager to recv_op . Because both cancellation_manager.Cancel() and rendezvous::Abort() will be called if a session gets an error.

PiperOrigin-RevId: 262415758",kernel_and_device.cc,"@@ -259,6 +259,7 @@ Status KernelAndDeviceOp::Run(ScopedStepContainer* step_container,
   }
 
   OpKernelContext::Params params;
+  params.is_eager = true;
   params.device = device_;
   params.frame_iter = FrameAndIter(0, 0);
   params.inputs = &inputs;
",0,train
8984bd30b49837893b95e44357264f5b4ee95118,tensorflow/tensorflow,"Only use cancellation_manager to cancel recv_op_ in eager mode.

There is a race condition in TensorFlow 1.x if assigning a cancellation_manager to recv_op . Because both cancellation_manager.Cancel() and rendezvous::Abort() will be called if a session gets an error.

PiperOrigin-RevId: 262415758",op_kernel.h,"@@ -621,6 +621,9 @@ class OpKernelContext {
     // The step being executed.
     int64 step_id = 0;
 
+    // True if the op is created by eager runtime.
+    bool is_eager = false;
+
     // The op kernel being computed.
     OpKernel* op_kernel = nullptr;
 
@@ -738,6 +741,8 @@ class OpKernelContext {
 
   int64 step_id() const { return params_->step_id; }
 
+  bool is_eager() const { return params_->is_eager; }
+
   const OpKernel& op_kernel() const { return *params_->op_kernel; }
 
   // Input/output signature.
",0,train
8984bd30b49837893b95e44357264f5b4ee95118,tensorflow/tensorflow,"Only use cancellation_manager to cancel recv_op_ in eager mode.

There is a race condition in TensorFlow 1.x if assigning a cancellation_manager to recv_op . Because both cancellation_manager.Cancel() and rendezvous::Abort() will be called if a session gets an error.

PiperOrigin-RevId: 262415758",sendrecv_ops.cc,"@@ -169,7 +169,12 @@ void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) {
   Rendezvous::Args args;
   args.device_context = ctx->op_device_context();
   args.alloc_attrs = ctx->output_alloc_attr(0);
-  args.cancellation_manager = ctx->cancellation_manager();
+  if (ctx->is_eager()) {
+    // NOTE(fishx): Only set cancellation_manager in eager mode. Because in
+    // Tensorflow 1.x, session (or graph_mgr) will abort the underlying
+    // rendezvous if it encounters any error.
+    args.cancellation_manager = ctx->cancellation_manager();
+  }
 
   FrameAndIter frame_iter = GetFrameAndIter(ctx, hostmem_sendrecv_);
   if (frame_iter == FrameAndIter(0, 0)) {
",0,train
cb45cab0a2813c5b5d5f28bfd870897e521ca924,tensorflow/tensorflow,"Restore constness of kStatTypeStrMap

PiperOrigin-RevId: 285455879
Change-Id: Ie4be8b1e001c3cda59057bb63cb865d22f5cd228",xplane_schema.cc,"@@ -22,7 +22,7 @@ const absl::string_view kHostThreads = ""Host Threads"";
 
 const int kNumStatTypes = static_cast<int>(StatType::kHloModule) + 1;
 
-static absl::string_view kStatTypeStrMap[kNumStatTypes] = {
+static const absl::string_view kStatTypeStrMap[kNumStatTypes] = {
     ""unknown"",         ""id"",
     ""parent_step_id"",  ""function_step_id"",
     ""device_ordinal"",  ""chip_ordinal"",
",0,train
0567f169e6910587e7dcca547aa5baeaaffbc03d,tensorflow/tensorflow,"Internal change.

PiperOrigin-RevId: 191020351",values.py,"@@ -243,7 +243,7 @@ def _tensor_conversion(var, dtype=None, name=None, as_ref=False):
 
 
 ops.register_tensor_conversion_function(DistributedVariable, _tensor_conversion)
-# TODO(josh11b): ops.register_dense_tensor_like_type(DistributedVariable)?
+ops.register_dense_tensor_like_type(DistributedVariable)
 
 
 class _MirroredSaveable(saver.BaseSaverBuilder.ResourceVariableSaveable):
",0,test
5e3932ce780cb2eae1549bf93481a1d6d181f00b,tensorflow/tensorflow,"Disable a flaky test test_group_conv

PiperOrigin-RevId: 314837253
Change-Id: I4deceead7a8a5b82a45b30025819a961dbdb5bb9",convolutional_test.py,"@@ -433,7 +433,7 @@ class GroupedConvTest(keras_parameterized.TestCase):
       ('Conv2D', keras.layers.Conv2D, (32, 12, 12, 32)),
       ('Conv3D', keras.layers.Conv3D, (32, 12, 12, 12, 32)),
   )
-  def test_group_conv(self, layer_cls, input_shape):
+  def disable_test_group_conv(self, layer_cls, input_shape):
     if test.is_gpu_available(cuda_only=True):
       with test_util.use_gpu():
         inputs = random_ops.random_uniform(shape=input_shape)
",0,train
47118ab5d64a0c8f93913294445424e2acb6c905,tensorflow/tensorflow,"Update keras dense layers to use embedding_lookup_sparse

We are switching to this instead of sparse_tensor_dense_matmul, which has performance
issue for large sparse tensor as it computes a dense gradient.

PiperOrigin-RevId: 361029309
Change-Id: I213c46b6e482817801e6b0d1e306c757da8672ce",core_test.py,"@@ -26,6 +26,7 @@ from tensorflow.python import keras
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
@@ -33,6 +34,7 @@ from tensorflow.python.keras.layers import core
 from tensorflow.python.keras.mixed_precision import policy
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import sparse_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.platform import test
@@ -503,6 +505,36 @@ class CoreLayersTest(keras_parameterized.TestCase):
     testing_utils.layer_test(
         keras.layers.Dense, kwargs={'units': 3}, input_shape=(3, 4, 5, 2))
 
+  def test_dense_output(self):
+    dense_inputs = ops.convert_to_tensor_v2_with_dispatch(
+        np.random.uniform(size=(10, 10)).astype('f'))
+    # Create some sparse data where multiple rows and columns are missing.
+    sparse_inputs = sparse_tensor.SparseTensor(
+        indices=np.random.randint(low=0, high=10, size=(5, 2)),
+        values=np.random.uniform(size=(5,)).astype('f'),
+        dense_shape=[10, 10])
+    sparse_inputs = sparse_ops.sparse_reorder(sparse_inputs)
+
+    layer = keras.layers.Dense(
+        5,
+        kernel_initializer=keras.initializers.RandomUniform(),
+        bias_initializer=keras.initializers.RandomUniform(),
+        dtype='float32')
+    dense_outputs = layer(dense_inputs)
+    sparse_outpus = layer(sparse_inputs)
+
+    expected_dense = math_ops.add(
+        math_ops.matmul(dense_inputs, keras.backend.get_value(layer.kernel)),
+        keras.backend.get_value(layer.bias))
+    expected_sparse = math_ops.add(
+        math_ops.matmul(
+            sparse_ops.sparse_tensor_to_dense(sparse_inputs),
+            keras.backend.get_value(layer.kernel)),
+        keras.backend.get_value(layer.bias))
+
+    self.assertAllClose(dense_outputs, expected_dense)
+    self.assertAllClose(sparse_outpus, expected_sparse)
+
   def test_dense_dtype(self):
     inputs = ops.convert_to_tensor_v2_with_dispatch(
         np.random.randint(low=0, high=7, size=(2, 2)))
",0,train
47118ab5d64a0c8f93913294445424e2acb6c905,tensorflow/tensorflow,"Update keras dense layers to use embedding_lookup_sparse

We are switching to this instead of sparse_tensor_dense_matmul, which has performance
issue for large sparse tensor as it computes a dense gradient.

PiperOrigin-RevId: 361029309
Change-Id: I213c46b6e482817801e6b0d1e306c757da8672ce",core.py,"@@ -19,6 +19,7 @@ from __future__ import print_function
 
 from tensorflow.python.eager import context
 from tensorflow.python.framework import sparse_tensor
+from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import gen_math_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
@@ -47,8 +48,28 @@ def dense(inputs, kernel, bias=None, activation=None, dtype=None):
 
   rank = inputs.shape.rank
   if rank == 2 or rank is None:
+    # We use embedding_lookup_sparse as a more efficient matmul operation for
+    # large sparse input tensors. The op will result in a sparse gradient, as
+    # opposed to sparse_ops.sparse_tensor_dense_matmul which results in dense
+    # gradients. This can lead to sigfinicant speedups, see b/171762937.
     if isinstance(inputs, sparse_tensor.SparseTensor):
-      outputs = sparse_ops.sparse_tensor_dense_matmul(inputs, kernel)
+      # We need to fill empty rows, as the op assumes at least one id per row.
+      inputs, _ = sparse_ops.sparse_fill_empty_rows(inputs, 0)
+      # We need to do some munging of our input to use the embedding lookup as a
+      # matrix multiply. We split our input matrix into separate ids and weights
+      # tensors. The values of the ids tensor should be the column indices of
+      # our input matrix and the values of the weights tensor can continue to
+      # the actual matrix weights. The column arrangement of ids and weights
+      # will be summed over and does not matter. See the documentation for
+      # sparse_ops.sparse_tensor_dense_matmul a more detailed explanation of the
+      # inputs to both ops.
+      ids = sparse_tensor.SparseTensor(
+          indices=inputs.indices,
+          values=inputs.indices[:, 1],
+          dense_shape=inputs.dense_shape)
+      weights = inputs
+      outputs = embedding_ops.embedding_lookup_sparse_v2(
+          kernel, ids, weights, combiner=""sum"")
     else:
       outputs = gen_math_ops.MatMul(a=inputs, b=kernel)
   # Broadcast kernel to inputs.
",0,train
8f31716e68bb16ebc6a265b470297de695761882,tensorflow/tensorflow,"Added embedding learning rate multiplier support for DNN Classifier.
Change: 140412953",dnn.py,"@@ -23,6 +23,7 @@ from tensorflow.contrib import layers
 from tensorflow.contrib.framework import deprecated
 from tensorflow.contrib.framework import deprecated_arg_values
 from tensorflow.contrib.framework.python.ops import variables as contrib_variables
+from tensorflow.contrib.layers.python.layers import feature_column as feature_column_lib
 from tensorflow.contrib.layers.python.layers import optimizers
 from tensorflow.contrib.learn.python.learn import evaluable
 from tensorflow.contrib.learn.python.learn import monitors as monitor_lib
@@ -34,6 +35,7 @@ from tensorflow.contrib.learn.python.learn.estimators import model_fn
 from tensorflow.contrib.learn.python.learn.estimators import prediction_key
 from tensorflow.contrib.learn.python.learn.utils import export
 from tensorflow.python import summary
+from tensorflow.python.framework import ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import variable_scope
@@ -64,6 +66,31 @@ def _add_hidden_layer_summary(value, tag):
   summary.histogram(""%s_activation"" % tag, value)
 
 
+def _get_embedding_variable(column, collection_key, input_layer_scope):
+  return ops.get_collection(collection_key,
+                            input_layer_scope + ""/"" + column.name)
+
+
+def _extract_embedding_lr_multipliers(embedding_lr_multipliers, collection_key,
+                                      input_layer_scope):
+  """"""Convert embedding lr multipliers to variable based gradient multiplier.""""""
+  if not embedding_lr_multipliers:
+    return None
+  gradient_multipliers = {}
+  for column, lr_mult in embedding_lr_multipliers.items():
+    if not isinstance(column, feature_column_lib._EmbeddingColumn):  # pylint: disable=protected-access
+      raise ValueError(
+          ""learning rate multipler can be defined for embedding columns. ""
+          ""It is defined for {}"".format(column))
+    embedding = _get_embedding_variable(
+        column, collection_key, input_layer_scope)
+    if not embedding:
+      raise ValueError(""Couldn't find a variable for column {}"".format(column))
+    for v in embedding:
+      gradient_multipliers[v] = lr_mult
+  return gradient_multipliers
+
+
 def _dnn_model_fn(features, labels, mode, params):
   """"""Deep Neural Net model_fn.
 
@@ -89,6 +116,9 @@ def _dnn_model_fn(features, labels, mode, params):
       * gradient_clip_norm: A float > 0. If provided, gradients are
           clipped to their global norm with this clipping ratio.
       * num_ps_replicas: The number of parameter server replicas.
+      * embedding_lr_multipliers: Optional. A dictionary from
+        `EbeddingColumn` to a `float` multiplier. Multiplier will be used to
+        multiply with learning rate for the embedding variables.
 
   Returns:
     predictions: A dict of `Tensor` objects.
@@ -103,6 +133,7 @@ def _dnn_model_fn(features, labels, mode, params):
   dropout = params.get(""dropout"")
   gradient_clip_norm = params.get(""gradient_clip_norm"")
   num_ps_replicas = params.get(""num_ps_replicas"", 0)
+  embedding_lr_multipliers = params.get(""embedding_lr_multipliers"", {})
 
   features = _get_feature_dict(features)
   parent_scope = ""dnn""
@@ -111,8 +142,9 @@ def _dnn_model_fn(features, labels, mode, params):
       partitioned_variables.min_max_variable_partitioner(
           max_partitions=num_ps_replicas,
           min_slice_size=64 << 20))
+  input_layer_scope = parent_scope + ""/input_from_feature_columns""
   with variable_scope.variable_scope(
-      parent_scope + ""/input_from_feature_columns"",
+      input_layer_scope,
       values=features.values(),
       partitioner=input_layer_partitioner) as scope:
     net = layers.input_from_feature_columns(
@@ -160,6 +192,8 @@ def _dnn_model_fn(features, labels, mode, params):
         global_step=contrib_variables.get_global_step(),
         learning_rate=_LEARNING_RATE,
         optimizer=_get_optimizer(optimizer),
+        gradient_multipliers=_extract_embedding_lr_multipliers(
+            embedding_lr_multipliers, parent_scope, input_layer_scope),
         clip_gradients=gradient_clip_norm,
         name=parent_scope,
         # Empty summaries to prevent optimizers from logging the training_loss.
@@ -234,7 +268,8 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
                gradient_clip_norm=None,
                enable_centered_bias=False,
                config=None,
-               feature_engineering_fn=None):
+               feature_engineering_fn=None,
+               embedding_lr_multipliers=None):
     """"""Initializes a DNNClassifier instance.
 
     Args:
@@ -271,6 +306,9 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
                         labels which are the output of `input_fn` and
                         returns features and labels which will be fed
                         into the model.
+      embedding_lr_multipliers: Optional. A dictionary from `EbeddingColumn` to
+          a `float` multiplier. Multiplier will be used to multiply with
+          learning rate for the embedding variables.
 
     Returns:
       A `DNNClassifier` estimator.
@@ -287,17 +325,27 @@ class DNNClassifier(evaluable.Evaluable, trainable.Trainable):
         model_dir=model_dir,
         config=config,
         params={
-            ""head"": head_lib._multi_class_head(  # pylint: disable=protected-access
-                n_classes,
-                weight_column_name=weight_column_name,
-                enable_centered_bias=enable_centered_bias),
-            ""hidden_units"": hidden_units,
-            ""feature_columns"": feature_columns,
-            ""optimizer"": optimizer,
-            ""activation_fn"": activation_fn,
-            ""dropout"": dropout,
-            ""gradient_clip_norm"": gradient_clip_norm,
-            ""num_ps_replicas"": config.num_ps_replicas if config else 0,
+            ""head"":
+                head_lib._multi_class_head(  # pylint: disable=protected-access
+                    n_classes,
+                    weight_column_name=weight_column_name,
+                    enable_centered_bias=enable_centered_bias),
+            ""hidden_units"":
+                hidden_units,
+            ""feature_columns"":
+                feature_columns,
+            ""optimizer"":
+                optimizer,
+            ""activation_fn"":
+                activation_fn,
+            ""dropout"":
+                dropout,
+            ""gradient_clip_norm"":
+                gradient_clip_norm,
+            ""num_ps_replicas"":
+                config.num_ps_replicas if config else 0,
+            ""embedding_lr_multipliers"":
+                embedding_lr_multipliers,
         },
         feature_engineering_fn=feature_engineering_fn)
 
",0,train
8f31716e68bb16ebc6a265b470297de695761882,tensorflow/tensorflow,"Added embedding learning rate multiplier support for DNN Classifier.
Change: 140412953",dnn_test.py,"@@ -27,12 +27,88 @@ import numpy as np
 import tensorflow as tf
 
 from tensorflow.contrib.learn.python.learn.estimators import _sklearn
+from tensorflow.contrib.learn.python.learn.estimators import dnn
 from tensorflow.contrib.learn.python.learn.estimators import estimator_test_utils
+from tensorflow.contrib.learn.python.learn.estimators import head as head_lib
 from tensorflow.contrib.learn.python.learn.estimators import test_data
 from tensorflow.contrib.learn.python.learn.metric_spec import MetricSpec
 from tensorflow.python.ops import math_ops
 
 
+class EmbeddingMultiplierTest(tf.test.TestCase):
+  """"""dnn_model_fn tests.""""""
+
+  def testRaisesNonEmbeddingColumn(self):
+    one_hot_language = tf.contrib.layers.one_hot_column(
+        tf.contrib.layers.sparse_column_with_hash_bucket('language', 10))
+
+    params = {
+        'feature_columns': [one_hot_language],
+        'head': head_lib._multi_class_head(2),
+        'hidden_units': [1],
+        # Set lr mult to 0. to keep embeddings constant.
+        'embedding_lr_multipliers': {
+            one_hot_language: 0.0
+        },
+    }
+    features = {
+        'language':
+            tf.SparseTensor(
+                values=['en', 'fr', 'zh'],
+                indices=[[0, 0], [1, 0], [2, 0]],
+                shape=[3, 1]),
+    }
+    labels = tf.constant([[0], [0], [0]], dtype=tf.int32)
+    with self.assertRaisesRegexp(
+        ValueError, 'can be defined for embedding columns'):
+      dnn._dnn_model_fn(features, labels,
+                        tf.contrib.learn.ModeKeys.TRAIN, params)
+
+  def testMultipliesGradient(self):
+    embedding_language = tf.contrib.layers.embedding_column(
+        tf.contrib.layers.sparse_column_with_hash_bucket('language', 10),
+        dimension=1, initializer=tf.constant_initializer(0.1))
+    embedding_wire = tf.contrib.layers.embedding_column(
+        tf.contrib.layers.sparse_column_with_hash_bucket('wire', 10),
+        dimension=1, initializer=tf.constant_initializer(0.1))
+
+    params = {
+        'feature_columns': [embedding_language, embedding_wire],
+        'head': head_lib._multi_class_head(2),
+        'hidden_units': [1],
+        # Set lr mult to 0. to keep embeddings constant.
+        'embedding_lr_multipliers': {
+            embedding_language: 0.0
+        },
+    }
+    features = {
+        'language':
+            tf.SparseTensor(
+                values=['en', 'fr', 'zh'],
+                indices=[[0, 0], [1, 0], [2, 0]],
+                shape=[3, 1]),
+        'wire':
+            tf.SparseTensor(
+                values=['omar', 'stringer', 'marlo'],
+                indices=[[0, 0], [1, 0], [2, 0]],
+                shape=[3, 1]),
+    }
+    labels = tf.constant([[0], [0], [0]], dtype=tf.int32)
+    model_ops = dnn._dnn_model_fn(features, labels,
+                                  tf.contrib.learn.ModeKeys.TRAIN, params)
+    with tf.train.MonitoredSession() as sess:
+      language_var = dnn._get_embedding_variable(
+          embedding_language, 'dnn', 'dnn/input_from_feature_columns')
+      wire_var = dnn._get_embedding_variable(
+          embedding_wire, 'dnn', 'dnn/input_from_feature_columns')
+      for _ in range(2):
+        _, language_value, wire_value = sess.run(
+            [model_ops.train_op, language_var, wire_var])
+      initial_value = np.full_like(language_value, 0.1)
+      self.assertTrue(np.all(np.isclose(language_value, initial_value)))
+      self.assertFalse(np.all(np.isclose(wire_value, initial_value)))
+
+
 class DNNClassifierTest(tf.test.TestCase):
 
   def _assertInRange(self, expected_min, expected_max, actual):
@@ -118,10 +194,10 @@ class DNNClassifierTest(tf.test.TestCase):
     classifier = tf.contrib.learn.DNNClassifier(
         n_classes=2,
         feature_columns=feature_columns,
-        hidden_units=[3, 3],
+        hidden_units=[10, 10],
         config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
-    classifier.fit(input_fn=_input_fn, steps=5)
+    classifier.fit(input_fn=_input_fn, steps=50)
 
     scores = classifier.evaluate(input_fn=_input_fn, steps=1)
     self._assertInRange(0.0, 1.0, scores['accuracy'])
@@ -222,7 +298,7 @@ class DNNClassifierTest(tf.test.TestCase):
         n_classes=3,
         feature_columns=feature_columns,
         hidden_units=[3, 3],
-        config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
     classifier.fit(x=train_x, y=train_y, steps=200)
     scores = classifier.evaluate(x=train_x, y=train_y, steps=1)
@@ -310,7 +386,7 @@ class DNNClassifierTest(tf.test.TestCase):
         weight_column_name='w',
         feature_columns=[tf.contrib.layers.real_valued_column('x')],
         hidden_units=[3, 3],
-        config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
     classifier.fit(input_fn=_input_fn_train, steps=5)
     scores = classifier.evaluate(input_fn=_input_fn_eval, steps=1)
@@ -339,8 +415,8 @@ class DNNClassifierTest(tf.test.TestCase):
     classifier = tf.contrib.learn.DNNClassifier(
         n_classes=3,
         feature_columns=feature_columns,
-        hidden_units=[3, 3],
-        config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+        hidden_units=[10, 10],
+        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
     classifier.fit(input_fn=_input_fn, steps=100)
 
@@ -524,7 +600,7 @@ class DNNClassifierTest(tf.test.TestCase):
     }
     with tf.test.mock.patch.dict('os.environ',
                                  {'TF_CONFIG': json.dumps(tf_config)}):
-      config = tf.contrib.learn.RunConfig(tf_random_seed=5)
+      config = tf.contrib.learn.RunConfig(tf_random_seed=1)
       # Because we did not start a distributed cluster, we need to pass an
       # empty ClusterSpec, otherwise the device_setter will look for
       # distributed jobs, such as ""/job:ps"" which are not present.
@@ -707,7 +783,7 @@ class DNNRegressorTest(tf.test.TestCase):
     regressor = tf.contrib.learn.DNNRegressor(
         feature_columns=[tf.contrib.layers.real_valued_column('x')],
         hidden_units=[3, 3],
-        config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
     regressor.fit(input_fn=_input_fn_train, steps=5)
     scores = regressor.evaluate(input_fn=_input_fn_train, steps=1)
@@ -772,7 +848,7 @@ class DNNRegressorTest(tf.test.TestCase):
         weight_column_name='w',
         feature_columns=[tf.contrib.layers.real_valued_column('x')],
         hidden_units=[3, 3],
-        config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
     regressor.fit(input_fn=_input_fn_train, steps=5)
     scores = regressor.evaluate(input_fn=_input_fn_eval, steps=1)
@@ -803,7 +879,7 @@ class DNNRegressorTest(tf.test.TestCase):
     regressor = tf.contrib.learn.DNNRegressor(
         feature_columns=feature_columns,
         hidden_units=[3, 3],
-        config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
     regressor.fit(input_fn=_input_fn, steps=200)
 
@@ -837,7 +913,7 @@ class DNNRegressorTest(tf.test.TestCase):
     regressor = tf.contrib.learn.DNNRegressor(
         feature_columns=feature_columns,
         hidden_units=[3, 3],
-        config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
     regressor.fit(input_fn=_input_fn, steps=200)
 
@@ -918,7 +994,7 @@ class DNNRegressorTest(tf.test.TestCase):
         model_dir=model_dir,
         feature_columns=feature_columns,
         hidden_units=[3, 3],
-        config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
     regressor.fit(input_fn=_input_fn, steps=5)
     predict_input_fn = functools.partial(_input_fn, num_epochs=1)
@@ -929,7 +1005,7 @@ class DNNRegressorTest(tf.test.TestCase):
         model_dir=model_dir,
         feature_columns=feature_columns,
         hidden_units=[3, 3],
-        config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
     predictions2 = list(regressor2.predict(input_fn=predict_input_fn))
     self.assertAllClose(predictions, predictions2)
 
@@ -1004,7 +1080,7 @@ class DNNRegressorTest(tf.test.TestCase):
         feature_columns=feature_columns,
         hidden_units=[3, 3],
         enable_centered_bias=True,
-        config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
     regressor.fit(input_fn=_input_fn, steps=5)
     self.assertIn('centered_bias_weight', regressor.get_variable_names())
@@ -1037,7 +1113,7 @@ class DNNRegressorTest(tf.test.TestCase):
         feature_columns=feature_columns,
         hidden_units=[3, 3],
         enable_centered_bias=False,
-        config=tf.contrib.learn.RunConfig(tf_random_seed=3))
+        config=tf.contrib.learn.RunConfig(tf_random_seed=1))
 
     regressor.fit(input_fn=_input_fn, steps=5)
     self.assertNotIn('centered_bias_weight', regressor.get_variable_names())
",0,train
f54e6f114d5180ce36a9e57ab0b4d8485ee81544,tensorflow/tensorflow,"Fix bug in function conversion that caused errors in the case of nested function calls.

PiperOrigin-RevId: 275230384
Change-Id: I5ab3d2e869d6b645502fbf288b245c8df45879db",call_trees.py,"@@ -62,7 +62,8 @@ class _ArgTemplateBuilder(object):
 
   def _consume_args(self):
     if self._arg_accumulator:
-      self._argspec.append(gast.Tuple(elts=self._arg_accumulator, ctx=None))
+      self._argspec.append(
+          gast.Tuple(elts=self._arg_accumulator, ctx=gast.Load()))
       self._arg_accumulator = []
 
   def add_arg(self, a):
@@ -84,7 +85,7 @@ class _ArgTemplateBuilder(object):
       for i in range(1, len(self._argspec)):
         result = gast.BinOp(result, gast.Add(), self._argspec[i])
       return result
-    return gast.Tuple([], None)
+    return gast.Tuple([], gast.Load())
 
 
 class CallTreeTransformer(converter.Base):
",0,test
f54e6f114d5180ce36a9e57ab0b4d8485ee81544,tensorflow/tensorflow,"Fix bug in function conversion that caused errors in the case of nested function calls.

PiperOrigin-RevId: 275230384
Change-Id: I5ab3d2e869d6b645502fbf288b245c8df45879db",call_trees_test.py,"@@ -29,7 +29,7 @@ from tensorflow.python.platform import test
 
 class CallTreesTest(converter_testing.TestCase):
 
-  def test_normal_function(self):
+  def test_function_no_args(self):
 
     def test_fn(f):
       return f() + 20
@@ -80,6 +80,24 @@ class CallTreesTest(converter_testing.TestCase):
           ((20,), None),
       ])
 
+  def test_function_with_single_arg(self):
+
+    def test_fn(f, a):
+      return f(a) + 20
+
+    with self.converted(test_fn, (function_scopes, call_trees), {}) as result:
+      self.assertEqual(result.test_fn(lambda a: a, 1), 21)
+      self.assertListEqual(self.dynamic_calls, [((1,), None)])
+
+  def test_function_with_args_only(self):
+
+    def test_fn(f, a, b):
+      return f(a, b) + 300
+
+    with self.converted(test_fn, (function_scopes, call_trees), {}) as result:
+      self.assertEqual(result.test_fn(lambda a, b: a + b, 1, 20), 321)
+      self.assertListEqual(self.dynamic_calls, [((1, 20), None)])
+
   def test_function_with_kwarg(self):
 
     def test_fn(f, a, b):
@@ -159,6 +177,20 @@ class CallTreesTest(converter_testing.TestCase):
   #           'e': 5
   #       })])
 
+  def test_function_with_call_in_lambda_argument(self):
+
+    def f(l, a):
+      return l(a) + 4000
+
+    def g(a, *args):
+      return a + sum(args)
+
+    def test_fn(f, g, a, *args):
+      return f(lambda x: g(x, *args), a)
+
+    with self.converted(test_fn, (function_scopes, call_trees), {}) as result:
+      self.assertEqual(result.test_fn(f, g, 1, *(20, 300)), 4321)
+
   def test_debugger_set_trace(self):
 
     tracking_list = []
",0,test
50736c76221ca0d28356f475442b8543e7505250,tensorflow/tensorflow,"Allow 64bit ids for TPU embedding workloads.

PiperOrigin-RevId: 426991414
Change-Id: I2833279c932ccee5e71713993882c9233c980e60",tpu_embedding_v2.py,"@@ -912,7 +912,7 @@ class TPUEmbedding(tracking.AutoTrackable):
           ""Weight will always be 1 in this case."".format(path))
     # For tensors, there are no indices and no weights.
     indices.append(int_zeros)
-    values.append(math_ops.cast(array_ops.reshape(tensor, [-1]), dtypes.int32))
+    values.append(math_ops.cast(array_ops.reshape(tensor, [-1]), dtypes.int64))
     weights.append(float_zeros)
 
   def _add_data_for_sparse_tensor(self, tensor, weight, indices, values,
@@ -925,7 +925,7 @@ class TPUEmbedding(tracking.AutoTrackable):
         sample_indices = array_ops.pad(
             sample_indices, paddings=[[0, 0], [0, 1]])
     indices.append(sample_indices)
-    values.append(math_ops.cast(tensor.values, dtypes.int32))
+    values.append(math_ops.cast(tensor.values, dtypes.int64))
     # If we have weights they must be a SparseTensor.
     if weight is not None:
       if not isinstance(weight, sparse_tensor.SparseTensor):
@@ -940,7 +940,7 @@ class TPUEmbedding(tracking.AutoTrackable):
                                   weights, int_zeros, float_zeros, path,
                                   feature):
     row_splits.append(math_ops.cast(tensor.row_splits, dtypes.int32))
-    values.append(math_ops.cast(tensor.values, dtypes.int32))
+    values.append(math_ops.cast(tensor.values, dtypes.int64))
     # If we have weights they must be a RaggedTensor.
     if weight is not None:
       if not isinstance(weight, ragged_tensor.RaggedTensor):
",0,train
bbb3ae0790f042d2bc5f6cce434c75c698d4a978,tensorflow/tensorflow,"Automated rollback of commit 394db95965e1d745f08b4eeb550878ddc175af15

PiperOrigin-RevId: 209082119",quantize.py,"@@ -198,7 +198,7 @@ def _FindLayersToQuantize(graph):
             |
     [post_conv_correction]
             |
-     biasadd|folded_bias
+     [biasadd|folded_bias]
             |
          [bypass]
             |
@@ -320,6 +320,7 @@ def _FindLayersToQuantize(graph):
               folded_bias_add_pattern,
               batch_norm_identity,
               bypass_pattern,
+              layer_pattern,
           ])
       ])
 
",0,train
bbb3ae0790f042d2bc5f6cce434c75c698d4a978,tensorflow/tensorflow,"Automated rollback of commit 394db95965e1d745f08b4eeb550878ddc175af15

PiperOrigin-RevId: 209082119",quantize_test.py,"@@ -194,6 +194,33 @@ class QuantizeTest(test_util.TensorFlowTestCase):
 
         self.assertNotIn('test/relu6', [c.name for c in consumers])
 
+  def testLayerActivationQuantized(self):
+    self._RunTestOverParameters(self._TestLayerActivationQuantized)
+
+  def _TestLayerActivationQuantized(self, is_training):
+    graph = ops.Graph()
+    with graph.as_default():
+      batch_size, height, width, depth = 5, 128, 128, 3
+      input1 = array_ops.zeros((batch_size, height, width, depth))
+      _ = conv2d(
+          input1,
+          32, [5, 5],
+          stride=2,
+          padding='SAME',
+          weights_initializer=self._WeightInit(0.09),
+          activation_fn=nn_ops.relu6,
+          biases_initializer=None,
+          scope='test')
+      # Ensure that both weights and output of activations are quantized
+      # when we have a conv->relu6 with no bias add
+      quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8)
+      activation_op = graph.get_operation_by_name('test/Relu6')
+      conv_op = graph.get_operation_by_name('test/Conv2D')
+      self.assertTrue('test/weights_quant/FakeQuantWithMinMaxVars:0' in
+                      [tensor_in.name for tensor_in in conv_op.inputs])
+      self.assertTrue('FakeQuantWithMinMaxVars' in
+                      [op.type for op in activation_op.outputs[0].consumers()])
+
   def testFinalLayerQuantized(self):
     self._RunTestOverParameters(self._TestFinalLayerQuantized)
 
",0,train
9c6164dbcbaa4441c177653ac6ca51133c6363be,tensorflow/tensorflow,Optimize dependencies in expected graph in tests to emulate dependency optimizer + avoid constant duplicates in tests as they get optimized,base_test.py,"@@ -129,14 +129,14 @@ class SimpleMultiEnginesTest2(trt_test.TfTrtIntegrationTestBase):
     """"""Create a graph containing two segments.""""""
     n = inp
     for i in range(2):
-      c = constant_op.constant(1.0, name=""c%d"" % i)
+      c = constant_op.constant(float(i), name=""c%d"" % i)
       n = math_ops.add(n, c, name=""add%d"" % i)
       n = math_ops.mul(n, n, name=""mul%d"" % i)
     n = self.trt_incompatible_op(n, name=""incompatible"")
-    c = constant_op.constant(1.0, name=""c2"")
+    c = constant_op.constant(2.0, name=""c2"")
     n = math_ops.add(n, c, name=""add2"")
     n = math_ops.mul(n, n, name=""mul2"")
-    c = constant_op.constant(1.0, name=""c3"")
+    c = constant_op.constant(3.0, name=""c3"")
     n = math_ops.add(n, c, name=""add3"")
     n = math_ops.mul(n, n, name=""mul3"")
     return array_ops.squeeze(n, name=""output_0"")
@@ -169,21 +169,18 @@ class ConstInputTest(trt_test.TfTrtIntegrationTestBase):
     """"""Create a graph containing multiple segment.""""""
     n = inp
     c = constant_op.constant(1.0, name=""c"")
-    # Adds control dependency from the constant op to a trt incompatible op,
-    # and adds control dependency from the trt incompatible op to all other
+    # Adds data dependency from the constant op to a trt incompatible op,
+    # and adds data dependency from the trt incompatible op to the other
     # ops, to make sure the constant op cannot be contracted with any trt
     # segment that depends on it.
-    with ops.control_dependencies([c]):
-      d = self.trt_incompatible_op(n, name=""incompatible"")
-    with ops.control_dependencies([d]):
-      n = math_ops.add(n, c, name=""add"")
-      n = math_ops.mul(n, n, name=""mul"")
-      n = math_ops.add(n, n, name=""add1"")
+    n = self.trt_incompatible_binary_op(n, c, name=""incompatible"")
+    n = math_ops.add(n, c, name=""add"")
+    n = math_ops.mul(n, n, name=""mul"")
+    n = math_ops.add(n, n, name=""add1"")
     n = self.trt_incompatible_op(n, name=""incompatible1"")
-    with ops.control_dependencies([d]):
-      n = math_ops.add(n, c, name=""add2"")
-      n = math_ops.mul(n, n, name=""mul1"")
-      n = math_ops.add(n, n, name=""add3"")
+    n = math_ops.add(n, c, name=""add2"")
+    n = math_ops.mul(n, n, name=""mul1"")
+    n = math_ops.add(n, n, name=""add3"")
     return array_ops.squeeze(n, name=""output_0"")
 
   def GetParams(self):
@@ -255,25 +252,21 @@ class ConstDataInputMultipleEnginesTest(trt_test.TfTrtIntegrationTestBase):
 class ControlDependencyTest(trt_test.TfTrtIntegrationTestBase):
 
   def GraphFn(self, inp):
-    """"""Create a graph containing multiple segment.""""""
+    """"""Create a graph containing multiple segments.""""""
     c1 = constant_op.constant(1.0, name=""c1"")
-    c2 = constant_op.constant(1.0, name=""c2"")
-    d1 = constant_op.constant(1.0, name=""d1"")
-    d2 = self.trt_incompatible_op(inp, name=""d2"")
-    with ops.control_dependencies([d1, d2]):
+    c2 = constant_op.constant(2.0, name=""c2"")
+    d1 = self.trt_incompatible_op(inp, name=""d1"")
+    with ops.control_dependencies([d1]):
       add = math_ops.add(inp, c1, name=""add"")
-    with ops.control_dependencies([d1, d2]):
-      mul = math_ops.mul(add, add, name=""mul"")
-    with ops.control_dependencies([d1, d2]):
-      add1 = math_ops.add(mul, mul, name=""add1"")
+    mul = math_ops.mul(add, add, name=""mul"")
+    add1 = math_ops.add(mul, mul, name=""add1"")
     edge = self.trt_incompatible_op(add1, name=""incompatible"")
-    with ops.control_dependencies([d1, d2, add, mul]):
+    with ops.control_dependencies([d1, add1]):
       add2 = math_ops.add(edge, c2, name=""add2"")
-    with ops.control_dependencies([d1, d2, add1, mul]):
-      mul1 = math_ops.mul(add2, add2, name=""mul1"")
-    with ops.control_dependencies([d1, d2, add, add1]):
-      add3 = math_ops.add(mul1, mul1, name=""add3"")
-    return array_ops.squeeze(add3, name=""output_0"")
+    mul1 = math_ops.mul(add2, add2, name=""mul1"")
+    add3 = math_ops.add(mul1, mul1, name=""add3"")
+    inc = self.trt_incompatible_binary_op(d1, add3, name=""incompatible1"")
+    return array_ops.squeeze(inc, name=""output_0"")
 
   def GetParams(self):
     shapes = [[2, 32, 32, 3]]
",0,test
9c6164dbcbaa4441c177653ac6ca51133c6363be,tensorflow/tensorflow,Optimize dependencies in expected graph in tests to emulate dependency optimizer + avoid constant duplicates in tests as they get optimized,tf_trt_integration_test_base.py,"@@ -15,6 +15,7 @@
 """"""Utilities to test TF-TensorRT integration.""""""
 
 import collections
+import copy
 import errno
 import gc
 import itertools
@@ -117,6 +118,10 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
   def trt_incompatible_op(self):
     return math_ops.erfc
 
+  @property
+  def trt_incompatible_binary_op(self):
+    return math_ops.igamma
+
   @property
   def precision_modes(self):
     return [""FP32"", ""FP16"", ""INT8""]
@@ -625,6 +630,59 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
         if k not in removed_const_nodes
     }
 
+    def dependency_optimization(input_map):
+      """"""Transitive reduction of the control dependencies.""""""
+      # 1. Topological sort.
+      working_edges = copy.deepcopy(input_map)
+      # Populate a set with all the nodes wiithout incoming edges.
+      working_set = {
+        name for name in working_edges if len(working_edges[name]) == 0}
+      sorted_nodes = []
+      while working_set:
+        # Take a node from the set and add it to the sorted list.
+        node0 = working_set.pop()
+        sorted_nodes.append(node0)
+        # Remove outgoing edges and add nodes to the set if they have no
+        # incoming edge remaining.
+        for node1 in list(working_edges.keys()):
+          for edge_name in (node0, ""^"" + node0):
+            if edge_name in working_edges[node1]:
+              working_edges[node1].remove(edge_name)
+              if not working_edges[node1]:
+                working_set.add(node1)
+      if sum(len(edges) for edges in working_edges.values()):
+        raise ValueError(""Input map doesn't represent a DAG!"")
+
+      # 2. Transitive reduction.
+      for i in range(len(sorted_nodes) - 1):
+        dep_name = ""^"" + sorted_nodes[i]
+        # Identify nodes which have a control edge from the current one.
+        targets = [
+          j for j in range(i + 1, len(sorted_nodes))
+          if dep_name in input_map[sorted_nodes[j]]]
+        if not targets:
+          continue
+        # Compute max path lengths until the last target node.
+        path_lengths = {sorted_nodes[i]: 0}
+        for j in range(i + 1, targets[-1] + 1):
+          j_name = sorted_nodes[j]
+          length = None
+          for edge_name in input_map[j_name]:
+            _, name = _InputName(edge_name)
+            if name in path_lengths and \
+              (length is None or path_lengths[name] >= length):
+              length = path_lengths[name] + 1
+          if length is not None:
+            path_lengths[j_name] = length
+        # Remove the control dependency of targets if there is a path of
+        # length strictly greater than 1 from the current node.
+        for j in targets:
+          j_name = sorted_nodes[j]
+          if path_lengths[j_name] > 1:
+            input_map[j_name].remove(dep_name)
+
+    dependency_optimization(expected_input_map)
+
     # Compute the actual mapping from each node to its input nodes. If a cast
     # op doesn't exist in the original graph, we replace the use of the cast op
     # with the input of the op. This allows the verification to handle the case
",0,test
f29ef287148d2912e941c70891866d57de2fae04,tensorflow/tensorflow,"Fix crash in tf.gather gradient when indices rank was unknown.

A crash will still occur if the rank is unknown and batch_dims < 0, since this case is hard to fix, but a sensible error message was added.

This fixes a crash in sparse_softmax_cross_entropy_with_logits when determinism is enabled, as that function uses gather with batch_dims >= 0 when determinism is enabled.

PiperOrigin-RevId: 398762499
Change-Id: I149567e53a5fcfbc69ab4bba35a73ca0ae81e967",gather_op_test.py,"@@ -31,7 +31,9 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
@@ -478,6 +480,32 @@ class GatherTest(test.TestCase, parameterized.TestCase):
     result = array_ops.gather(params, indices, axis=axis, batch_dims=batch_dims)
     self.assertAllEqual(expected, result)
 
+    # Test gradients
+    f64_params = math_ops.cast(params, dtypes.float64)
+    def gather(params):
+      return array_ops.gather(params, indices, axis=axis, batch_dims=batch_dims)
+    theoretical, numerical = gradient_checker_v2.compute_gradient(
+        gather, [f64_params])
+    self.assertAllClose(theoretical, numerical)
+
+    # Test gradients when input shapes are unknown
+    @def_function.function(input_signature=[
+        tensor_spec.TensorSpec(shape=None, dtype=dtypes.float64),
+        tensor_spec.TensorSpec(shape=None, dtype=dtypes.int32)
+    ])
+    def gather_unknown_shapes(params, indices):
+      return array_ops.gather(params, indices, axis=axis, batch_dims=batch_dims)
+    if batch_dims is None or batch_dims >= 0:
+      theoretical, numerical = gradient_checker_v2.compute_gradient(
+          lambda p: gather_unknown_shapes(p, indices), [f64_params])
+      self.assertAllClose(theoretical, numerical)
+    else:
+      with self.assertRaisesRegex(
+          ValueError,
+          ""Currently, it is unsupported to take the gradient of tf.gather""):
+        gradient_checker_v2.compute_gradient(
+            lambda p: gather_unknown_shapes(p, indices), [f64_params])
+
     # Test the gradients shape.
     with backprop.GradientTape() as tape:
       zeros = array_ops.zeros_like(params, dtype=dtypes.float32)
",0,test
f29ef287148d2912e941c70891866d57de2fae04,tensorflow/tensorflow,"Fix crash in tf.gather gradient when indices rank was unknown.

A crash will still occur if the rank is unknown and batch_dims < 0, since this case is hard to fix, but a sensible error message was added.

This fixes a crash in sparse_softmax_cross_entropy_with_logits when determinism is enabled, as that function uses gather with batch_dims >= 0 when determinism is enabled.

PiperOrigin-RevId: 398762499
Change-Id: I149567e53a5fcfbc69ab4bba35a73ca0ae81e967",array_grad.py,"@@ -586,7 +586,6 @@ def _GatherGrad(op, grad):
 def _GetBatchIndices(params_shape, indices, batch_dims):
   """"""Addds the batch offsets to the given indices and returns the results.""""""
   batch_indices = indices
-  indices_ndims = indices.shape.ndims
   indices_dtype = indices.dtype.base_dtype
   casted_params_shape = math_ops.cast(params_shape, indices_dtype)
   accum_dim_value = array_ops.ones((), dtype=indices_dtype)
@@ -597,8 +596,10 @@ def _GetBatchIndices(params_shape, indices, batch_dims):
     step = array_ops.ones((), dtype=indices_dtype)
     dim_indices = math_ops.range(start, dim_value, step)
     dim_indices *= accum_dim_value
-    dim_shape = array_ops.stack(
-        [1] * (dim - 1) + [dim_value] + [1] * (indices_ndims - dim), axis=0)
+    dim_shape = array_ops.concat([
+        array_ops.tile([1], [dim - 1]), [dim_value],
+        array_ops.tile([1], [array_ops.rank(indices) - dim])
+    ], axis=0)
     batch_indices += array_ops.reshape(dim_indices, dim_shape)
 
   return batch_indices
@@ -655,6 +656,13 @@ def _GatherV2Grad(op, grad):
   batch_dims = int(op.get_attr(""batch_dims""))
 
   if batch_dims < 0:
+    if indices.shape.ndims is None:
+      raise ValueError(
+          f""Currently, it is unsupported to take the gradient of tf.gather ""
+          f""when batch_dims < 0 and the rank of the indices is unknown. Please ""
+          f""pass a positive batch_dims or use tf.ensure_shape to update the ""
+          f""shape of indices when calling tf.gather. Got ""
+          f""batch_dims={batch_dims} and indices={indices}"")
     batch_dims += indices.shape.ndims
 
   # For axis 0 gathers, build an appropriately shaped IndexedSlices.
",0,test
85011622c0df59cbd03bd7d4e035d6c3521832dd,tensorflow/tensorflow,"Added more fine-grained shape inference for TensorArray such that partly unknown shapes are supported.
Change: 143288671",tensor_array_ops_test.py,"@@ -960,6 +960,46 @@ class TensorArrayTest(test.TestCase):
       with self.assertRaises(ValueError):
         w0.write(0, c2)
 
+  def testPartlyUnknownShape(self):
+    with self.test_session():
+      ta = tensor_array_ops.TensorArray(
+          dtype=dtypes.float32, tensor_array_name=""foo"", size=6)
+
+      c0 = array_ops.placeholder(dtypes.float32, [None, None, None, 3])
+      w0 = ta.write(0, c0)
+      r0 = w0.read(0)
+      self.assertAllEqual([None, None, None, 3], r0.get_shape().as_list())
+
+      c1 = array_ops.placeholder(dtypes.float32, [None, None, None, 3])
+      w1 = w0.write(1, c1)
+      r1 = w1.read(0)
+      self.assertAllEqual([None, None, None, 3], r1.get_shape().as_list())
+
+      # Writing less specific shape (doesn't change type.)
+      c2 = array_ops.placeholder(dtypes.float32, [None, None, None, None])
+      w2 = w1.write(2, c2)
+      r2 = w2.read(0)
+      self.assertAllEqual([None, None, None, 3], r2.get_shape().as_list())
+
+      # Writing more specific shape in one dimension and less specific in
+      # another.
+      c3 = array_ops.placeholder(dtypes.float32, [None, None, 2, None])
+      w3 = w2.write(3, c3)
+      r3 = w3.read(0)
+      self.assertAllEqual([None, None, 2, 3], r3.get_shape().as_list())
+
+      # Writing partly defined shape using TensorArray.scatter.
+      c4 = array_ops.placeholder(dtypes.float32, [2, None, 4, 2, 3])
+      w4 = w3.scatter([4, 5], c4)
+      r4 = w4.read(0)
+      self.assertAllEqual([None, 4, 2, 3], r4.get_shape().as_list())
+
+      # Writing fully defined shape using TensorArray.split.
+      c5 = array_ops.placeholder(dtypes.float32, [10, 4, 2, 3])
+      w5 = w4.split(c5, constant_op.constant([5, 5]))
+      r5 = w5.read(0)
+      self.assertAllEqual([5, 4, 2, 3], r5.get_shape().as_list())
+
   def _testUnpackShape(self):
     with self.test_session():
       ta = tensor_array_ops.TensorArray(
",0,train
85011622c0df59cbd03bd7d4e035d6c3521832dd,tensorflow/tensorflow,"Added more fine-grained shape inference for TensorArray such that partly unknown shapes are supported.
Change: 143288671",tensor_array_ops.py,"@@ -196,6 +196,26 @@ class TensorArray(object):
     """"""The reference to the TensorArray.""""""
     return self._handle
 
+  def _merge_element_shape(self, shape):
+    """"""Changes the element shape of the array given a shape to merge with.
+
+    Args:
+      shape: A `TensorShape` object to merge with.
+
+    Raises:
+      ValueError: if the provided shape is incompatible with the current
+          element shape of the `TensorArray`.
+    """"""
+
+    if self._element_shape:
+      if not shape.is_compatible_with(self._element_shape[0]):
+        raise ValueError(
+            ""Inconsistent shapes: saw %s but expected %s ""
+            ""(and infer_shape=True)"" % (shape, self._element_shape[0]))
+      self._element_shape[0] = self._element_shape[0].merge_with(shape)
+    else:
+      self._element_shape.append(shape)
+
   def grad(self, source, flow=None, name=None):
     # tensor_array_grad requires a flow input when forward
     # TensorArrays are dynamically sized.  This forces the creation
@@ -267,14 +287,7 @@ class TensorArray(object):
       ta._infer_shape = self._infer_shape
       ta._element_shape = self._element_shape
       if ta._infer_shape:
-        val_shape = value.get_shape()
-        if ta._element_shape:
-          if not val_shape == ta._element_shape[0]:
-            raise ValueError(""Inconsistent shapes: saw %s but expected %s ""
-                             ""(and infer_shape=True)"" %
-                             (val_shape, ta._element_shape[0]))
-        else:
-          ta._element_shape.append(val_shape)
+        ta._merge_element_shape(value.get_shape())
       return ta
 
   def stack(self, name=None):
@@ -423,13 +436,7 @@ class TensorArray(object):
         element_shape = tensor_shape.unknown_shape()
         if val_shape.dims is not None:
           element_shape = tensor_shape.TensorShape(val_shape.dims[1:])
-        if ta._element_shape:
-          if not element_shape == ta._element_shape[0]:
-            raise ValueError(""Inconsistent shapes: saw %s but expected %s ""
-                             ""(and infer_shape=True)"" %
-                             (element_shape, ta._element_shape[0]))
-        else:
-          ta._element_shape.append(element_shape)
+        ta._merge_element_shape(element_shape)
       return ta
 
   def split(self, value, lengths, name=None):
@@ -471,13 +478,7 @@ class TensorArray(object):
           if clengths is not None and clengths.max() == clengths.min():
             element_shape = tensor_shape.TensorShape([clengths[0]] +
                                                      val_shape.dims[1:])
-        if ta._element_shape:
-          if not element_shape == ta._element_shape[0]:
-            raise ValueError(""Inconsistent shapes: saw %s but expected %s ""
-                             ""(and infer_shape=True)"" %
-                             (element_shape, ta._element_shape[0]))
-        else:
-          ta._element_shape.append(element_shape)
+        ta._merge_element_shape(element_shape)
       return ta
 
   def size(self, name=None):
",0,train
ac40a68cd5fac2227fb6c4086b2eb01a7dc726c4,tensorflow/tensorflow,"Fix iterable bug

Co-authored-by: melissagrueter <melissagrueter@fb.com>
Co-authored-by: irenedea <irenedea@fb.com>",op_specs.cc,"@@ -91,11 +91,6 @@ class TypeResolver {
 
 Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) {
   *iterable_out = false;
-  if (!arg_def.number_attr().empty()) {
-    // when number_attr is set, argument has to be a list of tensors
-    *iterable_out = true;
-    visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int()));
-  }
   Type type = Type::Wildcard();
   if (arg_def.type() != DataType::DT_INVALID) {
     type = Type::ForDataType(arg_def.type());
@@ -122,6 +117,11 @@ Type TypeResolver::TypeOf(const OpDef_ArgDef& arg_def, bool* iterable_out) {
     LOG(FATAL) << ""Cannot resolve data type of argument \"""" << arg_def.name()
                << ""\"" in operation \"""" << op_def_.name() << ""\"""";
   }
+  if (!arg_def.number_attr().empty()) {
+    // when number_attr is set, argument has to be a list of tensors
+    *iterable_out = true;
+    visited_attrs_.insert(std::make_pair(arg_def.number_attr(), Type::Int()));
+  }
   return type;
 }
 
",0,train
f13b76a52886e39d8e97c9256c383eb3b79748d8,tensorflow/tensorflow,"Replace the fake updates with no updates when not training. This is possible now that the tf.cond bug has been fixed, and is needed to remove rare data races.

PiperOrigin-RevId: 173601427",normalization.py,"@@ -436,27 +436,30 @@ class BatchNormalization(base.Layer):
     if dmax is not None:
       d = math_ops.maximum(d, -dmax)
       d = math_ops.minimum(d, dmax)
-    # When not training, use r=1, d=0, and decay=1 meaning no updates.
+    # When not training, use r=1, d=0.
     r = utils.smart_cond(training, lambda: r, lambda: array_ops.ones_like(r))
     d = utils.smart_cond(training, lambda: d, lambda: array_ops.zeros_like(d))
-    decay = utils.smart_cond(training, lambda: self.renorm_momentum, lambda: 1.)
 
     def _update_renorm_variable(var, weight, value):
       """"""Updates a moving average and weight, returns the unbiased value.""""""
-      # Update the variables without zero debiasing. The debiasing will be
-      # accomplished by dividing the exponential moving average by the weight.
-      # For example, after a single update, the moving average would be
-      # (1-decay) * value. and the weight will be 1-decay, with their ratio
-      # giving value.
-      # Make sure the weight is not updated until before r and d computation.
       value = array_ops.identity(value)
-      with ops.control_dependencies([value]):
-        weight_value = array_ops.constant(1., dtype=weight.dtype)
-      new_var = moving_averages.assign_moving_average(
-          var, value, decay, zero_debias=False)
-      new_weight = moving_averages.assign_moving_average(
-          weight, weight_value, decay, zero_debias=False)
-      return new_var / new_weight
+      def _do_update():
+        # Update the variables without zero debiasing. The debiasing will be
+        # accomplished by dividing the exponential moving average by the weight.
+        # For example, after a single update, the moving average would be
+        # (1-decay) * value. and the weight will be 1-decay, with their ratio
+        # giving the value.
+        # Make sure the weight is not updated until before r and d computation.
+        with ops.control_dependencies([value]):
+          weight_value = array_ops.constant(1., dtype=weight.dtype)
+        new_var = moving_averages.assign_moving_average(
+            var, value, self.renorm_momentum, zero_debias=False)
+        new_weight = moving_averages.assign_moving_average(
+            weight, weight_value, self.renorm_momentum, zero_debias=False)
+        return new_var / new_weight
+      def _fake_update():
+        return array_ops.identity(var)
+      return utils.smart_cond(training, _do_update, _fake_update)
 
     with ops.colocate_with(self.moving_mean):
       new_mean = _update_renorm_variable(self.renorm_mean,
@@ -562,8 +565,6 @@ class BatchNormalization(base.Layer):
       else:
         new_mean, new_variance = mean, variance
 
-      # Update moving averages when training, and prevent updates otherwise.
-      decay = utils.smart_cond(training, lambda: self.momentum, lambda: 1.)
       if self.virtual_batch_size is not None:
         # This isn't strictly correct since in ghost batch norm, you are
         # supposed to sequentially update the moving_mean and moving_variance
@@ -575,10 +576,18 @@ class BatchNormalization(base.Layer):
         new_variance = math_ops.reduce_mean(new_variance,
                                             axis=1, keep_dims=True)
 
-      mean_update = moving_averages.assign_moving_average(
-          self.moving_mean, new_mean, decay, zero_debias=False)
-      variance_update = moving_averages.assign_moving_average(
-          self.moving_variance, new_variance, decay, zero_debias=False)
+      def _do_update(var, value):
+        return moving_averages.assign_moving_average(
+            var, value, self.momentum, zero_debias=False)
+
+      mean_update = utils.smart_cond(
+          training,
+          lambda: _do_update(self.moving_mean, new_mean),
+          lambda: self.moving_mean)
+      variance_update = utils.smart_cond(
+          training,
+          lambda: _do_update(self.moving_variance, new_variance),
+          lambda: self.moving_variance)
       if context.in_graph_mode():
         self.add_update(mean_update, inputs=inputs)
         self.add_update(variance_update, inputs=inputs)
",0,train
02f5601acdca3b468e568de2833ec893cbba2ef8,tensorflow/tensorflow,"Create a TF dialect pass to strip _noinline attributes and ensure that these are stripped when running the TF->TFL converter before inlining.

PiperOrigin-RevId: 405689268
Change-Id: I3e4ec98c921b4f3b635adcafe088bc6649233861",tf_tfl_passes.cc,"@@ -81,6 +81,7 @@ void AddConvertHloToTfPass(std::string entry_function_name,
   // DCE for private symbols.
   pass_manager->addPass(mlir::createSymbolDCEPass());
 
+  pass_manager->addPass(mlir::TF::CreateStripNoinlineAttributePass());
   // Add inline pass.
   pass_manager->addPass(mlir::createInlinerPass());
 
",0,train
02f5601acdca3b468e568de2833ec893cbba2ef8,tensorflow/tensorflow,"Create a TF dialect pass to strip _noinline attributes and ensure that these are stripped when running the TF->TFL converter before inlining.

PiperOrigin-RevId: 405689268
Change-Id: I3e4ec98c921b4f3b635adcafe088bc6649233861",passes.h,"@@ -183,6 +183,10 @@ LogicalResult ResourceLiftingForFunctionalControlFlow(FuncOp function);
 // known element shapes of push ops.
 std::unique_ptr<OperationPass<ModuleOp>> CreateStackOpsDecompositionPass();
 
+// Creates a pass to strip the ""tf._noinline"" attribute from the functions in
+// the module.
+std::unique_ptr<OperationPass<ModuleOp>> CreateStripNoinlineAttributePass();
+
 // Converts tensor list operations into operations on buffers and sizes. Needs
 // static shapes and known max element count.
 std::unique_ptr<OperationPass<ModuleOp>> CreateTensorListOpsDecompositionPass();
",0,train
02f5601acdca3b468e568de2833ec893cbba2ef8,tensorflow/tensorflow,"Create a TF dialect pass to strip _noinline attributes and ensure that these are stripped when running the TF->TFL converter before inlining.

PiperOrigin-RevId: 405689268
Change-Id: I3e4ec98c921b4f3b635adcafe088bc6649233861",strip_noinline_attribute.cc,"@@ -0,0 +1,43 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/compiler/mlir/tensorflow/transforms/passes.h""
+#include ""tensorflow/compiler/mlir/tensorflow/transforms/passes_detail.h""
+
+namespace mlir {
+namespace TF {
+
+namespace {
+
+// This tranformation pass strips any ""_noinline"" attributes from the module.
+struct StripNoinlineAttributePass
+    : public StripNoinlineAttributePassBase<StripNoinlineAttributePass> {
+ public:
+  // void runOnOperation() override;
+  void runOnOperation() override {
+    // Strip the ""tf._noinline"" attribute from top-level functions.
+    for (auto func_op : getOperation().getOps<FuncOp>())
+      func_op->removeAttr(""tf._noinline"");
+  }
+};
+
+}  // namespace
+
+std::unique_ptr<OperationPass<ModuleOp>> CreateStripNoinlineAttributePass() {
+  return std::make_unique<StripNoinlineAttributePass>();
+}
+
+}  // namespace TF
+}  // namespace mlir
",0,train
9f4e69bb1e8da97d369d1c2a9999845a41bb589b,tensorflow/tensorflow,"ConvolutionTransposedThin converted to new style.

PiperOrigin-RevId: 318083958
Change-Id: I8de4b4b250ceff00e1d16ef917cb2f8698d33e28",convolution_transposed_thin.cc,"@@ -28,21 +28,17 @@ namespace gpu {
 namespace cl {
 namespace {
 
-std::string GenerateConvolutionTransposedCode(
-    const OperationDef& op_def, int src_depth, int dst_channels,
-    const int2& kernel_size, const CLDevice& device,
-    const std::vector<ElementwiseOperation*>& linked_operations) {
-  TensorCodeGenerator src_tensor(
-      ""src_data"",
-      WHSBPoint{""src_size.x"", ""src_size.y"", ""src_size.z"", ""src_size.w""},
-      op_def.src_tensors[0]);
-  TensorCodeGenerator dst_tensor(
-      ""dst_data"",
-      WHSBPoint{""dst_size.x"", ""dst_size.y"", ""dst_size.z"", ""dst_size.w""},
-      op_def.dst_tensors[0]);
+std::string GenerateConvolutionTransposedCode(const OperationDef& op_def,
+                                              int src_depth, int dst_channels,
+                                              const int2& kernel_size,
+                                              Arguments* args) {
+  args->AddObjectRef(
+      ""src_tensor"", AccessType::READ,
+      absl::make_unique<TensorDescriptor>(op_def.src_tensors[0]));
+  args->AddObjectRef(
+      ""dst_tensor"", AccessType::WRITE,
+      absl::make_unique<TensorDescriptor>(op_def.dst_tensors[0]));
 
-  const std::string batch_id = op_def.IsBatchSupported() ? ""B"" : """";
-  std::string c = GetCommonDefines(op_def.precision);
   const std::string channel_x = dst_channels == 1 ? """" : "".x"";
   const std::vector<std::string> postfix = {channel_x, "".y"", "".z"", "".w""};
   const std::vector<std::string> channel = {"".x"", "".y"", "".z"", "".w""};
@@ -62,36 +58,33 @@ std::string GenerateConvolutionTransposedCode(
       break;
   }
 
+  std::string c = GetCommonDefines(op_def.precision);
   c += ""__kernel void main_function(\n"";
-  c += src_tensor.GetDeclaration(AccessType::READ) + "",\n"";
-  c += ""    __constant FLT4* filters"";
-  c += GetArgsDeclaration(linked_operations);
-  c += dst_tensor.GetDeclaration(AccessType::WRITE) + "",\n"";
-  c += ""    int4 src_size,             \n"";
-  c += ""    int4 dst_size,             \n"";
-  c += ""    FLT4 bias_value            \n"";
-  c += "") {\n"";
+  c += ""$0) {\n"";
   if (op_def.IsBatchSupported()) {
     c += ""  int linear_id = get_global_id(0);\n"";
-    c += ""  int X = linear_id / dst_size.w;\n"";
-    c += ""  int B = linear_id % dst_size.w;\n"";
+    c += ""  int X = linear_id / args.dst_tensor.Batch();\n"";
+    c += ""  int B = linear_id % args.dst_tensor.Batch();\n"";
+    c += ""  args.dst_tensor.SetBatchRef(B);\n"";
+    c += ""  args.src_tensor.SetBatchRef(B);\n"";
   } else {
     c += ""  int X = get_global_id(0);\n"";
   }
   c += ""  int Y = get_global_id(1);\n"";
-  c += ""  if (X >= src_size.x || Y >= src_size.y) return;\n"";
+  c += ""  if (X >= args.src_tensor.Width() || Y >= args.src_tensor.Height()) ""
+       ""return;\n"";
   c += ""  "" + accum_type + "" r["" + std::to_string(kernel_size.y) + ""]["" +
        std::to_string(kernel_size.x) + ""];\n"";
   c += ""  {\n"";
-  c += ""  FLT4 src = "" + src_tensor.ReadWHSB(""X"", ""Y"", ""0"", batch_id) + "";\n"";
+  c += ""  FLT4 src = args.src_tensor.Read(X, Y, 0);\n"";
   int index = 0;
   for (int y = 0; y < kernel_size.y; ++y) {
     for (int x = 0; x < kernel_size.x; ++x) {
       std::string r_s =
           ""  r["" + std::to_string(y) + ""]["" + std::to_string(x) + ""]"";
       for (int d = 0; d < dst_channels; ++d) {
-        c += r_s + postfix[d] + "" = dot(src, filters["" + std::to_string(index) +
-             ""]);\n"";
+        c += r_s + postfix[d] + "" = dot(src, args.weights.Read("" +
+             std::to_string(index) + ""));\n"";
         index++;
       }
     }
@@ -100,15 +93,15 @@ std::string GenerateConvolutionTransposedCode(
   for (int i = 1; i < src_depth; ++i) {
     c += ""  if (X > "" + std::to_string(-i) +
          "") {  // always true, to reduce registers usage\n"";
-    c += ""  FLT4 src = "" +
-         src_tensor.ReadWHSB(""X"", ""Y"", std::to_string(i), batch_id) + "";\n"";
+    c +=
+        ""  FLT4 src = args.src_tensor.Read(X, Y, "" + std::to_string(i) + "");\n"";
     for (int y = 0; y < kernel_size.y; ++y) {
       for (int x = 0; x < kernel_size.x; ++x) {
         std::string r_s =
             ""  r["" + std::to_string(y) + ""]["" + std::to_string(x) + ""]"";
         for (int d = 0; d < dst_channels; ++d) {
-          c += r_s + postfix[d] + "" += dot(src, filters["" +
-               std::to_string(index) + ""]);\n"";
+          c += r_s + postfix[d] + "" += dot(src, args.weights.Read("" +
+               std::to_string(index) + ""));\n"";
           index++;
         }
       }
@@ -121,21 +114,16 @@ std::string GenerateConvolutionTransposedCode(
     for (int x = 0; x < kernel_size.x; ++x) {
       const std::string x_coord = ""X + "" + std::to_string(x);
       const std::string y_coord = ""Y + "" + std::to_string(y);
-      c += ""  if ("" + x_coord + "" < dst_size.x && "" + y_coord +
-           "" < dst_size.y) {\n"";
-      c += ""    FLT4 result = bias_value;\n"";
+      c += ""  if ("" + x_coord + "" < args.dst_tensor.Width() && "" + y_coord +
+           "" < args.dst_tensor.Height()) {\n"";
+      c += ""    FLT4 result = args.weights.Read("" + std::to_string(index) +
+           "");\n"";
       for (int d = 0; d < dst_channels; ++d) {
         c += ""    result"" + channel[d] + "" += r["" + std::to_string(y) + ""]["" +
              std::to_string(x) + ""]"" + postfix[d] + "";\n"";
       }
-      const std::string x_3dcoord = op_def.IsBatchSupported()
-                                        ? ""("" + x_coord + "") * dst_size.w + B""
-                                        : x_coord;
-      const LinkingContext context{""result"", x_3dcoord, y_coord, ""0""};
-      c += PostProcess(linked_operations, context);
-      c += ""    "" +
-           dst_tensor.WriteWHSB(""result"", x_coord, y_coord, ""0"", batch_id) +
-           ""\n"";
+      c += ""    args.dst_tensor.Write(result, "" + x_coord + "", "" + y_coord +
+           "", 0);\n"";
       c += ""  }\n"";
     }
   }
@@ -150,19 +138,11 @@ ConvolutionTransposedThin::ConvolutionTransposedThin(
     : GPUOperation(definition),
       kernel_size_(attr.weights.shape.w, attr.weights.shape.h),
       src_channels_(attr.weights.shape.i),
-      dst_channels_(attr.weights.shape.o) {
-  float4 bias_value(0.0f);
-  for (int i = 0; i < attr.weights.shape.o; ++i) {
-    bias_value[i] = attr.bias.data[i];
-  }
-  bias_value_ = FLT4(definition_.precision, bias_value);
-}
+      dst_channels_(attr.weights.shape.o) {}
 
 ConvolutionTransposedThin::ConvolutionTransposedThin(
     ConvolutionTransposedThin&& operation)
     : GPUOperation(std::move(operation)),
-      weights_buf_(std::move(operation.weights_buf_)),
-      bias_value_(std::move(operation.bias_value_)),
       kernel_size_(operation.kernel_size_),
       src_channels_(operation.src_channels_),
       dst_channels_(operation.dst_channels_),
@@ -172,8 +152,6 @@ ConvolutionTransposedThin::ConvolutionTransposedThin(
 ConvolutionTransposedThin& ConvolutionTransposedThin::operator=(
     ConvolutionTransposedThin&& operation) {
   if (this != &operation) {
-    weights_buf_ = std::move(operation.weights_buf_);
-    bias_value_ = std::move(operation.bias_value_);
     std::swap(kernel_size_, operation.kernel_size_);
     std::swap(src_channels_, operation.src_channels_);
     std::swap(dst_channels_, operation.dst_channels_);
@@ -186,9 +164,15 @@ ConvolutionTransposedThin& ConvolutionTransposedThin::operator=(
 
 absl::Status ConvolutionTransposedThin::Compile(
     const CreationContext& creation_context) {
-  const auto code = GenerateConvolutionTransposedCode(
+  std::string code = GenerateConvolutionTransposedCode(
       definition_, DivideRoundUp(src_channels_, 4), dst_channels_, kernel_size_,
-      *creation_context.device, linked_operations_);
+      &args_);
+  std::string element_wise_code;
+  RETURN_IF_ERROR(
+      MergeOperations(linked_operations_, &args_, &element_wise_code));
+  RETURN_IF_ERROR(args_.TransformToCLCode(creation_context.device->GetInfo(),
+                                          {{""dst_tensor"", element_wise_code}},
+                                          &code));
 
   std::vector<CompilerOptions> options;
   if (definition_.precision == CalculationsPrecision::F16 &&
@@ -202,15 +186,10 @@ absl::Status ConvolutionTransposedThin::Compile(
 }
 
 absl::Status ConvolutionTransposedThin::BindArguments() {
-  kernel_.ResetBindingCounter();
-  RETURN_IF_ERROR(kernel_.SetMemoryAuto(src_[0]->GetMemoryPtr()));
-  RETURN_IF_ERROR(kernel_.SetMemoryAuto(weights_buf_.GetMemoryPtr()));
-  RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
-  RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[0]->GetWHSB()));
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWHSB()));
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(bias_value_));
-  return absl::OkStatus();
+  RETURN_IF_ERROR(args_.SetObjectRef(""src_tensor"", src_[0]));
+  RETURN_IF_ERROR(args_.SetObjectRef(""dst_tensor"", dst_[0]));
+  RETURN_IF_ERROR(SetArguments(linked_operations_, &args_));
+  return args_.Bind(kernel_.kernel());
 }
 
 int3 ConvolutionTransposedThin::GetGridSize() const {
@@ -248,7 +227,7 @@ absl::Status CreateConvolutionTransposedThin(
   }
   *result = ConvolutionTransposedThin(definition, attr);
   RETURN_IF_ERROR(
-      result->UploadWeights(attr.weights, creation_context.context));
+      result->UploadData(attr.weights, attr.bias, creation_context.context));
   return absl::OkStatus();
 }
 
",0,train
9f4e69bb1e8da97d369d1c2a9999845a41bb589b,tensorflow/tensorflow,"ConvolutionTransposedThin converted to new style.

PiperOrigin-RevId: 318083958
Change-Id: I8de4b4b250ceff00e1d16ef917cb2f8698d33e28",convolution_transposed_thin.h,"@@ -58,8 +58,9 @@ class ConvolutionTransposedThin : public GPUOperation {
   ConvolutionTransposedThin(const OperationDef& definition,
                             const ConvolutionTransposedAttributes& attr);
   template <DataType T>
-  absl::Status UploadWeights(const tflite::gpu::Tensor<OHWI, T>& weights,
-                             CLContext* context);
+  absl::Status UploadData(const tflite::gpu::Tensor<OHWI, T>& weights,
+                          const tflite::gpu::Tensor<Linear, T>& biases,
+                          CLContext* context);
 
   template <DataType S, typename T>
   void RearrangeWeightsData(const tflite::gpu::Tensor<OHWI, S>& weights,
@@ -68,9 +69,6 @@ class ConvolutionTransposedThin : public GPUOperation {
   absl::Status BindArguments();
   int3 GetGridSize() const;
 
-  Buffer weights_buf_;
-  FLT4 bias_value_;
-
   int2 kernel_size_;
   int src_channels_;
   int dst_channels_;
@@ -80,25 +78,50 @@ class ConvolutionTransposedThin : public GPUOperation {
 };
 
 template <DataType T>
-absl::Status ConvolutionTransposedThin::UploadWeights(
-    const tflite::gpu::Tensor<OHWI, T>& weights, CLContext* context) {
+absl::Status ConvolutionTransposedThin::UploadData(
+    const tflite::gpu::Tensor<OHWI, T>& weights,
+    const tflite::gpu::Tensor<Linear, T>& biases, CLContext* context) {
   const int src_depth = DivideRoundUp(src_channels_, 4);
-  const int elements_count =
-      kernel_size_.x * kernel_size_.y * src_depth * 4 * dst_channels_;
+  const int flt4_count =
+      kernel_size_.x * kernel_size_.y * src_depth * dst_channels_;
+
+  const bool f32_weights = definition_.precision == CalculationsPrecision::F32;
 
-  const int float4_size =
-      definition_.precision == CalculationsPrecision::F32 ? 16 : 8;
-  if (definition_.GetDataType() == DataType::FLOAT32) {
-    std::vector<float4> gpu_data(elements_count);
+  BufferDescriptor desc;
+  desc.element_type = f32_weights ? DataType::FLOAT32 : DataType::FLOAT16;
+  desc.element_size = 4;
+  desc.memory_type = MemoryType::CONSTANT;
+
+  Buffer weights_buffer;
+  if (f32_weights) {
+    std::vector<float4> gpu_data(flt4_count);
     RearrangeWeightsData(weights, absl::MakeSpan(gpu_data));
-    return CreateReadOnlyBuffer(float4_size * elements_count, gpu_data.data(),
-                                context, &weights_buf_);
+    float4 bias_value(0.0f);
+    for (int i = 0; i < weights.shape.o; ++i) {
+      bias_value[i] = biases.data[i];
+    }
+    gpu_data.push_back(bias_value);
+    RETURN_IF_ERROR(CreateReadOnlyBuffer(sizeof(float4) * gpu_data.size(),
+                                         gpu_data.data(), context,
+                                         &weights_buffer));
   } else {
-    std::vector<half4> gpu_data(elements_count);
+    std::vector<half4> gpu_data(flt4_count);
     RearrangeWeightsData(weights, absl::MakeSpan(gpu_data));
-    return CreateReadOnlyBuffer(float4_size * elements_count, gpu_data.data(),
-                                context, &weights_buf_);
+    half4 bias_value(0.0f);
+    for (int i = 0; i < weights.shape.o; ++i) {
+      bias_value[i] = biases.data[i];
+    }
+    gpu_data.push_back(bias_value);
+    RETURN_IF_ERROR(CreateReadOnlyBuffer(sizeof(half4) * gpu_data.size(),
+                                         gpu_data.data(), context,
+                                         &weights_buffer));
   }
+
+  args_.AddObject(""weights"", AccessType::READ,
+                  absl::make_unique<Buffer>(std::move(weights_buffer)),
+                  absl::make_unique<BufferDescriptor>(desc));
+
+  return absl::OkStatus();
 }
 
 template <DataType S, typename T>
",0,train
3adaa332c7c5055398f38c189a6ea741f5c799ed,tensorflow/tensorflow,"Add shape inference and upate tests

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",libsvm_ops.cc,"@@ -20,6 +20,7 @@ limitations under the License.
 namespace tensorflow {
 
 using shape_inference::InferenceContext;
+using shape_inference::ShapeHandle;
 
 REGISTER_OP(""DecodeLibsvm"")
     .Input(""input: string"")
@@ -27,7 +28,19 @@ REGISTER_OP(""DecodeLibsvm"")
     .Output(""feature: dtype"")
     .Attr(""dtype: {float, double, int32, int64} = DT_FLOAT"")
     .Attr(""num_features: int >= 1"")
-    .SetShapeFn(shape_inference::UnknownShape)
+    .SetShapeFn([](InferenceContext* c) {
+      c->set_output(0, c->input(0));
+
+      int32 num_features;
+      TF_RETURN_IF_ERROR(c->GetAttr(""num_features"", &num_features));
+      ShapeHandle out;
+      TF_RETURN_IF_ERROR(
+          c->Concatenate(c->input(0), c->Vector(num_features), &out));
+      c->set_output(1, out);
+
+      return Status::OK();
+    })
+
     .Doc(R""doc(
 Convert LibSVM input to tensors. The output consists of
 a label and a feature tensor. The shape of the label tensor
",0,train
3adaa332c7c5055398f38c189a6ea741f5c799ed,tensorflow/tensorflow,"Add shape inference and upate tests

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",decode_libsvm_op_test.py,"@@ -19,7 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import numpy as np
-import sys
+
 from tensorflow.contrib.libsvm.python.ops import libsvm_ops
 from tensorflow.python.platform import test
 
@@ -32,6 +32,12 @@ class DecodeLibsvmOpTest(test.TestCase):
                  ""1 2:2.5 3:0.1 5:0.503"",
                  ""2 3:2.5 2:0.1 1:0.105""]
       label, feature = libsvm_ops.decode_libsvm(content, num_features=6)
+
+      # shape inference
+      self.assertAllEqual(label.get_shape().as_list(), [3])
+      self.assertAllEqual(feature.get_shape().as_list(), [3, 6])
+
+      # sess.run()
       label, feature = sess.run([label, feature])
       self.assertAllEqual(label, [1, 1, 2])
       self.assertAllClose(feature, [[0, 3.4, 0.5, 0, 0.231, 0],
",0,train
592d2d67daca18db98c7f67b0a55ef487ed76f1c,tensorflow/tensorflow,"Transpose for high dimensional tensors using eigen (#15893)

* speeding up transpose on CPU",transpose_functor_cpu.cc,"@@ -88,6 +88,18 @@ struct Transpose<CPUDevice, T, conjugate> {
         internal::TransposeUsingEigen<CPUDevice, T, 5>(d, in, perm, conjugate,
                                                        out);
         break;
+      case 6:
+	internal::TransposeUsingEigen<CPUDevice, T, 6>(d, in, perm, conjugate,
+						       out);
+	break;
+      case 7:
+	internal::TransposeUsingEigen<CPUDevice, T, 7>(d, in, perm, conjugate,
+						       out);
+	break;
+      case 8:
+        internal::TransposeUsingEigen<CPUDevice, T, 8>(d, in, perm, conjugate,
+						       out);
+	break;
       default:
         TransposeSimple<T, conjugate>(d, in, perm, out);
         break;
",0,train
592d2d67daca18db98c7f67b0a55ef487ed76f1c,tensorflow/tensorflow,"Transpose for high dimensional tensors using eigen (#15893)

* speeding up transpose on CPU",transpose_functor_gpu.cu.cc,"@@ -201,6 +201,27 @@ struct Transpose<GPUDevice, T, conjugate> {
                                                          out);
         }
         break;
+      case 6:
+        if (!internal::TransposeUsingTile<T, conjugate>::run(d, in, perm,
+                                                             out)) {
+          internal::TransposeUsingEigen<GPUDevice, T, 6>(d, in, perm, conjugate,
+                                                         out);
+        }
+        break;
+      case 7:
+        if (!internal::TransposeUsingTile<T, conjugate>::run(d, in, perm,
+                                                             out)) {
+          internal::TransposeUsingEigen<GPUDevice, T, 7>(d, in, perm, conjugate,
+                                                         out);
+        }
+        break;
+      case 8:
+        if (!internal::TransposeUsingTile<T, conjugate>::run(d, in, perm,
+                                                             out)) {
+          internal::TransposeUsingEigen<GPUDevice, T, 8>(d, in, perm, conjugate,
+                                                         out);
+        }
+        break;
       default:
         internal::TransposeSimple<T, conjugate>(d, in, perm, out);
         break;
",0,train
d735183a884ad1662750658d7292729efed15885,tensorflow/tensorflow,"Update GraphDef version to 752.

PiperOrigin-RevId: 371291460
Change-Id: Ib35c171a5e91992993e80e74883488550cb4de3a",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 751  // Updated: 2021/4/29
+#define TF_GRAPH_DEF_VERSION 752  // Updated: 2021/4/30
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
c923edca9d0d54360fa52a1da36d303822d63330,tensorflow/tensorflow,"Remove cloned prepare_tf pass description from unroll_batch_matmul pass

Instead, there is already a class level documentation for the pass.

PiperOrigin-RevId: 268550569",unroll_batch_matmul.cc,"@@ -13,22 +13,6 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-// This transformation pass prepares for legalization to the TFLite dialect by
-// converting operations in TensorFlow dialect into operations that can be
-// legalized to TensorFlow Lite dialect with simple replacements.  The newly
-// created operations are in the TensorFlow dialect if the operation can be
-// represented using a TensorFlow op.  Otherwise, TensorFlow Lite dialect op is
-// used.  For example, Conv2D in TFLite which uses OHWI data format for filters
-// is not supported in TensorFlow because TensorFlow requires filters in the
-// HWIO data format.
-//
-// Motivation to prepare for the TFLite legalization before the actual
-// legalization is to exploit constant folding opportunities in any newly
-// created ops by leveraging constant folding support for the TensorFlow ops.
-// This way TFLite can be used as a serialization format only and does not
-// require access to the TFLite runtime for optimizations as required by the
-// TFLite team.
-
 #include ""tensorflow/compiler/mlir/lite/transforms/unroll_batch_matmul.h""
 
 #include <climits>
",0,train
995fa1fe89e647c2f7d62c9a289e4b50b912ddaf,tensorflow/tensorflow,"Remove Checkpoint proto symbols from the v2 API

Keeps tf.train.latest_checkpoint, since that's widely used and still quite useful.

The functionality of these symbols is replaced by tf.train.CheckpointManager.

PiperOrigin-RevId: 220879395",checkpoint_management.py,"@@ -36,6 +36,7 @@ from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import training_util
 from tensorflow.python.training.checkpoint_state_pb2 import CheckpointState
 from tensorflow.python.util import compat
+from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -55,7 +56,11 @@ def _GetCheckpointFilename(save_dir, latest_filename):
   return os.path.join(save_dir, latest_filename)
 
 
-@tf_export(""train.generate_checkpoint_state_proto"")
+@deprecation.deprecated(
+    date=None,
+    instructions=(""Use tf.train.CheckpointManager to manage checkpoints rather ""
+                  ""than editing the Checkpoint proto manually.""))
+@tf_export(v1=[""train.generate_checkpoint_state_proto""])
 def generate_checkpoint_state_proto(save_dir,
                                     model_checkpoint_path,
                                     all_model_checkpoint_paths=None,
@@ -121,7 +126,11 @@ def generate_checkpoint_state_proto(save_dir,
   return coord_checkpoint_proto
 
 
-@tf_export(""train.update_checkpoint_state"")
+@deprecation.deprecated(
+    date=None,
+    instructions=(""Use tf.train.CheckpointManager to manage checkpoints rather ""
+                  ""than manually editing the Checkpoint proto.""))
+@tf_export(v1=[""train.update_checkpoint_state""])
 def update_checkpoint_state(save_dir,
                             model_checkpoint_path,
                             all_model_checkpoint_paths=None,
@@ -344,7 +353,10 @@ def latest_checkpoint(checkpoint_dir, latest_filename=None):
   return None
 
 
-@tf_export(""train.checkpoint_exists"")
+@deprecation.deprecated(
+    date=None,
+    instructions=""Use standard file APIs to check for files with this prefix."")
+@tf_export(v1=[""train.checkpoint_exists""])
 def checkpoint_exists(checkpoint_prefix):
   """"""Checks whether a V1 or V2 checkpoint exists with the specified prefix.
 
@@ -369,7 +381,10 @@ def checkpoint_exists(checkpoint_prefix):
     return False
 
 
-@tf_export(""train.get_checkpoint_mtimes"")
+@deprecation.deprecated(
+    date=None,
+    instructions=""Use standard file utilities to get mtimes."")
+@tf_export(v1=[""train.get_checkpoint_mtimes""])
 def get_checkpoint_mtimes(checkpoint_prefixes):
   """"""Returns the mtimes (modification timestamps) of the checkpoints.
 
@@ -408,7 +423,10 @@ def get_checkpoint_mtimes(checkpoint_prefixes):
   return mtimes
 
 
-@tf_export(""train.remove_checkpoint"")
+@deprecation.deprecated(
+    date=None,
+    instructions=""Use standard file APIs to delete files with this prefix."")
+@tf_export(v1=[""train.remove_checkpoint""])
 def remove_checkpoint(checkpoint_prefix,
                       checkpoint_format_version=saver_pb2.SaverDef.V2,
                       meta_graph_suffix=""meta""):
",0,train
c94d33c7d3fe32aa46decebe6fb261c2ff5012c3,tensorflow/tensorflow,"Reset the global_train_batch in each training.

PiperOrigin-RevId: 303016514
Change-Id: I6af560f7f6e94c359600c2913a9dd426f062b921",callbacks.py,"@@ -2005,6 +2005,7 @@ class TensorBoard(Callback, version_utils.TensorBoardVersionSelector):
     self._should_trace = not (self._start_batch == 0 and self._stop_batch == 0)
 
   def on_train_begin(self, logs=None):
+    self._global_train_batch = 0
     self._push_writer(self._train_writer, self._train_step)
 
   def on_train_end(self, logs=None):
",0,train
c94d33c7d3fe32aa46decebe6fb261c2ff5012c3,tensorflow/tensorflow,"Reset the global_train_batch in each training.

PiperOrigin-RevId: 303016514
Change-Id: I6af560f7f6e94c359600c2913a9dd426f062b921",callbacks_test.py,"@@ -2018,14 +2018,16 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase):
         run_eagerly=testing_utils.should_run_eagerly())
     return model
 
-  def _get_trace_file(self, logdir):
+  def _count_trace_file(self, logdir):
     profile_dir = os.path.join(logdir, 'plugins', 'profile')
+    count = 0
     for (dirpath, dirnames, filenames) in os.walk(profile_dir):
+      del dirpath  # unused
       del dirnames  # unused
       for filename in filenames:
         if filename.endswith('.trace.json.gz'):
-          return os.path.join(dirpath, filename)
-    return None
+          count += 1
+    return count
 
   def fitModelAndAssertKerasModelWritten(self, model):
     x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1))
@@ -2095,7 +2097,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase):
             _ObservedSummary(logdir=self.train_dir, tag=u'batch_1'),
         },
     )
-    self.assertIsNotNone(self._get_trace_file(logdir=self.train_dir))
+    self.assertEqual(1, self._count_trace_file(logdir=self.train_dir))
 
   def test_TensorBoard_autoTrace_tagNameWithBatchNum(self):
     model = self._get_seq_model()
@@ -2118,7 +2120,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase):
             _ObservedSummary(logdir=self.train_dir, tag=u'batch_2'),
         },
     )
-    self.assertIsNotNone(self._get_trace_file(logdir=self.train_dir))
+    self.assertEqual(1, self._count_trace_file(logdir=self.train_dir))
 
   def test_TensorBoard_autoTrace_profileBatchRangeSingle(self):
     model = self._get_seq_model()
@@ -2142,7 +2144,30 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase):
             _ObservedSummary(logdir=self.train_dir, tag=u'batch_2'),
         },
     )
-    self.assertIsNotNone(self._get_trace_file(logdir=self.train_dir))
+    self.assertEqual(1, self._count_trace_file(logdir=self.train_dir))
+
+  def test_TensorBoard_autoTrace_profileBatchRangeTwice(self):
+    model = self._get_seq_model()
+    x, y = np.ones((10, 10, 10, 1)), np.ones((10, 1))
+    tb_cbk = keras.callbacks.TensorBoard(
+        self.logdir, histogram_freq=1, profile_batch='10,10', write_graph=False)
+
+    model.fit(
+        x,
+        y,
+        batch_size=3,
+        epochs=10,
+        validation_data=(x, y),
+        callbacks=[tb_cbk])
+
+    model.fit(
+        x,
+        y,
+        batch_size=3,
+        epochs=10,
+        validation_data=(x, y),
+        callbacks=[tb_cbk])
+    self.assertEqual(2, self._count_trace_file(logdir=self.train_dir))
 
   # Test case that replicates a Github issue.
   # https://github.com/tensorflow/tensorflow/issues/37543
@@ -2162,7 +2187,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase):
         callbacks=[keras.callbacks.TensorBoard(logdir, profile_batch=1)],
     )
     # Verifies trace exists in the first logdir.
-    self.assertIsNotNone(self._get_trace_file(logdir=logdir))
+    self.assertEqual(1, self._count_trace_file(logdir=logdir))
     logdir = os.path.join(self.get_temp_dir(), 'tb2')
     model.fit(
         np.zeros((64, 1)),
@@ -2171,7 +2196,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase):
         callbacks=[keras.callbacks.TensorBoard(logdir, profile_batch=2)],
     )
     # Verifies trace exists in the second logdir.
-    self.assertIsNotNone(self._get_trace_file(logdir=logdir))
+    self.assertEqual(1, self._count_trace_file(logdir=logdir))
 
   def test_TensorBoard_autoTrace_profileBatchRange(self):
     model = self._get_seq_model()
@@ -2195,7 +2220,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase):
             _ObservedSummary(logdir=self.train_dir, tag=u'batch_3'),
         },
     )
-    self.assertIsNotNone(self._get_trace_file(logdir=self.train_dir))
+    self.assertEqual(1, self._count_trace_file(logdir=self.train_dir))
 
   def test_TensorBoard_autoTrace_profileInvalidBatchRange(self):
     with self.assertRaises(ValueError):
@@ -2237,7 +2262,7 @@ class TestTensorBoardV2NonParameterizedTest(keras_parameterized.TestCase):
 
     # Enabled trace only on the 10000th batch, thus it should be empty.
     self.assertEmpty(summary_file.tensors)
-    self.assertIsNone(self._get_trace_file(logdir=self.train_dir))
+    self.assertEqual(0, self._count_trace_file(logdir=self.train_dir))
 
 
 class MostRecentlyModifiedFileMatchingPatternTest(test.TestCase):
",0,train
c88fd63cc599195cbf88885689e8630dd888bb6d,tensorflow/tensorflow,"[Grappler] Fix bug in arithmetic optimizer causing non-unique node names.

PiperOrigin-RevId: 257124468",arithmetic_optimizer.cc,"@@ -887,7 +887,8 @@ class HoistCommonFactorOutOfAggregation : public ArithmeticOptimizerStage {
     // them, it's possible that rewritten node already exists in a graph
     return rewritten_nodes_.find(node->name()) != rewritten_nodes_.end() ||
            ctx().node_map->NodeExists(OuterNodeName(node, false)) ||
-           ctx().node_map->NodeExists(OuterNodeName(node, true));
+           ctx().node_map->NodeExists(OuterNodeName(node, true)) ||
+           ctx().node_map->NodeExists(InnerAddNodeName(node));
   }
 
   // keep names of the nodes that were optimized by this stage
",0,train
3336574287a16a0ead083a33b5e80a1c7204fa62,tensorflow/tensorflow,"Fix shape mismatch in `rnn()` of keras backend

PiperOrigin-RevId: 202231273",backend.py,"@@ -3161,10 +3161,16 @@ def rnn(step_function,
                                       array_ops.stack(
                                           [1, array_ops.shape(output)[1]]))
         output = array_ops.where(tiled_mask_t, output, states[0])
-        new_states = [
-            array_ops.where(tiled_mask_t, new_states[i], states[i])
-            for i in range(len(states))
-        ]
+
+        masked_states = []
+        for i in range(len(states)):
+          states_dim = array_ops.shape(new_states[i])[1]
+          stacked_states_dim = array_ops.stack([1, states_dim])
+          tiled_mask = array_ops.tile(mask_t, stacked_states_dim)
+          masked_state = array_ops.where(tiled_mask, new_states[i], states[i])
+          masked_states.append(masked_state)
+        new_states = masked_states
+
         output_ta_t = output_ta_t.write(time, output)
         return (time + 1, output_ta_t) + tuple(new_states)
     else:
",0,train
3336574287a16a0ead083a33b5e80a1c7204fa62,tensorflow/tensorflow,"Fix shape mismatch in `rnn()` of keras backend

PiperOrigin-RevId: 202231273",backend_test.py,"@@ -1077,7 +1077,7 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase):
         {'go_backwards': False, 'mask': mask, 'unroll': True},
     ]
     with self.test_session():
-      for (i, kwargs) in enumerate(kwargs_list):
+      for i, kwargs in enumerate(kwargs_list):
         last_output, outputs, new_states = keras.backend.rnn(rnn_fn, inputs,
                                                              initial_states,
                                                              **kwargs)
@@ -1124,6 +1124,115 @@ class BackendNNOpsTest(test.TestCase, parameterized.TestCase):
       for b_s, b_u_s in zip(state_list[2], state_list[3]):
         self.assertAllClose(b_s, b_u_s, atol=1e-04)
 
+  def test_rnn_additional_states(self):
+    # implement a simple RNN
+    num_samples = 4
+    input_dim = 5
+    output_dim = 3
+    timesteps = 6
+
+    input_val = np.random.random(
+        (num_samples, timesteps, input_dim)).astype(np.float32)
+    init_state_val = np.random.random(
+        (num_samples, output_dim)).astype(np.float32)
+    w_i_val = np.random.random((input_dim, output_dim)).astype(np.float32)
+    w_o_val = np.random.random((output_dim, output_dim)).astype(np.float32)
+    np_mask = np.random.randint(2, size=(num_samples, timesteps))
+
+    def rnn_step_fn():
+      w_i = keras.backend.variable(w_i_val)
+      w_o = keras.backend.variable(w_o_val)
+
+      def step_function(x, states):
+        assert len(states) == 2
+        prev_output = states[0]
+        output = keras.backend.dot(x, w_i) + keras.backend.dot(prev_output, w_o)
+        return output, [output,
+                        keras.backend.concatenate([output, output], axis=-1)]
+
+      return step_function
+
+    # test default setup
+    last_output_list = [[], [], [], [], [], []]
+    outputs_list = [[], [], [], [], [], []]
+    state_list = [[], [], [], [], [], []]
+    additional_state_list = [[], [], [], [], [], []]
+
+    rnn_fn = rnn_step_fn()
+    inputs = keras.backend.variable(input_val)
+    initial_states = [keras.backend.variable(init_state_val),
+                      np.concatenate([init_state_val, init_state_val], axis=-1)]
+    mask = keras.backend.variable(np_mask)
+
+    kwargs_list = [
+        {'go_backwards': False, 'mask': None},
+        {'go_backwards': False, 'mask': None, 'unroll': True},
+        {'go_backwards': True, 'mask': None},
+        {'go_backwards': True, 'mask': None, 'unroll': True},
+        {'go_backwards': False, 'mask': mask},
+        {'go_backwards': False, 'mask': mask, 'unroll': True},
+    ]
+    with self.test_session():
+      for i, kwargs in enumerate(kwargs_list):
+        last_output, outputs, new_states = keras.backend.rnn(rnn_fn, inputs,
+                                                             initial_states,
+                                                             **kwargs)
+        # check static shape inference
+        self.assertEqual(last_output.get_shape().as_list(),
+                         [num_samples, output_dim])
+        self.assertEqual(outputs.get_shape().as_list(),
+                         [num_samples, timesteps, output_dim])
+        # for state in new_states:
+        #   self.assertEquals(state.get_shape().as_list(),
+        #                     [num_samples, output_dim])
+        self.assertEqual(new_states[0].get_shape().as_list(),
+                         [num_samples, output_dim])
+        self.assertEqual(new_states[1].get_shape().as_list(),
+                         [num_samples, 2 * output_dim])
+
+        last_output_list[i].append(keras.backend.eval(last_output))
+        outputs_list[i].append(keras.backend.eval(outputs))
+        self.assertEqual(len(new_states), 2)
+        state_list[i].append(keras.backend.eval(new_states[0]))
+        additional_state_list[i].append(keras.backend.eval(new_states[1]))
+
+      def assert_list_pairwise(z_list, atol=1e-05):
+        for (z1, z2) in zip(z_list[1:], z_list[:-1]):
+          self.assertAllClose(z1, z2, atol=atol)
+
+      assert_list_pairwise(last_output_list[0], atol=1e-04)
+      assert_list_pairwise(outputs_list[0], atol=1e-04)
+      assert_list_pairwise(state_list[0], atol=1e-04)
+      assert_list_pairwise(additional_state_list[0], atol=1e-04)
+      assert_list_pairwise(last_output_list[2], atol=1e-04)
+      assert_list_pairwise(outputs_list[2], atol=1e-04)
+      assert_list_pairwise(state_list[2], atol=1e-04)
+      assert_list_pairwise(additional_state_list[2], atol=1e-04)
+
+      for l, u_l in zip(last_output_list[0], last_output_list[1]):
+        self.assertAllClose(l, u_l, atol=1e-04)
+
+      for o, u_o in zip(outputs_list[0], outputs_list[1]):
+        self.assertAllClose(o, u_o, atol=1e-04)
+
+      for s, u_s in zip(state_list[0], state_list[1]):
+        self.assertAllClose(s, u_s, atol=1e-04)
+
+      for s, u_s in zip(additional_state_list[0], additional_state_list[1]):
+        self.assertAllClose(s, u_s, atol=1e-04)
+
+      for b_l, b_u_l in zip(last_output_list[2], last_output_list[3]):
+        self.assertAllClose(b_l, b_u_l, atol=1e-04)
+
+      for b_o, b_u_o in zip(outputs_list[2], outputs_list[3]):
+        self.assertAllClose(b_o, b_u_o, atol=1e-04)
+
+      for b_s, b_u_s in zip(state_list[2], state_list[3]):
+        self.assertAllClose(b_s, b_u_s, atol=1e-04)
+
+      for s, u_s in zip(additional_state_list[2], additional_state_list[3]):
+        self.assertAllClose(s, u_s, atol=1e-04)
+
   def test_normalize_batch_in_training(self):
     val = np.random.random((10, 3, 10, 10))
     x = keras.backend.variable(val)
",0,train
6787ce30efdfefbf69681ca9795959fb7244240b,tensorflow/tensorflow,"Update GraphDef version to 499.

PiperOrigin-RevId: 327589838
Change-Id: I97384115fcb61069d7041b40d8cead6522f86532",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 498  // Updated: 2020/8/19
+#define TF_GRAPH_DEF_VERSION 499  // Updated: 2020/8/20
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
a980aead874f555d01bb410b84024831404a408b,tensorflow/tensorflow,"Use test_adjusted_name when making the mangled_test_name in
run_and_gather_logs_lib.py, to avoid duplicate file names when the same test is
run on multiple GPUs.

PiperOrigin-RevId: 157630193",run_and_gather_logs_lib.py,"@@ -131,8 +131,18 @@ def run_and_gather_logs(name, test_name, test_args,
     # Hopefully running in sandboxed mode
     test_executable = os.path.join(""."", test_executable)
 
+  test_adjusted_name = name
+  gpu_config = gpu_info_lib.gather_gpu_devices()
+  if gpu_config:
+    gpu_name = gpu_config[0].model
+    gpu_short_name_match = re.search(r""Tesla [KP][4,8]0"", gpu_name)
+    if gpu_short_name_match:
+      gpu_short_name = gpu_short_name_match.group(0)
+      test_adjusted_name = name + ""|"" + gpu_short_name.replace("" "", ""_"")
+
   temp_directory = tempfile.mkdtemp(prefix=""run_and_gather_logs"")
-  mangled_test_name = name.strip(""/"").replace(""/"", ""_"").replace("":"", ""_"")
+  mangled_test_name = (test_adjusted_name.strip(""/"")
+                       .replace(""|"", ""_"").replace(""/"", ""_"").replace("":"", ""_""))
   test_file_prefix = os.path.join(temp_directory, mangled_test_name)
   test_file_prefix = ""%s."" % test_file_prefix
 
@@ -151,15 +161,6 @@ def run_and_gather_logs(name, test_name, test_args,
     if not log_files:
       raise MissingLogsError(""No log files found at %s."" % test_file_prefix)
 
-    test_adjusted_name = name
-    gpu_config = gpu_info_lib.gather_gpu_devices()
-    if gpu_config:
-      gpu_name = gpu_config[0].model
-      gpu_short_name_match = re.search(r""Tesla [KP][4,8]0"", gpu_name)
-      if gpu_short_name_match:
-        gpu_short_name = gpu_short_name_match.group(0)
-        test_adjusted_name = name + ""|"" + gpu_short_name.replace("" "", ""_"")
-
     return (process_test_logs(
         test_adjusted_name,
         test_name=test_name,
",0,train
b6af9ad8555b834822bff052316fac00cc8d949a,tensorflow/tensorflow,"Supported more types in TensorDescriptor::GetDataTypeFromTemplateArgs.

PiperOrigin-RevId: 432385164",tensor_desc.cc,"@@ -981,6 +981,18 @@ absl::Status TensorDescriptor::GetDataTypeFromTemplateArgs(
     *result = DataType::FLOAT16;
   } else if (read_type == ""float"") {
     *result = DataType::FLOAT32;
+  } else if (read_type == ""int"") {
+    *result = DataType::INT32;
+  } else if (read_type == ""short"") {
+    *result = DataType::INT16;
+  } else if (read_type == ""char"") {
+    *result = DataType::INT8;
+  } else if (read_type == ""uint"") {
+    *result = DataType::UINT32;
+  } else if (read_type == ""ushort"") {
+    *result = DataType::UINT16;
+  } else if (read_type == ""uchar"") {
+    *result = DataType::UINT8;
   } else {
     return absl::NotFoundError(absl::StrCat(
         ""Unrecognized Read selector template argument - "", read_type));
",0,train
ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function.
Change: 119321281",variable_ops.cc,"@@ -28,6 +28,8 @@ REGISTER_KERNEL_BUILDER(Name(""TemporaryVariable"").Device(DEVICE_CPU),
                         TemporaryVariableOp);
 REGISTER_KERNEL_BUILDER(Name(""DestroyTemporaryVariable"").Device(DEVICE_CPU),
                         DestroyTemporaryVariableOp);
+REGISTER_KERNEL_BUILDER(Name(""IsVariableInitialized"").Device(DEVICE_CPU),
+                        IsVariableInitializedOp);
 
 #if GOOGLE_CUDA
 // Only register 'Variable' on GPU for the subset of types also supported by
@@ -43,7 +45,12 @@ REGISTER_KERNEL_BUILDER(Name(""DestroyTemporaryVariable"").Device(DEVICE_CPU),
   REGISTER_KERNEL_BUILDER(Name(""DestroyTemporaryVariable"")               \
                               .Device(DEVICE_GPU)                        \
                               .TypeConstraint<type>(""T""),                \
-                          DestroyTemporaryVariableOp);
+                          DestroyTemporaryVariableOp);                   \
+  REGISTER_KERNEL_BUILDER(Name(""IsVariableInitialized"")                  \
+                              .Device(DEVICE_GPU)                        \
+                              .TypeConstraint<type>(""dtype"")             \
+                              .HostMemory(""is_initialized""),             \
+                          IsVariableInitializedOp);
 
 TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
 #undef REGISTER_GPU_KERNELS
",0,train
ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function.
Change: 119321281",variable_ops.h,"@@ -158,6 +158,22 @@ class DestroyTemporaryVariableOp : public OpKernel {
   string var_name_;
 };
 
+class IsVariableInitializedOp : public OpKernel {
+ public:
+  IsVariableInitializedOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    // Get a mutable input tensor of the Ref input.
+    const Tensor& input_tensor = context->mutable_input(0, false);
+    Tensor* output = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, TensorShape({}), &output));
+    auto output_tensor = output->tensor<bool, 0>();
+    bool result = input_tensor.IsInitialized();
+    output_tensor() = result;
+  }
+};
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_KERNELS_VARIABLE_OPS_H_
",0,train
ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function.
Change: 119321281",state_ops.cc,"@@ -40,6 +40,20 @@ shared_name: If non-empty, this variable is named in the given bucket
              with this shared_name. Otherwise, the node name is used instead.
 )doc"");
 
+REGISTER_OP(""IsVariableInitialized"")
+    .Output(""is_initialized: bool"")
+    .Input(""ref: Ref(dtype)"")
+    .Attr(""dtype: type"")
+    .SetAllowsUninitializedInput()
+    .Doc(R""doc(
+Checks whether a tensor has been initialized.
+
+Outputs boolean scalar indicating whether the tensor has been initialized.
+
+ref: Should be from a `Variable` node. May be uninitialized.
+dtype: The type of elements in the variable tensor.
+)doc"");
+
 REGISTER_OP(""TemporaryVariable"")
     .Output(""ref: Ref(dtype)"")
     .Attr(""shape: shape"")
",0,train
ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function.
Change: 119321281",variable_ops_test.py,"@@ -237,6 +237,14 @@ class VariableOpTest(tf.test.TestCase):
           result = tf.mul(var, var)
       self.assertAllClose([4.0], result.eval())
 
+  def testIsVariableInitialized(self):
+    for use_gpu in [True, False]:
+      with self.test_session(use_gpu=use_gpu):
+        v0 = state_ops.variable_op([1, 2], tf.float32)
+        self.assertEqual(False, tf.is_variable_initialized(v0).eval())
+        tf.assign(v0, [[2.0, 3.0]]).eval()
+        self.assertEqual(True, tf.is_variable_initialized(v0).eval())
+
 
 if __name__ == ""__main__"":
   tf.test.main()
",0,train
ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function.
Change: 119321281",state_ops.py,"@@ -30,6 +30,7 @@ collected in the graph.
 @@initialize_all_variables
 @@initialize_variables
 @@initialize_local_variables
+@@is_variable_initialized
 @@assert_variables_initialized
 
 ## Saving and Restoring Variables
@@ -134,6 +135,8 @@ def variable_op(shape, dtype, name=""Variable"", set_shape=True, container="""",
 # NOTE(mrry): Shapes are conditionally set in the Python wrapper.
 ops.RegisterShape(""Variable"")(common_shapes.unknown_shape)
 
+ops.RegisterShape(""IsVariableInitialized"")(common_shapes.scalar_shape)
+
 
 @ops.RegisterShape(""TemporaryVariable"")
 def _TemporaryVariableShape(op):
",0,train
ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function.
Change: 119321281",variables.py,"@@ -798,6 +798,18 @@ def initialize_local_variables():
   return initialize_variables(local_variables())
 
 
+def is_variable_initialized(variable):
+  """"""Returns an Op to check if a variable has been initialized.
+
+  Args:
+    variable: A `Variable`.
+
+  Returns:
+    An operation to check whether a variable has been initialized.
+  """"""
+  return state_ops.is_variable_initialized(variable)
+
+
 def assert_variables_initialized(var_list=None):
   """"""Returns an Op to check if variables are initialized.
 
",0,train
ac96df2d153c6ba47f902eccecc99701b76d660c,tensorflow/tensorflow,"Added is_variable_initialized(variable) function.
Change: 119321281",session_manager_test.py,"@@ -71,6 +71,8 @@ class SessionManagerTest(tf.test.TestCase):
       os.rename(checkpoint_dir, checkpoint_dir2)
       gfile.MakeDirs(checkpoint_dir)
       v = tf.Variable([6.0, 7.0, 8.0], name=""v"")
+      with self.test_session():
+        self.assertEqual(False, tf.is_variable_initialized(v).eval())
       tf.train.SessionManager(ready_op=tf.assert_variables_initialized())
       saver = tf.train.Saver({""v"": v})
       # This should fail as there's no checkpoint within 2 seconds.
@@ -85,6 +87,9 @@ class SessionManagerTest(tf.test.TestCase):
       sess = sm.prepare_session("""", init_op=None, saver=saver,
                                 checkpoint_dir=checkpoint_dir,
                                 wait_for_checkpoint=True, max_wait_secs=2)
+      self.assertEqual(
+          True, tf.is_variable_initialized(
+              sess.graph.get_tensor_by_name(""v:0"")).eval(session=sess))
 
   def testRecoverSession(self):
     # Create a checkpoint.
@@ -109,11 +114,16 @@ class SessionManagerTest(tf.test.TestCase):
     # Create a new Graph and SessionManager and recover.
     with tf.Graph().as_default():
       v = tf.Variable(2, name=""v"")
+      with self.test_session():
+        self.assertEqual(False, tf.is_variable_initialized(v).eval())
       sm2 = tf.train.SessionManager(ready_op=tf.assert_variables_initialized())
       saver = tf.train.Saver({""v"": v})
       sess, initialized = sm2.recover_session("""", saver=saver,
                                               checkpoint_dir=checkpoint_dir)
       self.assertTrue(initialized)
+      self.assertEqual(
+          True, tf.is_variable_initialized(
+              sess.graph.get_tensor_by_name(""v:0"")).eval(session=sess))
       self.assertEquals(1, sess.run(v))
 
   def testWaitForSessionReturnsNoneAfterTimeout(self):
",0,train
a39efe62d28a754bd27d08d485b414c5555c7411,tensorflow/tensorflow,"Fix cluster_resolver test breakage in strategy_common_test.

PiperOrigin-RevId: 318360022
Change-Id: I112e970eb438621d54a5aa935d1b5df63b3a6e9d",strategy_common_test.py,"@@ -160,20 +160,16 @@ class StrategyClusterResolverTest(test.TestCase, parameterized.TestCase):
 
     with strategy.scope():
       self.assertIs(strategy.cluster_resolver, resolver)
+
     self.assertTrue(hasattr(resolver, 'cluster_spec'))
-    if isinstance(strategy, TPUStrategy):
-      self.skipTest('b/159747888')
-    self.assertTrue(hasattr(resolver, 'environment'))
     self.assertTrue(hasattr(resolver, 'master'))
     self.assertTrue(hasattr(resolver, 'num_accelerators'))
-    self.assertIsNone(resolver.rpc_layer)
+    self.assertTrue(hasattr(resolver, 'task_id'))
+    self.assertTrue(hasattr(resolver, 'task_type'))
     if isinstance(strategy, CollectiveAllReduceStrategy):
       self.assertEqual(resolver.task_id, 0)
       self.assertAllInSet(resolver.task_type, ['chief', 'worker'])
-    elif isinstance(strategy, TPUStrategy):
-      # TPUStrategy does not have task_id and task_type applicable.
-      self.assertIsNone(resolver.task_id)
-      self.assertIsNone(resolver.task_type)
+      self.assertIsNone(resolver.rpc_layer)
 
 
 if __name__ == '__main__':
",0,train
c182e9bffe6e67878ff602fc06051c4d6021b7d9,tensorflow/tensorflow,"Add decomposition pattern for tfr.quant_act_range op

quant_act_range determines proper clipping range, given activation and quantization parameters.

PiperOrigin-RevId: 384660997
Change-Id: I850c1f1f8f767fe301c1deceeb3ab03ea9a0cec8",decompose.cc,"@@ -36,6 +36,7 @@ limitations under the License.
 #include ""mlir/Dialect/StandardOps/IR/Ops.h""  // from @llvm-project
 #include ""mlir/IR/Attributes.h""  // from @llvm-project
 #include ""mlir/IR/Builders.h""  // from @llvm-project
+#include ""mlir/IR/BuiltinAttributes.h""  // from @llvm-project
 #include ""mlir/IR/BuiltinOps.h""  // from @llvm-project
 #include ""mlir/IR/BuiltinTypes.h""  // from @llvm-project
 #include ""mlir/IR/MLIRContext.h""  // from @llvm-project
@@ -77,6 +78,16 @@ namespace TFR {
 
 namespace {
 
+// Quantize the float value based on given scale and zero point attributes.
+Attribute Quantize(float value, Attribute scale_attr, Attribute zp_attr,
+                   OpBuilder builder) {
+  double scale = scale_attr.cast<FloatAttr>().getValueAsDouble();
+  int64_t zp = zp_attr.cast<IntegerAttr>().getInt();
+
+  int quantized = static_cast<int>(std::round(value / scale) + zp);
+  return builder.getI32IntegerAttr(quantized);
+}
+
 // Decompose the TF ops with the registered composition library.
 struct DecomposeTFOpsPass
     : public PassWrapper<DecomposeTFOpsPass, FunctionPass> {
@@ -111,10 +122,13 @@ struct DecomposeTFOpsPass
   llvm::Optional<ModuleOp> external_tfr_module;
 };
 
+#include ""tensorflow/compiler/mlir/tfr/passes/generated_decompose.inc""
+
 void DecomposeTFOpsPass::ApplyCanonicalization() {
   FuncOp func = getFunction();
   OwningRewritePatternList patterns(&getContext());
 
+  populateWithGenerated(patterns);
   populateCanonicalizationPatterns(func, patterns);
 
   (void)applyPatternsAndFoldGreedily(func, std::move(patterns));
",0,train
4d2627928b737a4332cb4a82e6a110f020a76a65,tensorflow/tensorflow,"Mark certain methods as const

PiperOrigin-RevId: 288605569
Change-Id: I8d47e0289815e3b7031c77c352c27adf8bd7af9a",tensor_handle.cc,"@@ -276,7 +276,7 @@ bool TensorHandle::IsReady() const {
   return is_ready_;
 }
 
-Status TensorHandle::WaitReady(const char* caller) {
+Status TensorHandle::WaitReady(const char* caller) const {
   if (!IsReady()) {
     profiler::TraceMe activity(absl::StrCat(caller, "" WaitReady""),
                                profiler::TraceMeLevel::kInfo);
@@ -375,7 +375,7 @@ Status TensorHandle::CopyInferenceShape(TensorHandle* other) {
   return Status::OK();
 }
 
-Status TensorHandle::NumDims(int* num_dims) {
+Status TensorHandle::NumDims(int* num_dims) const {
   DCHECK(num_dims != nullptr);
   if (!IsReady() && !inference_shape_.unknown_rank()) {
     *num_dims = inference_shape_.dims();
@@ -386,7 +386,7 @@ Status TensorHandle::NumDims(int* num_dims) {
   }
 }
 
-Status TensorHandle::Dim(int dim_index, int64* dim) {
+Status TensorHandle::Dim(int dim_index, int64* dim) const {
   DCHECK(dim != nullptr);
   if (!IsReady() && !inference_shape_.unknown_rank() &&
       inference_shape_.dim_size(dim_index) != -1) {
@@ -398,7 +398,7 @@ Status TensorHandle::Dim(int dim_index, int64* dim) {
   }
 }
 
-Status TensorHandle::NumElements(int64* num_elements) {
+Status TensorHandle::NumElements(int64* num_elements) const {
   DCHECK(num_elements != nullptr);
   if (!IsReady() && inference_shape_.IsFullyDefined()) {
     *num_elements = inference_shape_.num_elements();
",0,train
4d2627928b737a4332cb4a82e6a110f020a76a65,tensorflow/tensorflow,"Mark certain methods as const

PiperOrigin-RevId: 288605569
Change-Id: I8d47e0289815e3b7031c77c352c27adf8bd7af9a",tensor_handle.h,"@@ -124,9 +124,9 @@ class TensorHandle : public core::RefCounted {
   Device* DeviceOrHostCPU(EagerContext* ctx) const;
 
   Status Shape(tensorflow::TensorShape* shape);
-  Status NumDims(int* num_dims);
-  Status Dim(int dim_index, int64* dim);
-  Status NumElements(int64* num_elements);
+  Status NumDims(int* num_dims) const;
+  Status Dim(int dim_index, int64* dim) const;
+  Status NumElements(int64* num_elements) const;
 
 #if !defined(IS_MOBILE_PLATFORM)
   bool HasRemoteMirror(Device* d);
@@ -214,7 +214,7 @@ class TensorHandle : public core::RefCounted {
   // If the contents of the Tensor pointed to by this handle is yet to be
   // computed by a EagerNode, this function will block till that computation is
   // done and the handle is ""ready"".
-  Status WaitReady(const char* caller);
+  Status WaitReady(const char* caller) const;
 
   // TODO(b/136608821): device_ == nullptr iff Host CPU:0
   // This was expedient, but perhaps worth revisiting ('device_' should always
",0,train
d7fbbc00235c8d0c34de7b34a156fb9c576fb209,tensorflow/tensorflow,"[XLA] Enable truncated normal for double.

Fix a problem in testTruncatedNormalIsInRange that causes the test not actually
run.

Add testTruncatedNormalIsNotConstant for double.

PiperOrigin-RevId: 257417015",random_ops_test.py,"@@ -116,12 +116,14 @@ class RandomOpsTest(xla_test.XLATestCase):
     def rng(dtype):
       return random_ops.truncated_normal(shape=[2], dtype=dtype)
 
-    self._testRngIsNotConstant(rng, dtypes.float32)
+     # TODO(b/34339814): make this test work with 16 bit float types.
+    for dtype in self._random_types() & {np.float32, np.float64}:
+      self._testRngIsNotConstant(rng, dtype)
 
   def testTruncatedNormalIsInRange(self):
     count = 10000000
     # TODO(b/34339814): make this test work with 16 bit float types.
-    for dtype in self._random_types() & {dtypes.float32, dtypes.float64}:
+    for dtype in self._random_types() & {np.float32, np.float64}:
       with self.session() as sess:
         with self.test_scope():
           x = random_ops.truncated_normal(shape=[count], dtype=dtype)
",0,train
d7fbbc00235c8d0c34de7b34a156fb9c576fb209,tensorflow/tensorflow,"[XLA] Enable truncated normal for double.

Fix a problem in testTruncatedNormalIsInRange that causes the test not actually
run.

Add testTruncatedNormalIsNotConstant for double.

PiperOrigin-RevId: 257417015",random_ops.cc,"@@ -293,7 +293,7 @@ class TruncatedNormalOp : public XlaOpKernel {
 
 REGISTER_XLA_OP(Name(""TruncatedNormal"")
                     .CompileTimeConstantInput(""shape"")
-                    .TypeConstraint(""dtype"", DT_FLOAT),
+                    .TypeConstraint(""dtype"", {DT_FLOAT, DT_DOUBLE}),
                 TruncatedNormalOp);
 
 }  // namespace
",0,train
83e49ee10c4ad54f37ff217b3813cc0f96026b75,tensorflow/tensorflow,"Add infrastructure to save the default fusion configuration in either per-edge or per-node mode.

PiperOrigin-RevId: 254297785",instruction_fusion.cc,"@@ -455,7 +455,7 @@ StatusOr<bool> InstructionFusion::Run(HloModule* module) {
   module_ = module;
   int64 fuse_count = 0;
   std::vector<std::vector<bool>>* fusion_config = nullptr;
-  if (is_main_fusion_) {
+  if (config_collection_mode_ != FusionConfigCollection::kOff) {
     fusion_config = module->mutable_fusion_config();
     fusion_config->clear();
   }
@@ -550,7 +550,7 @@ StatusOr<bool> InstructionFusion::Run(HloModule* module) {
       }
     }
 
-    if (is_main_fusion_) {
+    if (config_collection_mode_ != FusionConfigCollection::kOff) {
       const std::vector<bool>* comp_fusion_config =
           fusion_queue->FusionConfiguration();
       if (comp_fusion_config && comp_fusion_config->size() > 0) {
@@ -559,17 +559,20 @@ StatusOr<bool> InstructionFusion::Run(HloModule* module) {
     }
   }
 
-  if (is_main_fusion_) {
-    int64 fused_edge_count = 0;
+  if (config_collection_mode_ != FusionConfigCollection::kOff) {
+    int64 fused_count = 0;
     for (auto& config_per_computation : *fusion_config) {
       for (auto edge : config_per_computation) {
-        if (edge) ++fused_edge_count;
+        if (edge) {
+          ++fused_count;
+        }
       }
     }
-    VLOG(4) << ""There are "" << fused_edge_count << "" fused edges that cause ""
+    VLOG(1) << ""There are "" << fused_count << "" fused bits that cause ""
             << fuse_count << "" fusion actions."";
-    VLOG(4) << FusionConfigToString(*fusion_config);
+    VLOG(1) << FusionConfigToString(*fusion_config);
   }
+  VLOG(1) << ""Fusion count: "" << fuse_count;
 
   return changed;
 }
",0,train
83e49ee10c4ad54f37ff217b3813cc0f96026b75,tensorflow/tensorflow,"Add infrastructure to save the default fusion configuration in either per-edge or per-node mode.

PiperOrigin-RevId: 254297785",instruction_fusion.h,"@@ -27,6 +27,12 @@ limitations under the License.
 
 namespace xla {
 
+enum class FusionConfigCollection {
+  kOff,      // Do not collect configuration.
+  kPerEdge,  // Collect per-edge configuration.
+  kPerNode,  // Collect per-node configuration.
+};
+
 // HLO pass which performs instruction fusion. Instructions are fused
 // ""vertically"", meaning producing instructions are fused into their consumers
 // with the intent that the loops which compute their values will be fused in
@@ -36,10 +42,12 @@ class InstructionFusion : public HloModulePass {
  public:
   explicit InstructionFusion(
       std::function<bool(const HloInstruction& instruction)> is_expensive,
-      bool may_duplicate = true, bool main_fusion = false)
+      bool may_duplicate = true,
+      FusionConfigCollection config_collection_mode =
+          FusionConfigCollection::kOff)
       : is_expensive_(is_expensive),
         may_duplicate_(may_duplicate),
-        is_main_fusion_(main_fusion) {}
+        config_collection_mode_(config_collection_mode) {}
   ~InstructionFusion() override = default;
   absl::string_view name() const override { return ""fusion""; }
 
@@ -123,7 +131,9 @@ class InstructionFusion : public HloModulePass {
   // Reachability information for the current computation.
   std::unique_ptr<HloReachabilityMap> reachability_;
 
-  bool is_main_fusion() { return is_main_fusion_; }
+  FusionConfigCollection config_collection_mode() {
+    return config_collection_mode_;
+  }
 
  private:
   // The set of producers whose consumers we cannot fuse into.
@@ -156,8 +166,8 @@ class InstructionFusion : public HloModulePass {
   // Returns whether we may duplicate an instruction if we want to fuse it.
   bool may_duplicate_;
 
-  // Main fusion pass.
-  bool is_main_fusion_;
+  // Configuration mode.
+  FusionConfigCollection config_collection_mode_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(InstructionFusion);
 };
",0,train
6728f85d82d73c34ce9cb7cf03e311c9965f13f6,tensorflow/tensorflow,"Don't treat `type` objects (with __array__) as ndarrays.

PiperOrigin-RevId: 307454154
Change-Id: I6669c41e4dd8256ffd7c4203a1e84ddc2b2f876b",function.py,"@@ -53,6 +53,7 @@ from tensorflow.python.framework import func_graph as func_graph_module
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_spec
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.framework import type_spec
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -107,32 +108,36 @@ def _make_input_signature_hashable(elem, variable_map=None):
     return tuple(map(lambda e: _make_input_signature_hashable(e, variable_map),
                      elem))
 
-  # If the element is not hashable, assume it is a weakref to a variable
-  # and return the dtype & shape. Else, simply return the element
   try:
     hash(elem)
   except TypeError:
+    # TFE_Py_EncodeArg weakrefs arguments it does not recognize, and we expect
+    # all recognized types to be hashable.
     assert isinstance(elem, weakref.ReferenceType)
     v = elem()
 
-    # Check if v is a Variable.  Note that we can't use isinstance to check if
-    # it's a variable, since not all variable types are subclass of Variable.
-    # TODO(mdan) Update this to use a generic ""Variable"" superclass once we
-    # create one.
-    if not (hasattr(v, ""shape"") and hasattr(v, ""dtype"")):
-      raise ValueError(""Arguments to a tf.function must be Tensors, Variables, ""
-                       ""or hashable Python objects (or nested structures of ""
-                       ""these types).\nGot type: %s"" % type(v).__name__)
-
-    idx = variable_map.get(id(v))
-    if idx is None:
-      idx = len(variable_map)
-      variable_map[id(v)] = idx
-
-    # We include the class name to avoid having different types of variables
-    # having the same hash. We Also include the variable index which allows
-    # us to return a different hash if variables have been aliased in a call.
-    return v.__class__, tensor_spec.TensorSpec(v.shape, v.dtype), idx
+    if resource_variable_ops.is_resource_variable(v):
+      idx = variable_map.get(id(v))
+      if idx is None:
+        idx = len(variable_map)
+        variable_map[id(v)] = idx
+
+      # We include the class name to avoid having different types of variables
+      # having the same hash. We Also include the variable index which allows
+      # us to return a different hash if variables have been aliased in a call.
+      return v.__class__, tensor_spec.TensorSpec(v.shape, v.dtype), idx
+
+    if _is_ndarray(v):
+      # Numpy arrays are not hashable, but when calling functions we treat them
+      # in the same way as tf.Tensors.
+      if not hasattr(v, ""shape"") or not hasattr(v, ""dtype""):
+        # TODO(tomhennigan) De-dup with _as_ndarray in _convert_numpy_inputs.
+        v = _as_ndarray(v)
+      return tensor_spec.TensorSpec(v.shape, v.dtype)
+
+    raise ValueError(""Arguments to a tf.function must be Tensors, Variables, ""
+                     ""or hashable Python objects (or nested structures of ""
+                     ""these types).\nGot type: %s"" % type(v).__name__)
 
   return elem
 
@@ -2668,6 +2673,24 @@ class FunctionSpec(object):
       return inputs, {}
 
 
+def _as_ndarray(value):
+  """"""Converts value to an ndarray, assumes _is_ndarray(value).""""""
+  # TODO(tomhennigan) Support __array_interface__ too.
+  return value.__array__()
+
+
+def _is_ndarray(value):
+  """"""Tests whether the given value is an ndarray (and not a TF tensor/var).""""""
+  # TODO(tomhennigan) Support __array_interface__ too.
+  return hasattr(value, ""__array__"") and not (
+      resource_variable_ops.is_resource_variable(value)
+      or tensor_util.is_tensor(value)
+      # For legacy reasons we do not automatically promote Numpy strings.
+      or isinstance(value, np.str_)
+      # NumPy dtypes have __array__ as unbound methods.
+      or isinstance(value, type))
+
+
 def _convert_numpy_inputs(inputs):
   """"""Convert numpy array inputs to tensors.""""""
   # We assume that any CompositeTensors have already converted their components
@@ -2680,8 +2703,12 @@ def _convert_numpy_inputs(inputs):
   # possible since ndarrays are not hashable).
   need_packing = False
   for index, value in enumerate(flat_inputs):
-    if type(value) == np.ndarray:
-      flat_inputs[index] = constant_op.constant(value)
+    if _is_ndarray(value):
+      a = _as_ndarray(value)
+      if not isinstance(a, np.ndarray):
+        raise TypeError(""The output of __array__ must be an np.ndarray ""
+                        ""(got {} from {})."".format(type(a), type(value)))
+      flat_inputs[index] = constant_op.constant(a)
       need_packing = True
   if need_packing:
     return nest.pack_sequence_as(
",0,test
6728f85d82d73c34ce9cb7cf03e311c9965f13f6,tensorflow/tensorflow,"Don't treat `type` objects (with __array__) as ndarrays.

PiperOrigin-RevId: 307454154
Change-Id: I6669c41e4dd8256ffd7c4203a1e84ddc2b2f876b",function_test.py,"@@ -775,11 +775,44 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     # shouldn't trigger another function definition.
     self.assertLen(total_function_cache(defined), 1)
 
+    np_ones = numpy.ones([], numpy.float32)
+    np_zeros = numpy.zeros([], numpy.float32)
+    tf_ones = array_ops.ones([])
+    tf_zeros = array_ops.zeros([])
+
     # Test that the numpy array is properly an argument to the graph function.
-    self.assertEqual(1., defined(numpy.ones([])).numpy())
-    self.assertEqual(0., defined(numpy.zeros([])).numpy())
-    self.assertEqual(1., defined(array_ops.ones([])).numpy())
-    self.assertEqual(0., defined(array_ops.zeros([])).numpy())
+    self.assertEqual(1., defined(np_ones).numpy())
+    self.assertLen(total_function_cache(defined), 2)
+    self.assertEqual(0., defined(np_zeros).numpy())
+    self.assertEqual(1., defined(tf_ones).numpy())
+    self.assertEqual(0., defined(tf_zeros).numpy())
+    self.assertLen(total_function_cache(defined), 2)
+
+    # Test that mutable inputs are supported.
+    mutable = numpy.ones([], numpy.float32)
+    self.assertEqual(1., defined(mutable).numpy())
+    mutable.fill(0)
+    self.assertEqual(0., defined(mutable).numpy())
+
+    class MyNdarray(numpy.ndarray):
+      pass
+
+     # Test that the subclasses of ndarray are converted too.
+    self.assertEqual(1., defined(np_ones.view(MyNdarray)).numpy())
+    self.assertEqual(0., defined(np_zeros.view(MyNdarray)).numpy())
+
+    # We should not have triggered any re-tracing of the python function.
+    self.assertLen(total_function_cache(defined), 2)
+
+  def testNumpyDtypeInputSupported(self):
+    @function.defun
+    def f(x, dtype):
+      return constant_op.constant(dtype(x))
+
+    self.assertEqual(f(1, numpy.float32).numpy(), numpy.float32(1))
+    self.assertEqual(f(2, numpy.float32).numpy(), numpy.float32(2))
+    self.assertEqual(f(1, numpy.int32).numpy(), numpy.int32(1))
+    self.assertEqual(f(2, numpy.int32).numpy(), numpy.int32(2))
 
   def testDefunNumpyArraysConvertedToTensorsInKwargs(self):
 
",0,test
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",chlo_ops.h,"@@ -32,8 +32,14 @@ namespace mlir {
 namespace chlo {
 
 class HloClientDialect : public Dialect {
+  void initialize();
+
  public:
-  explicit HloClientDialect(MLIRContext *context);
+  explicit HloClientDialect(MLIRContext *context)
+      : Dialect(getDialectNamespace(), context,
+                TypeID::get<HloClientDialect>()) {
+    initialize();
+  }
   static StringRef getDialectNamespace() { return ""chlo""; }
 };
 
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",chlo_ops.cc,"@@ -266,8 +266,7 @@ BROADCAST_BINARY_OP_DEFS(BroadcastXorOp);
 // chlo Dialect Constructor
 //===----------------------------------------------------------------------===//
 
-HloClientDialect::HloClientDialect(MLIRContext* context)
-    : Dialect(getDialectNamespace(), context) {
+void HloClientDialect::initialize() {
   addOperations<
 #define GET_OP_LIST
 #include ""mlir-hlo/Dialect/mhlo/IR/chlo_ops.cc.inc""
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",hlo_ops.cc,"@@ -2188,7 +2188,7 @@ struct HLOInlinerInterface : public DialectInlinerInterface {
 //===----------------------------------------------------------------------===//
 
 MhloDialect::MhloDialect(MLIRContext* context)
-    : Dialect(getDialectNamespace(), context) {
+    : Dialect(getDialectNamespace(), context, TypeID::get<MhloDialect>()) {
   addOperations<
 #define GET_OP_LIST
 #include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops.cc.inc""
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",lhlo_ops.cc,"@@ -49,7 +49,7 @@ namespace mlir {
 namespace lmhlo {
 
 LmhloDialect::LmhloDialect(MLIRContext *context)
-    : Dialect(getDialectNamespace(), context) {
+    : Dialect(getDialectNamespace(), context, TypeID::get<LmhloDialect>()) {
   addOperations<
 #define GET_OP_LIST
 #include ""mlir-hlo/Dialect/mhlo/IR/lhlo_ops.cc.inc""
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tfl_ops.cc,"@@ -269,7 +269,7 @@ struct TensorFlowLiteOpFolderDialectInterface
 };
 
 TensorFlowLiteDialect::TensorFlowLiteDialect(mlir::MLIRContext *context)
-    : Dialect(/*name=*/""tfl"", context) {
+    : Dialect(/*name=*/""tfl"", context, TypeID::get<TensorFlowLiteDialect>()) {
   addOperations<
 #define GET_OP_LIST
 #include ""tensorflow/compiler/mlir/lite/ir/tfl_ops.cc.inc""
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tf_device.cc,"@@ -101,7 +101,8 @@ bool BlockWrapsSingleOp(Block* block) {
 }  // end anonymous namespace
 
 TensorFlowDeviceDialect::TensorFlowDeviceDialect(MLIRContext* context)
-    : Dialect(/*name=*/""tf_device"", context) {
+    : Dialect(/*name=*/""tf_device"", context,
+              TypeID::get<TensorFlowDeviceDialect>()) {
   addOperations<
 #define GET_OP_LIST
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_device.cc.inc""
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tf_executor.cc,"@@ -92,7 +92,8 @@ struct TensorFlowExecutorOpFolderDialectInterface
 }  // namespace
 
 TensorFlowExecutorDialect::TensorFlowExecutorDialect(MLIRContext *context)
-    : Dialect(/*name=*/""tf_executor"", context) {
+    : Dialect(/*name=*/""tf_executor"", context,
+              TypeID::get<TensorFlowExecutorDialect>()) {
   addOperations<
 #define GET_OP_LIST
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_executor.cc.inc""
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tf_ops.cc,"@@ -188,7 +188,7 @@ std::vector<TensorFlowDialect::AdditionalOpFunction>
         new std::vector<TensorFlowDialect::AdditionalOpFunction>();
 
 TensorFlowDialect::TensorFlowDialect(MLIRContext *context)
-    : Dialect(/*name=*/""tf"", context) {
+    : Dialect(/*name=*/""tf"", context, TypeID::get<TensorFlowDialect>()) {
   addOperations<
 #define GET_OP_LIST
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_all_ops.cc.inc""
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tf_saved_model.cc,"@@ -113,7 +113,8 @@ static LogicalResult Verify(SessionInitializerOp session_initializer) {
 //===----------------------------------------------------------------------===//
 
 TensorFlowSavedModelDialect::TensorFlowSavedModelDialect(MLIRContext *context)
-    : Dialect(/*name=*/""tf_saved_model"", context) {
+    : Dialect(/*name=*/""tf_saved_model"", context,
+              TypeID::get<TensorFlowSavedModelDialect>()) {
   addOperations<
 #define GET_OP_LIST
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.cc.inc""
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tfjs_ops.cc,"@@ -25,8 +25,7 @@ namespace tfjs {
 // TFJSDialect
 //===----------------------------------------------------------------------===//
 
-TFJSDialect::TFJSDialect(MLIRContext *context)
-    : Dialect(getDialectNamespace(), context) {
+void TFJSDialect::initialize() {
   addOperations<
 #define GET_OP_LIST
 #include ""tensorflow/compiler/mlir/tfjs/ir/tfjs_ops.cc.inc""
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",cubin_creator.cc,"@@ -278,7 +278,8 @@ StatusOr<std::vector<uint8_t>> tensorflow::kernel_gen::GenerateCubinForTfCode(
 
   mlir::OwningModuleRef kernel_module =
       xla::mlir_gpu::ExtractKernelModule(*module).ValueOrDie();
-  auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module);
+  llvm::LLVMContext llvmContext;
+  auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module, llvmContext);
   if (!llvmModule) {
     return InternalError(""Could not translate MLIR module to NVVM"");
   }
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",tf_framework_ops.cc,"@@ -24,8 +24,7 @@ namespace mlir {
 namespace kernel_gen {
 namespace tf_framework {
 
-TFFrameworkDialect::TFFrameworkDialect(MLIRContext *context)
-    : Dialect(getDialectNamespace(), context) {
+void TFFrameworkDialect::initialize() {
   addOperations<
 #define GET_OP_LIST
 #include ""tensorflow/compiler/mlir/tools/kernel_gen/ir/tf_framework_ops.cc.inc""
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",cpu_compiler.cc,"@@ -622,10 +622,9 @@ StatusOr<std::unique_ptr<Executable>> CpuCompiler::RunBackend(
 
   // Compile must be thread-safe so create a new LLVM context for the module.
   mlir::MLIRContext mlir_context;
-  auto llvm_module = absl::make_unique<llvm::Module>(
-      ""__compute_module"",
-      mlir_context.getRegisteredDialect<mlir::LLVM::LLVMDialect>()
-          ->getLLVMContext());
+  llvm::LLVMContext llvm_context;
+  auto llvm_module =
+      absl::make_unique<llvm::Module>(""__compute_module"", llvm_context);
 
   auto jit = absl::make_unique<SimpleOrcJIT>(
       CompilerTargetOptions(module->config()),
@@ -834,10 +833,8 @@ CpuCompiler::CompileAheadOfTime(std::unique_ptr<HloModuleGroup> module_group,
 
   // Compile must be thread-safe so create a new LLVM context for the module.
   mlir::MLIRContext mlir_context;
-  llvm::Module llvm_module(
-      ""__compute_module"",
-      mlir_context.getRegisteredDialect<mlir::LLVM::LLVMDialect>()
-          ->getLLVMContext());
+  llvm::LLVMContext llvm_context;
+  llvm::Module llvm_module(""__compute_module"", llvm_context);
   llvm_module.setDataLayout(target_machine->createDataLayout());
   llvm_module.setTargetTriple(triple.getTriple());
   if (pic_level != llvm::PICLevel::NotPIC) {
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",mlir_emitter.cc,"@@ -32,7 +32,8 @@ namespace cpu {
 namespace {
 
 // Lower an MLIR module to an LLVM module.
-std::unique_ptr<llvm::Module> MakeLLVMModule(mlir::OwningModuleRef module) {
+std::unique_ptr<llvm::Module> MakeLLVMModule(mlir::OwningModuleRef module,
+                                             llvm::LLVMContext *context) {
   // When set, the LLVM backend will be allowed to reassociate floating-point
   // reductions, which enables much more efficient ""horizontal"" SIMD
   // implementations.
@@ -47,7 +48,7 @@ std::unique_ptr<llvm::Module> MakeLLVMModule(mlir::OwningModuleRef module) {
       mlir::LowerVectorToLLVMOptions().setReassociateFPReductions(
           kReassociateFPReductions)));
   CHECK(succeeded(manager.run(*module)));
-  return mlir::translateModuleToLLVMIR(*module);
+  return mlir::translateModuleToLLVMIR(*module, *context);
 }
 
 // Get arguments to pass a memref to an mlir function.
@@ -114,7 +115,8 @@ Status EmitMlirFuncAndCall(
   emitter(&op_builder, function);
 
   // Now link it all into the main LLVM module.
-  auto mlir_llvm_module = MakeLLVMModule(std::move(mlir_module));
+  auto mlir_llvm_module =
+      MakeLLVMModule(std::move(mlir_module), &b->getContext());
   mlir_llvm_module->setDataLayout(llvm_module->getDataLayout());
   llvm::Linker::linkModules(
       *llvm_module, std::move(mlir_llvm_module), llvm::Linker::None,
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",xla_thunks_ops.cc,"@@ -28,7 +28,7 @@ namespace mlir {
 namespace xla_thunks {
 
 XLAThunksDialect::XLAThunksDialect(MLIRContext *context)
-    : Dialect(getDialectNamespace(), context) {
+    : Dialect(getDialectNamespace(), context, TypeID::get<XLAThunksDialect>()) {
   addOperations<
 #define GET_OP_LIST
 #include ""tensorflow/compiler/xla/service/gpu/ir/xla_thunks_ops.cc.inc""
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",lhlo_dialect_emitter.cc,"@@ -205,7 +205,7 @@ LhloDialectEmitter::LhloDialectEmitter(
       platform_(platform) {
   LLVMDialect* llvmDialect =
       mlir_module.getContext()->getRegisteredDialect<LLVMDialect>();
-  pointer_size_ = llvmDialect->getLLVMModule().getDataLayout().getPointerSize();
+  pointer_size_ = llvmDialect->getDataLayout().getPointerSize();
 }
 
 void LhloDialectEmitter::AddThunkToThunkSequence(std::unique_ptr<Thunk> thunk) {
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",mlir_compiler.cc,"@@ -30,18 +30,14 @@ namespace {
 using ::mlir::MLIRContext;
 using ::mlir::LLVM::LLVMDialect;
 
-int64 ConfigureLLVMModuleAndGetPointerSize(MLIRContext* context) {
+int64 GetPointerSize(MLIRContext* context) {
   LLVMDialect* dialect = context->getRegisteredDialect<LLVMDialect>();
-  llvm::Module& module = dialect->getLLVMModule();
-  module.setTargetTriple(gpu::nvptx::kTargetTriple);
-  module.setDataLayout(gpu::nvptx::kDataLayout);
-  return module.getDataLayout().getPointerSize();
+  return dialect->getDataLayout().getPointerSize();
 }
 
 }  // namespace
 
-MlirCompiler::MlirCompiler()
-    : pointer_size_(ConfigureLLVMModuleAndGetPointerSize(&context_)) {}
+MlirCompiler::MlirCompiler() : pointer_size_(GetPointerSize(&context_)) {}
 
 se::Platform::Id MlirCompiler::PlatformId() const {
   return stream_executor::cuda::kCudaPlatformId;
",0,train
c1a176930a9d5895bdd87b91ba0dde53a0aa1a35,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@b6d9add71b1a

Updates LLVM usage to match
[b6d9add71b1a](https://github.com/llvm/llvm-project/commit/b6d9add71b1a)

PiperOrigin-RevId: 325589103
Change-Id: If80989dd59ceb82283256a4149cceb3062ec2c72",mlir_compiler_impl.cc,"@@ -18,6 +18,7 @@ limitations under the License.
 #include <vector>
 
 #include ""absl/container/flat_hash_map.h""
+#include ""llvm/IR/LLVMContext.h""
 #include ""mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h""  // from @llvm-project
 #include ""mlir/Dialect/GPU/GPUDialect.h""  // from @llvm-project
 #include ""mlir/Dialect/LLVMIR/LLVMDialect.h""  // from @llvm-project
@@ -543,7 +544,11 @@ StatusOr<std::unique_ptr<Executable>> MlirCompilerImpl::RunBackend(
   TF_RETURN_IF_ERROR(
       module_hook_.invoke(IRHook::LoweringStage::KERNEL, *kernel_module));
 
-  auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module);
+  // Translate to LLVM IR in a fresh context. The module is further translated
+  // to textual PTX and a CUBIN blob so there is no need for the context to live
+  // longer than this function.
+  llvm::LLVMContext llvmContext;
+  auto llvmModule = mlir::translateModuleToNVVMIR(*kernel_module, llvmContext);
 
   if (!llvmModule) {
     return InternalError(""Translation to LLVM failed"");
",0,train
4a7fedd035d446dbf5af689ff64053815faa3ec7,tensorflow/tensorflow,"[XLA:SPMD] Do not shard replicated 1D broadcast if its size is relatevely
small.

PiperOrigin-RevId: 425733363
Change-Id: I134c578a1b173d3f1bda7b269b78001619e7e362",sharding_propagation.cc,"@@ -726,6 +726,16 @@ bool InferShardingFromUsers(
                                   false)) {
     return false;
   }
+
+  // TODO(b/214615180): Remove this special handing after a general solution.
+  // If the replicated broadcast is 1D and the size is relative small,
+  // no need to shard it.
+  if (is_spmd && instruction->opcode() == HloOpcode::kBroadcast &&
+      instruction->has_sharding() && instruction->sharding().IsReplicated() &&
+      instruction->shape().IsArray() && instruction->shape().rank() == 1 &&
+      instruction->shape().dimensions(0) <= 128) {
+    return false;
+  }
   bool improved_sharding = false;
   const bool may_combine_partial_sharding = is_spmd && aggressiveness > 0;
   for (const HloInstruction* user : instruction->users()) {
",0,train
4a7fedd035d446dbf5af689ff64053815faa3ec7,tensorflow/tensorflow,"[XLA:SPMD] Do not shard replicated 1D broadcast if its size is relatevely
small.

PiperOrigin-RevId: 425733363
Change-Id: I134c578a1b173d3f1bda7b269b78001619e7e362",sharding_propagation_test.cc,"@@ -427,6 +427,31 @@ ENTRY %broadcast {
   }
 }
 
+TEST_P(ParameterizedMetadataTest, Broadcast1DBackwardNoChange) {
+  const char* const hlo_string = R""(
+HloModule module
+ENTRY %broadcast {
+  %param0 = s32[128]{0} parameter(0)
+  %constant0 = s32[] constant(0), sharding={replicated}
+  %broadcast = s32[128]{0} broadcast(%constant0), dimensions={}, sharding={replicated}
+  ROOT %compare = pred[128]{0} compare(s32[128]{0} %param0, s32[128]{0} %broadcast),
+    direction=NE, sharding={devices=[4]0,1,2,3}
+})"";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+  if (GetParam().clear_metadata) {
+    ClearMetadata(module.get());
+  }
+  TF_ASSERT_OK_AND_ASSIGN(
+      bool changed,
+      ShardingPropagation(/*is_spmd=*/false, GetParam().propagate_metadata)
+          .Run(module.get()));
+  EXPECT_FALSE(changed);
+  auto* instruction = FindInstruction(module.get(), ""broadcast"");
+  ASSERT_NE(instruction, nullptr);
+  EXPECT_THAT(instruction, op::Sharding(""{replicated}""));
+}
+
 TEST_P(ParameterizedMetadataTestWithOutput, BroadcastForwardPartial) {
   const char* const hlo_string = R""(
 HloModule module
",0,train
77535c387165d9c27cf9617b33a882b6ceae05bd,tensorflow/tensorflow,"Integrate LLVM at https://github.com/llvm/llvm-project/commit/7d9518c8000b

PiperOrigin-RevId: 319229381
Change-Id: Ibc22f7a1474a8354c5c023346de2fe3ce80ff460",tf_executor.h,"@@ -53,7 +53,7 @@ enum Kind {
 
 // The Control type is a token-like value that models control dependencies from
 // TensorFlow graphs.
-class ControlType : public Type::TypeBase<ControlType, Type> {
+class ControlType : public Type::TypeBase<ControlType, Type, TypeStorage> {
  public:
   using Base::Base;
 
@@ -65,7 +65,7 @@ class ControlType : public Type::TypeBase<ControlType, Type> {
   static bool kindof(unsigned kind) { return kind == TFTypes::Control; }
 };
 
-class TokenType : public Type::TypeBase<TokenType, Type> {
+class TokenType : public Type::TypeBase<TokenType, Type, TypeStorage> {
  public:
   using Base::Base;
 
",0,test
77535c387165d9c27cf9617b33a882b6ceae05bd,tensorflow/tensorflow,"Integrate LLVM at https://github.com/llvm/llvm-project/commit/7d9518c8000b

PiperOrigin-RevId: 319229381
Change-Id: Ibc22f7a1474a8354c5c023346de2fe3ce80ff460",tf_types.h,"@@ -110,9 +110,10 @@ namespace detail {
 //   - `static unsigned getTypeKind()` that returns the (fixed) kind of the
 //     type.
 template <typename Derived>
-class TensorFlowTypeImpl : public Type::TypeBase<Derived, TensorFlowType> {
+class TensorFlowTypeImpl
+    : public Type::TypeBase<Derived, TensorFlowType, TypeStorage> {
  public:
-  using Base = typename Type::TypeBase<Derived, TensorFlowType>;
+  using Base = typename Type::TypeBase<Derived, TensorFlowType, TypeStorage>;
   using TFBase = TensorFlowTypeImpl<Derived>;
   using Base::Base;
 
",0,test
77535c387165d9c27cf9617b33a882b6ceae05bd,tensorflow/tensorflow,"Integrate LLVM at https://github.com/llvm/llvm-project/commit/7d9518c8000b

PiperOrigin-RevId: 319229381
Change-Id: Ibc22f7a1474a8354c5c023346de2fe3ce80ff460",hlo_ops.h,"@@ -62,7 +62,7 @@ enum Kind {
 };
 }  // namespace HLOTypes
 
-class TokenType : public Type::TypeBase<TokenType, Type> {
+class TokenType : public Type::TypeBase<TokenType, Type, TypeStorage> {
  public:
   using Base::Base;
 
",0,test
fc2d7fdacb35001e9b98ff8b844679985bbf61a4,tensorflow/tensorflow,"[Executor] Reorganize code in `ExecutorState::NodeDone()` for efficiency.

Executor microbenchmarks show a 3.22% to 4.16% improvement with this change, which avoids re-checking the status multiple times in the non-error case.

PiperOrigin-RevId: 304719934
Change-Id: I6a9e3d1db8b13f32eb558a57fcb272c07ba1079a",executor.cc,"@@ -316,6 +316,8 @@ class ExecutorState {
   // nodes in 'ready' into 'inline_ready'.
   //
   // This method will clear `*ready` before returning.
+  //
+  // REQUIRES: `!ready->empty()`.
   void ScheduleReady(TaggedNodeSeq* ready, TaggedNodeReadyQueue* inline_ready);
 
   // Clean up when this executor is done.
@@ -1022,73 +1024,80 @@ template <class PropagatorStateType>
 bool ExecutorState<PropagatorStateType>::NodeDone(
     const Status& s, TaggedNodeSeq* ready, NodeExecStatsInterface* stats,
     TaggedNodeReadyQueue* inline_ready) {
-  nodestats::SetAllEnd(stats);
   if (stats) {
-    if (stats_collector_) {
-      stats->Done(immutable_state_.params().device->name());
+    nodestats::SetAllEnd(stats);
+    DCHECK_NE(stats_collector_, nullptr);
+    stats->Done(immutable_state_.params().device->name());
+  }
+
+  if (TF_PREDICT_TRUE(s.ok())) {
+    const size_t ready_size = ready->size();
+    if (ready_size == 0) {
+      return num_outstanding_ops_.fetch_sub(1) == 1;
     } else {
-      delete stats;
+      // NOTE: Avoid touching the atomic counter if only one node becomes ready.
+      if (ready_size > 1) {
+        num_outstanding_ops_.fetch_add(ready_size - 1,
+                                       std::memory_order_relaxed);
+      }
+
+      // Schedule the ready nodes in 'ready'.
+      ScheduleReady(ready, inline_ready);
+
+      return false;
     }
-  }
+  } else {
+    bool abort_run = false;
 
-  bool abort_run = false;
-  if (!s.ok()) {
     // Some error happened. This thread of computation is done.
-    mutex_lock l(mu_);
-    if (status_.ok()) {
-      abort_run = true;
-
-      // If execution has been cancelled, mark any new errors as being derived.
-      // This ensures any errors triggered by cancellation are marked as
-      // derived.
-      if (cancellation_manager_ && cancellation_manager_->IsCancelled()) {
-        status_ = StatusGroup::MakeDerived(s);
-      } else {
-        status_ = s;
+    {
+      mutex_lock l(mu_);
+      if (status_.ok()) {
+        // If this is the first node to fail in this run, we are responsible for
+        // aborting all other execution in the step.
+        abort_run = true;
+
+        // If execution has been cancelled, mark any new errors as being
+        // derived. This ensures any errors triggered by cancellation are marked
+        // as derived.
+        if (cancellation_manager_ && cancellation_manager_->IsCancelled()) {
+          status_ = StatusGroup::MakeDerived(s);
+        } else {
+          status_ = s;
+        }
       }
     }
-  }
-  if (abort_run) {
-    TRACEPRINTF(""StartAbort: %s"", s.ToString().c_str());
-    if (cancellation_manager_) {
-      // only log when the abort happens during the actual run time.
-      auto device_name = immutable_state_.params().device->name();
-      // Use VLOG instead of LOG(warning) because error status is expected when
-      // the executor is run under the grappler optimization phase or when
-      // iterating through a tf.data input pipeline.
-      VLOG(1) << ""["" << device_name << ""] Executor start aborting: "" << s;
-    }
 
-    if (rendezvous_) {
-      rendezvous_->StartAbort(s);
-    }
-    if (collective_executor_) {
-      collective_executor_->StartAbort(s);
-    }
-    if (cancellation_manager_) {
-      cancellation_manager_->StartCancel();
-    }
-  }
+    if (abort_run) {
+      TRACEPRINTF(""StartAbort: %s"", s.ToString().c_str());
+      if (cancellation_manager_) {
+        // Only log when the abort happens during the actual run time.
+        // Use VLOG instead of LOG(warning) because error status is expected
+        // when the executor is run under the grappler optimization phase or
+        // when iterating through a tf.data input pipeline.
+        VLOG(1) << ""["" << immutable_state_.params().device->name()
+                << ""] Executor start aborting: "" << s;
+      }
 
-  bool completed = false;
-  const size_t ready_size = ready->size();
-  if (ready_size == 0 || !s.ok()) {
-    completed = (num_outstanding_ops_.fetch_sub(1) == 1);
-  } else if (ready_size > 1) {
-    num_outstanding_ops_.fetch_add(ready_size - 1, std::memory_order_relaxed);
-  }
+      if (rendezvous_) {
+        rendezvous_->StartAbort(s);
+      }
+      if (collective_executor_) {
+        collective_executor_->StartAbort(s);
+      }
+      if (cancellation_manager_) {
+        cancellation_manager_->StartCancel();
+      }
+    }
 
-  // Schedule the ready nodes in 'ready'.
-  if (s.ok()) {
-    ScheduleReady(ready, inline_ready);
+    return num_outstanding_ops_.fetch_sub(1) == 1;
   }
-  return completed;
 }
 
 template <class PropagatorStateType>
 void ExecutorState<PropagatorStateType>::ScheduleReady(
     TaggedNodeSeq* ready, TaggedNodeReadyQueue* inline_ready) {
-  if (ready->empty()) return;
+  DCHECK(!ready->empty());
 
   int64 scheduled_nsec = 0;
   if (stats_collector_) {
",0,train
0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>.

We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible.

Refactoring only, NFC intended.

PiperOrigin-RevId: 336080771
Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",cpu_device.cc,"@@ -28,7 +28,7 @@ CpuDevice::CpuDevice(int id,
     : PjRtDevice(id, std::move(local_device_state), kCpuPlatformName,
                  /*device_kind=*/kCpuPlatformName) {}
 
-StatusOr<std::shared_ptr<PjRtClient>> GetCpuClient(bool asynchronous) {
+StatusOr<std::unique_ptr<PjRtClient>> GetCpuClient(bool asynchronous) {
   TF_ASSIGN_OR_RETURN(se::Platform * platform,
                       PlatformUtil::GetPlatform(""Host""));
   if (platform->VisibleDeviceCount() <= 0) {
@@ -56,7 +56,7 @@ StatusOr<std::shared_ptr<PjRtClient>> GetCpuClient(bool asynchronous) {
     devices.push_back(std::move(device));
   }
 
-  return std::make_shared<PjRtClient>(
+  return std::make_unique<PjRtClient>(
       kCpuPlatformName, client, std::move(devices), /*host_id=*/0,
       /*allocator=*/nullptr, /*host_memory_allocator=*/nullptr,
       /*should_stage_host_to_device_transfers=*/false,
",0,test
0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>.

We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible.

Refactoring only, NFC intended.

PiperOrigin-RevId: 336080771
Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",cpu_device.h,"@@ -28,7 +28,7 @@ class CpuDevice : public PjRtDevice {
   CpuDevice(int id, std::unique_ptr<LocalDeviceState> local_device_state);
 };
 
-StatusOr<std::shared_ptr<PjRtClient>> GetCpuClient(bool asynchronous);
+StatusOr<std::unique_ptr<PjRtClient>> GetCpuClient(bool asynchronous);
 
 }  // namespace xla
 
",0,test
0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>.

We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible.

Refactoring only, NFC intended.

PiperOrigin-RevId: 336080771
Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",gpu_multistream_test.cc,"@@ -28,7 +28,7 @@ namespace {
 // computation wait for the inputs to be produced before executing.
 TEST(GpuMultiStream, Basics) {
   TF_ASSERT_OK_AND_ASSIGN(
-      std::shared_ptr<PjRtClient> client,
+      std::unique_ptr<PjRtClient> client,
       GetNvidiaGpuClient(/*asynchronous=*/true, GpuAllocatorConfig(),
                          /*distributed_client=*/nullptr, /*node_id=*/0));
 
",0,test
0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>.

We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible.

Refactoring only, NFC intended.

PiperOrigin-RevId: 336080771
Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",interpreter_device.cc,"@@ -28,7 +28,7 @@ InterpreterDevice::InterpreterDevice(
     : PjRtDevice(id, std::move(local_device_state), kInterpreterPlatformName,
                  /*device_kind=*/kInterpreterPlatformName) {}
 
-StatusOr<std::shared_ptr<PjRtClient>> GetInterpreterClient() {
+StatusOr<std::unique_ptr<PjRtClient>> GetInterpreterClient() {
   TF_ASSIGN_OR_RETURN(se::Platform * platform,
                       PlatformUtil::GetPlatform(""Interpreter""));
   if (platform->VisibleDeviceCount() != 1) {
@@ -50,7 +50,7 @@ StatusOr<std::shared_ptr<PjRtClient>> GetInterpreterClient() {
       absl::make_unique<InterpreterDevice>(0, std::move(device_state));
   devices.push_back(std::move(device));
 
-  return std::make_shared<PjRtClient>(
+  return std::make_unique<PjRtClient>(
       kInterpreterPlatformName, client, std::move(devices), /*host_id=*/0,
       /*allocator=*/nullptr, /*host_memory_allocator=*/nullptr,
       /*should_stage_host_to_device_transfers=*/false,
",0,test
0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>.

We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible.

Refactoring only, NFC intended.

PiperOrigin-RevId: 336080771
Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",interpreter_device.h,"@@ -29,7 +29,7 @@ class InterpreterDevice : public PjRtDevice {
                     std::unique_ptr<LocalDeviceState> local_device_state);
 };
 
-StatusOr<std::shared_ptr<PjRtClient>> GetInterpreterClient();
+StatusOr<std::unique_ptr<PjRtClient>> GetInterpreterClient();
 
 }  // namespace xla
 
",0,test
0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>.

We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible.

Refactoring only, NFC intended.

PiperOrigin-RevId: 336080771
Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",nvidia_gpu_device.cc,"@@ -301,7 +301,7 @@ GpuDevice::GpuDevice(int id,
     : PjRtDevice(id, std::move(local_device_state), kGpuPlatformName,
                  std::move(device_kind), node_id) {}
 
-StatusOr<std::shared_ptr<PjRtClient>> GetNvidiaGpuClient(
+StatusOr<std::unique_ptr<PjRtClient>> GetNvidiaGpuClient(
     bool asynchronous, const GpuAllocatorConfig& allocator_config,
     std::shared_ptr<DistributedRuntimeClient> distributed_client, int node_id) {
   TF_ASSIGN_OR_RETURN(LocalClient * xla_client, GetGpuXlaClient());
@@ -324,13 +324,12 @@ StatusOr<std::shared_ptr<PjRtClient>> GetNvidiaGpuClient(
     devices = BuildLocalDevices(std::move(local_device_states));
   }
 
-  std::shared_ptr<PjRtClient> pyclient = std::make_shared<GpuClient>(
+  return std::unique_ptr<PjRtClient>(std::make_unique<GpuClient>(
       ""gpu"", xla_client, std::move(devices),
       /*node_id=*/node_id, std::move(allocator),
       std::move(host_memory_allocator),
       /*should_stage_host_to_device_transfers=*/true,
-      /*gpu_run_options=*/std::move(gpu_run_options));
-  return pyclient;
+      /*gpu_run_options=*/std::move(gpu_run_options)));
 }
 
 }  // namespace xla
",0,test
0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>.

We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible.

Refactoring only, NFC intended.

PiperOrigin-RevId: 336080771
Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",nvidia_gpu_device.h,"@@ -53,7 +53,7 @@ struct GpuAllocatorConfig {
 
 // distributed_client may be nullptr in non-distributed settings.
 // distributed_client should not be Open()ed before calling this function.
-StatusOr<std::shared_ptr<PjRtClient>> GetNvidiaGpuClient(
+StatusOr<std::unique_ptr<PjRtClient>> GetNvidiaGpuClient(
     bool asynchronous, const GpuAllocatorConfig& allocator_config,
     std::shared_ptr<DistributedRuntimeClient> distributed_client, int node_id);
 
",0,test
0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>.

We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible.

Refactoring only, NFC intended.

PiperOrigin-RevId: 336080771
Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",py_client.cc,"@@ -30,6 +30,8 @@ namespace xla {
 namespace py = pybind11;
 namespace pprof = tensorflow::tfprof::pprof;
 
+PyClient::PyClient(std::unique_ptr<PjRtClient> pjrt_client)
+    : pjrt_client_(std::move(pjrt_client)) {}
 PyClient::PyClient(std::shared_ptr<PjRtClient> pjrt_client)
     : pjrt_client_(std::move(pjrt_client)) {}
 
",0,test
0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>.

We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible.

Refactoring only, NFC intended.

PiperOrigin-RevId: 336080771
Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",py_client.h,"@@ -88,6 +88,7 @@ ClientAndPtr<T> WrapWithClient(std::shared_ptr<PyClient> client, T* contents) {
 // We use a wrapper class to add Python-specific functionality.
 class PyClient : public std::enable_shared_from_this<PyClient> {
  public:
+  explicit PyClient(std::unique_ptr<PjRtClient> pjrt_client);
   explicit PyClient(std::shared_ptr<PjRtClient> pjrt_client);
 
   PjRtClient* pjrt_client() const { return pjrt_client_.get(); }
",0,test
0581f8b86e6ba8c27f5a5236183f2b6aeff66c51,tensorflow/tensorflow,"[PJRT] Change client factory methods to return a std::unique_ptr<> instead of a std::shared_ptr<>.

We don't need shared ownership any more at the level of the C++ API, and the style guide recommends unique ownership where possible.

Refactoring only, NFC intended.

PiperOrigin-RevId: 336080771
Change-Id: I29ac1b40bf5815c2f6bc44851f102f40fa6c6ed3",xla.cc,"@@ -556,13 +556,13 @@ PYBIND11_MODULE(xla_extension, m) {
   m.def(
       ""get_cpu_client"",
       [](bool asynchronous) -> StatusOr<std::shared_ptr<PyClient>> {
-        TF_ASSIGN_OR_RETURN(std::shared_ptr<PjRtClient> client,
+        TF_ASSIGN_OR_RETURN(std::unique_ptr<PjRtClient> client,
                             GetCpuClient(asynchronous));
         return std::make_shared<PyClient>(std::move(client));
       },
       py::arg(""asynchronous"") = true);
   m.def(""get_interpreter_client"", []() -> StatusOr<std::shared_ptr<PyClient>> {
-    TF_ASSIGN_OR_RETURN(std::shared_ptr<PjRtClient> client,
+    TF_ASSIGN_OR_RETURN(std::unique_ptr<PjRtClient> client,
                         GetInterpreterClient());
     return std::make_shared<PyClient>(std::move(client));
   });
@@ -572,7 +572,7 @@ PYBIND11_MODULE(xla_extension, m) {
          std::shared_ptr<DistributedRuntimeClient> distributed_client,
          int node_id) -> StatusOr<std::shared_ptr<PyClient>> {
         TF_ASSIGN_OR_RETURN(
-            std::shared_ptr<PjRtClient> client,
+            std::unique_ptr<PjRtClient> client,
             GetNvidiaGpuClient(asynchronous, allocator_config,
                                std::move(distributed_client), node_id));
         return std::make_shared<PyClient>(std::move(client));
",0,test
2f3eb7b5c2fd927ec2b21ae972a39788cdce89c4,tensorflow/tensorflow,"Nest shouldn't barf on ranges.

PiperOrigin-RevId: 246185019",def_function_test.py,"@@ -17,6 +17,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from six.moves import range
+
 import functools
 import weakref
 
@@ -113,6 +115,14 @@ class DefFunctionTest(test.TestCase):
     with self.assertRaises(ValueError):
       fn(1.0)
 
+  def testRange(self):
+
+    @def_function.function
+    def f(unused_x):
+      return 1.0
+
+    self.assertAllEqual(f(range(5)), 1.0)
+
   def testCorrectVariableCreation(self):
 
     state = []
",0,train
2f3eb7b5c2fd927ec2b21ae972a39788cdce89c4,tensorflow/tensorflow,"Nest shouldn't barf on ranges.

PiperOrigin-RevId: 246185019",nest.py,"@@ -130,6 +130,8 @@ def _sequence_like(instance, args):
   elif _is_composite_tensor(instance):
     metadata = instance._component_metadata()  # pylint: disable=protected-access
     return type(instance)._from_components(args, metadata)  # pylint: disable=protected-access
+  elif isinstance(instance, _six.moves.range):
+    return _sequence_like(list(instance), args)
   else:
     # Not a namedtuple
     return type(instance)(args)
",0,train
e50ff7aac1f7f98fd103950dcdfbc53804a6b0d4,tensorflow/tensorflow,Enhance the comment for RunDatasetOp(),dataset_test_base.h,"@@ -771,7 +771,11 @@ class DatasetOpsTestBaseV2 : public DatasetOpsTestBase {
                       std::unique_ptr<TestIterator>* iterator);
 
   // Runs the dataset operation according to the predefined dataset params and
-  // produces outputs.
+  // produces outputs. Different from `MakeDataset()` which returns a Dataset
+  // object, `RunDatasetOp()` executes the dataset kernel based on the input
+  // DatasetParams and returns the produced outputs as a tensor vector. It can
+  // be used to run some dataset operations that do not have an internal
+  // customized `Dataset` class (e.g. `ReduceDatasetOp`).
   Status RunDatasetOp(const DatasetParams& dataset_params,
                       std::vector<Tensor>* outputs);
 
",0,train
ab1478d380a3f91b75a43b1d452e63b7ab3a0868,tensorflow/tensorflow,"Work around the outside compilation / resource variable issue.

PiperOrigin-RevId: 335441457
Change-Id: Id9055f26d786f8d9936edc0312f73e86a420e56a",tensor_tracer.py,"@@ -1494,7 +1494,7 @@ class TensorTracer(object):
 
       flush_op = tpu.outside_compilation(
           _flush_fun, cache_val, self._replica_id,
-          training_util.get_or_create_global_step())
+          array_ops.identity(training_util.get_or_create_global_step()))
     else:
       flush_op = _flush_fun(cache_val, self._replica_id,
                             training_util.get_or_create_global_step())
",0,test
eb3fcf1cb36297d58a1bb1e7c9fa79370894d1bd,tensorflow/tensorflow,"Extend experimental_get_compiler_ir to get serialized HLO modules.

PiperOrigin-RevId: 354030850
Change-Id: I57c1a5445614eab200ad0e86af668c15cd7718d7",get_compiler_ir.cc,"@@ -119,7 +119,8 @@ xla::StatusOr<std::string> GetCompilerIr(
   TF_RETURN_IF_ERROR(args.status());
 
   switch (stage) {
-    case IrExportStage::HLO: {
+    case IrExportStage::HLO:
+    case IrExportStage::HLO_SERIALIZED: {
       XlaCompiler::CompilationResult result;
       TF_RETURN_IF_ERROR(
           compiler.CompileFunction(compile_options, function, *args, &result));
@@ -131,13 +132,23 @@ xla::StatusOr<std::string> GetCompilerIr(
           std::unique_ptr<xla::HloModule> new_module,
           xla::HloModule::CreateFromProto(result.computation->proto(), config));
 
-      return new_module->ToString();
+      if (stage == IrExportStage::HLO_SERIALIZED) {
+        return new_module->ToProto().SerializeAsString();
+      } else {
+        return new_module->ToString();
+      }
     }
-    case IrExportStage::OPTIMIZED_HLO: {
+    case IrExportStage::OPTIMIZED_HLO:
+    case IrExportStage::OPTIMIZED_HLO_SERIALIZED: {
       xla::StatusOr<xla::LocalExecutable*> executable = GetLocalExecutable(
           options, compile_options, function, cache, *args, compiler);
       TF_RETURN_IF_ERROR(executable.status());
-      return (*executable)->executable()->module().ToString();
+      xla::Executable* new_executable = (*executable)->executable();
+      if (stage == IrExportStage::OPTIMIZED_HLO_SERIALIZED) {
+        return new_executable->module().ToProto().SerializeAsString();
+      } else {
+        return new_executable->module().ToString();
+      }
     }
     case IrExportStage::OPTIMIZED_HLO_DOT: {
       xla::StatusOr<xla::LocalExecutable*> executable = GetLocalExecutable(
",0,train
eb3fcf1cb36297d58a1bb1e7c9fa79370894d1bd,tensorflow/tensorflow,"Extend experimental_get_compiler_ir to get serialized HLO modules.

PiperOrigin-RevId: 354030850
Change-Id: I57c1a5445614eab200ad0e86af668c15cd7718d7",get_compiler_ir.h,"@@ -27,10 +27,16 @@ class Tensor;
 class TensorHandle;
 class EagerContext;
 
-enum class IrExportStage { HLO, OPTIMIZED_HLO, OPTIMIZED_HLO_DOT };
-
-// Returns HLO text for a given function `func_name` using library runtime
-// `runtime` on a device `dev` with given `inputs`.
+enum class IrExportStage {
+  HLO,
+  HLO_SERIALIZED,
+  OPTIMIZED_HLO,
+  OPTIMIZED_HLO_SERIALIZED,
+  OPTIMIZED_HLO_DOT
+};
+
+// Returns the IR format of the selected stage for a given function `func_name`
+// using library runtime `runtime` on a device `dev` with given `inputs`.
 xla::StatusOr<std::string> GetCompilerIr(
     IrExportStage stage, ProcessFunctionLibraryRuntime* pflr,
     absl::string_view func_name, Device* dev, EagerContext* context,
",0,train
eb3fcf1cb36297d58a1bb1e7c9fa79370894d1bd,tensorflow/tensorflow,"Extend experimental_get_compiler_ir to get serialized HLO modules.

PiperOrigin-RevId: 354030850
Change-Id: I57c1a5445614eab200ad0e86af668c15cd7718d7",def_function.py,"@@ -958,13 +958,21 @@ class Function(object):
       **kwargs: Keyword arguments used for compilation.
 
     Returns:
-      Function callable with the stage at which the compiler IR should be
-      serialized. Allowed values for the `stage` are:
-       - `hlo`: HLO output after conversion from TF
-         (https://www.tensorflow.org/xla/operation_semantics).
-       - `optimized_hlo`: HLO after compiler optimizations.
-       - `optimized_hlo_dot`: optimized HLO in DOT format suitable for
-         Graphviz.
+      Function callable with the following kwargs:
+        - `stage` at which the compiler IR should be serialized. Allowed values
+          are:
+           - `hlo`: HLO output after conversion from TF
+            (https://www.tensorflow.org/xla/operation_semantics).
+           - `hlo_serialized`: Like stage=`hlo`, but the output is a serialized
+             HLO module proto (a bytes object).
+           - `optimized_hlo`: HLO after compiler optimizations.
+           - `optimized_hlo_serialized`: Like stage=`optimized_hlo`, but the
+             output is a serialized HLO module proto (a bytes object).
+           - `optimized_hlo_dot`: optimized HLO in DOT format suitable for
+             Graphviz.
+        - `device_name` can be either None, in which case the preferred device
+          is used for compilation, or a device name. It can be a full device
+          name, or a partial one, e.g., `/device:CPU:0`.
 
       For example, for
 
@@ -1013,21 +1021,20 @@ class Function(object):
         concrete_fn._function_spec.canonicalize_function_inputs(
             *args, **kwargs)
 
-    def compiler_ir_generator(stage='hlo'):
-      """"""Returns compiler IR for the given `stage`.
-
-      Args:
-        stage: Stage at which to return the IR. Allowed values are 'hlo' and
-        'optimized_hlo'.
-      """"""
+    def compiler_ir_generator(stage=""hlo"", device_name=None):
       # TODO(cheshire): This is a hack to get the current ""preferred"" device,
       # there is no current API to get it otherwise.
-      device = random_ops.random_normal([]).device
-      return context.context().get_compiler_ir(
-          device_name=device,
+      if device_name is None:
+        device_name = random_ops.random_normal([]).device
+      res_bytes = context.context().get_compiler_ir(
+          device_name=device_name,
           stage=stage,
           function_name=fn_name,
           args=list(filtered_flat_args) + concrete_fn.captured_inputs)
+      if stage in (""hlo_serialized"", ""optimized_hlo_serialized""):
+        return res_bytes
+      else:
+        return res_bytes.decode(""utf-8"")
 
     return compiler_ir_generator
 
",0,train
eb3fcf1cb36297d58a1bb1e7c9fa79370894d1bd,tensorflow/tensorflow,"Extend experimental_get_compiler_ir to get serialized HLO modules.

PiperOrigin-RevId: 354030850
Change-Id: I57c1a5445614eab200ad0e86af668c15cd7718d7",def_function_xla_jit_test.py,"@@ -778,6 +778,19 @@ class DefFunctionTest(xla_test.XLATestCase):
       self.assertIn('tuple',
                     f.experimental_get_compiler_ir(l)())
 
+  def testGetCompilerIrSerialized(self):
+    with ops.device('device:{}:0'.format(self.device)):
+
+      @def_function.function(jit_compile=True)
+      def fn(x):
+        return x - x
+
+      inputs = constant_op.constant([1, 2, 2, 3, 3])
+      for stage in ('hlo_serialized', 'optimized_hlo_serialized'):
+        hlo = fn.experimental_get_compiler_ir(inputs)(
+            stage=stage, device_name=f'/device:{self.device}:0')
+        self.assertIsInstance(hlo, bytes)
+
   def testConstantOnWrongDevice(self):
     with ops.device('device:{}:0'.format(self.device)):
 
",0,train
eb3fcf1cb36297d58a1bb1e7c9fa79370894d1bd,tensorflow/tensorflow,"Extend experimental_get_compiler_ir to get serialized HLO modules.

PiperOrigin-RevId: 354030850
Change-Id: I57c1a5445614eab200ad0e86af668c15cd7718d7",tfe_wrapper.cc,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""pybind11/complex.h""
 #include ""pybind11/functional.h""
 #include ""pybind11/pybind11.h""
+#include ""pybind11/pytypes.h""
 #include ""pybind11/stl.h""
 #include ""tensorflow/c/c_api.h""
 #include ""tensorflow/c/c_api_experimental.h""
@@ -296,10 +297,10 @@ static py::object TFE_ClearScalarCache() {
 }
 
 // Returns compiler IR for a given function.
-static std::string TFE_GetCompilerIr(py::handle& ctx,
-                                     const char* concrete_function_name,
-                                     const char* stage, const char* device_name,
-                                     py::handle& inputs) {
+static py::bytes TFE_GetCompilerIr(py::handle& ctx,
+                                   const char* concrete_function_name,
+                                   const char* stage, const char* device_name,
+                                   py::handle& inputs) {
   EagerContext* context = ContextFromInterface(
       reinterpret_cast<ImmediateExecutionContext*>(InputTFE_Context(ctx)));
 
@@ -307,8 +308,12 @@ static std::string TFE_GetCompilerIr(py::handle& ctx,
   IrExportStage selected_stage = [&] {
     if (s_stage == ""hlo"") {
       return IrExportStage::HLO;
+    } else if (s_stage == ""hlo_serialized"") {
+      return IrExportStage::HLO_SERIALIZED;
     } else if (s_stage == ""optimized_hlo"") {
       return IrExportStage::OPTIMIZED_HLO;
+    } else if (s_stage == ""optimized_hlo_serialized"") {
+      return IrExportStage::OPTIMIZED_HLO_SERIALIZED;
     } else if (s_stage == ""optimized_hlo_dot"") {
       return IrExportStage::OPTIMIZED_HLO_DOT;
     } else {
@@ -341,19 +346,21 @@ static std::string TFE_GetCompilerIr(py::handle& ctx,
                                                   d->parsed_name());
   });
   if (selected_device == devices.end()) {
-    ThrowValueError(""No matching device found"");
+    ThrowValueError(
+        absl::StrFormat(""No matching device found for '%s'"", device_name)
+            .c_str());
   }
 
-  xla::StatusOr<std::string> hlo_text =
+  xla::StatusOr<std::string> hlo_str =
       GetCompilerIr(selected_stage, context->pflr(), concrete_function_name,
                     *selected_device, context, input_handles);
 
-  if (!hlo_text.ok()) {
+  if (!hlo_str.ok()) {
     ThrowValueError(absl::StrFormat(""Failed getting HLO text: '%s'"",
-                                    hlo_text.status().error_message())
+                                    hlo_str.status().error_message())
                         .c_str());
   }
-  return *hlo_text;
+  return py::bytes(*hlo_str);
 }
 
 }  // namespace tensorflow
",0,train
e185d5bb9c536ae5cd87d64f875deb8522d66fc7,tensorflow/tensorflow,"Appending error message to address error thrown when certain train_step and test_step are decorated with tf.function

PiperOrigin-RevId: 384969431
Change-Id: I1ff65811961eaa23d32934dc6a47ee9dc6f9319f",mirrored_run.py,"@@ -463,7 +463,9 @@ class _MirroredReplicaContext(distribute_lib.ReplicaContext):
           "" please avoid nested `tf.function`s or control flow statements that""
           "" may potentially cross a synchronization boundary, for example,""
           "" wrap the `fn` passed to `strategy.run` or the entire `strategy.run`""
-          "" inside a `tf.function` or move the control flow out of `fn`"")
+          "" inside a `tf.function` or move the control flow out of `fn`. Also,""
+          "" when subclassing a tf.keras.Model please avoid decorating""
+          "" overridden methods`test_step` and `train_step` in `tf.function`."")
 
     t.has_paused.set()
     t.should_run.wait()
",0,train
b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth.

Add a memory_bandwidth() property to StreamExecutor's DeviceDescription,
and use this in the GPU's --xla_hlo_profile.

PiperOrigin-RevId: 189157407",gpu_compiler.cc,"@@ -671,6 +671,8 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
 
   if (module->config().hlo_profiling_enabled()) {
     HloCostAnalysis cost_analysis(ShapeSizeBytesFunction());
+    cost_analysis.set_bytes_per_second(
+        stream_exec->GetDeviceDescription().memory_bandwidth());
     TF_RETURN_IF_ERROR(module->entry_computation()->Accept(&cost_analysis));
     profile_index_map = MakeUnique<HloProfileIndexMap>(*module);
     profile_printer =
",0,test
b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth.

Add a memory_bandwidth() property to StreamExecutor's DeviceDescription,
and use this in the GPU's --xla_hlo_profile.

PiperOrigin-RevId: 189157407",cuda_driver.cc,"@@ -1503,6 +1503,19 @@ static port::StatusOr<T> GetSimpleAttribute(CUdevice device,
   return true;
 }
 
+/* static */ port::StatusOr<int> CUDADriver::GetDeviceAttribute(
+    CUdevice_attribute attribute, CUdevice device) {
+  int val;
+  CUresult res = cuDeviceGetAttribute(&val, attribute, device);
+  if (res != CUDA_SUCCESS) {
+    return port::Status{
+        port::error::INTERNAL,
+        port::Printf(""failed to get device attribute %d for device %d: %s"",
+                     attribute, device, ToString(res).c_str())};
+  }
+  return val;
+}
+
 /* static */ bool CUDADriver::IsEccEnabled(CUdevice device, bool *result) {
   int value = -1;
   CUresult res =
",0,test
b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth.

Add a memory_bandwidth() property to StreamExecutor's DeviceDescription,
and use this in the GPU's --xla_hlo_profile.

PiperOrigin-RevId: 189157407",cuda_driver.h,"@@ -400,12 +400,20 @@ class CUDADriver {
 
   // Returns a grab-bag of device properties in a caller-owned device_properties
   // structure for device_ordinal via cuDeviceGetProperties.
-  // This call is deprecated in the NVIDIA driver API.
+  //
+  // This call is deprecated in the NVIDIA driver API; its replacement is
+  // GetDeviceAttribute
   //
   // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE__DEPRECATED.html#group__CUDA__DEVICE__DEPRECATED_1g65a5b4e25186bd257df80b98c98cffe6
   static bool GetDeviceProperties(CUdevprop *device_properties,
                                   int device_ordinal);
 
+  // Gets a specific integer-valued property about the given device.
+  //
+  // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE.html#group__CUDA__DEVICE_1g9c3e1414f0ad901d3278a4d6645fc266
+  static port::StatusOr<int> GetDeviceAttribute(CUdevice_attribute attribute,
+                                                CUdevice device);
+
   // Returns whether ECC is enabled for the given CUdevice via
   // cuDeviceGetattribute with CU_DEVICE_ATTRIBUTE_ECC_ENABLED.
   // http://docs.nvidia.com/cuda/cuda-driver-api/group__CUDA__DEVICE.html#group__CUDA__DEVICE_1g9c3e1414f0ad901d3278a4d6645fc266
",0,test
b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth.

Add a memory_bandwidth() property to StreamExecutor's DeviceDescription,
and use this in the GPU's --xla_hlo_profile.

PiperOrigin-RevId: 189157407",cuda_gpu_executor.cc,"@@ -1103,6 +1103,18 @@ DeviceDescription *CUDAExecutor::PopulateDeviceDescription() const {
     builder.set_device_memory_size(device_memory_size);
   }
 
+  port::StatusOr<int> mem_clock_khz = CUDADriver::GetDeviceAttribute(
+      CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device_ordinal_);
+  port::StatusOr<int> mem_bus_width_bits = CUDADriver::GetDeviceAttribute(
+      CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, device_ordinal_);
+  if (mem_clock_khz.ok() && mem_bus_width_bits.ok()) {
+    // Times 2 because HBM is DDR memory; it gets two data bits per each data
+    // lane.
+    builder.set_memory_bandwidth(2 * int64_t{mem_clock_khz.ValueOrDie()} *
+                                 1000 *
+                                 int64_t{mem_bus_width_bits.ValueOrDie()} / 8);
+  }
+
   {
     BlockDim block_dim_limit;
     FillBlockDimLimit(&block_dim_limit);
",0,test
b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth.

Add a memory_bandwidth() property to StreamExecutor's DeviceDescription,
and use this in the GPU's --xla_hlo_profile.

PiperOrigin-RevId: 189157407",device_description.cc,"@@ -50,6 +50,7 @@ DeviceDescription::DeviceDescription()
       shared_memory_alloc_granularity_(1),
       device_address_bits_(kUninitializedUint64),
       device_memory_size_(kUninitializedUint64),
+      memory_bandwidth_(kUninitializedUint64),
       shared_memory_per_core_(kUninitializedUint64),
       shared_memory_per_block_(kUninitializedUint64),
       clock_rate_ghz_(-1.0),
@@ -85,6 +86,8 @@ std::unique_ptr<std::map<string, string>> DeviceDescription::ToMap() const {
   result[""Device Address Bits""] = port::StrCat(device_address_bits());
   result[""Device Memory Size""] =
       port::HumanReadableNumBytes::ToString(device_memory_size());
+  result[""Memory Bandwidth""] = port::StrCat(
+      port::HumanReadableNumBytes::ToString(memory_bandwidth_), ""/s"");
 
   result[""Shared Memory Per Core""] =
       port::HumanReadableNumBytes::ToString(shared_memory_per_core_);
",0,test
b08c54271084b05ea822b3348a3a448a9fe3b898,tensorflow/tensorflow,"[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth.

Add a memory_bandwidth() property to StreamExecutor's DeviceDescription,
and use this in the GPU's --xla_hlo_profile.

PiperOrigin-RevId: 189157407",device_description.h,"@@ -140,6 +140,11 @@ class DeviceDescription {
   // Returns the device memory size in bytes.
   uint64 device_memory_size() const { return device_memory_size_; }
 
+  // Returns the device's memory bandwidth in bytes/sec.  (This is for
+  // reads/writes to/from the device's own memory, not for transfers between the
+  // host and device.)
+  uint64 memory_bandwidth() const { return memory_bandwidth_; }
+
   // Returns the device's core clock rate in GHz.
   float clock_rate_ghz() const { return clock_rate_ghz_; }
 
@@ -212,6 +217,7 @@ class DeviceDescription {
 
   uint64 device_address_bits_;
   uint64 device_memory_size_;
+  uint64 memory_bandwidth_;
 
   // Shared memory limits on a given device.
   uint64 shared_memory_per_core_;
@@ -305,6 +311,9 @@ class DeviceDescriptionBuilder {
   void set_device_memory_size(uint64 value) {
     device_description_->device_memory_size_ = value;
   }
+  void set_memory_bandwidth(uint64 value) {
+    device_description_->memory_bandwidth_ = value;
+  }
 
   void set_shared_memory_per_core(int64 value) {
     device_description_->shared_memory_per_core_ = value;
",0,test
063bdbe773ba520d6a232f278567a16411a90597,tensorflow/tensorflow,"Make sparse_reshape work well with output of tf.shape.

PiperOrigin-RevId: 155902266",feature_column.py,"@@ -1330,13 +1330,6 @@ class _CategoricalColumn(_FeatureColumn):
     pass
 
 
-def _sparse_reshape(inputs, shape):
-  # Satisfies sparse_reshape assumptions such as dtype int64.
-  # shape is a list.
-  return sparse_ops.sparse_reshape(inputs,
-                                   math_ops.cast(shape, dtypes.int64))
-
-
 def _create_categorical_column_weighted_sum(
     column, builder, units, sparse_combiner, weight_collections, trainable):
   """"""Create a weighted sum of a categorical column for linear_model.""""""
@@ -1344,13 +1337,13 @@ def _create_categorical_column_weighted_sum(
       builder,
       weight_collections=weight_collections,
       trainable=trainable)
-  id_tensor = _sparse_reshape(sparse_tensors.id_tensor, [
+  id_tensor = sparse_ops.sparse_reshape(sparse_tensors.id_tensor, [
       array_ops.shape(sparse_tensors.id_tensor)[0], -1
   ])
   weight_tensor = sparse_tensors.weight_tensor
   if weight_tensor is not None:
-    weight_tensor = _sparse_reshape(weight_tensor,
-                                    [array_ops.shape(weight_tensor)[0], -1])
+    weight_tensor = sparse_ops.sparse_reshape(
+        weight_tensor, [array_ops.shape(weight_tensor)[0], -1])
 
   weight = variable_scope.get_variable(
       name='weight',
",0,train
063bdbe773ba520d6a232f278567a16411a90597,tensorflow/tensorflow,"Make sparse_reshape work well with output of tf.shape.

PiperOrigin-RevId: 155902266",sparse_reshape_op_test.py,"@@ -78,6 +78,18 @@ class SparseReshapeTest(test.TestCase):
       self.assertAllEqual(output_val.values, input_val.values)
       self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
 
+  def testWorksWellWithTfShape(self):
+    with self.test_session(use_gpu=False) as sess:
+      sp_input = self._SparseTensorPlaceholder()
+      input_val = self._SparseTensorValue_5x6()
+      shape = array_ops.shape(sp_input)  # tf.shape generates int32 output
+      sp_output = sparse_ops.sparse_reshape(sp_input, shape)
+
+      output_val = sess.run(sp_output, {sp_input: input_val})
+      self.assertAllEqual(output_val.indices, input_val.indices)
+      self.assertAllEqual(output_val.values, input_val.values)
+      self.assertAllEqual(output_val.dense_shape, input_val.dense_shape)
+
   def testFeedSameShapeWithInferredDim(self):
     with self.test_session(use_gpu=False) as sess:
       sp_input = self._SparseTensorPlaceholder()
",0,train
063bdbe773ba520d6a232f278567a16411a90597,tensorflow/tensorflow,"Make sparse_reshape work well with output of tf.shape.

PiperOrigin-RevId: 155902266",sparse_ops.py,"@@ -556,7 +556,7 @@ def sparse_reshape(sp_input, shape, name=None):
       number of elements than `sp_input`.
   """"""
   sp_input = _convert_to_sparse_tensor(sp_input)
-  shape = ops.convert_to_tensor(shape, dtype=dtypes.int64)
+  shape = math_ops.cast(shape, dtype=dtypes.int64)
 
   with ops.name_scope(name, ""SparseReshape"", [sp_input]) as name:
     reshaped_ind, reshaped_shape = gen_sparse_ops._sparse_reshape(
",0,train
fc670504413590e3c6e665ed233c44b53be5daed,tensorflow/tensorflow,Update nn_ops.py,nn_ops.py,"@@ -3832,7 +3832,7 @@ def softmax_v2(logits, axis=None, name=None):
 
   This function performs the equivalent of
 
-      softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis)
+      softmax = tf.exp(logits) / tf.reduce_sum(tf.exp(logits), axis, keepdims=True)
 
   Example usage:
 
",0,train
e3b0a4291984f1af0cb8bf512542dffaca2d6cb5,tensorflow/tensorflow,"Allow non-integer values for Poisson CDF/PMF.

PiperOrigin-RevId: 186502845",poisson_test.py,"@@ -18,6 +18,7 @@ from __future__ import division
 from __future__ import print_function
 
 import numpy as np
+from scipy import special
 from scipy import stats
 from tensorflow.contrib.distributions.python.ops import poisson as poisson_lib
 from tensorflow.python.framework import constant_op
@@ -110,7 +111,7 @@ class PoissonTest(test.TestCase):
       batch_size = 6
       lam = constant_op.constant([3.0] * batch_size)
       lam_v = 3.0
-      x = [2.2, 3.1, 4., 5.5, 6., 7.]
+      x = [2., 3., 4., 5., 6., 7.]
 
       poisson = self._make_poisson(rate=lam)
       log_cdf = poisson.log_cdf(x)
@@ -121,12 +122,31 @@ class PoissonTest(test.TestCase):
       self.assertEqual(cdf.get_shape(), (6,))
       self.assertAllClose(cdf.eval(), stats.poisson.cdf(x, lam_v))
 
+  def testPoissonCDFNonIntegerValues(self):
+    with self.test_session():
+      batch_size = 6
+      lam = constant_op.constant([3.0] * batch_size)
+      lam_v = 3.0
+      x = np.array([2.2, 3.1, 4., 5.5, 6., 7.], dtype=np.float32)
+
+      poisson = self._make_poisson(rate=lam)
+      cdf = poisson.cdf(x)
+      self.assertEqual(cdf.get_shape(), (6,))
+
+      # The Poisson CDF should be valid on these non-integer values, and
+      # equal to igammac(1 + x, rate).
+      self.assertAllClose(cdf.eval(), special.gammaincc(1. + x, lam_v))
+
+      with self.assertRaisesOpError(""cannot contain fractional components""):
+        poisson_validate = self._make_poisson(rate=lam, validate_args=True)
+        poisson_validate.cdf(x).eval()
+
   def testPoissonCdfMultidimensional(self):
     with self.test_session():
       batch_size = 6
       lam = constant_op.constant([[2.0, 4.0, 5.0]] * batch_size)
       lam_v = [2.0, 4.0, 5.0]
-      x = np.array([[2.2, 3.1, 4., 5.5, 6., 7.]], dtype=np.float32).T
+      x = np.array([[2., 3., 4., 5., 6., 7.]], dtype=np.float32).T
 
       poisson = self._make_poisson(rate=lam)
       log_cdf = poisson.log_cdf(x)
",0,train
e3b0a4291984f1af0cb8bf512542dffaca2d6cb5,tensorflow/tensorflow,"Allow non-integer values for Poisson CDF/PMF.

PiperOrigin-RevId: 186502845",poisson.py,"@@ -35,9 +35,15 @@ __all__ = [
 
 
 _poisson_sample_note = """"""
-Note that the input value must be a non-negative floating point tensor with
-dtype `dtype` and whose shape can be broadcast with `self.rate`. `x` is only
-legal if it is non-negative and its components are equal to integer values.
+The Poisson distribution is technically only defined for non-negative integer
+values. When `validate_args=False`, non-integral inputs trigger an assertion.
+
+When `validate_args=False` calculations are otherwise unchanged despite
+integral or non-integral inputs.
+
+When `validate_args=False`, evaluating the pmf at non-integral values,
+corresponds to evaluations of an unnormalized distribution, that does not
+correspond to evaluations of the cdf.
 """"""
 
 
@@ -150,10 +156,6 @@ class Poisson(distribution.Distribution):
   def _cdf(self, x):
     if self.validate_args:
       x = distribution_util.embed_check_nonnegative_integer_form(x)
-    else:
-      # Whether or not x is integer-form, the following is well-defined.
-      # However, scipy takes the floor, so we do too.
-      x = math_ops.floor(x)
     return math_ops.igammac(1. + x, self.rate)
 
   def _log_normalization(self):
@@ -162,9 +164,6 @@ class Poisson(distribution.Distribution):
   def _log_unnormalized_prob(self, x):
     if self.validate_args:
       x = distribution_util.embed_check_nonnegative_integer_form(x)
-    else:
-      # For consistency with cdf, we take the floor.
-      x = math_ops.floor(x)
     return x * self.log_rate - math_ops.lgamma(1. + x)
 
   def _mean(self):
",0,train
9cd2f8cb1a2b67c6d8d558349746a367168fc3b4,tensorflow/tensorflow,"Add delegate support to external C API

Add TFL_InterpreterOptionsAddDelegate to allow delegate
injection during interpreter creation.

PiperOrigin-RevId: 260734684",c_api.cc,"@@ -123,6 +123,12 @@ TFL_Interpreter* TFL_NewInterpreter(
         TFL_InterpreterOptions::kDefaultNumThreads) {
       interpreter->SetNumThreads(optional_options->num_threads);
     }
+
+    for (auto* delegate : optional_options->delegates) {
+      if (interpreter->ModifyGraphWithDelegate(delegate) != kTfLiteOk) {
+        return nullptr;
+      }
+    }
   }
 
   return new TFL_Interpreter{model->impl, std::move(optional_error_reporter),
",0,train
9cd2f8cb1a2b67c6d8d558349746a367168fc3b4,tensorflow/tensorflow,"Add delegate support to external C API

Add TFL_InterpreterOptionsAddDelegate to allow delegate
injection during interpreter creation.

PiperOrigin-RevId: 260734684",c_api_experimental.cc,"@@ -41,6 +41,11 @@ void TFL_InterpreterOptionsAddCustomOp(TFL_InterpreterOptions* options,
   options->op_resolver.AddCustom(name, registration, min_version, max_version);
 }
 
+void TFL_InterpreterOptionsAddDelegate(TFL_InterpreterOptions* options,
+                                       TFL_Delegate* delegate) {
+  options->delegates.push_back(delegate);
+}
+
 #ifdef __cplusplus
 }  // extern ""C""
 #endif  // __cplusplus
",0,train
9cd2f8cb1a2b67c6d8d558349746a367168fc3b4,tensorflow/tensorflow,"Add delegate support to external C API

Add TFL_InterpreterOptionsAddDelegate to allow delegate
injection during interpreter creation.

PiperOrigin-RevId: 260734684",c_api_experimental.h,"@@ -23,6 +23,7 @@ extern ""C"" {
 #endif  // __cplusplus
 
 typedef TfLiteBuiltinOperator TFL_BuiltinOperator;
+typedef TfLiteDelegate TFL_Delegate;
 
 // Resets all variable tensors to zero.
 TFL_CAPI_EXPORT extern TFL_Status TFL_InterpreterResetVariableTensors(
@@ -42,12 +43,22 @@ TFL_CAPI_EXPORT void TFL_InterpreterOptionsAddBuiltinOp(
 //
 // NOTE: The interpreter will make a copy of `registration` internally, so the
 // caller should ensure that its contents (function pointers, etc...) remain
-// valid for the duration of the interpreter's lifetime. A common practice is
-// making the provided TFL_Registration instance static.
+// valid for the duration of any created interpreter's lifetime. A common
+// practice is making the provided TFL_Registration instance static.
 TFL_CAPI_EXPORT void TFL_InterpreterOptionsAddCustomOp(
     TFL_InterpreterOptions* options, const char* name,
     const TFL_Registration* registration, int min_version, int max_version);
 
+// Adds a delegate to be applied during `TFL_Interpreter` creation.
+//
+// If delegate application fails, interpreter creation will also fail with an
+// associated error logged.
+//
+// NOTE: The caller retains ownership of the delegate and should ensure that it
+// remains valid for the duration of any created interpreter's lifetime.
+TFL_CAPI_EXPORT extern void TFL_InterpreterOptionsAddDelegate(
+    TFL_InterpreterOptions* options, TFL_Delegate* delegate);
+
 #ifdef __cplusplus
 }  // extern ""C""
 #endif  // __cplusplus
",0,train
9cd2f8cb1a2b67c6d8d558349746a367168fc3b4,tensorflow/tensorflow,"Add delegate support to external C API

Add TFL_InterpreterOptionsAddDelegate to allow delegate
injection during interpreter creation.

PiperOrigin-RevId: 260734684",c_api_experimental_test.cc,"@@ -32,7 +32,7 @@ TfLiteRegistration* GetDummyRegistration() {
   return &registration;
 }
 
-TEST(CApiExperimentalSimple, Smoke) {
+TEST(CApiExperimentalTest, Smoke) {
   TFL_Model* model = TFL_NewModelFromFile(
       ""tensorflow/lite/testdata/add.bin"");
   ASSERT_NE(model, nullptr);
@@ -52,6 +52,52 @@ TEST(CApiExperimentalSimple, Smoke) {
   TFL_DeleteModel(model);
 }
 
+TEST(CApiExperimentalTest, Delegate) {
+  TFL_Model* model =
+      TFL_NewModelFromFile(""tensorflow/lite/testdata/add.bin"");
+
+  // Create and install a delegate instance.
+  bool delegate_prepared = false;
+  TfLiteDelegate delegate = TfLiteDelegateCreate();
+  delegate.data_ = &delegate_prepared;
+  delegate.Prepare = [](TfLiteContext* context, TfLiteDelegate* delegate) {
+    *static_cast<bool*>(delegate->data_) = true;
+    return kTfLiteOk;
+  };
+  TFL_InterpreterOptions* options = TFL_NewInterpreterOptions();
+  TFL_InterpreterOptionsAddDelegate(options, &delegate);
+  TFL_Interpreter* interpreter = TFL_NewInterpreter(model, options);
+
+  // The delegate should have been applied.
+  EXPECT_TRUE(delegate_prepared);
+
+  // Subsequent exectuion should behave properly (the delegate is a no-op).
+  TFL_DeleteInterpreterOptions(options);
+  TFL_DeleteModel(model);
+  EXPECT_EQ(TFL_InterpreterInvoke(interpreter), kTfLiteOk);
+  TFL_DeleteInterpreter(interpreter);
+}
+
+TEST(CApiExperimentalTest, DelegateFails) {
+  TFL_Model* model =
+      TFL_NewModelFromFile(""tensorflow/lite/testdata/add.bin"");
+
+  // Create and install a delegate instance.
+  TfLiteDelegate delegate = TfLiteDelegateCreate();
+  delegate.Prepare = [](TfLiteContext* context, TfLiteDelegate* delegate) {
+    return kTfLiteError;
+  };
+  TFL_InterpreterOptions* options = TFL_NewInterpreterOptions();
+  TFL_InterpreterOptionsAddDelegate(options, &delegate);
+  TFL_Interpreter* interpreter = TFL_NewInterpreter(model, options);
+
+  // Interpreter creation should fail as delegate preparation failed.
+  EXPECT_EQ(nullptr, interpreter);
+
+  TFL_DeleteInterpreterOptions(options);
+  TFL_DeleteModel(model);
+}
+
 }  // namespace
 
 int main(int argc, char** argv) {
",0,train
9cd2f8cb1a2b67c6d8d558349746a367168fc3b4,tensorflow/tensorflow,"Add delegate support to external C API

Add TFL_InterpreterOptionsAddDelegate to allow delegate
injection during interpreter creation.

PiperOrigin-RevId: 260734684",c_api_internal.h,"@@ -43,6 +43,8 @@ struct TFL_InterpreterOptions {
   void (*error_reporter)(void* user_data, const char* format,
                          va_list args) = nullptr;
   void* error_reporter_user_data = nullptr;
+
+  std::vector<TfLiteDelegate*> delegates;
 };
 
 struct TFL_Interpreter {
",0,train
67b82da322acd410b00d08bc05997ecbdb88717f,tensorflow/tensorflow,+ Test for DaskDataFeeder,data_feeder.py,"@@ -311,12 +311,18 @@ class DaskDataFeeder(object):
         # combine into a data frame
         self.df = dd.multi.concat([X, y], axis=1)
         self.n_classes = n_classes
-        X_shape = tuple([X.count().compute()])
+
+        X_count = X.count().compute()
+        if len(X_count) == 1:
+            X_shape = tuple([X.count().compute()])
+        else:
+            # TODO: Support multi-dimensional
+            ValueError(""Only one dimensional input for DaskDataFeeder is supported now."")
         y_shape = tuple([y.count().compute()])
         self.sample_fraction = batch_size/float(list(X_shape)[0])
         self.input_shape, self.output_shape = _get_in_out_shape(
             X_shape, y_shape, n_classes, batch_size)
-        self.input_dtype, self.output_dtype = X.dtype, y.dtype
+        self.input_dtype, self.output_dtype = X.dtypes, y.dtypes # TODO: dtypes for dataframe
         if random_state is None:
             self.random_state = np.random.RandomState(42)
         else:
@@ -336,8 +342,8 @@ class DaskDataFeeder(object):
         def _feed_dict_fn():
             # TODO: option for with/without replacement (dev version of dask)
             sample = self.df.random_split([self.sample_fraction, 1-self.sample_fraction],
-                                    random_state=self.random_state)[0]
-            inp = sample[self.X_columns]
-            out = sample[self.y_columns]
+                                    random_state=self.random_state)
+            inp = sample[0][self.X_columns]
+            out = sample[0][self.y_columns]
             return {input_placeholder.name: inp, output_placeholder.name: out}
         return _feed_dict_fn
",0,train
67b82da322acd410b00d08bc05997ecbdb88717f,tensorflow/tensorflow,+ Test for DaskDataFeeder,test_data_feeder.py,"@@ -83,6 +83,21 @@ class DataFeederTest(tf.test.TestCase):
         self.assertAllClose(feed_dict['input'], [[1, 2], [3, 4]])
         self.assertAllClose(feed_dict['output'], [1, 2])
 
+    def test_dask_data_feeder(self):
+        X = pd.DataFrame(dict(a=list('aabbcc')))
+        X = dd.from_pandas(X, npartitions=3)
+        y = pd.DataFrame(dict(labels=list('010011')))
+        y = dd.from_pandas(y, npartitions=3)
+        X = _construct_dask_df_with_divisions(X)
+        y = _construct_dask_df_with_divisions(y)
+
+        df = DaskDataFeeder(X, y, n_classes=2, batch_size=2)
+        feed_dict_fn = df.get_feed_dict_fn(
+            MockPlaceholder(name='input'),
+            MockPlaceholder(name='output'))
+        feed_dict = feed_dict_fn()
+
+
 
 if __name__ == '__main__':
     tf.test.main()
",0,train
5ff27167b274a7471b35ba80491004093a3f6133,tensorflow/tensorflow,"Don't automatically add control deps to collective ops.

These ops need to run asynchronously to avoid deadlock.

PiperOrigin-RevId: 226397820",auto_control_deps.py,"@@ -29,13 +29,22 @@ from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 
+# Op types that should not run in program order, e.g. because they need to run
+# asynchronously to avoid deadlock.
+ASYNC_STATEFUL_OPS = [
+    ""CollectiveReduce"",
+    ""CollectiveBcastSend"",
+    ""CollectiveBcastRecv"",
+]
+
 
 class AutomaticControlDependencies(object):
   """"""Context manager to automatically add control dependencies.
 
   Code under this context manager will act as if a sensible set of control
   dependencies were present. More specifically:
-    1. All stateful ops in the scope will execute
+    1. All stateful ops in the scope will execute (with the exception of ops in
+       ASYNC_STATEFUL_OPS)
     2. Stateful ops which modify the same resource will execute in program order
 
   Note: creating variables in an automatic control dependencies context is not
@@ -223,7 +232,8 @@ class AutomaticControlDependencies(object):
       control_inputs = set()
       # Ensure stateful ops run
       if (op.type not in self._graph._registered_ops  # pylint: disable=protected-access
-          or self._graph._registered_ops[op.type].is_stateful):  # pylint: disable=protected-access
+          or (self._graph._registered_ops[op.type].is_stateful   # pylint: disable=protected-access
+              and op.type not in ASYNC_STATEFUL_OPS)):
         ops_which_must_run.add(op)
       # Ignore switches (they're handled separately)
       if op.type == ""Switch"" and op.inputs[0].dtype == dtypes_module.resource:
@@ -255,8 +265,8 @@ class AutomaticControlDependencies(object):
           if inp in merge_for_resource:
             merge_for_resource[inp]._add_control_input(op)  # pylint: disable=protected-access
           last_op_using_resource_tensor[inp] = op
-      if (op.op_def.is_stateful and not found_resource
-          and op._control_flow_context is None):  # pylint: disable=protected-access
+      if (op.op_def.is_stateful and op.type not in ASYNC_STATEFUL_OPS
+          and not found_resource and op._control_flow_context is None):  # pylint: disable=protected-access
         if None in last_op_using_resource_tensor:
           op._add_control_input(last_op_using_resource_tensor[None])  # pylint: disable=protected-access
         last_op_using_resource_tensor[None] = op
",0,train
a74f9c3c612586ba4581bd6324a7c1ced69ec5a3,tensorflow/tensorflow,"Add support to for composite tensors, such as SparseTensor and RaggedTensor, to while_v2

PiperOrigin-RevId: 245285953",ops.py,"@@ -1761,10 +1761,10 @@ class IndexedSlices(_TensorLike, composite_tensor.CompositeTensor):
     if shape is None:
       shape = self._values.shape
     if self._dense_shape is None:
-      return [shape, shape[:1]]  # values, indices
+      return (shape, shape[:1])  # values, indices
     else:
       # values, indices, dense_shape
-      return [shape, shape[:1], tensor_shape.TensorShape([shape.ndims])]
+      return (shape, shape[:1], tensor_shape.TensorShape([shape.ndims]))
 
   @property
   def _is_graph_tensor(self):
",0,train
a74f9c3c612586ba4581bd6324a7c1ced69ec5a3,tensorflow/tensorflow,"Add support to for composite tensors, such as SparseTensor and RaggedTensor, to while_v2

PiperOrigin-RevId: 245285953",sparse_tensor.py,"@@ -250,11 +250,11 @@ class SparseTensor(_TensorLike, composite_tensor.CompositeTensor):
       raise ValueError(""Shape invariant for SparseTensor must have the form ""
                        ""TensorShape([r]), got %r"" % shape)
     rank = tensor_shape.dimension_value(shape[0])
-    return [
+    return (
         tensor_shape.TensorShape([None, rank]),  # indices
         tensor_shape.TensorShape([None]),  # values
         tensor_shape.TensorShape([rank])  # dense_shape
-    ]
+        )
 
   @property
   def _is_graph_tensor(self):
",0,train
a74f9c3c612586ba4581bd6324a7c1ced69ec5a3,tensorflow/tensorflow,"Add support to for composite tensors, such as SparseTensor and RaggedTensor, to while_v2

PiperOrigin-RevId: 245285953",control_flow_ops_py_test.py,"@@ -1790,6 +1790,18 @@ class ControlFlowTest(test.TestCase):
       r = r[1] * array_ops.ones([8, 8])
       self.assertAllEqual(np.ones((8, 8)), self.evaluate(r))
 
+  @test_util.disable_control_flow_v2(""b/131265085"")
+  @test_util.run_v1_only(""b/131265085"")
+  def testWhileBadShape(self):
+    x = constant_op.constant([2.0, 4.0], name=""values"")
+    i = constant_op.constant(0)
+    c = lambda i, _: math_ops.less(i, 10)
+    b = lambda i, x: [i + 1, x + 1]
+    with self.assertRaisesRegexp(ValueError, ""is not compatible with""):
+      # Shape of x is [2], but we specify a shape of [5].
+      control_flow_ops.while_loop(
+          c, b, [i, x], [i.shape, tensor_shape.TensorShape([5])])
+
   @test_util.run_deprecated_v1
   def testWhileWithNonTensorInput_Scalar(self):
     with self.cached_session():
@@ -1807,7 +1819,6 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(c, b, [n], parallel_iterations=20)
       self.assertEqual([10000], self.evaluate(r))
 
-  @test_util.run_v1_only(""b/120545219"")
   def testWhileShapeInference(self):
     with self.cached_session():
       i = constant_op.constant(0)
@@ -1822,19 +1833,23 @@ class ControlFlowTest(test.TestCase):
       r = control_flow_ops.while_loop(
           c, b, [i, m],
           [i.get_shape(), tensor_shape.TensorShape([None, 2])])
-      self.assertIsNone(r[1].shape.dims[0].value)
-      self.assertEqual(r[1].shape.dims[1], tensor_shape.Dimension(2))
+      self.assertTrue(r[1].shape.is_compatible_with([8, 2]))
 
+  @test_util.run_v1_only(""b/120545219"")
+  def testWhileShapeInferenceBadShape(self):
+    with self.cached_session():
+      i = constant_op.constant(0)
+      m = array_ops.ones([2, 2])
+      c = lambda i, j: math_ops.less(i, 2)
+      b = lambda i, j: [i + 1, array_ops.concat([j, j], 0)]
       with self.assertRaisesRegexp(
           ValueError,
           r""Input tensor 'ones:0' enters the loop with shape \(2, 2\), but has ""
           r""shape \(4, 2\) after one iteration. To allow the shape to vary ""
           r""across iterations, use the `shape_invariants` argument of ""
           r""tf.while_loop to specify a less-specific shape.""):
-        r = control_flow_ops.while_loop(c, b, [i, m])
+        control_flow_ops.while_loop(c, b, [i, m])
 
-  @test_util.disable_control_flow_v2(""b/116328420 (SparseTensor)"")
-  @test_util.run_v1_only(""b/120545219"")
   def testWhileShapeInferenceSparseTensor(self):
     values = constant_op.constant([2.0, 4.0], name=""values"")
     indices = constant_op.constant([[0], [3]],
@@ -1873,61 +1888,72 @@ class ControlFlowTest(test.TestCase):
               array_ops.concat([x.dense_shape, [10]], axis=0))
       ]
 
+    def check_shapes(r, indices, values, dense_shape):
+      self.assertTrue(r.indices.shape.is_compatible_with(indices))
+      self.assertTrue(r.values.shape.is_compatible_with(values))
+      self.assertTrue(r.dense_shape.shape.is_compatible_with(dense_shape))
+
     # Default shape invariant; b1 only modifies values.
     _, r = control_flow_ops.while_loop(c, b1, [i, x])
-    self.assertEqual(r.indices.get_shape().as_list(), [None, 1])
-    self.assertEqual(r.values.get_shape().as_list(), [None])
-    self.assertEqual(r.dense_shape.get_shape().as_list(), [1])
+    check_shapes(r, indices=[None, 1], values=[None], dense_shape=[1])
 
     # Default shape invariant; b2 adds new values
     _, r = control_flow_ops.while_loop(c, b2, [i, x])
-    self.assertEqual(r.indices.get_shape().as_list(), [None, 1])
-    self.assertEqual(r.values.get_shape().as_list(), [None])
-    self.assertEqual(r.dense_shape.get_shape().as_list(), [1])
-
-    # Default shape invariant; b3 modifies rank (which is not allowed).
-    with self.assertRaises(ValueError):
-      _, r = control_flow_ops.while_loop(c, b3, [i, x])
+    check_shapes(r, indices=[None, 1], values=[None], dense_shape=[1])
 
     # Explicit shape invariant, allowing any rank; b1 only modifies values.
     _, r = control_flow_ops.while_loop(
         c, b1, [i, x],
         [i.get_shape(), tensor_shape.TensorShape([None])])
-    self.assertEqual(r.indices.get_shape().as_list(), [None, None])
-    self.assertEqual(r.values.get_shape().as_list(), [None])
-    self.assertEqual(r.dense_shape.get_shape().as_list(), [None])
+    check_shapes(r, indices=[None, None], values=[None], dense_shape=[None])
 
     # Explicit shape invariant, allowing any rank; b3 modifies rank.
     _, r = control_flow_ops.while_loop(
         c, b3, [i, x],
         [i.get_shape(), tensor_shape.TensorShape([None])])
-    self.assertEqual(r.indices.get_shape().as_list(), [None, None])
-    self.assertEqual(r.values.get_shape().as_list(), [None])
-    self.assertEqual(r.dense_shape.get_shape().as_list(), [None])
+    check_shapes(r, indices=[None, None], values=[None], dense_shape=[None])
 
     # Shape invariant with ndims=None.  Technically, this isn't supported
     # according to the docs, but we support it for backwards compatibility.
     _, r = control_flow_ops.while_loop(
         c, b1, [i, x],
         [i.get_shape(), tensor_shape.TensorShape(None)])
-    self.assertEqual(r.indices.get_shape().as_list(), [None, None])
-    self.assertEqual(r.values.get_shape().as_list(), [None])
-    self.assertEqual(r.dense_shape.get_shape().as_list(), [None])
+    check_shapes(r, indices=[None, None], values=[None], dense_shape=[None])
     _, r = control_flow_ops.while_loop(
         c, b3, [i, x],
         [i.get_shape(), tensor_shape.TensorShape(None)])
-    self.assertEqual(r.indices.get_shape().as_list(), [None, None])
-    self.assertEqual(r.values.get_shape().as_list(), [None])
-    self.assertEqual(r.dense_shape.get_shape().as_list(), [None])
+    check_shapes(r, indices=[None, None], values=[None], dense_shape=[None])
+
+  @test_util.disable_control_flow_v2(""b/131265085"")
+  @test_util.run_v1_only(""b/131265085"")
+  def testWhileBadShapeSparseTensor(self):
+    values = constant_op.constant([2.0, 4.0], name=""values"")
+    indices = constant_op.constant([[0], [3]],
+                                   dtype=dtypes.int64,
+                                   name=""indices"")
+    shape = constant_op.constant([10], dtype=dtypes.int64, name=""dense_shape"")
+    i = constant_op.constant(0)
+    x = sparse_tensor.SparseTensor(indices, values, dense_shape=shape)
+    c = lambda i, _: i < 10
+    b1 = lambda i, x: [i+1, x]
+    def b2(i, x):  # modifies rank.  (shape of all components is changed.)
+      return [
+          i + 1,
+          sparse_tensor.SparseTensor(
+              array_ops.concat([x.indices, [[i], [i]]], axis=1), x.values * 2.0,
+              array_ops.concat([x.dense_shape, [10]], axis=0))
+      ]
 
     # Explicit shape invariant, with a specific (incompatible) rank.
     with self.assertRaisesRegexp(ValueError, ""is not compatible with""):
-      _, r = control_flow_ops.while_loop(
+      control_flow_ops.while_loop(
           c, b1, [i, x],
           [i.get_shape(), tensor_shape.TensorShape([5])])
 
-  @test_util.disable_control_flow_v2(""b/116282023 (IndexedSlices)"")
-  @test_util.run_v1_only(""b/120545219"")
+    # Default shape invariant, but b2 modifies rank (which is not allowed).
+    with self.assertRaises(ValueError):
+      control_flow_ops.while_loop(c, b2, [i, x])
+
   def testWhileShapeInferenceIndexedSlices(self):
     with self.cached_session():
       values = constant_op.constant([[2.0, 4.0], [3.0, 5.0]], name=""values"")
@@ -1953,17 +1979,28 @@ class ControlFlowTest(test.TestCase):
           c, b, [i, x],
           [i.get_shape(), tensor_shape.TensorShape([None, 2])])
       self.assertEqual(r.dense_shape.get_shape()[0], 2)
-      self.assertEqual(r.values.get_shape().as_list(), [None, 2])
+      self.assertTrue(r.values.get_shape().is_compatible_with([None, 2]))
+
+  @test_util.disable_control_flow_v2(""b/131265085"")
+  @test_util.run_v1_only(""b/131265085"")
+  def testWhileBadShapeIndexedSlices(self):
+    values = constant_op.constant([2.0, 4.0], name=""values"")
+    indices = constant_op.constant([[0], [3]],
+                                   dtype=dtypes.int64,
+                                   name=""indices"")
+    shape = constant_op.constant([10], dtype=dtypes.int64, name=""dense_shape"")
+    i = constant_op.constant(0)
+    x = sparse_tensor.SparseTensor(indices, values, dense_shape=shape)
+    c = lambda i, _: 10
+    b = lambda i, x: [i+1, x]
 
-      with self.assertRaisesRegexp(ValueError, ""is not compatible with""):
-        _, r = control_flow_ops.while_loop(
-            c, b, [i, x],
-            [i.get_shape(), tensor_shape.TensorShape([None, 5])])
+    # Explicit shape invariant, with a specific (incompatible) rank.
+    with self.assertRaisesRegexp(ValueError, ""is not compatible with""):
+      control_flow_ops.while_loop(
+          c, b, [i, x],
+          [i.get_shape(), tensor_shape.TensorShape([5])])
 
-  @test_util.disable_control_flow_v2(""b/116328420 (RaggedTensor)"")
   def testWhileShapeInferenceRaggedTensor(self):
-    if context.executing_eagerly():
-      self.skipTest(""b/116328420"")
     i = constant_op.constant(0)
     x = ragged_factory_ops.constant([[1, 2], [3], [4, 5, 6]])
     c = lambda i, _: i < 10
@@ -1980,11 +2017,13 @@ class ControlFlowTest(test.TestCase):
           array_ops.concat([x, x], axis=0)
       ]
 
+    def check_shapes(r, values, splits):
+      self.assertTrue(r.values.shape.is_compatible_with(values))
+      self.assertTrue(r.row_splits.shape.is_compatible_with(splits))
+
     # Default shape invariant; b1 adds new values to rows.
     _, r = control_flow_ops.while_loop(c, b1, [i, x])
-    self.assertEqual(r.row_splits.shape.as_list(), [4])
-
-    self.assertTrue(r.values.shape.as_list() in ([6 * 2**10], [None]))
+    check_shapes(r, values=[None], splits=[4])
 
     # Default shape invariant; b2 adds new rows (not allowed).
     if not context.executing_eagerly():
@@ -1995,20 +2034,15 @@ class ControlFlowTest(test.TestCase):
     _, r = control_flow_ops.while_loop(
         c, b1, [i, x],
         [i.get_shape(), tensor_shape.TensorShape([None, None])])
-    self.assertTrue(r.row_splits.shape.as_list() in ([4], [None]))
-    self.assertTrue(r.values.shape.as_list() in ([6 * 2**10], [None]))
+    check_shapes(r, values=[None], splits=[None])
 
     # Explicit shape invariant; b2 adds new rows.
     _, r = control_flow_ops.while_loop(
         c, b2, [i, x],
         [i.get_shape(), tensor_shape.TensorShape([None, None])])
-    self.assertTrue(r.row_splits.shape.as_list() in ([3 * 2**10 + 1], [None]))
-    self.assertTrue(r.values.shape.as_list() in ([6 * 2**10], [None]))
+    check_shapes(r, values=[None], splits=[None])
 
-  @test_util.disable_control_flow_v2(""b/116328420 (RaggedTensor)"")
   def testWhileShapeInferenceRaggedTensorRaggedRank2(self):
-    if context.executing_eagerly():
-      self.skipTest(""b/116328420"")
     i = constant_op.constant(0)
     x = ragged_factory_ops.constant([[[1, 2], [3], [4, 5, 6]],
                                      [[], [8, 9, 10]]])
@@ -3473,8 +3507,7 @@ class ControlFlowTest(test.TestCase):
     self.assertEqual(0, value_x)
     self.assertEqual(73, value_x_grad)
 
-  @test_util.disable_control_flow_v2(""b/116282023 (IndexedSlices)"")
-  @test_util.run_v1_only(""b/120545219"")
+  @test_util.deprecated_graph_mode_only
   def testWhileGrad_IndexedSlices(self):
     with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name=""values"")
@@ -3496,8 +3529,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r.values, values)[0]
       self.assertAllClose(np.array([1024.0, 1024.0]), self.evaluate(r))
 
-  @test_util.disable_control_flow_v2(""b/116328420 (SparseTensor)"")
-  @test_util.run_v1_only(""b/120545219"")
+  @test_util.deprecated_graph_mode_only
   def testWhileGrad_SparseTensor(self):
     with self.cached_session():
       values = constant_op.constant([2.0, 4.0], name=""values"")
@@ -3520,7 +3552,7 @@ class ControlFlowTest(test.TestCase):
       r = gradients_impl.gradients(r.values, values)[0]
       self.assertAllClose(np.array([1024.0, 1024.0]), self.evaluate(r))
 
-  @test_util.run_v1_only(""b/120545219"")
+  @test_util.deprecated_graph_mode_only
   def testCallGradInLoop(self):
     with self.cached_session() as sess:
       i0 = constant_op.constant(0)
",0,train
a74f9c3c612586ba4581bd6324a7c1ced69ec5a3,tensorflow/tensorflow,"Add support to for composite tensors, such as SparseTensor and RaggedTensor, to while_v2

PiperOrigin-RevId: 245285953",control_flow_ops.py,"@@ -3466,7 +3466,7 @@ def while_loop(cond,
           return x
         return ops.convert_to_tensor(x)
 
-      loop_vars = nest.map_structure(convert, loop_vars)
+      loop_vars = nest.map_structure(convert, loop_vars, expand_composites=True)
       if maximum_iterations is not None:
         return loop_vars[1]
       else:
",0,train
a74f9c3c612586ba4581bd6324a7c1ced69ec5a3,tensorflow/tensorflow,"Add support to for composite tensors, such as SparseTensor and RaggedTensor, to while_v2

PiperOrigin-RevId: 245285953",while_v2.py,"@@ -72,12 +72,18 @@ def while_loop(cond,
   # `wrapped_body` below.
   loop_vars = list(_tensor_array_to_flow(orig_loop_vars))
   loop_vars = nest.map_structure(
-      ops.internal_convert_to_tensor_or_indexed_slices, loop_vars)
+      ops.internal_convert_to_tensor_or_indexed_slices, loop_vars,
+      expand_composites=True)
   if shape_invariants is not None:
-    nest.assert_same_structure(orig_loop_vars, shape_invariants)
+    nest.assert_same_structure(orig_loop_vars, shape_invariants,
+                               expand_composites=False)
+    shape_invariants = nest.map_structure(
+        control_flow_ops._get_shape_invariant, loop_vars,
+        list(shape_invariants), expand_composites=False)
   else:
-    shape_invariants = nest.map_structure(lambda t: t.shape, loop_vars)
-
+    shape_invariants = nest.map_structure(
+        control_flow_ops._get_shape_invariant, loop_vars,
+        expand_composites=False)
   if not name:
     name = ""while""
 
@@ -150,11 +156,12 @@ def while_loop(cond,
       # `orig_loop_vars` and `args`, converts flows in `args` to TensorArrays
       # and packs it into the structure of `orig_loop_vars`.
       outputs = body(*_pack_sequence_as(orig_loop_vars, args))
-      if not nest.is_sequence(outputs):
+      if not nest.is_sequence_or_composite(outputs):
         outputs = [outputs]
       # Compare the structure of input and output of body converting the
       # top-level tuples to list to be compatible with legacy while_loop.
-      nest.assert_same_structure(list(outputs), list(orig_loop_vars))
+      nest.assert_same_structure(list(outputs), list(orig_loop_vars),
+                                 expand_composites=True)
 
       outputs = _tensor_array_to_flow(outputs)
 
@@ -193,7 +200,8 @@ def while_loop(cond,
     # Make sure that the shapes of the loop outputs are compatible with the
     # shape invariants, or the shapes of the loop vars if the invariants are not
     # specified.
-    num_flattened_outputs = len(nest.flatten(orig_loop_vars))
+    num_flattened_outputs = len(nest.flatten(orig_loop_vars,
+                                             expand_composites=True))
     # First var is loop counter and second var is maximum_iterations.
     first_loop_var_index = 2
     _check_shapes_compat(
@@ -201,10 +209,10 @@ def while_loop(cond,
                            num_flattened_outputs],
         nest.flatten(
             shape_invariants[first_loop_var_index:first_loop_var_index +
-                             len_orig_loop_vars]),
+                             len_orig_loop_vars], expand_composites=True),
         nest.flatten(loop_vars[first_loop_var_index:first_loop_var_index +
-                               len_orig_loop_vars]))
-    flattened_loop_vars = nest.flatten(loop_vars)
+                               len_orig_loop_vars], expand_composites=True))
+    flattened_loop_vars = nest.flatten(loop_vars, expand_composites=True)
     _check_num_inputs_outputs(cond_graph, body_graph,
                               len(flattened_loop_vars))
 
@@ -237,7 +245,7 @@ def while_loop(cond,
   if return_same_structure:
     return outputs
 
-  flattened_outputs = nest.flatten(outputs)
+  flattened_outputs = nest.flatten(outputs, expand_composites=True)
   if len(flattened_outputs) == 1:
     return flattened_outputs[0]
   else:
@@ -905,9 +913,11 @@ def _pack_sequence_as(structure_with_tas, loop_vars):
 
   flattened_loop_vars = [
       flow_to_tensor_array(*z)
-      for z in zip(nest.flatten(loop_vars), nest.flatten(structure_with_tas))
+      for z in zip(nest.flatten(loop_vars, expand_composites=True),
+                   nest.flatten(structure_with_tas, expand_composites=True))
   ]
-  return nest.pack_sequence_as(structure_with_tas, flattened_loop_vars)
+  return nest.pack_sequence_as(structure_with_tas, flattened_loop_vars,
+                               expand_composites=True)
 
 
 def _tensor_array_to_flow(loop_vars):
@@ -917,14 +927,15 @@ def _tensor_array_to_flow(loop_vars):
       return maybe_ta.flow
     return maybe_ta
 
-  return nest.map_structure(f, loop_vars)
+  return nest.map_structure(f, loop_vars, expand_composites=True)
 
 
 def _build_signature(loop_vars, shape_invariants):
   return nest.pack_sequence_as(loop_vars, [
       tensor_spec.TensorSpec(s, t.dtype, name=t.op.name)
-      for s, t in zip(nest.flatten(shape_invariants), nest.flatten(loop_vars))
-  ])
+      for s, t in zip(nest.flatten(shape_invariants, expand_composites=True),
+                      nest.flatten(loop_vars, expand_composites=True))
+  ], expand_composites=True)
 
 
 def _build_maximum_iterations_loop_var(maximum_iterations):
",0,train
aeef5bba09657c009de6162235302d0c6fd54998,tensorflow/tensorflow,"Delay transpiler initialzation to mitigate effects of circular imports.

PiperOrigin-RevId: 350751088
Change-Id: Ib5431b139e6d0adf2d0eec2d34bc1b21ad687256",tfr_gen.py,"@@ -334,7 +334,7 @@ _AG_FIXED_RETURN_TYPE = {
 QN = qual_names.QN
 
 # TODO(mdan): Fix this with an importable module.
-AG_MODULE = api._TRANSPILER._extra_locals['ag__']  # pylint:disable=protected-access
+AG_MODULE = api._TRANSPILER.get_extra_locals()['ag__']  # pylint:disable=protected-access
 
 
 class TFRTypeResolver(type_inference.Resolver):
",0,train
aeef5bba09657c009de6162235302d0c6fd54998,tensorflow/tensorflow,"Delay transpiler initialzation to mitigate effects of circular imports.

PiperOrigin-RevId: 350751088
Change-Id: Ib5431b139e6d0adf2d0eec2d34bc1b21ad687256",api.py,"@@ -209,30 +209,31 @@ class PyToTF(transpiler.PyToPy):
 
   def __init__(self):
     super(PyToTF, self).__init__()
-
-    # TODO(mdan): Move into core or replace with an actual importable module.
-    # Craft a module that exposes the external API as well as certain
-    # internal modules.
-    ag_internal = imp.new_module('autograph')
-    ag_internal.__dict__.update(inspect.getmodule(PyToTF).__dict__)
-    ag_internal.ConversionOptions = converter.ConversionOptions
-    ag_internal.STD = converter.STANDARD_OPTIONS
-    ag_internal.Feature = converter.Feature
-    ag_internal.utils = utils
-    ag_internal.FunctionScope = function_wrappers.FunctionScope
-    ag_internal.with_function_scope = function_wrappers.with_function_scope
-    # TODO(mdan): Add safeguards against name clashes.
-    # We don't want to create a submodule because we want the operators to be
-    # accessible as ag__.<operator>
-    ag_internal.__dict__.update(special_functions.__dict__)
-    ag_internal.__dict__.update(operators.__dict__)
-
-    self._extra_locals = {'ag__': ag_internal}
+    self._extra_locals = None
 
   def get_transformed_name(self, node):
     return 'tf__' + super(PyToTF, self).get_transformed_name(node)
 
   def get_extra_locals(self):
+    if self._extra_locals is None:
+      # TODO(mdan): Move into core or replace with an actual importable module.
+      # Craft a module that exposes the external API as well as certain
+      # internal modules.
+      ag_internal = imp.new_module('autograph')
+      ag_internal.__dict__.update(inspect.getmodule(PyToTF).__dict__)
+      ag_internal.ConversionOptions = converter.ConversionOptions
+      ag_internal.STD = converter.STANDARD_OPTIONS
+      ag_internal.Feature = converter.Feature
+      ag_internal.utils = utils
+      ag_internal.FunctionScope = function_wrappers.FunctionScope
+      ag_internal.with_function_scope = function_wrappers.with_function_scope
+      # TODO(mdan): Add safeguards against name clashes.
+      # We don't want to create a submodule because we want the operators to be
+      # accessible as ag__.<operator>
+      ag_internal.__dict__.update(special_functions.__dict__)
+      ag_internal.__dict__.update(operators.__dict__)
+
+      self._extra_locals = {'ag__': ag_internal}
     return self._extra_locals
 
   def get_caching_key(self, ctx):
",0,train
c58b80eb60443f97161ff674670166346a586b05,tensorflow/tensorflow,remove redundant test in random_seed_test,random_seed_test.py,"@@ -24,7 +24,6 @@ from absl.testing import parameterized
 
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.util import random_seed as data_random_seed
-from tensorflow.python.eager import context
 from tensorflow.python.framework import combinations
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -125,24 +124,15 @@ class RandomSeedTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testRandomSeed(self, input_fn, output_fn):
 
     tinput, toutput = input_fn._obj(), output_fn._obj() # pylint: disable=protected-access
-    def check(tinput, toutput):
-      random_seed.set_random_seed(tinput[0])
-      g_seed, op_seed = data_random_seed.get_seed(tinput[1])
-      g_seed = self.evaluate(g_seed)
-      op_seed = self.evaluate(op_seed)
-      msg = 'test_case = {0}, got {1}, want {2}'.format(
-          tinput, (g_seed, op_seed), toutput)
-      self.assertEqual((g_seed, op_seed), toutput, msg=msg)
-      random_seed.set_random_seed(None)
+    random_seed.set_random_seed(tinput[0])
+    g_seed, op_seed = data_random_seed.get_seed(tinput[1])
+    g_seed = self.evaluate(g_seed)
+    op_seed = self.evaluate(op_seed)
+    msg = 'test_case = {0}, got {1}, want {2}'.format(
+        tinput, (g_seed, op_seed), toutput)
+    self.assertEqual((g_seed, op_seed), toutput, msg=msg)
+    random_seed.set_random_seed(None)
 
-    check(tinput=tinput, toutput=toutput)
-
-    if not context.executing_eagerly():
-      random_seed.set_random_seed(1)
-      for i in range(10):
-        tinput = (1, None)
-        toutput = (1, i)
-        check(tinput=tinput, toutput=toutput)
 
 if __name__ == '__main__':
   test.main()
",0,train
8bf2eafa476610aa60bfe4194d3517742dc3ebcc,tensorflow/tensorflow,"Update tensorrt test to not rely on Keras.

Replace the initializer with tf init_ops, and the keras.dataset with tfds.

PiperOrigin-RevId: 304659361
Change-Id: I8016c1607aa57c419dfca229c42fd4c0403b90f1",quantization_mnist_test.py,"@@ -18,10 +18,12 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+
+import tensorflow_datasets as tfds
+
 from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import get_linked_tensorrt_version
 from tensorflow.compiler.tf2tensorrt._pywrap_py_utils import is_tensorrt_enabled
 from tensorflow.core.protobuf import config_pb2
-from tensorflow.python import keras
 from tensorflow.python.compiler.tensorrt import trt_convert
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.estimator.estimator import Estimator
@@ -33,10 +35,10 @@ from tensorflow.python.framework import graph_util
 from tensorflow.python.framework import importer
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
-from tensorflow.python.keras.datasets import mnist
 from tensorflow.python.layers import layers
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import metrics
 from tensorflow.python.ops import nn
@@ -81,12 +83,12 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase):
             'kernel',
             shape=[num_inputs, num_outputs],
             dtype=dtypes.float32,
-            initializer=keras.initializers.glorot_uniform())
+            initializer=init_ops.GlorotUniform())
         bias = variable_scope.get_variable(
             'bias',
             shape=[num_outputs],
             dtype=dtypes.float32,
-            initializer=keras.initializers.zeros())
+            initializer=init_ops.Zeros())
         x = math_ops.matmul(x, kernel)
         x = _Quantize(x, quantization_range)
         x = nn.bias_add(x, bias)
@@ -179,19 +181,15 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase):
     Returns:
       The Estimator evaluation result.
     """"""
-    # Get dataset
-    train_data, test_data = mnist.load_data()
-
-    def _PreprocessFn(x, y):
+    def _PreprocessFn(entry):
+      x, y = entry['image'], entry['label']
       x = math_ops.cast(x, dtypes.float32)
-      x = array_ops.expand_dims(x, axis=2)
       x = 2.0 * (x / 255.0) - 1.0
       y = math_ops.cast(y, dtypes.int32)
       return x, y
 
     def _EvalInputFn():
-      mnist_x, mnist_y = test_data
-      dataset = dataset_ops.Dataset.from_tensor_slices((mnist_x, mnist_y))
+      dataset = tfds.load('mnist', split='test')
       dataset = dataset.map(
           map_func=_PreprocessFn,
           num_parallel_calls=8).batch(batch_size=batch_size)
@@ -201,9 +199,8 @@ class QuantizationAwareTrainingMNISTTest(test_util.TensorFlowTestCase):
       return features, labels
 
     def _TrainInputFn():
-      mnist_x, mnist_y = train_data
-      dataset = dataset_ops.Dataset.from_tensor_slices((mnist_x, mnist_y))
-      dataset = dataset.shuffle(2 * len(mnist_x))
+      dataset = tfds.load('mnist', split='train')
+      dataset = dataset.shuffle(60000)
       dataset = dataset.map(
           map_func=_PreprocessFn,
           num_parallel_calls=8).batch(batch_size=batch_size)
",0,test
80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo.

PiperOrigin-RevId: 343190551
Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",gpu_info.cc,"@@ -343,7 +343,8 @@ bool MaliInfo::IsValhall() const {
 }
 
 void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
-                                     GpuInfo* gpu_info) {
+                                     GpuApi gpu_api, GpuInfo* gpu_info) {
+  gpu_info->gpu_api = gpu_api;
   std::string lowered = gpu_description;
   absl::AsciiStrToLower(&lowered);
   gpu_info->vendor = GetGpuVendor(lowered);
@@ -392,5 +393,35 @@ int GpuInfo::GetComputeUnitsCount() const {
   }
 }
 
+int GpuInfo::GetMaxImageArguments() const {
+  if (IsApiOpenGl()) {
+    return opengl_info.max_image_units;
+  } else if (IsApiVulkan()) {
+    return vulkan_info.max_per_stage_descriptor_sampled_images;
+  } else if (IsApiMetal()) {
+    return 32;
+  } else if (IsApiOpenCl()) {
+    return 128;
+  } else {
+    return 1;
+  }
+}
+
+bool GpuInfo::IsApiOpenGl() const { return gpu_api == GpuApi::kOpenGl; }
+
+bool GpuInfo::IsApiVulkan() const { return gpu_api == GpuApi::kVulkan; }
+
+bool GpuInfo::IsApiMetal() const { return gpu_api == GpuApi::kMetal; }
+
+bool GpuInfo::IsApiOpenCl() const { return gpu_api == GpuApi::kOpenCl; }
+
+bool GpuInfo::IsApiOpenGl31OrAbove() const {
+  if (!IsApiOpenGl()) {
+    return false;
+  }
+  return (opengl_info.major_version == 3 && opengl_info.minor_version >= 1) ||
+         opengl_info.major_version > 3;
+}
+
 }  // namespace gpu
 }  // namespace tflite
",0,train
80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo.

PiperOrigin-RevId: 343190551
Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",gpu_info.h,"@@ -34,6 +34,14 @@ enum class GpuVendor {
   kUnknown
 };
 
+enum class GpuApi {
+  kUnknown,
+  kOpenCl,
+  kMetal,
+  kVulkan,
+  kOpenGl,
+};
+
 enum class AdrenoGpu {
   // Adreno 6xx series
   kAdreno685,
@@ -190,6 +198,28 @@ struct MaliInfo {
   bool IsValhall() const;
 };
 
+struct OpenGlInfo {
+  std::string renderer_name;
+  std::string vendor_name;
+  std::string version;
+  int major_version = -1;
+  int minor_version = -1;
+
+  int max_image_units = 0;
+  int max_ssbo_bindings = 0;
+  int max_image_bindings = 0;
+};
+
+struct VulkanInfo {
+  std::string vendor_name;
+  uint32_t api_version = -1;
+  uint32_t api_version_major = -1;
+  uint32_t api_version_minor = -1;
+  uint32_t api_version_patch = -1;
+
+  uint32_t max_per_stage_descriptor_sampled_images = 0;
+};
+
 struct GpuInfo {
   bool IsAdreno() const;
   bool IsApple() const;
@@ -207,20 +237,15 @@ struct GpuInfo {
 
   int GetComputeUnitsCount() const;
 
+  int GetMaxImageArguments() const;
+
   GpuVendor vendor = GpuVendor::kUnknown;
+  GpuApi gpu_api = GpuApi::kUnknown;
 
-  std::string renderer_name;
-  std::string vendor_name;
-  std::string version;
-  int major_version = -1;
-  int minor_version = -1;
   std::vector<std::string> extensions;
-  int max_ssbo_bindings = 0;
-  int max_image_bindings = 0;
   std::vector<int> max_work_group_size;
   int max_work_group_invocations;
   int max_texture_size = 0;
-  int max_image_units = 0;
   int max_array_texture_layers = 0;
 
   std::vector<int> supported_subgroup_sizes;
@@ -228,19 +253,34 @@ struct GpuInfo {
   AdrenoInfo adreno_info;
   AppleInfo apple_info;
   MaliInfo mali_info;
+
+  // OpenGL specific, gpu_api should be kOpenGl
+  OpenGlInfo opengl_info;
+  bool IsApiOpenGl() const;
+  bool IsApiOpenGl31OrAbove() const;
+
+  // Vulkan specific, gpu_api should be kVulkan
+  VulkanInfo vulkan_info;
+  bool IsApiVulkan() const;
+
+  bool IsApiMetal() const;
+
+  bool IsApiOpenCl() const;
 };
 
 inline bool IsOpenGl31OrAbove(const GpuInfo& gpu_info) {
-  return (gpu_info.major_version == 3 && gpu_info.minor_version >= 1) ||
-         gpu_info.major_version > 3;
+  return (gpu_info.opengl_info.major_version == 3 &&
+          gpu_info.opengl_info.minor_version >= 1) ||
+         gpu_info.opengl_info.major_version > 3;
 }
 
 // Currently it initializes:
 // vendor
 // AdrenoInfo if vendor is kQualcomm
 // AppleInfo if vendor is kApple
+// MaliInfo if vendor is kMali
 void GetGpuInfoFromDeviceDescription(const std::string& gpu_description,
-                                     GpuInfo* gpu_info);
+                                     GpuApi gpu_api, GpuInfo* gpu_info);
 
 }  // namespace gpu
 }  // namespace tflite
",0,train
80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo.

PiperOrigin-RevId: 343190551
Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",api.cc,"@@ -385,7 +385,7 @@ absl::Status Compile(const CompilationOptions& options,
   }
   GpuInfo gpu_info;
   RETURN_IF_ERROR(RequestGpuInfo(&gpu_info));
-  if (!IsOpenGl31OrAbove(gpu_info)) {
+  if (!gpu_info.IsApiOpenGl31OrAbove()) {
     return absl::InternalError(
         ""OpenGL ES 3.1 or above is required to use OpenGL inference."");
   }
@@ -406,7 +406,7 @@ absl::Status ReadSerializedModel(
     std::unique_ptr<CompiledModel>* compiled_model) {
   GpuInfo gpu_info;
   RETURN_IF_ERROR(RequestGpuInfo(&gpu_info));
-  if (!IsOpenGl31OrAbove(gpu_info)) {
+  if (!gpu_info.IsApiOpenGl31OrAbove()) {
     return absl::InternalError(
         ""OpenGL ES 3.1 or above is required to use OpenGL inference."");
   }
",0,train
80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo.

PiperOrigin-RevId: 343190551
Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",api2.cc,"@@ -636,7 +636,7 @@ class InferenceEnvironmentImpl : public InferenceEnvironment {
     RETURN_IF_ERROR(EglEnvironment::NewEglEnvironment(&egl_env_));
 
     RETURN_IF_ERROR(RequestGpuInfo(&gpu_info_));
-    properties_.is_opengl_available = IsOpenGl31OrAbove(gpu_info_);
+    properties_.is_opengl_available = gpu_info_.IsApiOpenGl31OrAbove();
     if (!properties_.is_opengl_available) {
       return absl::InternalError(
           ""OpenGL ES 3.1 or above is required to use OpenGL inference."");
",0,train
80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo.

PiperOrigin-RevId: 343190551
Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",compiler.cc,"@@ -201,7 +201,7 @@ class CompilerImpl : public Compiler {
           return;
         }
         bool is_ref = IsRef(*object);
-        if (num_textures < gpu_info_.max_image_units &&
+        if (num_textures < gpu_info_.GetMaxImageArguments() &&
             !ExceedsMaxSize(*object, gpu_info_) &&
             (object->object_type == ObjectType::TEXTURE ||
              (is_ref && options_.ref_obj_type == ObjectType::TEXTURE) ||
@@ -251,8 +251,7 @@ class CompilerImpl : public Compiler {
         attr.outputs.push_back(object);
       }
 
-      // Allocate bindings. Textures must be bound first. max_image_units also
-      // defines max binding number for a texture.
+      // Allocate bindings. Textures must be bound first.
       uint32_t binding = 0;
       auto set_binding = [&](ObjectType type, Object& object) {
         if (object.object_type == type) {
",0,train
80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo.

PiperOrigin-RevId: 343190551
Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",request_gpu_info.cc,"@@ -28,27 +28,34 @@ namespace tflite {
 namespace gpu {
 namespace gl {
 
-absl::Status RequestGpuInfo(GpuInfo* gpu_info) {
-  GpuInfo info;
-
+absl::Status RequestOpenGlInfo(OpenGlInfo* gl_info) {
   const GLubyte* renderer_name = glGetString(GL_RENDERER);
   if (renderer_name) {
-    info.renderer_name = reinterpret_cast<const char*>(renderer_name);
-    GetGpuInfoFromDeviceDescription(info.renderer_name, &info);
+    gl_info->renderer_name = reinterpret_cast<const char*>(renderer_name);
   }
 
   const GLubyte* vendor_name = glGetString(GL_VENDOR);
   if (vendor_name) {
-    info.vendor_name = reinterpret_cast<const char*>(vendor_name);
+    gl_info->vendor_name = reinterpret_cast<const char*>(vendor_name);
   }
 
   const GLubyte* version_name = glGetString(GL_VERSION);
   if (version_name) {
-    info.version = reinterpret_cast<const char*>(version_name);
+    gl_info->version = reinterpret_cast<const char*>(version_name);
   }
 
-  glGetIntegerv(GL_MAJOR_VERSION, &info.major_version);
-  glGetIntegerv(GL_MINOR_VERSION, &info.minor_version);
+  glGetIntegerv(GL_MAJOR_VERSION, &gl_info->major_version);
+  glGetIntegerv(GL_MINOR_VERSION, &gl_info->minor_version);
+
+  return absl::OkStatus();
+}
+
+absl::Status RequestGpuInfo(GpuInfo* gpu_info) {
+  GpuInfo info;
+  RETURN_IF_ERROR(RequestOpenGlInfo(&info.opengl_info));
+
+  GetGpuInfoFromDeviceDescription(info.opengl_info.renderer_name,
+                                  GpuApi::kOpenGl, &info);
 
   GLint extensions_count;
   glGetIntegerv(GL_NUM_EXTENSIONS, &extensions_count);
@@ -57,8 +64,10 @@ absl::Status RequestGpuInfo(GpuInfo* gpu_info) {
     info.extensions[i] = std::string(
         reinterpret_cast<const char*>(glGetStringi(GL_EXTENSIONS, i)));
   }
-  glGetIntegerv(GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS, &info.max_ssbo_bindings);
-  glGetIntegerv(GL_MAX_COMPUTE_IMAGE_UNIFORMS, &info.max_image_bindings);
+  glGetIntegerv(GL_MAX_COMPUTE_SHADER_STORAGE_BLOCKS,
+                &info.opengl_info.max_ssbo_bindings);
+  glGetIntegerv(GL_MAX_COMPUTE_IMAGE_UNIFORMS,
+                &info.opengl_info.max_image_bindings);
   info.max_work_group_size.resize(3);
   glGetIntegeri_v(GL_MAX_COMPUTE_WORK_GROUP_SIZE, 0,
                   &info.max_work_group_size[0]);
@@ -69,7 +78,7 @@ absl::Status RequestGpuInfo(GpuInfo* gpu_info) {
   glGetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS,
                 &info.max_work_group_invocations);
   glGetIntegerv(GL_MAX_TEXTURE_SIZE, &info.max_texture_size);
-  glGetIntegerv(GL_MAX_IMAGE_UNITS, &info.max_image_units);
+  glGetIntegerv(GL_MAX_IMAGE_UNITS, &info.opengl_info.max_image_units);
   glGetIntegerv(GL_MAX_ARRAY_TEXTURE_LAYERS, &info.max_array_texture_layers);
   RETURN_IF_ERROR(GetOpenGlErrors());
   *gpu_info = info;
",0,train
80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo.

PiperOrigin-RevId: 343190551
Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",request_gpu_info.h,"@@ -26,6 +26,10 @@ namespace tflite {
 namespace gpu {
 namespace gl {
 
+// This method performs multiple GL calls, therefore, egl context needs to be
+// created upfront.
+absl::Status RequestOpenGlInfo(OpenGlInfo* gl_info);
+
 // This method performs multiple GL calls, therefore, egl context needs to be
 // created upfront.
 absl::Status RequestGpuInfo(GpuInfo* gpu_info);
",0,train
80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo.

PiperOrigin-RevId: 343190551
Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",calculator_from_metadata.cc,"@@ -69,7 +69,7 @@ class WorkgroupsCalculatorFromMetadata : public WorkgroupsCalculator {
 const data::HardcodedWorkgroups* FindWorkgroups(
     const data::CustomWorkgroups& workgroups, const GpuInfo& gpu_info) {
   for (auto workgroup : *workgroups.hardcoded_workgroups()) {
-    if (workgroup->gpu_info()->c_str() == gpu_info.renderer_name) {
+    if (workgroup->gpu_info()->c_str() == gpu_info.opengl_info.renderer_name) {
       return workgroup;
     }
   }
",0,train
80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo.

PiperOrigin-RevId: 343190551
Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",gpu_compatibility.cc,"@@ -70,10 +70,11 @@ std::map<std::string, std::string> GPUCompatibilityList::CalculateVariables(
   variables[kDeviceModel] = android_info.model;
   variables[kDeviceName] = android_info.device;
   variables[kManufacturer] = android_info.manufacturer;
-  variables[kGPUModel] = gpu_info.renderer_name;
+  const auto& gl_info = gpu_info.opengl_info;
+  variables[kGPUModel] = gl_info.renderer_name;
   char buffer[128];
-  int len = snprintf(buffer, 128 - 1, ""%d.%d"", gpu_info.major_version,
-                     gpu_info.minor_version);
+  int len = snprintf(buffer, 128 - 1, ""%d.%d"", gl_info.major_version,
+                     gl_info.minor_version);
   buffer[len] = '\0';
   variables[kOpenGLESVersion] = std::string(buffer);
   CanonicalizeValues(&variables);
",0,train
80033af9526ddeee85e8ad0fcf98f37f0706dd09,tensorflow/tensorflow,"Added GpuApi to GpuInfo.

PiperOrigin-RevId: 343190551
Change-Id: Ic0d3cf4676600a0276490fb71c2167bda9ff07e1",gpu_compatibility_test.cc,"@@ -39,10 +39,9 @@ TEST_F(GPUCompatibilityTest, ReturnsSupportedForFullMatch) {
   tflite::acceleration::AndroidInfo android_info = {.android_sdk_version = ""24"",
                                                     .model = ""m712c""};
 
-  tflite::gpu::GpuInfo tflite_gpu_info = {
-      .major_version = 3,
-      .minor_version = 1,
-  };
+  tflite::gpu::GpuInfo tflite_gpu_info;
+  tflite_gpu_info.opengl_info.major_version = 3;
+  tflite_gpu_info.opengl_info.minor_version = 1;
 
   EXPECT_TRUE(list_->Includes(android_info, tflite_gpu_info));
 }
@@ -54,11 +53,10 @@ TEST_F(GPUCompatibilityTest, ReturnsUnsupportedForFullMatch) {
                                                     .model = ""SM-G960F"",
                                                     .device = ""starlte"",
                                                     .manufacturer = ""Samsung""};
-  tflite::gpu::GpuInfo tflite_gpu_info = {
-      .renderer_name = ""Mali-G72"",
-      .major_version = 3,
-      .minor_version = 2,
-  };
+  tflite::gpu::GpuInfo tflite_gpu_info;
+  tflite_gpu_info.opengl_info.renderer_name = ""Mali-G72"";
+  tflite_gpu_info.opengl_info.major_version = 3;
+  tflite_gpu_info.opengl_info.minor_version = 2;
   EXPECT_FALSE(list_->Includes(android_info, tflite_gpu_info));
 }
 
",0,train
f050412ecddfb771008165989946dcea3b9b60f8,tensorflow/tensorflow,"Lazily construct no-op OpKernelContext::Params::{inc,dec}_num_deferred_ops_function.

Each time we create an OpKernelContext::Params, we default-create no-op functions for these members. Since these functions are rarely used, this change defers their creation until the point of use.

PiperOrigin-RevId: 282854876
Change-Id: Ibdf5c034cffb001d2055413b29c328386b011693",op_kernel.h,"@@ -726,8 +726,8 @@ class OpKernelContext {
     const int* forward_from_array = nullptr;
 
     // For tracking actively running deferred ops.
-    std::function<void()> inc_num_deferred_ops_function = []() {};
-    std::function<void()> dec_num_deferred_ops_function = []() {};
+    std::function<void()> inc_num_deferred_ops_function;
+    std::function<void()> dec_num_deferred_ops_function;
   };
 
   // params must outlive the OpKernelContext.
@@ -1271,10 +1271,14 @@ class OpKernelContext {
   // functions. It then must call these two functions in pairs, before and after
   // device execution, respectively.
   TF_MUST_USE_RESULT std::function<void()> inc_num_deferred_ops_function() {
-    return params_->inc_num_deferred_ops_function;
+    return params_->inc_num_deferred_ops_function
+               ? params_->inc_num_deferred_ops_function
+               : []() {};
   }
   TF_MUST_USE_RESULT std::function<void()> dec_num_deferred_ops_function() {
-    return params_->dec_num_deferred_ops_function;
+    return params_->dec_num_deferred_ops_function
+               ? params_->dec_num_deferred_ops_function
+               : []() {};
   }
 
   Allocator* get_allocator(AllocatorAttributes attr);
",0,train
3ed8e55bf6710378c8abd3c3e24bc13a60bd50fd,tensorflow/tensorflow,Update image_ops_impl.py,image_ops_impl.py,"@@ -2944,15 +2944,15 @@ def rgb_to_yiq(images):
   Returns:
     images: tensor with the same shape as `images`.
 
-    Usage Example:
+  Usage Example:
     
-    >>> image = [[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
-    ...          [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]],
-    ...          [[13.0, 14.0, 15.0], [16.0, 17.0, 18.0]]]
-    >>> image = tf.constant(image)
-    >>> tf.image.rgb_to_yiq(image)
-    <tf.Tensor: shape=(3, 2, 3), dtype=float32, numpy=
-    array([[[ 1.815     , -0.91724455,  0.09962624],
+  >>> image = [[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
+  ...          [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]],
+  ...          [[13.0, 14.0, 15.0], [16.0, 17.0, 18.0]]]
+  >>> image = tf.constant(image)
+  >>> tf.image.rgb_to_yiq(image)
+  <tf.Tensor: shape=(3, 2, 3), dtype=float32, numpy=
+  array([[[ 1.815     , -0.91724455,  0.09962624],
         [ 4.815     , -0.9172445 ,  0.09951639]],
        [[ 7.815     , -0.91724443,  0.09940636],
         [10.815001  , -0.91724455,  0.09929633]],
@@ -2986,15 +2986,15 @@ def yiq_to_rgb(images):
   Returns:
     images: tensor with the same shape as `images`.
 
-    Usage Example:
+  Usage Example:
     
-    >>> image = [[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
-    ...          [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]],
-    ...          [[13.0, 14.0, 15.0], [16.0, 17.0, 18.0]]]
-    >>> image = tf.constant(image)
-    >>> tf.image.yiq_to_rgb(image)
-    <tf.Tensor: shape=(3, 2, 3), dtype=float32, numpy=
-    array([[[ 4.774447  , -1.4856384 ,  3.8992112 ],
+  >>> image = [[[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]],
+  ...          [[7.0, 8.0, 9.0], [10.0, 11.0, 12.0]],
+  ...          [[13.0, 14.0, 15.0], [16.0, 17.0, 18.0]]]
+  >>> image = tf.constant(image)
+  >>> tf.image.yiq_to_rgb(image)
+  <tf.Tensor: shape=(3, 2, 3), dtype=float32, numpy=
+  array([[[ 4.774447  , -1.4856384 ,  3.8992112 ],
         [12.504881  , -1.2432894 ,  8.691683  ]],
        [[20.235313  , -1.0009406 , 13.484153  ],
         [27.965748  , -0.7585919 , 18.276623  ]],
",0,train
b01309c36f90e7cbbca7d7b7cc2d176eed70cc87,tensorflow/tensorflow,Remove adding delete operators,greedy_memory_planner.h,"@@ -87,7 +87,6 @@ class GreedyMemoryPlanner : public MemoryPlanner {
   };
 
  private:
-  TF_LITE_REMOVE_VIRTUAL_DELETE
   // Whether a buffer is active in a given time range.
   bool DoesEntryOverlapInTime(const ListEntry* entry, const int first_time_used,
                               const int last_time_used) const;
",0,train
b01309c36f90e7cbbca7d7b7cc2d176eed70cc87,tensorflow/tensorflow,Remove adding delete operators,linear_memory_planner.h,"@@ -37,7 +37,6 @@ class LinearMemoryPlanner : public MemoryPlanner {
                                   int buffer_index, int* offset) override;
 
  private:
-  TF_LITE_REMOVE_VIRTUAL_DELETE
   static constexpr int kMaxBufferCount = 1024;
   size_t buffer_offsets_[kMaxBufferCount];
   int current_buffer_count_;
",0,train
4e53b2c50cd4b9cb9f620f3cc360a0cb3437a6ae,tensorflow/tensorflow,"Adding an _eager_reset() method to MultiDeviceIterator that re-initializes the MultiDeviceIteratorResource and all underlying AnonymousIterator resources instead of re-creating them in eager mode as is the usual suggestion.

PiperOrigin-RevId: 234914867",multi_device_iterator_test.py,"@@ -34,6 +34,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.platform import test
+from tensorflow.python.platform import tf_logging as logging
 
 
 # memory_profiler might not be available in the OSS version of TensorFlow.
@@ -65,6 +66,7 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase,
       for _ in six.moves.range(num_iters):
         f()
       increase = memory_profiler.memory_usage(-1)[0] - initial
+      logging.info(""Memory increase observed: %f MB"" % increase)
       assert increase < increase_threshold_absolute_mb, (
           ""Increase is too high. Initial memory usage: %f MB. Increase: %f MB. ""
           ""Maximum allowed increase: %f"") % (initial, increase,
@@ -101,19 +103,37 @@ class MultiDeviceIteratorTest(test_base.DatasetTestBase,
         self.evaluate(elem_on_2)
 
   @test_util.run_v1_only(""b/121264236"")
-  def testEagerNoMemoryLeak(self):
+  def testEagerMemoryUsageWithReset(self):
     if not context.executing_eagerly():
       self.skipTest(""Only eager mode test"")
     if memory_profiler is None:
       self.skipTest(""memory_profiler required to run this test"")
 
+    dataset = dataset_ops.Dataset.range(10)
+    multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
+        dataset, [""/cpu:1"", ""/cpu:2""])
+
+    def f():
+      self.evaluate(multi_device_iterator.get_next())
+      multi_device_iterator._eager_reset()
+
+    self.assertNotIncreasingMemory(
+        f, num_iters=100, increase_threshold_absolute_mb=50)
+
+  @test_util.run_v1_only(""b/121264236"")
+  def testEagerMemoryUsageWithRecreation(self):
+    if not context.executing_eagerly():
+      self.skipTest(""Only eager mode test"")
+    if memory_profiler is None:
+      self.skipTest(""memory_profiler required to run this test"")
+
+    dataset = dataset_ops.Dataset.range(10)
+
     def f():
-      dataset = dataset_ops.Dataset.range(10)
       multi_device_iterator = multi_device_iterator_ops.MultiDeviceIterator(
           dataset, [""/cpu:1"", ""/cpu:2""])
       self.evaluate(multi_device_iterator.get_next())
       del multi_device_iterator
-      del dataset
 
     # TODO(b/123316347): Reduce threshold once bug is fixed.
     self.assertNotIncreasingMemory(
",0,train
4e53b2c50cd4b9cb9f620f3cc360a0cb3437a6ae,tensorflow/tensorflow,"Adding an _eager_reset() method to MultiDeviceIterator that re-initializes the MultiDeviceIteratorResource and all underlying AnonymousIterator resources instead of re-creating them in eager mode as is the usual suggestion.

PiperOrigin-RevId: 234914867",multi_device_iterator_ops.py,"@@ -89,6 +89,11 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2):
     self._next_func = _remote_next_func._get_concrete_function_internal()  # pylint: disable=protected-access
     self._next_captured_args = self._next_func.captured_inputs
 
+    self._incarnation_id_index = -1
+    for i, arg in enumerate(self._next_captured_args):
+      if arg == incarnation_id:
+        self._incarnation_id_index = i
+
     @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.string)])
     def _finalize_func(unused_string_handle):
       return array_ops.constant(0, dtypes.int64)
@@ -126,6 +131,49 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2):
     return self._structure
 
 
+class _ReincarnatedPerDeviceGenerator(dataset_ops.DatasetV2):
+  """"""Creates a _PerDeviceGenerator-like dataset with a new incarnation_id.
+
+  Re-uses the functions from the provided per_device_dataset and just switches
+  out the function argument corresponding to the incarnation_id.
+  """"""
+
+  def __init__(self, per_device_dataset, incarnation_id):
+    # pylint: disable=protected-access
+    self._structure = per_device_dataset._structure
+
+    self._init_func = per_device_dataset._init_func
+    self._init_captured_args = self._init_func.captured_inputs
+
+    self._next_func = per_device_dataset._next_func
+    self._next_captured_args = per_device_dataset._next_captured_args
+    # The captured arguments to the next_func are string_handle, incarnation_id.
+    # We update the incarnation id to the new one.
+    self._next_captured_args[
+        per_device_dataset._incarnation_id_index] = incarnation_id
+
+    self._finalize_func = per_device_dataset._finalize_func
+    self._finalize_captured_args = per_device_dataset._finalize_captured_args
+
+    variant_tensor = gen_dataset_ops.generator_dataset(
+        self._init_captured_args,
+        self._next_captured_args,
+        self._finalize_captured_args,
+        init_func=self._init_func,
+        next_func=self._next_func,
+        finalize_func=self._finalize_func,
+        **dataset_ops.flat_structure(self))
+    super(_ReincarnatedPerDeviceGenerator, self).__init__(variant_tensor)
+
+  def _inputs(self):
+    # TODO(b/116506223): Determine which datasets should be used as inputs here.
+    return []
+
+  @property
+  def _element_structure(self):
+    return self._structure
+
+
 class MultiDeviceIterator(object):
   """"""An iterator over multiple devices.""""""
 
@@ -156,9 +204,11 @@ class MultiDeviceIterator(object):
     self._devices = devices
     self._source_device = source_device
     self._source_device_tensor = ops.convert_to_tensor(source_device)
+    self._max_buffer_size = max_buffer_size
+    self._prefetch_buffer_size = prefetch_buffer_size
 
-    if prefetch_buffer_size > max_buffer_size:
-      max_buffer_size = prefetch_buffer_size
+    if self._prefetch_buffer_size > self._max_buffer_size:
+      self._max_buffer_size = self._prefetch_buffer_size
 
     # Create the MultiDeviceIterator.
     with ops.device(self._source_device):
@@ -171,7 +221,7 @@ class MultiDeviceIterator(object):
               devices=self._devices,
               shared_name=shared_name,
               container="""",
-              **dataset_ops.flat_structure(dataset)))
+              **dataset_ops.flat_structure(self._dataset)))
       if context.executing_eagerly():
         # Delete the resource when this object is deleted
         self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
@@ -183,7 +233,15 @@ class MultiDeviceIterator(object):
       self._incarnation_id = gen_dataset_ops.multi_device_iterator_init(
           self._dataset._variant_tensor,  # pylint: disable=protected-access
           self._multi_device_iterator_resource,
-          max_buffer_size=max_buffer_size)
+          max_buffer_size=self._max_buffer_size)
+
+    self._prototype_device_datasets = []
+    for i, device in enumerate(self._devices):
+      with ops.device(device):
+        ds = _PerDeviceGenerator(
+            i, self._multi_device_iterator_resource, self._incarnation_id,
+            self._source_device_tensor, self._dataset._element_structure)  # pylint: disable=protected-access
+        self._prototype_device_datasets.append(ds)
 
     # TODO(rohanj): Explore the possibility of the MultiDeviceIterator to
     # initialize the device side of the pipeline. This would allow the
@@ -193,17 +251,7 @@ class MultiDeviceIterator(object):
     self._device_iterators = []
     for i, device in enumerate(self._devices):
       with ops.device(device):
-        ds = _PerDeviceGenerator(
-            i, self._multi_device_iterator_resource, self._incarnation_id,
-            self._source_device_tensor, dataset._element_structure)  # pylint: disable=protected-access
-        if prefetch_buffer_size > 0:
-          ds = ds.prefetch(prefetch_buffer_size)
-        # TODO(jsimsa): Enable auto-tuning and optimizations when supported for
-        # non-CPU devices.
-        options = dataset_ops.Options()
-        options.experimental_autotune = False
-        options.experimental_optimization.apply_default_optimizations = False
-        ds = ds.with_options(options)
+        ds = self._create_device_dataset(i)
         if context.executing_eagerly():
           self._device_iterators.append(dataset_ops.make_one_shot_iterator(ds))
         else:
@@ -216,6 +264,20 @@ class MultiDeviceIterator(object):
       ]
       self._initializer = control_flow_ops.group(*device_iterator_initializers)
 
+  def _create_device_dataset(self, i):
+    """"""Uses _prototype_device_datasets[i] to build a dataset for the device.""""""
+    ds = self._prototype_device_datasets[i]
+    ds = _ReincarnatedPerDeviceGenerator(ds, self._incarnation_id)
+    if self._prefetch_buffer_size > 0:
+      ds = ds.prefetch(self._prefetch_buffer_size)
+    # TODO(jsimsa): Enable auto-tuning and optimizations when supported for
+    # non-CPU devices.
+    options = dataset_ops.Options()
+    options.experimental_autotune = False
+    options.experimental_optimization.apply_default_optimizations = False
+    ds = ds.with_options(options)
+    return ds
+
   def get_next(self, device=None):
     """"""Returns the next element given a `device`, else returns all in a list.""""""
     if device is not None:
@@ -242,6 +304,23 @@ class MultiDeviceIterator(object):
       return control_flow_ops.no_op()
     return self._initializer
 
+  def _eager_reset(self):
+    """"""Resets the MultiDeviceIterator in eager mode.""""""
+    if not context.executing_eagerly():
+      raise ValueError(""Eager reset is only supported in eager mode."")
+    # pylint: disable=protected-access
+    self._incarnation_id = gen_dataset_ops.multi_device_iterator_init(
+        self._dataset._variant_tensor,
+        self._multi_device_iterator_resource,
+        max_buffer_size=self._max_buffer_size)
+    for i, device in enumerate(self._devices):
+      with ops.device(device):
+        ds = self._create_device_dataset(i)
+        # Reset the device iterator resources with the new dataset.
+        ds_variant = ds._variant_tensor
+        gen_dataset_ops.make_iterator(ds_variant,
+                                      self._device_iterators[i]._resource)
+
   @property
   def output_types(self):
     return self._dataset.output_types
",0,train
6f6eb52a89ec6e360d8604fa68516cf2d819207f,tensorflow/tensorflow,"Fixed typos, comments",strided_slice_op.cc,"@@ -219,7 +219,7 @@ Status ValidateStridedSliceOp(
   // Step 2: Make a sparse spec into a full index spec
   //
   // The sparse spec does not correspond to the number of dimensions
-  // Make a dense spec that correspond to the number of dimensions
+  // Make a dense spec that corresponds to the number of dimensions
   //
   // For example suppose foo[...,3:] on foo.shape=(2,2,3) then
   // we need to produce the missing begin_mask for the first two
",0,train
ac70125923a3315802f867837521377a6a18f283,tensorflow/tensorflow,"Fix some races detected by the analysis tool.

collective_rma_distributed: Return WorkerInterface to cache
prior to invoking RecvFromPeer callback, instead of after.

broadcaster: put status_ updates inside mutex.
PiperOrigin-RevId: 196192631",broadcaster.cc,"@@ -134,7 +134,7 @@ void Broadcaster::TreeSendTo(const CollectiveParams& cp,
 // Execute a tree broadcast, i.e. each non-source device receives from
 // one other and sends to up-to two others.
 void Broadcaster::RunTree() {
-  mutex mu;
+  mutex mu;               // also guards status_ while callbacks are pending
   int pending_count = 0;  // GUARDED_BY(mu)
   condition_variable all_done;
   std::vector<int> send_to_ranks;
@@ -164,13 +164,11 @@ void Broadcaster::RunTree() {
       DispatchSend(
           target_rank, output_,
           [this, target_rank, &mu, &pending_count, &all_done](const Status& s) {
+            mutex_lock l(mu);
             status_.Update(s);
-            {
-              mutex_lock l(mu);
-              --pending_count;
-              if (pending_count == 0) {
-                all_done.notify_all();
-              }
+            --pending_count;
+            if (pending_count == 0) {
+              all_done.notify_all();
             }
           });
     }
@@ -191,13 +189,11 @@ void Broadcaster::RunTree() {
           op_dev_ctx, op_dev_ctx, device_, device_, ctx_->input_alloc_attr(0),
           ctx_->output_alloc_attr(0), input, output_,
           [this, &mu, &pending_count, &all_done](const Status& s) {
+            mutex_lock l(mu);
             status_.Update(s);
-            {
-              mutex_lock l(mu);
-              --pending_count;
-              if (0 == pending_count) {
-                all_done.notify_all();
-              }
+            --pending_count;
+            if (0 == pending_count) {
+              all_done.notify_all();
             }
           });
     }
",0,train
ac70125923a3315802f867837521377a6a18f283,tensorflow/tensorflow,"Fix some races detected by the analysis tool.

collective_rma_distributed: Return WorkerInterface to cache
prior to invoking RecvFromPeer callback, instead of after.

broadcaster: put status_ updates inside mutex.
PiperOrigin-RevId: 196192631",collective_rma_distributed.cc,"@@ -122,7 +122,6 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer(
   // Logic to be executed on the RecvBufferAsync callback.
   auto recv_buf_callback = [this, state, peer_task, to_device, to_alloc_attr,
                             to_device_ctx, to_tensor, done](const Status& s) {
-    std::unique_ptr<State> del_on_exit(state);
     if (s.ok()) {
       // In this generic implementation the bytes come back in the
       // RPC response protobuf rather than via RDMA so we need to copy
@@ -134,6 +133,7 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer(
         done(errors::Internal(""RecvBufResponse returned "", num_bytes,
                               "" bytes where to_tensor expected "",
                               to_tensor->TotalBytes()));
+        delete state;
         return;
       }
       if (to_device->tensorflow_gpu_device_info()) {
@@ -144,6 +144,7 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer(
         Status status = dev_mgr_->LookupDevice(""CPU:0"", &cpu_dev);
         if (!status.ok()) {
           done(status);
+          delete state;
           return;
         }
         AllocatorAttributes cpu_attr;
@@ -163,6 +164,7 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer(
                              // done in another thread.
                              SchedClosure([s, done] { done(s); });
                            });
+        delete state;
         return;
       } else {
         // CPU device
@@ -174,6 +176,7 @@ void CollectiveRemoteAccessDistributed::RecvFromPeer(
       dev_resolver_->ClearTask(peer_task);
     }
 
+    delete state;
     done(s);
   };
 
",0,train
a45373d1764ca99433a4e0b4ac5080a3afce4ba8,tensorflow/tensorflow,ensure all keys have same shape for stackkeys,map_kernels.h,"@@ -118,7 +118,8 @@ class TensorMapLookup : public OpKernel {
     OP_REQUIRES_OK(c, GetInputMap(c, 0, &m));
 
     OP_REQUIRES(c, m->tensors().find(key) != m->tensors().end(),
-                errors::InvalidArgument(""Trying to lookup non-existent key. Could not find "" + key.DeviceSafeDebugString()));
+                errors::InvalidArgument(""Trying to lookup non-existent key. Could"" 
+                                        ""not find "" + key.DeviceSafeDebugString()));
 
     c->set_output(0, m->tensors().find(key)->second);
   }
@@ -189,18 +190,21 @@ class TensorMapStackKeys : public OpKernel {
                 errors::InvalidArgument(""TensorMapStackKeys cannot be called on empty map.""));
 
     auto it = m->tensors().begin();
-    size_t sz = m->tensors().size();
-    TensorShape shape = it->first.shape();
-    shape.InsertDim(0, m->tensors().size());
+    TensorShape output_shape = it->first.shape();
+    output_shape.InsertDim(0, m->tensors().size());
     Tensor* result;
-    OP_REQUIRES_OK(c, c->allocate_output(0, shape, &result));
+    OP_REQUIRES_OK(c, c->allocate_output(0, output_shape, &result));
+
+    //string error_str = ""Key does not match requested dtype. Requested "" + DataTypeString(key_dtype_) + "", but saw "" + DataTypeString(it->first.dtype());
     int i = 0;
-    string error_str = ""Key does not match requested dtype. Requested "" + DataTypeString(key_dtype_) + "", but saw "" + DataTypeString(it->first.dtype());
-    string simple = ""Key does not match requested dtype."";
+    size_t sz = m->tensors().size();
+    TensorShape key_shape = it->first.shape();
     while (it != m->tensors().end() && i < sz) {
       OP_REQUIRES(c, it->first.dtype() == key_dtype_,
-                     errors::InvalidArgument(""Key does not match requested dtype.""));
-      batch_util::CopyElementToSlice(it->first, result, i);
+                  errors::InvalidArgument(""Key does not match requested dtype.""));
+      OP_REQUIRES(c, it->first.shape() == key_shape,
+                 errors::InvalidArgument(""Keys must all have the same shape.""));
+      OP_REQUIRES_OK(c, batch_util::CopyElementToSlice(it->first, result, i));
       i++;
       it++;
     }
",0,train
a45373d1764ca99433a4e0b4ac5080a3afce4ba8,tensorflow/tensorflow,ensure all keys have same shape for stackkeys,map_ops_test.py,"@@ -68,13 +68,10 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     k2 = constant_op.constant(2.0)
     v = constant_op.constant(11.0)
     m = map_ops.tensor_map_insert(m, k, v)
-    simple = ""Trying to lookup non-existent key.""
-    error_str = simple + "" Could not find "" + str(k2)
-    with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                ""Trying to lookup non-existent key. *""):
+    with self.assertRaisesRegex(errors.InvalidArgumentError, ""Trying to lookup non-existent key. *""):
       l = map_ops.tensor_map_lookup(m, k2, dtypes.float32)
       self.evaluate(l)
-'''
+
   def testTensorMapErase(self):
     m = map_ops.empty_tensor_map()
     k = constant_op.constant(1.0)
@@ -158,11 +155,11 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
   def testStackKeysEmptyMapFails(self):
     m = map_ops.empty_tensor_map()
     with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                ""Empty map has no keys.""):
+                                ""TensorMapStackKeys cannot be called on empty map.""):
       keys = map_ops.tensor_map_stack_keys(m, dtypes.float32)
       self.evaluate(keys)
 
-  def testStackKeysMismatchedDtypeFails(self):
+  def testStackKeysIncorrectDtypeFails(self):
     m = map_ops.empty_tensor_map()
     k = constant_op.constant(""mismatched_key"")
     v = constant_op.constant(2.0)
@@ -174,6 +171,19 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       keys = map_ops.tensor_map_stack_keys(m, dtypes.float32)
       self.evaluate(keys)
 
+  def testStackKeysIncorrectShapeFails(self):
+    m = map_ops.empty_tensor_map()
+    k = constant_op.constant(1.0)
+    k2 = constant_op.constant([1.0, 11.0])
+    v = constant_op.constant(2.0)
+    v2 = constant_op.constant(22.0)
+    m = map_ops.tensor_map_insert(m, k, v)
+    m = map_ops.tensor_map_insert(m, k2, v2)
+    with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                ""Keys must all have the same shape.""):
+      keys = map_ops.tensor_map_stack_keys(m, dtypes.float32)
+      self.evaluate(keys)
+
   def testInsertLookupGrad(self):
     with backprop.GradientTape() as tape:
       m = map_ops.empty_tensor_map()
@@ -437,6 +447,6 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     s = map_ops.tensor_map_size(m)
     self.assertAllEqual(s, 0)
     self.assertAllEqual(map_ops.tensor_map_has_key(m, k), False)
-'''
+
 if __name__ == ""__main__"":
   test.main()
",0,train
a6421c4dda1a83ea975bae545df1de16d38726b0,tensorflow/tensorflow,"Swap NaN count from index 7 to 2 within DebugNumericSummary ops.
Change: 147888410",debug_ops.h,"@@ -241,12 +241,12 @@ class DebugNumericSummaryOp : public OpKernel {
     OP_REQUIRES_OK(context, context->allocate_output(0, shape, &output_tensor));
     output_tensor->vec<double>()(0) = static_cast<double>(is_initialized);
     output_tensor->vec<double>()(1) = static_cast<double>(element_count);
-    output_tensor->vec<double>()(2) = static_cast<double>(negative_inf_count);
-    output_tensor->vec<double>()(3) = static_cast<double>(negative_count);
-    output_tensor->vec<double>()(4) = static_cast<double>(zero_count);
-    output_tensor->vec<double>()(5) = static_cast<double>(positive_count);
-    output_tensor->vec<double>()(6) = static_cast<double>(positive_inf_count);
-    output_tensor->vec<double>()(7) = static_cast<double>(nan_count);
+    output_tensor->vec<double>()(2) = static_cast<double>(nan_count);
+    output_tensor->vec<double>()(3) = static_cast<double>(negative_inf_count);
+    output_tensor->vec<double>()(4) = static_cast<double>(negative_count);
+    output_tensor->vec<double>()(5) = static_cast<double>(zero_count);
+    output_tensor->vec<double>()(6) = static_cast<double>(positive_count);
+    output_tensor->vec<double>()(7) = static_cast<double>(positive_inf_count);
     output_tensor->vec<double>()(8) = min;
     output_tensor->vec<double>()(9) = max;
     output_tensor->vec<double>()(10) = mean;
",0,train
a6421c4dda1a83ea975bae545df1de16d38726b0,tensorflow/tensorflow,"Swap NaN count from index 7 to 2 within DebugNumericSummary ops.
Change: 147888410",debug_ops_test.cc,"@@ -254,12 +254,12 @@ TEST_F(DebugNumericSummaryOpTest, Float_full_house) {
       &expected,
       {1.0,              // Is initialized.
        18.0,             // Total element count.
+       4.0,              // nan count.
        2.0,              // -inf count.
        2.0,              // negative number count (excluding -inf).
        3.0,              // zero count.
        2.0,              // positive number count (excluding +inf).
        5.0,              // +inf count.
-       4.0,              // nan count.
        -3.0,             // minimum of non-inf and non-nan elements.
        7.0,              // maximum of non-inf and non-nan elements.
        0.85714285714,    // mean of non-inf and non-nan elements.
@@ -290,12 +290,12 @@ TEST_F(DebugNumericSummaryOpTest, Double_full_house) {
       &expected,
       {1.0,              // Is initialized.
        18.0,             // Total element count.
+       4.0,              // nan count.
        2.0,              // -inf count.
        2.0,              // negative count (excluding -inf).
        3.0,              // zero count.
        2.0,              // positive count (excluding +inf).
        5.0,              // +inf count.
-       4.0,              // nan count.
        -3.0,             // minimum of non-inf and non-nan elements.
        7.0,              // maximum of non-inf and non-nan elements.
        0.85714285714,    // mean of non-inf and non-nan elements.
@@ -315,12 +315,12 @@ TEST_F(DebugNumericSummaryOpTest, Float_only_valid_values) {
       &expected,
       {1.0,              // Is initialized.
        6.0,              // Total element count.
+       0.0,              // nan count.
        0.0,              // -inf count.
        1.0,              // negative count (excluding -inf).
        2.0,              // zero count.
        3.0,              // positive count (excluding +inf).
        0.0,              // +inf count.
-       0.0,              // nan count.
        -1.0,             // minimum of non-inf and non-nan elements.
        7.0,              // maximum of non-inf and non-nan elements.
        2.0,              // mean of non-inf and non-nan elements.
@@ -351,12 +351,12 @@ TEST_F(DebugNumericSummaryOpTest, Float_all_Inf_or_NaN) {
   // NaNs.
   ASSERT_NEAR(1.0, output[0], 1e-8);  // Is initialized.
   ASSERT_NEAR(9.0, output[1], 1e-8);  // Total element count.
-  ASSERT_NEAR(2.0, output[2], 1e-8);  // -inf count.
-  ASSERT_NEAR(0.0, output[3], 1e-8);  // negative count (excluding -inf).
-  ASSERT_NEAR(0.0, output[4], 1e-8);  // zero count.
-  ASSERT_NEAR(0.0, output[5], 1e-8);  // positive count (excluding +inf).
-  ASSERT_NEAR(3.0, output[6], 1e-8);  // +inf count.
-  ASSERT_NEAR(4.0, output[7], 1e-8);  // nan count.
+  ASSERT_NEAR(4.0, output[2], 1e-8);  // nan count.
+  ASSERT_NEAR(2.0, output[3], 1e-8);  // -inf count.
+  ASSERT_NEAR(0.0, output[4], 1e-8);  // negative count (excluding -inf).
+  ASSERT_NEAR(0.0, output[5], 1e-8);  // zero count.
+  ASSERT_NEAR(0.0, output[6], 1e-8);  // positive count (excluding +inf).
+  ASSERT_NEAR(3.0, output[7], 1e-8);  // +inf count.
   // Due to the absence of any non-inf and non-nan values, the output of min,
   // max, mean and var are all degenerate.
   ASSERT_EQ(std::numeric_limits<float>::infinity(), output[8]);
",0,train
a6421c4dda1a83ea975bae545df1de16d38726b0,tensorflow/tensorflow,"Swap NaN count from index 7 to 2 within DebugNumericSummary ops.
Change: 147888410",session_debug_testlib.py,"@@ -1060,7 +1060,7 @@ class SessionDebugTestBase(test_util.TensorFlowTestCase):
       self.assertTrue(dump.loaded_partition_graphs())
 
       self.assertAllClose([[
-          1.0, 18.0, 2.0, 2.0, 3.0, 2.0, 5.0, 4.0, -3.0, 7.0, 0.85714286,
+          1.0, 18.0, 4.0, 2.0, 2.0, 3.0, 2.0, 5.0, -3.0, 7.0, 0.85714286,
           8.97959184
       ]], dump.get_tensors(""numeric_summary/a/read"", 0, ""DebugNumericSummary""))
 
",0,train
b7ae5f28b4b55098ae64ed2bcab21f6e29d5abee,tensorflow/tensorflow,"Simplify MemcpyRewritePattern by using the MemcpyOpAdaptor.

PiperOrigin-RevId: 399226638
Change-Id: Iebccf00cdce622d33e40f8a98704d09d8139dbed",memcpy_pattern.cc,"@@ -33,6 +33,7 @@
 #include ""mlir/Transforms/DialectConversion.h""
 #include ""llvm/ADT/ArrayRef.h""
 #include ""llvm/ADT/StringRef.h""
+#include ""mlir/Dialect/GPU/GPUDialect.h""  // from @llvm-project
 #include ""mlir/Dialect/StandardOps/IR/Ops.h""  // from @llvm-project
 #include ""mlir/IR/BlockAndValueMapping.h""  // from @llvm-project
 #include ""mlir/IR/Value.h""  // from @llvm-project
@@ -61,20 +62,16 @@ struct MemcpyRewritePattern
       mlir::gpu::MemcpyOp op, Value chain, Value stream,
       ArrayRef<Value> operands,
       ConversionPatternRewriter& rewriter) const override {
-    if (!all_of(operands, [](Value operand) {
-          return operand.getType().isa<tfrt::gpu::BufferType>();
-        }))
+    mlir::gpu::MemcpyOpAdaptor adaptor =
+        mlir::gpu::MemcpyOpAdaptor(operands, op->getAttrDictionary());
+    if (!adaptor.src().getType().isa<tfrt::gpu::BufferType>() ||
+        !adaptor.dst().getType().isa<tfrt::gpu::BufferType>()) {
       return rewriter.notifyMatchFailure(op, ""expected buffer operands"");
-
-    BlockAndValueMapping mapping;
-    for (auto pair : llvm::zip_first(op->getOperands(), operands))
-      mapping.map(std::get<0>(pair), std::get<1>(pair));
-
+    }
     rewriter.eraseOp(op);
-
     return rewriter
-        .create<tfrt::gpu::MemCopyOp>(op.getLoc(), mapping.lookup(op.dst()),
-                                      mapping.lookup(op.src()), stream, chain)
+        .create<tfrt::gpu::MemCopyOp>(op.getLoc(), adaptor.dst(), adaptor.src(),
+                                      stream, chain)
         .getResult();
   }
 };
",0,train
16765079bb77aa2ff24d96bd6781baf80f1c9ca8,tensorflow/tensorflow,"[XLIR] Log and skip instances where memref.global op is absent for a constant.

PiperOrigin-RevId: 411604942
Change-Id: I9eb23ef6896fd3d9ca37715c8d35be4315bea6be",kernel_ops_pattern.cc,"@@ -291,12 +291,16 @@ static void Rewrite(Operation* op, mlir::PatternRewriter& rewriter,
                       rewriter.getStringAttr(gpu_module_data));
 
   // Annotate memref.global ops with the gpu.module symbol, and annotate the
-  // gpu.module op with memref.global symbols which requiring initialization.
+  // gpu.module op with memref.global symbols which require initialization.
   SmallVector<mlir::Attribute, 4> const_attrs;
   for (const auto& constant : constants) {
     auto global_op = mlir::SymbolTable::lookupNearestSymbolFrom(
         op, rewriter.getStringAttr(constant.symbol_name));
-    assert(global_op);
+    if (!global_op) {
+      LOG(WARNING) << ""memref.global op not found for constant. Possibly ""
+                   << ""unused (spurious) constant."";
+      continue;
+    }
     global_op->setAttr(tfrt::gpu::getGpuModuleAttrName(),
                        mlir::SymbolRefAttr::get(gpu_module));
     if (!constant.content.empty())
",0,train
f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds.

PiperOrigin-RevId: 205732423",direct_session.cc,"@@ -717,7 +717,8 @@ Status DirectSession::Run(const RunOptions& run_options,
   // Receive outputs.
   if (outputs) {
     std::vector<Tensor> sorted_outputs;
-    const Status s = call_frame.ConsumeRetvals(&sorted_outputs);
+    const Status s = call_frame.ConsumeRetvals(
+        &sorted_outputs, /* allow_dead_tensors = */ false);
     if (errors::IsInternal(s)) {
       return errors::InvalidArgument(s.error_message());
     } else if (!s.ok()) {
",0,test
f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds.

PiperOrigin-RevId: 205732423",function.cc,"@@ -746,6 +746,8 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
     rets_alloc_attrs.push_back(ret_alloc_attrs);
   }
 
+  bool allow_dead_tensors = opts.allow_dead_tensors;
+
   // The ProcFLR sends the arguments to the function from the source_device to
   // the target_device. So here we receive those arguments. Similarly, when the
   // computation is done and stored in *rets, we send the return values back
@@ -756,7 +758,7 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
       device_context, args_alloc_attrs, rendezvous, remote_args,
       [frame, remote_args, item, source_device, target_device,
        target_incarnation, rendezvous, device_context, rets, done, exec_args,
-       rets_alloc_attrs](const Status& status) {
+       rets_alloc_attrs, allow_dead_tensors](const Status& status) {
         Status s = status;
         if (s.ok()) {
           s = frame->SetArgs(*remote_args);
@@ -769,13 +771,13 @@ void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
           return;
         }
         item->exec->RunAsync(
-            *exec_args,
-            [frame, rets, done, source_device, target_device,
-             target_incarnation, rendezvous, device_context, remote_args,
-             exec_args, rets_alloc_attrs](const Status& status) {
+            *exec_args, [frame, rets, done, source_device, target_device,
+                         target_incarnation, rendezvous, device_context,
+                         remote_args, exec_args, rets_alloc_attrs,
+                         allow_dead_tensors](const Status& status) {
               Status s = status;
               if (s.ok()) {
-                s = frame->ConsumeRetvals(rets);
+                s = frame->ConsumeRetvals(rets, allow_dead_tensors);
               }
               delete frame;
               if (!s.ok()) {
@@ -859,14 +861,15 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
     return;
   }
 
+  bool allow_dead_tensors = opts.allow_dead_tensors;
   item->exec->RunAsync(
       // Executor args
       *exec_args,
       // Done callback.
-      [frame, rets, done, exec_args](const Status& status) {
+      [frame, rets, done, exec_args, allow_dead_tensors](const Status& status) {
         Status s = status;
         if (s.ok()) {
-          s = frame->ConsumeRetvals(rets);
+          s = frame->ConsumeRetvals(rets, allow_dead_tensors);
         }
         delete frame;
         delete exec_args;
",0,test
f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds.

PiperOrigin-RevId: 205732423",function.cc,"@@ -865,12 +865,15 @@ Status FunctionCallFrame::GetRetvals(std::vector<Tensor>* rets) const {
   return Status::OK();
 }
 
-Status FunctionCallFrame::ConsumeRetvals(std::vector<Tensor>* rets) {
+Status FunctionCallFrame::ConsumeRetvals(std::vector<Tensor>* rets,
+                                         bool allow_dead_tensors) {
   rets->clear();
   rets->reserve(rets_.size());
   for (size_t i = 0; i < rets_.size(); ++i) {
     if (rets_[i].has_val) {
       rets->emplace_back(std::move(rets_[i].val));
+    } else if (allow_dead_tensors) {
+      rets->emplace_back();
     } else {
       return errors::Internal(""Retval["", i, ""] does not have value"");
     }
",0,test
f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds.

PiperOrigin-RevId: 205732423",function.h,"@@ -261,7 +261,10 @@ class FunctionCallFrame : public CallFrameInterface {
   // Caller methods.
   Status SetArgs(gtl::ArraySlice<Tensor> args);
   Status GetRetvals(std::vector<Tensor>* rets) const;
-  Status ConsumeRetvals(std::vector<Tensor>* rets);
+
+  // Moves the return values from the frame to rets. If allow_dead_tensors is
+  // false it will fail if any of the retvals do not have a value.
+  Status ConsumeRetvals(std::vector<Tensor>* rets, bool allow_dead_tensors);
 
   size_t num_args() const override { return arg_types_.size(); }
   size_t num_retvals() const override { return ret_types_.size(); }
@@ -510,6 +513,9 @@ class FunctionLibraryRuntime {
     // If true, we create a new IntraProcessRendezvous, else use the existing
     // one.
     bool create_rendezvous = false;
+
+    // If True, allow returning dead tensors.
+    bool allow_dead_tensors = false;
   };
   typedef std::function<void(const Status&)> DoneCallback;
   virtual void Run(const Options& opts, Handle handle,
",0,test
f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds.

PiperOrigin-RevId: 205732423",partitioned_function_ops.cc,"@@ -330,6 +330,7 @@ class PartitionedCallOp : public AsyncOpKernel {
     // using device-specific threadpools when available.
     opts.runner = ctx->runner();
     opts.source_device = local_device_name_;
+    opts.allow_dead_tensors = true;
     // TODO(akshayka): Accommodate the multiple-worker scenario by adding the
     // constructed rendezvous to a rendezvous manager.
     Rendezvous* rendez = new IntraProcessRendezvous(lib->device_mgr());
",0,test
f85d825500357603afb7a02d2c88ad306ee43006,tensorflow/tensorflow,"Allow differentiating tfe.defun functions which contain conds.

PiperOrigin-RevId: 205732423",function_test.py,"@@ -213,6 +213,19 @@ class FunctionTest(test.TestCase):
     self.assertEqual(fn_op.output_shapes, None)
     self.assertAllEqual(fn_op(x, x), None)
 
+  @test_util.run_in_graph_and_eager_modes()
+  def testDefunCondGradient(self):
+
+    @function.defun
+    def f(x):
+      return control_flow_ops.cond(x > 0.5, lambda: 2 * x, lambda: 3 * x)
+
+    with backprop.GradientTape() as t:
+      x = constant_op.constant(1.0)
+      t.watch(x)
+      y = f(x)
+    self.assertAllEqual(self.evaluate(t.gradient(y, x)), 2.0)
+
   def testDefunCapturedInt32(self):
     x = constant_op.constant(1, dtype=dtypes.int32)
 
",0,test
c3f713e1cc1d1c14ce9e19a792e4179ca3fc92bf,tensorflow/tensorflow,Address review comment on new PR #23109,check_ops.py,"@@ -236,7 +236,6 @@ def _make_assert_msg_data(sym, x, y, summarize, test_op):
       data.append('Corresponding y values:')
       data.append(y_vals.numpy().reshape((-1,))[:num_vals])
     
-  if summarize > 0:
     # reshape((-1,)) is the fastest way to get a flat array view.
     x_np = x.numpy().reshape((-1,))
     y_np = y.numpy().reshape((-1,))
",0,train
7e5cf28779087a2ca36b79e8d1b02083d77cb8ff,tensorflow/tensorflow,"Replace unneeded TODO with clarification comment.

PiperOrigin-RevId: 263128148",collective_ops_test.py,"@@ -141,8 +141,8 @@ class CollectiveOpTest(test.TestCase):
           with ops.device(device):
             device_collectives = []
             for j in range(num_vars):
-              # TODO(ayushd): figure out why identity is necessary to get the
-              # right device on the input here with TF2_BEHAVIOR=1.
+              # NOTE(ayushd): we need the `identity` here to ensure that the
+              # input to `all_reduce` has an explicit device string.
               input_tensor = array_ops.identity(device_tensors[j])
               collective_op = collective_ops.all_reduce(
                   input_tensor, group_size, group_key, instances[j],
",0,test
62f0dee7a386259a245f3d7e7b481715f3d018c2,tensorflow/tensorflow,"[TFR] Improve error message on undefined default value

PiperOrigin-RevId: 388371834
Change-Id: I8678bf9fb500b72f096f8294d9a47650f77da02d",tfr_gen.py,"@@ -1232,7 +1232,11 @@ class TFRGen(transformer.CodeGenerator):
     """"""emit mlir constant statement from default value of the ArgDef proto.""""""
     name = self._ssa_name('cst')
     cst_ty = _get_type_from_proto(None, attr_def)
-    cst_val = _get_val_from_proto(cst_ty, attr_def.default_value)
+    try:
+      cst_val = _get_val_from_proto(cst_ty, attr_def.default_value)
+    except AttributeError:
+      raise AttributeError(
+          f'attribute ""{attr_def.name}"" does not have default_value')
     if cst_ty == TFRTypes.ATTR:
       self._emit_with_loc('\n{} = tfr.constant {} -> {}'.format(
           name, cst_val, cst_ty))
",0,train
5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,cholesky_op_test.py,"@@ -15,7 +15,6 @@
 """"""Tests for tensorflow.ops.tf.Cholesky.""""""
 
 import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import constant_op
@@ -76,7 +75,7 @@ class CholeskyOpTest(xla_test.XLATestCase):
 
       # Generate random positive-definite matrices.
       matrices = np.random.rand(10, 5, 5).astype(dtype)
-      for i in xrange(10):
+      for i in range(10):
         matrices[i] = np.dot(matrices[i].T, matrices[i])
       self._verifyCholesky(matrices, atol=1e-4)
 
",0,test
5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,clustering_test.py,"@@ -15,7 +15,6 @@
 """"""Tests for the behavior of the auto-compilation pass.""""""
 
 import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import constant_op
@@ -52,7 +51,7 @@ class ClusteringTest(xla_test.XLATestCase):
         input2 = constant_op.constant(val2, name=""const2"")
       with self.test_scope():
         output = math_ops.add(input1, input2)
-      for _ in xrange(10):
+      for _ in range(10):
         result = self.evaluate(output)
         self.assertAllClose(result, expected, rtol=1e-3)
 
",0,test
5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,conv3d_test.py,"@@ -15,7 +15,6 @@
 """"""Tests for 3D convolutions using the XLA JIT.""""""
 
 import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import constant_op
@@ -96,11 +95,11 @@ class Conv3DTransposeTest(xla_test.XLATestCase):
       #   kernel_depth * ceil(kernel_height/2) * kernel_width or
       #   kernel_depth * kernel_height * ceil(kernel_width/2)
 
-      for n in xrange(x_shape[0]):
-        for k in xrange(f_shape[3]):
-          for w in xrange(y_shape[3]):
-            for h in xrange(y_shape[2]):
-              for d in xrange(y_shape[1]):
+      for n in range(x_shape[0]):
+        for k in range(f_shape[3]):
+          for w in range(y_shape[3]):
+            for h in range(y_shape[2]):
+              for d in range(y_shape[1]):
                 d_in = d > 0 and d < y_shape[1] - 1
                 h_in = h > 0 and h < y_shape[2] - 1
                 w_in = w > 0 and w < y_shape[3] - 1
@@ -133,11 +132,11 @@ class Conv3DTransposeTest(xla_test.XLATestCase):
           x, f, y_shape, strides=strides, padding=""SAME"")
       value = self.evaluate(output)
 
-      for n in xrange(x_shape[0]):
-        for k in xrange(f_shape[3]):
-          for w in xrange(y_shape[3]):
-            for h in xrange(y_shape[2]):
-              for d in xrange(y_shape[1]):
+      for n in range(x_shape[0]):
+        for k in range(f_shape[3]):
+          for w in range(y_shape[3]):
+            for h in range(y_shape[2]):
+              for d in range(y_shape[1]):
                 # We add a case for locations divisible by the stride.
                 d_in = d % strides[1] == 0 and 0 < d < y_shape[1] - 1
                 h_in = h % strides[2] == 0 and 0 < h < y_shape[2] - 1
@@ -176,11 +175,11 @@ class Conv3DTransposeTest(xla_test.XLATestCase):
       # The amount of padding added
       pad = 1
 
-      for n in xrange(x_shape[0]):
-        for k in xrange(f_shape[3]):
-          for w in xrange(y_shape[3]):
-            for h in xrange(y_shape[2]):
-              for d in xrange(y_shape[1]):
+      for n in range(x_shape[0]):
+        for k in range(f_shape[3]):
+          for w in range(y_shape[3]):
+            for h in range(y_shape[2]):
+              for d in range(y_shape[1]):
                 # We add a case for locations divisible by the stride.
                 d_in = d % strides[1] == 0 and pad < d < y_shape[1] - 1 - pad
                 h_in = h % strides[2] == 0 and pad < h < y_shape[2] - 1 - pad
",0,test
5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,depthwise_conv_op_test.py,"@@ -15,7 +15,6 @@
 """"""Functional tests for depthwise convolutional operations.""""""
 
 import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import constant_op
@@ -35,7 +34,7 @@ def ReferenceDepthwiseConv2D(input_tensor, filter_tensor, strides, padding,
   convs = []
   in_channels = filter_tensor.shape[2]
   # Use a custom implementation of depthwise conv2d using slicing.
-  for channel in xrange(in_channels):
+  for channel in range(in_channels):
     # Slice the input along channel
     if data_format == ""NCHW"":
       input_slice = input_tensor[:, channel:channel+1, :, :]
",0,test
5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,fifo_queue_test.py,"@@ -16,8 +16,6 @@
 
 import time
 
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import dtypes as dtypes_lib
 from tensorflow.python.ops import data_flow_ops
@@ -86,7 +84,7 @@ class FIFOQueueTest(xla_test.XLATestCase):
 
       # Dequeue every element using a single thread.
       results = []
-      for _ in xrange(len(elems)):
+      for _ in range(len(elems)):
         results.append(dequeued_t.eval())
       self.assertItemsEqual(elems, results)
 
@@ -124,7 +122,7 @@ class FIFOQueueTest(xla_test.XLATestCase):
       for enqueue_op in enqueue_ops:
         enqueue_op.run()
 
-      for i in xrange(len(elems)):
+      for i in range(len(elems)):
         vals = self.evaluate(dequeued_t)
         self.assertEqual([elems[i]], vals)
 
@@ -145,7 +143,7 @@ class FIFOQueueTest(xla_test.XLATestCase):
       results = []
 
       def dequeue():
-        for _ in xrange(len(elems)):
+        for _ in range(len(elems)):
           results.append(sess.run(dequeued_t))
 
       enqueue_thread = self.checkedThread(target=enqueue)
@@ -168,7 +166,7 @@ class FIFOQueueTest(xla_test.XLATestCase):
       for enqueue_op in enqueue_ops:
         enqueue_op.run()
 
-      for i in xrange(len(elems)):
+      for i in range(len(elems)):
         x_val, y_val = sess.run(dequeued_t)
         x, y = elems[i]
         self.assertEqual([x], x_val)
",0,test
5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,image_ops_test.py,"@@ -21,8 +21,6 @@ import os
 from absl.testing import parameterized
 import numpy as np
 
-from six.moves import xrange  # pylint: disable=redefined-builtin
-
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -227,7 +225,7 @@ class AdjustHueTest(xla_test.XLATestCase):
     x_v = x_np.reshape([-1, 3])
     y_v = np.ndarray(x_v.shape, dtype=x_v.dtype)
     channel_count = x_v.shape[0]
-    for i in xrange(channel_count):
+    for i in range(channel_count):
       r = x_v[i][0]
       g = x_v[i][1]
       b = x_v[i][2]
@@ -347,7 +345,7 @@ class AdjustSaturationTest(xla_test.XLATestCase):
     x_v = x_np.reshape([-1, 3])
     y_v = np.ndarray(x_v.shape, dtype=x_v.dtype)
     channel_count = x_v.shape[0]
-    for i in xrange(channel_count):
+    for i in range(channel_count):
       r = x_v[i][0]
       g = x_v[i][1]
       b = x_v[i][2]
",0,test
5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,listdiff_op_test.py,"@@ -15,7 +15,6 @@
 """"""Tests for XLA listdiff operator.""""""
 
 import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import dtypes
@@ -61,7 +60,7 @@ class ListDiffTest(xla_test.XLATestCase):
     int_low = -7
     int_high = 8
     max_size = 50
-    for _ in xrange(num_random_tests):
+    for _ in range(num_random_tests):
       x_size = np.random.randint(max_size + 1)
       x = np.random.randint(int_low, int_high, size=x_size)
       y_size = np.random.randint(max_size + 1)
",0,test
5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,unary_ops_test.py,"@@ -18,7 +18,6 @@ import unittest
 
 import numpy as np
 import six
-from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import dtypes
@@ -80,7 +79,7 @@ class UnaryOpsTest(xla_test.XLATestCase):
   def ListsAreClose(self, result, expected, rtol, atol):
     """"""Tests closeness of two lists of floats.""""""
     self.assertEqual(len(result), len(expected))
-    for i in xrange(len(result)):
+    for i in range(len(result)):
       self.assertAllClose(result[i], expected[i], rtol, atol)
 
   def AssertCloseAndSorted(self, result, expected, rtol, atol):
",0,test
5f0d7ee44eb5a62603e72e9f0c91551ce9e4c254,tensorflow/tensorflow,Remove xrange from tensorflow/compilers,xla_shape.py,"@@ -16,8 +16,6 @@
 
 import numpy as _np  # Avoids becoming a part of public Tensorflow API.
 
-from six.moves import xrange
-
 from tensorflow.compiler.xla import xla_data_pb2
 from tensorflow.compiler.xla.python_api import types
 
@@ -117,7 +115,7 @@ def _CreateShapeFromNumpy(ndarray):  # pylint: disable=invalid-name
   else:
     # Row-major layout. This corresponds to a ""dimension order is
     # major-to-minor"" layout int XLA.
-    layout = list(reversed(xrange(ndarray.ndim)))
+    layout = list(reversed(range(ndarray.ndim)))
 
   return Shape(element_type, dimensions, layout)
 
",0,test
fca2509e3b3d6252fa34f6e35d8a359c0e5cbf64,tensorflow/tensorflow,"Format tf.function's error message when input and signature does not match

This fix tries to address the issue raised in 30576 where the error message
is hard to interpret:
```
ValueError: Python inputs incompatible with input_signature: inputs ((<tf.Tensor 'random_normal:0' shape=(1, 123, 1) dtype=float32>, <tf.Tensor 'random_normal_1:0' shape=(1, 123, 2) dtype=float32>, <tf.Tensor 'random_normal_2:0' shape=(1, 123, 3) dtype=float32>, <tf.Tensor 'random_normal_3:0' shape=(1, 123, 4) dtype=float32>, <tf.Tensor 'random_normal_4:0' shape=(1, 123, 5) dtype=float32>, <tf.Tensor 'random_normal_5:0' shape=(1, 123, 6) dtype=float32>, <tf.Tensor 'random_normal_6:0' shape=(1, 123, 7) dtype=float32>, <tf.Tensor 'random_normal_7:0' shape=(1, 123, 8) dtype=float32>, <tf.Tensor 'random_normal_8:0' shape=(1, 123, 1) dtype=float32>)), input_signature ((TensorSpec(shape=(?, ?, 1), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 2), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 3), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 4), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 5), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 6), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 7), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 8), dtype=tf.float32, name=None), TensorSpec(shape=(?, ?, 9), dtype=tf.float32, name=None)))
```

This fix formats the error message:
```
ValueError: Python inputs incompatible with input_signature:
  inputs: (
    Tensor(""random_normal:0"", shape=(1, 123, 1), dtype=float32),
    Tensor(""random_normal_1:0"", shape=(1, 123, 2), dtype=float32),
    Tensor(""random_normal_2:0"", shape=(1, 123, 3), dtype=float32),
    Tensor(""random_normal_3:0"", shape=(1, 123, 4), dtype=float32),
    Tensor(""random_normal_4:0"", shape=(1, 123, 5), dtype=float32),
    Tensor(""random_normal_5:0"", shape=(1, 123, 6), dtype=float32),
    Tensor(""random_normal_6:0"", shape=(1, 123, 7), dtype=float32),
    Tensor(""random_normal_7:0"", shape=(1, 123, 8), dtype=float32),
    Tensor(""random_normal_8:0"", shape=(1, 123, 1), dtype=float32))
  input_signature: (
    TensorSpec(shape=(?, ?, 1), dtype=tf.float32, name=None),
    TensorSpec(shape=(?, ?, 2), dtype=tf.float32, name=None),
    TensorSpec(shape=(?, ?, 3), dtype=tf.float32, name=None),
    TensorSpec(shape=(?, ?, 4), dtype=tf.float32, name=None),
    TensorSpec(shape=(?, ?, 5), dtype=tf.float32, name=None),
    TensorSpec(shape=(?, ?, 6), dtype=tf.float32, name=None),
    TensorSpec(shape=(?, ?, 7), dtype=tf.float32, name=None),
    TensorSpec(shape=(?, ?, 8), dtype=tf.float32, name=None),
    TensorSpec(shape=(?, ?, 9), dtype=tf.float32, name=None))
```

This fix fixes 30576.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",function.py,"@@ -1548,12 +1548,17 @@ def _convert_inputs_to_signature(inputs, input_signature, flat_input_signature):
                          ""Inputs (%s), input_signature(%s)."" %
                          (str(inputs), str(input_signature)))
 
+  def format_error_message(inputs, input_signature):
+      return (""  inputs: (\n    "" +
+      "",\n    "".join([str(i) for i in inputs]) +
+      "")\n  input_signature: (\n    "" +
+      "",\n    "".join([str(i) for i in input_signature]) +
+      "")"")
   if any(not spec.is_compatible_with(other) for spec, other in zip(
       flat_input_signature,
       flatten_inputs)):
-    raise ValueError(""Python inputs incompatible with input_signature: ""
-                     ""inputs (%s), input_signature (%s)"" %
-                     (str(inputs), str(input_signature)))
+    raise ValueError(""Python inputs incompatible with input_signature:\n%s"" %
+                     format_error_message(inputs, input_signature))
 
   if need_packing:
     inputs = nest.pack_sequence_as(
",0,train
b2933c618260edc039fb8a7e2dce4d2e185f0892,tensorflow/tensorflow,"[XLA:GPU] Allow multi-output fusion of element-wise instructions, in addition to loop fusions.

PiperOrigin-RevId: 207253181",multi_output_fusion.cc,"@@ -115,15 +115,23 @@ bool IsInputFusibleReduction(HloInstruction* instr) {
 // will be broadcasted and have not been observed to cause data locality issues.
 // TODO(b/111977086): Improve reduce emitters to remove this limitation.
 bool ReduceFriendlyInputLayouts(HloInstruction* instr) {
+  std::vector<HloInstruction*> params;
+  if (instr->opcode() == HloOpcode::kFusion) {
+    params = instr->fused_parameters();
+  } else {
+    for (HloInstruction* operand : instr->operands()) {
+      params.push_back(operand);
+    }
+  }
   int64 max_rank = 0;
   const Layout* max_rank_layout;
-  for (HloInstruction* param : instr->fused_parameters()) {
+  for (HloInstruction* param : params) {
     if (ShapeUtil::Rank(param->shape()) > max_rank) {
       max_rank = ShapeUtil::Rank(param->shape());
       max_rank_layout = &param->shape().layout();
     }
   }
-  return c_all_of(instr->fused_parameters(), [&](HloInstruction* param) {
+  return c_all_of(params, [&](HloInstruction* param) {
     return (ShapeUtil::Rank(param->shape()) < max_rank) ||
            (LayoutUtil::Equal(param->shape().layout(), *max_rank_layout));
   });
@@ -221,7 +229,7 @@ bool GpuMultiOutputFusion::DoProducerConsumerMultiOutputFusion() {
       const bool is_loop_fusion =
           producer->opcode() == HloOpcode::kFusion &&
           producer->fusion_kind() == HloInstruction::FusionKind::kLoop;
-      if (!is_loop_fusion) {
+      if (!producer->IsElementwise() && !is_loop_fusion) {
         VLOG(3) << producer->name() << "" is not a loop fusion."";
         continue;
       }
",0,test
b2933c618260edc039fb8a7e2dce4d2e185f0892,tensorflow/tensorflow,"[XLA:GPU] Allow multi-output fusion of element-wise instructions, in addition to loop fusions.

PiperOrigin-RevId: 207253181",multi_output_fusion_test.cc,"@@ -256,6 +256,26 @@ TEST_F(MultiOutputFusionTest, MultiOutputFusionTwoLoops) {
               op::Tuple(op::Multiply(), op::Divide()));
 }
 
+TEST_F(MultiOutputFusionTest, ProducerConsumerFusionElementwiseAndReduce) {
+  auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""(
+    ENTRY reduce {
+      p0 = f32[2,2,2]{2,1,0} parameter(0)
+      c0 = f32[] constant(0)
+      exp = f32[2,2,2]{2,1,0} exponential(p0)
+      reduce = f32[2,2]{1,0} reduce(exp, c0), dimensions={2}, to_apply=scalar_add_computation
+      ROOT root = (f32[2,2]{1,0}, f32[2,2,2]{2,1,0}) tuple(reduce, exp)
+    })""))
+                    .ValueOrDie();
+  ASSERT_TRUE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie());
+  SCOPED_TRACE(module->ToString());
+  const HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(root, op::Tuple(op::GetTupleElement(), op::GetTupleElement()));
+  const HloInstruction* fusion = root->operand(0)->operand(0);
+  ASSERT_TRUE(fusion->IsMultiOutputFusion());
+  EXPECT_THAT(fusion->fused_expression_root(),
+              op::Tuple(op::Reduce(), op::Exp()));
+}
+
 TEST_F(MultiOutputFusionTest, ProducerConsumerFusionLoopFusionAndReduce) {
   auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""(
     fused_add {
",0,test
21b91300e9e18dbfa2d1a503721ed3d0a08f37e2,tensorflow/tensorflow,"boosted_trees: infer the output shapes of Quantiles Op from the input shapes.

PiperOrigin-RevId: 188750079",quantile_ops.cc,"@@ -272,6 +272,20 @@ REGISTER_OP(""Quantiles"")
     .Input(""sparse_indices: num_sparse_features * int64"")
     .Output(""dense_quantiles: num_dense_features * int32"")
     .Output(""sparse_quantiles: num_sparse_features * int32"")
+    .SetShapeFn([](InferenceContext* c) {
+      int num_dense_features;
+      TF_RETURN_IF_ERROR(c->GetAttr(""num_dense_features"", &num_dense_features));
+      int num_sparse_features;
+      TF_RETURN_IF_ERROR(
+          c->GetAttr(""num_sparse_features"", &num_sparse_features));
+      // Set output shapes (dense_quantiles and sparse_quantiles) by the
+      // relevant inputs (dense_values and sparse_values). Note that the output
+      // has an additional dimension for dimension_ids.
+      for (int i = 0; i < num_dense_features + num_sparse_features; ++i) {
+        c->set_output(i, c->MakeShape({c->Dim(c->input(i), 0), 2}));
+      }
+      return Status::OK();
+    })
     .Doc(R""doc(
 Computes quantile for each a given list of dense and sparse feature values using
 the given buckets.
",0,train
571d0114eda553e2d1b5c9c71f77c2211b5914e3,tensorflow/tensorflow,"Internal Cleanup.

PiperOrigin-RevId: 227929845",optimizer_v2_test.py,"@@ -27,7 +27,6 @@ import numpy as np
 from tensorflow.python import keras
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
-from tensorflow.python.eager import function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -262,23 +261,6 @@ class OptimizerTest(test.TestCase):
       self.evaluate(sgd.iterations.initializer)
       self.assertEqual(0, self.evaluate(sgd.iterations))
 
-  @test_util.run_in_graph_and_eager_modes
-  def testSerializationWithinDefun(self):
-    with self.cached_session():
-      sgd = gradient_descent.SGD(3.0)
-      var0 = resource_variable_ops.ResourceVariable([1.0, 2.0],
-                                                    dtype=dtypes.float32)
-      loss = lambda: 3 * var0
-      sgd.minimize(loss, [var0])
-
-      def serialize():
-        config = sgd.get_config()
-        gradient_descent.SGD.from_config(config)
-
-      compiled_serialize = function.defun(serialize)
-      with self.assertRaisesRegexp(RuntimeError, 'inside Tensorflow graph'):
-        compiled_serialize()
-
   @test_util.run_in_graph_and_eager_modes
   def testConfig(self):
     with self.cached_session():
",0,test
571d0114eda553e2d1b5c9c71f77c2211b5914e3,tensorflow/tensorflow,"Internal Cleanup.

PiperOrigin-RevId: 227929845",optimizers.py,"@@ -575,7 +575,7 @@ class Adamax(Optimizer):
 
   def get_updates(self, loss, params):
     grads = self.get_gradients(loss, params)
-    self.updates = [state_ops.assign_add(self.iterations, 1)]
+    self.updates = []
 
     lr = self.lr
     if self.initial_decay > 0:
@@ -583,7 +583,8 @@ class Adamax(Optimizer):
           1. / (1. + self.decay * math_ops.cast(self.iterations,
                                                 K.dtype(self.decay))))
 
-    t = math_ops.cast(self.iterations, K.floatx()) + 1
+    with ops.control_dependencies([state_ops.assign_add(self.iterations, 1)]):
+      t = math_ops.cast(self.iterations, K.floatx())
     lr_t = lr / (1. - math_ops.pow(self.beta_1, t))
 
     shapes = [K.int_shape(p) for p in params]
",0,test
d27dd98f3f48e860f0cdfebb32871ba994a8e87f,tensorflow/tensorflow,"Raise the TypeError at the line of code that causes the issue.

PiperOrigin-RevId: 226068866",tpu_estimator.py,"@@ -1672,7 +1672,7 @@ class _OutfeedHostCall(object):
               'Exception while calling %s: %s. It is likely the tensors '
               '(%s[1]) do not match the '
               'function\'s arguments', name, e, name)
-          raise e
+          raise
     return ret
 
   def record(self, host_calls):
@@ -1805,7 +1805,7 @@ class _OutfeedHostCall(object):
                 'Exception while calling %s: %s. It is likely the tensors '
                 '(%s[1]) do not match the '
                 'function\'s arguments', name, e, name)
-            raise e
+            raise
         else:
           ret[name] = self._host_fns[name](*dequeue_ops)
 
",0,train
c4b0364a37b36155673bc108ff56ed506ff96f1d,tensorflow/tensorflow,"Remove overrides of ConversionTarget::isDynamicallyLegal in favor of callbacks

It's just confusing to have two ways of doing this. This is in preparation of
removing the virtual method in mlir core, see https://reviews.llvm.org/D106786

PiperOrigin-RevId: 387409346
Change-Id: I2bf6a3db0a768fefeba76e824d53f2e06b373932",legalize_tfl.cc,"@@ -2949,13 +2949,14 @@ static bool isIllegalType(Type type) {
   return false;
 }
 
-class TosaConversionTarget : public ConversionTarget {
- public:
-  using ConversionTarget::ConversionTarget;
+void LegalizeTFL::runOnFunction() {
+  QuantTypeConverter converter;
+  ConversionTarget target(getContext());
 
- protected:
+  target.addIllegalDialect<TFL::TensorFlowLiteDialect>();
+  target.addIllegalDialect<quant::QuantizationDialect>();
   // Operations are legal if they don't contain any illegal type.
-  bool isDynamicallyLegal(Operation* op) const override {
+  target.markUnknownOpDynamicallyLegal([](Operation* op) {
     if (auto constantOp = dyn_cast<ConstantOp>(op)) {
       return constantOp.getType().isa<NoneType>();
     }
@@ -2974,16 +2975,7 @@ class TosaConversionTarget : public ConversionTarget {
       if (type && isIllegalType(type)) return false;
     }
     return true;
-  }
-};
-
-void LegalizeTFL::runOnFunction() {
-  QuantTypeConverter converter;
-  TosaConversionTarget target(getContext());
-
-  target.addIllegalDialect<TFL::TensorFlowLiteDialect>();
-  target.addIllegalDialect<quant::QuantizationDialect>();
-  target.markUnknownOpDynamicallyLegal();
+  });
 
   auto* ctx = &getContext();
   auto func = getFunction();
",0,train
c4b0364a37b36155673bc108ff56ed506ff96f1d,tensorflow/tensorflow,"Remove overrides of ConversionTarget::isDynamicallyLegal in favor of callbacks

It's just confusing to have two ways of doing this. This is in preparation of
removing the virtual method in mlir core, see https://reviews.llvm.org/D106786

PiperOrigin-RevId: 387409346
Change-Id: I2bf6a3db0a768fefeba76e824d53f2e06b373932",legalize_tf_types.cc,"@@ -98,17 +98,14 @@ class TfTypeConversionTarget : public ConversionTarget {
  public:
   explicit TfTypeConversionTarget(MLIRContext &ctx, TfTypeConverter &converter)
       : ConversionTarget(ctx), converter_(converter) {
-    markUnknownOpDynamicallyLegal();
-  }
-
- protected:
-  bool isDynamicallyLegal(Operation *op) const override {
-    // The FuncOp type can contain types that the op's operand and result types
-    // do not contain.
-    if (auto func = dyn_cast<FuncOp>(op)) {
-      if (!converter_.isSignatureLegal(func.getType())) return false;
-    }
-    return converter_.isLegal(op);
+    markUnknownOpDynamicallyLegal([this](Operation *op) {
+      // The FuncOp type can contain types that the op's operand and result
+      // types do not contain.
+      if (auto func = dyn_cast<FuncOp>(op)) {
+        if (!converter_.isSignatureLegal(func.getType())) return false;
+      }
+      return converter_.isLegal(op);
+    });
   }
 
  private:
",0,train
a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,amdgpu_compiler.cc,"@@ -73,7 +73,7 @@ Status AMDGPUCompiler::OptimizeHloConvolutionCanonicalization(
   HloPassPipeline pipeline(""conv_canonicalization"");
   pipeline.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/false,
                                             /*allow_mixed_precision=*/false);
-  pipeline.AddPass<CudnnConvRewriter>();
+  pipeline.AddPass<GpuConvRewriter>();
   pipeline.AddPass<GpuConvPaddingLegalization>();
 
   pipeline.AddPass<HloConstantFolding>();
",0,train
a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,gpu_conv_rewriter.cc,"@@ -735,7 +735,7 @@ StatusOr<bool> RunOnComputation(HloComputation* computation) {
 }
 }  // namespace
 
-StatusOr<bool> CudnnConvRewriter::Run(HloModule* module) {
+StatusOr<bool> GpuConvRewriter::Run(HloModule* module) {
   bool changed = false;
   for (HloComputation* computation : module->MakeNonfusionComputations()) {
     TF_ASSIGN_OR_RETURN(bool result, RunOnComputation(computation));
",0,train
a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,gpu_conv_rewriter.h,"@@ -32,7 +32,7 @@ namespace gpu {
 // Note that this pattern is necessary but not sufficient to map convolutions
 // to CuDNN.  More patterns will be matched in cudnn_fused_conv_rewriter.
 
-class CudnnConvRewriter : public HloModulePass {
+class GpuConvRewriter : public HloModulePass {
  public:
   absl::string_view name() const override { return ""cudnn-conv-rewriter""; }
 
",0,train
a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,gpu_conv_rewriter_test.cc,"@@ -85,7 +85,7 @@ class GpuConvRewriterTest : public HloTestBase {
 
  protected:
   bool RunPass(HloModule* module) {
-    return CudnnConvRewriter().Run(module).ValueOrDie();
+    return GpuConvRewriter().Run(module).ValueOrDie();
   }
 
   // A convolution window with stride 1 and zero padding. The size fields are
@@ -724,7 +724,7 @@ TEST_F(GpuConvRewriterTest, TestForwardInt8Convolution) {
     })"");
   TF_ASSERT_OK_AND_ASSIGN(auto m, ParseAndReturnVerifiedModule(module_str));
 
-  ASSERT_FALSE(CudnnConvRewriter().Run(m.get()).ok());
+  ASSERT_FALSE(GpuConvRewriter().Run(m.get()).ok());
 }
 }  // anonymous namespace
 }  // namespace gpu
",0,train
a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,ir_emission_utils.h,"@@ -113,7 +113,7 @@ bool IsCustomCallToDnnBatchNorm(const HloInstruction& hlo);
 // memory used by cudnn.  Callers shouldn't inspect scratch_memory, as its value
 // is not well-defined.
 //
-// CudnnConvRewriter lowers kConvolution HLOs to these custom calls.
+// GpuConvRewriter lowers kConvolution HLOs to these custom calls.
 // When it does so, it chooses algorithm -1 and 0 bytes of scratch space.  Later
 // on in the pipeline, CudnnConvAlgorithmChooser chooses an explicit
 // algorithm for each conv and sets the amount of scratch space needed.
",0,train
a3f6850276217b8b172f303204423c332db5fbf7,tensorflow/tensorflow,Renaming from CudnnConvRewriter to GpuConvRewriter,nvptx_compiler.cc,"@@ -112,7 +112,7 @@ Status NVPTXCompiler::OptimizeHloConvolutionCanonicalization(
   pipeline.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/false,
                                             /*allow_mixed_precision=*/false);
   pipeline.AddPass<CusolverRewriter>();
-  pipeline.AddPass<CudnnConvRewriter>();
+  pipeline.AddPass<GpuConvRewriter>();
   pipeline.AddPass<CudnnFusedConvRewriter>();
   pipeline.AddPass<GpuConvPaddingLegalization>();
   pipeline.AddPass<CudnnPadForConvolutions>(IsVoltaOrLater(*stream_exec));
@@ -121,7 +121,7 @@ Status NVPTXCompiler::OptimizeHloConvolutionCanonicalization(
   // fixes.
   pipeline.AddPass<TupleSimplifier>();
 
-  // tf2xla bridge, DepthwiseConvolutionConverter and CudnnConvRewriter
+  // tf2xla bridge, DepthwiseConvolutionConverter and GpuConvRewriter
   // introduces reshapes and transposes that can be eliminated using
   // AlgebraicSimplifier
   {
@@ -134,7 +134,7 @@ Status NVPTXCompiler::OptimizeHloConvolutionCanonicalization(
     pass.AddPass<AlgebraicSimplifier>(options);
   }
 
-  // CudnnConvRewriter, GpuConvPaddingLegalization and
+  // GpuConvRewriter, GpuConvPaddingLegalization and
   // CudnnConvPadForTensorCores may add instructions which can be simplified
   // by constant folding.
   pipeline.AddPass<HloConstantFolding>();
@@ -170,7 +170,7 @@ Status NVPTXCompiler::OptimizeHloPostLayoutAssignment(
   // Choose the fastest algorithm for each conv.
   //
   // We pick the algorithm before fusion so we can generate better HLO. After
-  // CudnnConvRewriter, our convolutions are CustomCalls which return a
+  // GpuConvRewriter, our convolutions are CustomCalls which return a
   // tuple (conv_result, scratch_memory), and the each conv uses 0 bytes of
   // scratch:
   //
",0,train
2469ba8003194f92829f4119718f9ce2efd9eae9,tensorflow/tensorflow,"Update docstring for sequence_feature_column

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",sequence_feature_column.py,"@@ -371,6 +371,12 @@ def sequence_numeric_column(
     default_value: A single value compatible with `dtype` that is used for
       padding the sparse data into a dense `Tensor`.
     dtype: The type of values.
+    normalizer_fn: If not `None`, a function that can be used to normalize the
+      value of the tensor after `default_value` is applied for parsing.
+      Normalizer function takes the input `Tensor` as its argument, and returns
+      the output `Tensor`. (e.g. lambda x: (x - 3.0) / 4.2). Please note that
+      even though the most common use case of this function is normalization, it
+      can be used for any kind of Tensorflow transformations.
 
   Returns:
     A `_SequenceNumericColumn`.
",0,train
d4c975c2893c35a58300cf395d326aafc21e751b,tensorflow/tensorflow,Enable row_vectorization for small row.,ir_emitter_unnested.cc,"@@ -174,6 +174,26 @@ void AnnotateThunkLaunchDimensions(const LaunchDimensions& launch_dims,
       {llvm::ConstantAsMetadata::get(ir_kernel),
        llvm::MDString::get(llvm_context, ""reqntidx""),
        llvm::ConstantAsMetadata::get(threads_per_block_ir_value)}));
+  if (launch_dims.thread_counts_per_block().y > 1) {
+    threads_per_block_ir_value = llvm::ConstantInt::get(
+        llvm::IntegerType::get(llvm_context, /*NumBits=*/32),
+        launch_dims.thread_counts_per_block().y);
+    nvvm_annotations_node->addOperand(llvm::MDNode::get(
+        llvm_context,
+        {llvm::ConstantAsMetadata::get(ir_kernel),
+         llvm::MDString::get(llvm_context, ""reqntidy""),
+         llvm::ConstantAsMetadata::get(threads_per_block_ir_value)}));
+  }
+  if (launch_dims.thread_counts_per_block().z > 1) {
+    threads_per_block_ir_value = llvm::ConstantInt::get(
+        llvm::IntegerType::get(llvm_context, /*NumBits=*/32),
+        launch_dims.thread_counts_per_block().z);
+    nvvm_annotations_node->addOperand(llvm::MDNode::get(
+        llvm_context,
+        {llvm::ConstantAsMetadata::get(ir_kernel),
+         llvm::MDString::get(llvm_context, ""reqntidz""),
+         llvm::ConstantAsMetadata::get(threads_per_block_ir_value)}));
+  }
 }
 
 bool BinarySearchDenseElementsAttr(mlir::DenseIntElementsAttr elements,
",0,test
d4c975c2893c35a58300cf395d326aafc21e751b,tensorflow/tensorflow,Enable row_vectorization for small row.,launch_dimensions.cc,"@@ -70,9 +70,8 @@ int64_t ThreadsPerBlockRowVectorized(const Shape& shape,
       (shape.dimensions().back() % 256) != 0 &&
       // Do not trigger the row vectorized codepath if this create too
       // small block size as this hurt performance.
-      (threads_per_block_row_vectorized >= 128 &&
-       threads_per_block_row_vectorized <=
-           gpu_device_info.threads_per_block_limit)) {
+      threads_per_block_row_vectorized <=
+          gpu_device_info.threads_per_block_limit) {
     return threads_per_block_row_vectorized;
   }
   return -1;
@@ -101,12 +100,20 @@ StatusOr<LaunchDimensions> CalculateLaunchDimensions(
   // TODO(jlebar): Investigate this further, and tune this heuristic so we can
   // run faster on the few benchmarks where smaller block size helps.
   int64_t threads_per_block = ThreadsPerBlockLimit(gpu_device_info);
+  int64_t threads_per_block_y = 1;
   int64_t threads_per_block_row_vectorized =
       ThreadsPerBlockRowVectorized(shape, gpu_device_info, dim_config);
   if (threads_per_block_row_vectorized > 0) {
+    CHECK(dim_config.row_vectorized);
     threads_per_block = threads_per_block_row_vectorized;
-    VLOG(2) << ""Update # of threads per block to ("" << threads_per_block
+    if (threads_per_block < 128 && num_elements > 128) {
+      // This case happens for small row size.
+      threads_per_block_y =
+          CeilOfRatio((int64_t)128, threads_per_block);
+    VLOG(2) << ""Update # of threads per block to (.x="" << threads_per_block
+            << "", .y="" << threads_per_block_y
             << "") to be row_vectorized."";
+    }
   } else {
     CHECK(!dim_config.row_vectorized);
     // We unroll kernels to make use of vectorized loads/stores. This means we
@@ -122,27 +129,27 @@ StatusOr<LaunchDimensions> CalculateLaunchDimensions(
     }
   }
 
-  int64_t block_count = CeilOfRatio(num_elements, threads_per_block);
+  int64_t block_count = CeilOfRatio(num_elements,
+                                  threads_per_block * threads_per_block_y);
   if (dim_config.few_waves && !dim_config.row_vectorized) {
     int64_t capped_threads_per_block =
         std::min<int64_t>(threads_per_block, 128);
     int64_t capped_block_count =
         gpu_device_info.core_count *
-        (gpu_device_info.threads_per_core_limit / capped_threads_per_block);
+        (gpu_device_info.threads_per_core_limit /
+         (capped_threads_per_block * threads_per_block_y));
     if (capped_block_count < block_count) {
       threads_per_block = capped_threads_per_block;
       block_count = capped_block_count;
+      VLOG(2) << ""Update the # of blocks to "" << block_count
+              << "" and the # of threads per blocks to ""
+              << threads_per_block << "" as the few waves mode is enabled."";
     }
   } else if (dim_config.few_waves && dim_config.row_vectorized) {
-    int64_t capped_threads_per_block =
-        std::min<int64_t>(threads_per_block, 128);
-    if (dim_config.row_vectorized) {
-      // Keep the threads_per_block found for row_vectorized.
-      capped_threads_per_block = threads_per_block;
-    }
     int64_t min_block_count =
         gpu_device_info.core_count *
-        (gpu_device_info.threads_per_core_limit / capped_threads_per_block);
+        (gpu_device_info.threads_per_core_limit /
+         (threads_per_block * threads_per_block_y));
     int64_t capped_block_count = block_count;
     // This multiple of 32 was tuned to not cause regression on multiple
     // benchmarks.  It isn't a value that is optimal for all
@@ -154,7 +161,7 @@ StatusOr<LaunchDimensions> CalculateLaunchDimensions(
     // Do not increase the number of blocks. This can happens for
     // small num_elements.
     if (capped_block_count < block_count) {
-      threads_per_block = capped_threads_per_block;
+      VLOG(2) << ""Update # of blocks to block_count as few_waves is enabled."";
       block_count = capped_block_count;
     }
   }
@@ -167,11 +174,11 @@ StatusOr<LaunchDimensions> CalculateLaunchDimensions(
   }
 
   VLOG(2) << absl::StrFormat(
-      ""Initialized the block count to ceil(# of elements / threads per ""
-      ""block) = ceil(%d/%d) = %d"",
-      num_elements, threads_per_block, block_count);
-
-  return LaunchDimensions(block_count, threads_per_block);
+      ""Initialized the block count to %d, the block size .x=%d and .y=%d""
+      "" for %d elements in the tensor."",
+      block_count, threads_per_block, threads_per_block_y, num_elements);
+  return LaunchDimensions({block_count, 1, 1},
+                          {threads_per_block, threads_per_block_y, 1});
 }
 
 }  // namespace gpu
",0,test
2929f873bb5111316befe9e4804d6b4a8ad999cb,tensorflow/tensorflow,Update ctc_ops.py,ctc_ops.py,"@@ -313,8 +313,8 @@ def ctc_greedy_decoder(inputs,
 
   Notes:
 
-  - Unlike `ctc_beam_search_decoder`, `ctc_greedy_decoder` omits blanks up-to
-    the special treatment under `merge_repeated`.
+  - Unlike `ctc_beam_search_decoder`, `ctc_greedy_decoder` considers blanks
+    as regular elements when computing the probability of a sequence.
   - Default `blank_index` is `(num_classes - 1)`, unless overriden.
 
   If `merge_repeated` is `True`, merge repeated classes in output.
@@ -374,7 +374,14 @@ def ctc_beam_search_decoder(inputs,
                             top_paths=1,
                             merge_repeated=True):
   """"""Performs beam search decoding on the logits given in input.
-
+  
+  **Note** Although in general greedy search is a special case of beam-search
+  with `top_paths=1` and `beam_width=1`, `ctc_beam_search_decoder` differs
+  from `ctc_gready_decoder` in the treatment of blanks when computing the
+  probability of a sequence:
+    - `ctc_beam_search_decoder` treats blanks as sequence termination
+    - `ctc_gready_decoder` treats blanks as regular elements
+  
   If `merge_repeated` is `True`, merge repeated classes in the output beams.
   This means that if consecutive entries in a beam are the same,
   only the first of these is emitted.  That is, when the sequence is
@@ -433,9 +440,12 @@ def ctc_beam_search_decoder_v2(inputs,
                                top_paths=1):
   """"""Performs beam search decoding on the logits given in input.
 
-  **Note** The `ctc_greedy_decoder` is a special case of the
-  `ctc_beam_search_decoder` with `top_paths=1` and `beam_width=1` (but
-  that decoder is faster for this special case).
+  **Note** Although in general greedy search is a special case of beam-search
+  with `top_paths=1` and `beam_width=1`, `ctc_beam_search_decoder` differs
+  from `ctc_gready_decoder` in the treatment of blanks when computing the
+  probability of a sequence:
+    - `ctc_beam_search_decoder` treats blanks as sequence termination
+    - `ctc_gready_decoder` treats blanks as regular elements
 
   Args:
     inputs: 3-D `float` `Tensor`, size `[max_time, batch_size, num_classes]`.
",0,test
2a8b52fc0c5f1fc257ad9c042126b00edfeca705,tensorflow/tensorflow,"Don't use hex floats.

Hex float literals are in C11 and C++17, but not in C++11, so use plain float notation.

PiperOrigin-RevId: 197933744",hlo_evaluator_test.cc,"@@ -262,13 +262,13 @@ TEST_P(HloEvaluatorTest, DoesCosR2) {
   auto operand = Literal::CreateR2<float>({{0, M_PI}, {-M_PI, 2 * M_PI}});
   auto expected = Literal::CreateR2<float>({{1, -1}, {-1, 1}});
   TestUnaryOp(HloOpcode::kCos, std::move(expected), std::move(operand),
-              use_bfloat16_ ? 0x1.0P-5 : 0x1.0P-20);
+              use_bfloat16_ ? 0.031250 : 9.5367431640625E-7);
 }
 TEST_P(HloEvaluatorTest, DoesSinR2) {
   auto operand = Literal::CreateR2<float>({{0, M_PI}, {-M_PI, 2 * M_PI}});
   auto expected = Literal::CreateR2<float>({{0, 0}, {0, 0}});
   TestUnaryOp(HloOpcode::kSin, std::move(expected), std::move(operand),
-              use_bfloat16_ ? 0x1.0P-5 : 0x1.0P-20);
+              use_bfloat16_ ? 0.031250 : 9.5367431640625E-7);
 }
 TEST_P(HloEvaluatorTest, DoesNotR2) {
   auto operand =
@@ -333,7 +333,7 @@ TEST_P(HloEvaluatorTest, DoesReshape) {
   result->EachCell<NativeT>(
       [&](tensorflow::gtl::ArraySlice<int64> indices, NativeT value) {
         std::vector<int64> rindexes = Permute(permutation, indices);
-        EXPECT_NEAR(value, literal_clone->Get<NativeT>(rindexes), 0x1.0P-5);
+        EXPECT_NEAR(value, literal_clone->Get<NativeT>(rindexes), 0.031250);
       });
 }
 
@@ -567,7 +567,7 @@ TEST_P(HloEvaluatorTest, NegativePadding2D) {
   (*expected_array)(0, 4) = 2.718f;
   auto expected = Literal::CreateR2FromArray2D<float>(*expected_array);
 
-  EXPECT_TRUE(LiteralTestUtil::Near(*expected, *result, ErrorSpec(0x1.0P-5)));
+  EXPECT_TRUE(LiteralTestUtil::Near(*expected, *result, ErrorSpec(0.031250)));
 }
 
 TEST_P(HloEvaluatorTest, NegativeAndInteriorPadding2D) {
",0,train
2a8b52fc0c5f1fc257ad9c042126b00edfeca705,tensorflow/tensorflow,"Don't use hex floats.

Hex float literals are in C11 and C++17, but not in C++11, so use plain float notation.

PiperOrigin-RevId: 197933744",convert_test.cc,"@@ -249,10 +249,10 @@ XLA_TEST_F(ConvertTest, ConvertR1F32ToR1S64) {
                          -1.99f,
                          -2.0f,
                          -2.01f,
-                         0x1.FFFFFEp+62F,
-                         0x1.FFFFFCp+62F,
-                         -0x1.FFFFFEp+62F,
-                         -0x1.FFFFFCp+62F};
+                         9223371487098961920.f,
+                         9223370937343148032.f,
+                         -9223371487098961920.f,
+                         -9223370937343148032.f};
   std::unique_ptr<Literal> arg_literal = Literal::CreateR1<float>({arg});
   auto arg_param = builder.Parameter(0, arg_literal->shape(), ""arg_param"");
   std::unique_ptr<GlobalData> arg_data =
",0,train
01e1696cfca77dfe2438f55a43bf342c0c913510,tensorflow/tensorflow,"Update GraphDef version to 557.

PiperOrigin-RevId: 337643578
Change-Id: I9d8be717c6c4eb9148725b2dc0f4da4f7fff0f47",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 556  // Updated: 2020/10/16
+#define TF_GRAPH_DEF_VERSION 557  // Updated: 2020/10/17
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
9e3fa2c884063dbcd772b055f6104a7eae1d84ff,tensorflow/tensorflow,"Modify registration code so that the registered savers are compatible with V1 SavedModel loading APIs.

PiperOrigin-RevId: 434835675",registration_saving_test.py,"@@ -20,6 +20,7 @@ import tempfile
 from absl.testing import parameterized
 
 from google.protobuf import wrappers_pb2
+from tensorflow.python.client import session
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import test
@@ -30,6 +31,7 @@ from tensorflow.python.ops import io_ops
 from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.saved_model import load
+from tensorflow.python.saved_model import loader
 from tensorflow.python.saved_model import registration
 from tensorflow.python.saved_model import save
 from tensorflow.python.training.tracking import tracking
@@ -46,6 +48,12 @@ class Part(resource_variable_ops.ResourceVariable):
   def _deserialize_from_proto(cls, **kwargs):
     return cls([0, 0])
 
+  def _export_to_saved_model_graph(self, object_map, tensor_map, **kwargs):
+    p = Part(array_ops.zeros(self.shape, self.dtype))
+    object_map[self] = p
+    tensor_map[self.handle] = p.handle
+    return [self.handle]
+
 
 @registration.register_serializable()
 class Stack(tracking.AutoTrackable):
@@ -247,6 +255,25 @@ class SingleCycleTest(test.TestCase):
     util.Checkpoint(s2=restore_s).read(ckpt_path).expect_partial()
     self.assertAllEqual(expected_value_s2, restore_s.value())
 
+  def test_compatible_with_v1_savedmodel(self):
+    p1 = Part([1, 4])
+    p2 = Part([2, 5])
+    p3 = Part([3, 6])
+    s = Stack([p1, p2, p3])
+    save_path = os.path.join(self.get_temp_dir(), ""savedmodel"")
+
+    @def_function.function(input_signature=[])
+    def serve():
+      return {""value"": s.value()}
+
+    exported_value = serve()[""value""]
+
+    save.save(s, save_path, signatures=serve)
+    with ops.Graph().as_default(), session.Session() as sess:
+      metagraph = loader.load(sess, [""serve""], save_path)
+      value_output = metagraph.signature_def[""serving_default""].outputs[""value""]
+      self.assertAllEqual(exported_value, sess.run(value_output.name))
+
 
 if __name__ == ""__main__"":
   test.main()
",0,train
9e3fa2c884063dbcd772b055f6104a7eae1d84ff,tensorflow/tensorflow,"Modify registration code so that the registered savers are compatible with V1 SavedModel loading APIs.

PiperOrigin-RevId: 434835675",functional_saver.py,"@@ -135,35 +135,39 @@ def registered_saver_filename(filename_tensor, saver_name):
 
 def _get_mapped_registered_save_fn(fn, trackables, call_with_mapped_captures):
   """"""Converts the function to a python or tf.function with a single file arg.""""""
+
+  def save_fn(file_prefix):
+    return fn(trackables=trackables, file_prefix=file_prefix)
   if call_with_mapped_captures is None:
-    def mapped_fn(file_prefix):
-      return fn(trackables=trackables, file_prefix=file_prefix)
-    return mapped_fn
+    return save_fn
   else:
-    tf_fn = def_function.function(fn, autograph=False)
+    tf_fn = def_function.function(save_fn, autograph=False)
     concrete = tf_fn.get_concrete_function(
-        trackables=trackables,
         file_prefix=tensor_spec.TensorSpec(shape=(), dtype=dtypes.string))
-    def mapped_fn(file_prefix):
+
+    def save_fn_with_replaced_captures(file_prefix):
       return call_with_mapped_captures(concrete, [file_prefix])
-    return mapped_fn
+
+    return save_fn_with_replaced_captures
 
 
 def _get_mapped_registered_restore_fn(fn, trackables,
                                       call_with_mapped_captures):
   """"""Converts the function to a python or tf.function with a single file arg.""""""
+
+  def restore_fn(merged_prefix):
+    return fn(trackables=trackables, merged_prefix=merged_prefix)
   if call_with_mapped_captures is None:
-    def mapped_fn(merged_prefix):
-      return fn(trackables=trackables, merged_prefix=merged_prefix)
-    return mapped_fn
+    return restore_fn
   else:
-    tf_fn = def_function.function(fn, autograph=False)
+    tf_fn = def_function.function(restore_fn, autograph=False)
     concrete = tf_fn.get_concrete_function(
-        trackables=trackables,
         merged_prefix=tensor_spec.TensorSpec(shape=(), dtype=dtypes.string))
-    def mapped_fn(merged_prefix):
+
+    def restore_fn_with_replaced_captures(merged_prefix):
       return call_with_mapped_captures(concrete, [merged_prefix])
-    return mapped_fn
+
+    return restore_fn_with_replaced_captures
 
 
 class MultiDeviceSaver(object):
",0,train
ae11a063040ee1d13cc71eaa74b2ab7c6f19873f,tensorflow/tensorflow,"iOS Metal delegate: support no biases in transposed_conv.

PiperOrigin-RevId: 273561634",compute_task_descriptor.cc,"@@ -51,7 +51,7 @@ std::vector<uint8_t> GetByteBufferConverted(
     for (const float value : input_vector) {
       const HalfBits converted = fp16_ieee_from_fp32_value(value);
       const uint8_t* bytes = reinterpret_cast<const uint8_t*>(&converted);
-      result.insert(result.end(), bytes, bytes + sizeof(*bytes));
+      result.insert(result.end(), bytes, bytes + sizeof(HalfBits));
     }
     return result;
   }
",0,train
ae11a063040ee1d13cc71eaa74b2ab7c6f19873f,tensorflow/tensorflow,"iOS Metal delegate: support no biases in transposed_conv.

PiperOrigin-RevId: 273561634",transpose_conv.cc,"@@ -1047,11 +1047,8 @@ std::vector<ComputeTaskDescriptorPtr> ConvolutionTransposed3x3(
   auto filters = options.storage_precision == RuntimeOptions::Precision::FP32
                      ? GetByteBuffer(filters_reordered)
                      : VectorFloatToHalf(filters_reordered);
-  auto resized_bias = params.bias.data;
-  resized_bias.resize(params.weights.shape.o, 0.0f);
-  auto biases = options.storage_precision == RuntimeOptions::Precision::FP32
-                    ? GetByteBuffer(resized_bias)
-                    : VectorFloatToHalf(resized_bias);
+  auto biases = GetByteBufferConvertedResized(
+      params.bias.data, options.storage_precision, params.weights.shape.o);
   border_desc->immutable_buffers = {
       {""device FilterStripe* const filters"", filters},
       {""constant FLT4* const biases"", biases},
@@ -1139,8 +1136,8 @@ std::vector<ComputeTaskDescriptorPtr> ConvolutionTransposed3x3(
       }};
 
   desc->immutable_buffers = {
-      {""device FilterStripe* const filters"", GetByteBuffer(filters)},
-      {""constant FLT4* const biases"", GetByteBuffer(biases)},
+      {""device FilterStripe* const filters"", filters},
+      {""constant FLT4* const biases"", biases},
   };
 
   desc->uniform_buffers = {
",0,train
795cd91aad486e28098b07d6e3651f243b2bde64,tensorflow/tensorflow,"Fix monitored_session docstring.

PiperOrigin-RevId: 257040451",monitored_session.py,"@@ -762,17 +762,24 @@ class _MonitoredSession(object):
         computations with access to a raw session.  The returned value of the
         `step_fn` will be returned from `run_step_fn`, unless a stop is
         requested.  In that case, the next `should_stop` call will return True.
-        Example usage:  ```python
-           with tf.Graph().as_default(): c =
-             tf.compat.v1.placeholder(dtypes.float32) v = tf.add(c, 4.0) w =
-             tf.add(c, 0.5)
-             def step_fn(step_context):
-               a = step_context.session.run(fetches=v, feed_dict={c: 0.5})
-               if a <= 4.5: step_context.request_stop()
-               return step_context.run_with_hooks(fetches=w, feed_dict={c: 0.1})
-             with tf.MonitoredSession() as session:
-               while not session.should_stop(): a = session.run_step_fn(step_fn)
-                 ```  Hooks interact with the `run_with_hooks()` call inside the
+        Example usage:
+            ```python
+            with tf.Graph().as_default():
+              c = tf.compat.v1.placeholder(dtypes.float32)
+              v = tf.add(c, 4.0)
+              w = tf.add(c, 0.5)
+              def step_fn(step_context):
+                a = step_context.session.run(fetches=v, feed_dict={c: 0.5})
+                if a <= 4.5:
+                  step_context.request_stop()
+                  return step_context.run_with_hooks(fetches=w,
+                                                     feed_dict={c: 0.1})
+
+              with tf.MonitoredSession() as session:
+                while not session.should_stop():
+                  a = session.run_step_fn(step_fn)
+            ```
+            Hooks interact with the `run_with_hooks()` call inside the
                  `step_fn` as they do with a `MonitoredSession.run` call.
 
     Returns:
",0,train
cfa374cefe132be886c26a374c51454177c68868,tensorflow/tensorflow,Fix the segfault in convert_nodes.cc,convert_nodes.cc,"@@ -119,26 +119,29 @@ static std::vector<std::pair<int, int>> CreateSamePadding(
 class TRT_ShapedWeights {
  public:
   TRT_ShapedWeights(tensorflow::DataType type, const void* values,
-                    nvinfer1::Dims shape, bool owned_values = false)
+                    nvinfer1::Dims shape,
+                    const std::vector<char>* owned_values = nullptr)
       : shape_(shape),
         type_(type),
         values_(values),
-        owned_values_(owned_values),
+        owned_values_(owned_values ? *owned_values : std::vector<char>({})),
         dummy_flag_(false) {
     // Note: this->shape.type[] is not used
   }
 
   explicit TRT_ShapedWeights(tensorflow::DataType type)
-      : type_(type),
+      : shape_(),
+        type_(type),
         values_(nullptr),
-        owned_values_(false),
+        owned_values_(),
         dummy_flag_(true) {}
 
-  ~TRT_ShapedWeights() {
-    if (values_ && owned_values_) delete static_cast<const char*>(values_);
-  }
-
-  TRT_ShapedWeights(const TRT_ShapedWeights&) = default;
+  TRT_ShapedWeights(const TRT_ShapedWeights& rhs)
+      : shape_(rhs.shape_),
+        type_(rhs.type_),
+        values_(rhs.values_),
+        owned_values_(rhs.owned_values_),
+        dummy_flag_(rhs.dummy_flag_) {}
 
   int64_t count() const {
     int64_t c = 1;
@@ -152,7 +155,18 @@ class TRT_ShapedWeights {
     if (dummy_flag_) return nvinfer1::Weights{trt_type, nullptr, 0};
 
     // Note: this->shape.type[] is not used
-    return nvinfer1::Weights{trt_type, values_, GetShapeSize(shape_)};
+    return nvinfer1::Weights{trt_type, GetValues(), GetShapeSize(shape_)};
+  }
+
+  const void* GetValues() const {
+    if (values_) return values_;
+    if (owned_values_.size()) return owned_values_.data();
+    return nullptr;
+  }
+
+  void SetValues(const void* values) {
+    values_ = values;
+    owned_values_.clear();
   }
 
   size_t size_bytes() const {
@@ -165,51 +179,55 @@ class TRT_ShapedWeights {
 
   nvinfer1::Dims shape_;
   tensorflow::DataType type_;
+
+ private:
   const void* values_;
-  bool owned_values_;
+  std::vector<char> owned_values_;
   bool dummy_flag_;
 };
 
 class TRT_TensorOrWeights {
  public:
   explicit TRT_TensorOrWeights(nvinfer1::ITensor* tensor)
-      : _tensor_(tensor), _variant_(TRT_NODE_TENSOR) {}
-  TRT_TensorOrWeights(const TRT_ShapedWeights& weights)
-      : _weights_(weights), _variant_(TRT_NODE_WEIGHTS) {}
+      : _tensor_(tensor), _weights_(DT_FLOAT), _variant_(TRT_NODE_TENSOR) {}
+  explicit TRT_TensorOrWeights(const TRT_ShapedWeights& weights)
+      : _tensor_(nullptr), _weights_(weights), _variant_(TRT_NODE_WEIGHTS) {}
+  TRT_TensorOrWeights(const TRT_TensorOrWeights& rhs)
+      : _tensor_(rhs._tensor_),
+        _weights_(rhs._weights_),
+        _variant_(rhs._variant_) {}
   ~TRT_TensorOrWeights() {}
 
   bool is_tensor() const { return _variant_ == TRT_NODE_TENSOR; }
   bool is_weights() const { return _variant_ == TRT_NODE_WEIGHTS; }
 
   nvinfer1::ITensor* tensor() {
-    CHECK_EQ(this->is_tensor(), true);
+    CHECK_EQ(is_tensor(), true);
     return _tensor_;
   }
-  nvinfer1::ITensor const* tensor() const {
-    CHECK_EQ(this->is_tensor(), true);
+  const nvinfer1::ITensor* tensor() const {
+    CHECK_EQ(is_tensor(), true);
     return _tensor_;
   }
   TRT_ShapedWeights& weights() {
-    CHECK_EQ(this->is_weights(), true);
+    CHECK_EQ(is_weights(), true);
     return _weights_;
   }
   const TRT_ShapedWeights& weights() const {
-    CHECK_EQ(this->is_weights(), true);
+    CHECK_EQ(is_weights(), true);
     return _weights_;
   }
   nvinfer1::Dims shape() const {
-    if (this->is_tensor()) {
-      return this->tensor()->getDimensions();
+    if (is_tensor()) {
+      return tensor()->getDimensions();
     } else {
-      return this->weights().shape_;
+      return weights().shape_;
     }
   }
 
  private:
-  union {
-    nvinfer1::ITensor* _tensor_;
-    TRT_ShapedWeights _weights_;
-  };
+  nvinfer1::ITensor* _tensor_;
+  TRT_ShapedWeights _weights_;
   enum { TRT_NODE_TENSOR, TRT_NODE_WEIGHTS } _variant_;
 };
 
@@ -307,7 +325,7 @@ tensorflow::DataType TFAttrs::get<tensorflow::DataType>(string key) const {
 }
 
 template <typename T>
-void Reorder4(nvinfer1::DimsNCHW shape, T const* idata,
+void Reorder4(nvinfer1::DimsNCHW shape, const T* idata,
               nvinfer1::DimsNCHW istrides, T* odata,
               nvinfer1::DimsNCHW ostrides) {
   for (int n = 0; n < shape.n(); ++n) {
@@ -339,9 +357,10 @@ void ReorderRSCKToKCRS(const TRT_ShapedWeights& iweights,
   nvinfer1::DimsNCHW ostrides = {c * r * s, r * s, s, 1};
   switch (iweights.type_) {
     case tensorflow::DataType::DT_FLOAT:
-      Reorder4(
-          {k, c, r, s}, static_cast<float const*>(iweights.values_), istrides,
-          static_cast<float*>(const_cast<void*>(oweights->values_)), ostrides);
+      Reorder4({k, c, r, s}, static_cast<float const*>(iweights.GetValues()),
+               istrides,
+               static_cast<float*>(const_cast<void*>(oweights->GetValues())),
+               ostrides);
       break;
     default:
       LOG(FATAL) << ""!!!!!!!!!!!!!!!!!!!!!!!!broke!!!!!!!!!!!!"";
@@ -399,7 +418,7 @@ class Converter {
     TRT_ShapedWeights weights(type, nullptr, shape);
     // TODO(jie): check weights size_bytes. 0 means type error
     _temp_bufs.push_back(std::vector<uint8_t>(weights.size_bytes()));
-    weights.values_ = _temp_bufs.back().data();
+    weights.SetValues(_temp_bufs.back().data());
     return weights;
   }
 
@@ -579,8 +598,8 @@ tensorflow::Status UnaryCompute(const TRT_ShapedWeights& iweights,
   CHECK_EQ(iweights.type_, oweights->type_);
   switch (iweights.type_) {
     case tensorflow::DataType::DT_FLOAT: {
-      auto inp = static_cast<float const*>(iweights.values_);
-      auto oup = static_cast<float*>(const_cast<void*>(oweights->values_));
+      auto inp = static_cast<float const*>(iweights.GetValues());
+      auto oup = static_cast<float*>(const_cast<void*>(oweights->GetValues()));
       std::transform(inp, inp + iweights.count(), oup, unary_op.unary<float>());
       break;
     }
@@ -603,9 +622,9 @@ tensorflow::Status BinaryCompute(const TRT_ShapedWeights& iweights_l,
 
   switch (iweights_l.type_) {
     case tensorflow::DataType::DT_FLOAT: {
-      auto inp_l = static_cast<float const*>(iweights_l.values_);
-      auto inp_r = static_cast<float const*>(iweights_r.values_);
-      auto oup = static_cast<float*>(const_cast<void*>(oweights->values_));
+      auto inp_l = static_cast<const float*>(iweights_l.GetValues());
+      auto inp_r = static_cast<const float*>(iweights_r.GetValues());
+      auto oup = static_cast<float*>(const_cast<void*>(oweights->GetValues()));
 
       if (iweights_l.count() != iweights_r.count()) {
         // We only supports broadcast of RankZero
@@ -1117,7 +1136,7 @@ tensorflow::Status ConvertConst(Converter& ctx,
 
   // Get trt type & shape
   TFAttrs attrs(node_def);
-  tensorflow::DataType dtype = attrs.get<tensorflow::DataType>(""dtype"");
+  const tensorflow::DataType dtype = attrs.get<tensorflow::DataType>(""dtype"");
 
   // Create shaped weights as output
   tensorflow::Tensor tensor;
@@ -1148,11 +1167,18 @@ tensorflow::Status ConvertConst(Converter& ctx,
   } else if (!weights_tensor.tensor_content().empty()) {
     VLOG(2) << ""TENSOR!!!"" << node_def.name();
     const auto& content = weights_tensor.tensor_content();
-    char* buf = new char[content.size() + 1];
-    buf[content.size()] = 0;
-    port::CopyToArray(content, buf);
-    weights = TRT_ShapedWeights(dtype, buf, GetTensorShape(tensor),
-                                /*owned_values=*/true);
+
+    std::vector<char> values;
+    if (content.size() > 0) {
+      const int dtype_size = tensorflow::DataTypeSize(dtype);
+      CHECK_EQ(0, content.size() % dtype_size)
+          << ""Tensor content size ("" << content.size()
+          << "") is not a multiple of "" << dtype_size;
+      values.resize(content.size());
+      port::CopyToArray(content, values.data());
+    }
+    weights =
+        TRT_ShapedWeights(dtype, nullptr, GetTensorShape(tensor), &values);
   } else {
     return tensorflow::errors::Unimplemented(
         ""Not supported constant type, at "" + node_def.name());
@@ -1242,7 +1268,7 @@ tensorflow::Status ConvertReduce(Converter& ctx,
   if (index_type != tensorflow::DataType::DT_INT32)
     return tensorflow::errors::Unimplemented(""Tidx supports only DT_INT32"");
   auto index_list_data =
-      static_cast<int*>(const_cast<void*>(index_list.values_));
+      static_cast<int*>(const_cast<void*>(index_list.GetValues()));
 
   // Hack warning: have to fall back to pool layer since reduce is not in public
   // TRT yet.
@@ -1340,7 +1366,7 @@ tensorflow::Status ConvertPad(Converter& ctx,
   if (padding_type != tensorflow::DataType::DT_INT32)
     return tensorflow::errors::Unimplemented(
         ""Tpaddings supports only DT_INT32"");
-  auto pad_data = static_cast<int*>(const_cast<void*>(pads.values_));
+  auto pad_data = static_cast<int*>(const_cast<void*>(pads.GetValues()));
 
   std::vector<int32_t> pad_index;
   for (int i = 0; i < nb_dims; i++) {
",0,train
2174f5e1dfd24a55aaced21990e3c53148035dc6,tensorflow/tensorflow,"Fix a bug introduced in cl/130951050. The correct CL description should be: make sure that no control edges by an outer control dependency context are added to nodes inside a while loop.
Change: 131231314",control_flow_ops.py,"@@ -1158,8 +1158,17 @@ class ControlFlowContext(object):
 
   def _MaybeRemoveExternalControlEdges(self, op):
     """"""Remove any external control dependency on this op.""""""
-    internal_control_inputs = [x for x in op.control_inputs
-                               if _GetOutputContext(x) == self]
+    while_ctxt = self.GetWhileContext()
+    # A control input of `op` is internal if it is in the same while
+    # loop context as the enclosing while loop context of self.
+    if while_ctxt is None:
+      internal_control_inputs = op.control_inputs
+    else:
+      internal_control_inputs = []
+      for x in op.control_inputs:
+        ctxt = _GetOutputContext(x)
+        if ctxt is not None and ctxt.GetWhileContext() == while_ctxt:
+          internal_control_inputs.append(x)
     if len(internal_control_inputs) != len(op.control_inputs):
       del op.control_inputs[:]
       op._add_control_inputs(internal_control_inputs)
",0,test
59b88e9bfbfe0f2042b18387b082c015b90c1158,tensorflow/tensorflow,"[ROCm] Fix for a test regression on the ROCm platform - 200207 - 2

The following commit introduces a test regression on the ROCm platform
https://github.com/tensorflow/tensorflow/commit/7a931a2349591f4e2250ac2d3b6c3ca66538b740

That commit adds an explicit check for GPU device in the profiler output (if a GPU is present in the list of physical devices).

Since ROCm platform does not yet support device tracing, this test now fails on the ROCm platform

The ""fix"" (until ROCm adds support for device tracing) is to disable that check on the ROCm platform",profiler_test.py,"@@ -47,7 +47,8 @@ class ProfilerTest(test_util.TensorFlowTestCase):
     profile_pb.ParseFromString(profile_result)
     devices = frozenset(device.name for device in profile_pb.devices.values())
     self.assertIn('/host:CPU', devices)
-    if config.list_physical_devices('GPU'):
+    if not test_util.IsBuiltWithROCm() and config.list_physical_devices('GPU'):
+      # device tracing is not yet supported on the ROCm platform
       self.assertIn('/device:GPU:0', devices)
     events = frozenset(event.name for event in profile_pb.trace_events)
     self.assertIn('three_times_five', events)
",0,train
13bc7a5343dbbd27a9244f4756adcf98ead326b8,tensorflow/tensorflow,"Move StatType to open-source MetadataMatcher.

PiperOrigin-RevId: 284796115
Change-Id: Icb541c6e652914cea8a5144a0faa4f6410ac99b5",tf_op_utils.h,"@@ -54,6 +54,66 @@ inline bool IsUnknownOp(absl::string_view tf_op_type) {
 inline bool IsDatasetOp(absl::string_view tf_op_type) {
   return tf_op_type == kDatasetOp;
 }
+
+constexpr size_t kNumStatType = 27;
+
+enum class StatType {
+  kUnknown = 0,
+  // TraceMe arguments.
+  kStepId,
+  kParentStepId,
+  kFunctionStepId,
+  kDeviceOrdinal,
+  kChipOrdinal,
+  kNodeOrdinal,
+  kModelId,
+  kQueueAddr,
+  kRequestId,
+  kRunId,
+  kCorrelationId,
+  kGraphType,
+  kStepNum,
+  kIterNum,
+  kIndexOnHost,
+  kBytesReserved,
+  kBytesAllocated,
+  kBytesAvailable,
+  kFragmentation,
+  kKernelDetails,
+  // Stats added when processing traces.
+  kGroupId,
+  kStepName,
+  kLevel0,
+  kTfOp,
+  kHloOp,
+  kHloModule,
+};
+
+constexpr std::array<absl::string_view, kNumStatType> kStatTypeStrMap({
+    ""unknown"",         ""id"",
+    ""parent_step_id"",  ""function_step_id"",
+    ""device_ordinal"",  ""chip_ordinal"",
+    ""node_ordinal"",    ""model_id"",
+    ""queue_addr"",      ""request_id"",
+    ""run_id"",          ""correlation_id"",
+    ""graph_type"",      ""step_num"",
+    ""iter_num"",        ""index_on_host"",
+    ""bytes_reserved"",  ""bytes_allocated"",
+    ""bytes_available"", ""fragmentation"",
+    ""kernel_details"",  ""group_id"",
+    ""step_name"",       ""level 0"",
+    ""tf_op"",           ""hlo_op"",
+    ""hlo_module"",
+});
+
+inline absl::string_view GetStatTypeStr(StatType stat_type) {
+  return kStatTypeStrMap.at(static_cast<std::size_t>(stat_type));
+}
+
+inline bool IsStatType(StatType stat_type, absl::string_view stat_name) {
+  return kStatTypeStrMap.at(static_cast<std::size_t>(stat_type)) == stat_name;
+}
+
 }  // namespace profiler
 }  // namespace tensorflow
 
",0,train
0f2f5b978524f3306e415d18701ea64bd2c688b3,tensorflow/tensorflow,Fix PEP8 (#15378),__init__.py,"@@ -47,7 +47,7 @@ _allowed_symbols = [
     'VariableClippingOptimizer',
     'MultitaskOptimizerWrapper',
     'clip_gradients_by_global_norm',
-    'ElasticAverageOptimizer', 
+    'ElasticAverageOptimizer',
     'ElasticAverageCustomGetter'
 ]
 
",0,train
0f2f5b978524f3306e415d18701ea64bd2c688b3,tensorflow/tensorflow,Fix PEP8 (#15378),elastic_average_optimizer.py,"@@ -79,9 +79,9 @@ class ElasticAverageCustomGetter(object):
     if trainable:
       with ops.device(self._worker_device):
         local_var = getter(name, trainable=True,
-                           collections=[ops.GraphKeys.LOCAL_VARIABLES], 
+                           collections=[ops.GraphKeys.LOCAL_VARIABLES],
                            *args, **kwargs)
-        
+
       global_center_variable = variable_scope.variable(
         name='%s/%s' %
              (GLOBAL_VARIABLE_NAME,
@@ -96,7 +96,7 @@ class ElasticAverageCustomGetter(object):
           initial_value=local_var.initialized_value(),
           trainable=False,
           collections=[ops.GraphKeys.LOCAL_VARIABLES])
-        
+
       self._local_map[local_var] = local_center_variable
       self._global_map[local_var] = global_center_variable
       return local_var
@@ -173,7 +173,7 @@ class ElasticAverageOptimizer(optimizer.Optimizer):
                         colocate_gradients_with_ops=False,
                         grad_loss=None):
     """"""Compute gradients of `loss` for the variables in `var_list`.
-    
+
     Add rho*elastic_difference to loss to control the exploration
     This is the first part of `minimize()`.  It returns a list
     of (gradient, variable) pairs where ""gradient"" is the gradient
@@ -204,7 +204,7 @@ class ElasticAverageOptimizer(optimizer.Optimizer):
     """"""
     if not var_list:
       var_list = variables.trainable_variables()
-      
+
     elastic_difference = [math_ops.subtract(v, lv) for v, lv in zip(
       variables.trainable_variables(),
       [self._local_map[var] for var in var_list])]
",0,train
0f2f5b978524f3306e415d18701ea64bd2c688b3,tensorflow/tensorflow,Fix PEP8 (#15378),setup.py,"@@ -70,7 +70,7 @@ setup(
         'Topic :: Scientific/Engineering :: Mathematics',
         'Topic :: Scientific/Engineering :: Artificial Intelligence',
         'Topic :: Software Development',
-        'Topic :: Software Development :: Libraries',  
+        'Topic :: Software Development :: Libraries',
         'Topic :: Software Development :: Libraries :: Python Modules',
     ],
     license='Apache 2.0',
",0,train
0f2f5b978524f3306e415d18701ea64bd2c688b3,tensorflow/tensorflow,Fix PEP8 (#15378),fully_connected_reader.py,"@@ -62,7 +62,7 @@ def decode(serialized_example):
 
   # Convert label from a scalar uint8 tensor to an int32 scalar.
   label = tf.cast(features['label'], tf.int32)
-  
+
   return image, label
 
 def augment(image, label):
@@ -172,7 +172,7 @@ def run_training():
           step += 1
       except tf.errors.OutOfRangeError:
         print('Done training for %d epochs, %d steps.' % (FLAGS.num_epochs, step))
-      
+
 def main(_):
   run_training()
 
",0,train
0b537e5b7d4eca61b058d2415f8f93b253506a1a,tensorflow/tensorflow,Don't dump the whole literal into VLOG(1),xla_device_context.cc,"@@ -131,7 +131,7 @@ void XlaDeviceContext::CopyCPUTensorToDevice(const Tensor* cpu_tensor,
         xla::ShapeUtil::MakeShape(shape.element_type(),
                                   xla::AsInt64Slice(shape.dimensions())));
 
-    VLOG(1) << ""Transfer to device as literal: "" << literal.ToString() << "" ""
+    VLOG(2) << ""Transfer to device as literal: "" << literal.ToString() << "" ""
             << xla_tensor->shaped_buffer().ToString();
     if (UseMultipleStreams() &&
         !transfer_manager_->CanShapedBufferBeAccessedNow(
@@ -214,7 +214,7 @@ void XlaDeviceContext::CopyDeviceTensorToCPU(const Tensor* device_tensor,
       device_to_host_stream_.get(), xla_tensor->shaped_buffer(), literal,
       [ref, xla_tensor, done](xla::Status status) {
         done([&]() -> Status {
-          VLOG(1) << ""Transfer from device as literal: ""
+          VLOG(2) << ""Transfer from device as literal: ""
                   << xla_tensor->shaped_buffer().ToString();
           return status;
         }());
",0,test
b0641138b866a5ffdc511f4ab055735513c57c92,tensorflow/tensorflow,"convert_to_tensor calls eager_convert_to_tensor in eager mode

Temporary hack to make most composite ops work.

PiperOrigin-RevId: 165205218",ops.py,"@@ -961,6 +961,8 @@ def convert_to_tensor(value, dtype=None, name=None, preferred_dtype=None):
     RuntimeError: If a registered conversion function returns an invalid value.
 
   """"""
+  if context.in_eager_mode():
+    return convert_to_eager_tensor(value, dtype=dtype)
   return internal_convert_to_tensor(
       value=value,
       dtype=dtype,
@@ -1005,6 +1007,8 @@ def internal_convert_to_tensor(value,
     RuntimeError: If a registered conversion function returns an invalid value.
 
   """"""
+  if context.in_eager_mode():
+    return convert_to_eager_tensor(value, dtype=dtype)
   error_prefix = """" if name is None else ""%s: "" % name
   if dtype is not None:
     dtype = dtypes.as_dtype(dtype)
",0,train
b0641138b866a5ffdc511f4ab055735513c57c92,tensorflow/tensorflow,"convert_to_tensor calls eager_convert_to_tensor in eager mode

Temporary hack to make most composite ops work.

PiperOrigin-RevId: 165205218",ops_test.py,"@@ -25,6 +25,7 @@ from tensorflow.core.framework import attr_value_pb2
 from tensorflow.core.framework import types_pb2
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.client import session
+from tensorflow.python.eager import context
 from tensorflow.python.framework import common_shapes
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import device as pydev
@@ -290,6 +291,12 @@ class OperationTest(test_util.TensorFlowTestCase):
       self.assertAllEqual((4, 1), tensor.get_shape().as_list())
       self.assertAllEqual(values, tensor.eval())
 
+  def testConvertToTensorEager(self):
+    with context.eager_mode():
+      t = ops.EagerTensor(1)
+      converted = ops.convert_to_tensor(t)
+      self.assertTrue(isinstance(converted, ops.EagerTensor))
+
   def testConvertToTensorNestedTuple(self):
     with self.test_session():
       values = ((2,), (3,), (5,), (7,))
",0,train
94bf823c136dce0f7846176f9be6129b990b1c1c,tensorflow/tensorflow,"Set all tf.VarIsInitializedOp to true in resource op lifting pass.

Currently the resource op lifting pass is only used for the TPU bridge and it is assumed for all TPU computations variables are all initialized.

PiperOrigin-RevId: 337593565
Change-Id: I8a5d687eb8b0cafc3de8d6d5ead6ec690d809679",resource_op_lifting.cc,"@@ -159,6 +159,26 @@ Type GetResourceSubtype(Value value) {
   return nullptr;
 }
 
+// Replaces all `tf.VarIsInitializedOp` in a block with a constant true.
+// TODO(b/171039585): Replace this with proper analysis of
+// `tf.VarIsInitializedOp` in regards to resource writes and control flow.
+void SetAllVarIsInitializedToTrue(Block* block) {
+  auto builder = OpBuilder::atBlockBegin(block);
+  TF::ConstOp const_true = nullptr;
+  for (auto op :
+       llvm::make_early_inc_range(block->getOps<TF::VarIsInitializedOp>())) {
+    builder.setInsertionPoint(op);
+    if (!const_true)
+      const_true = builder.create<TF::ConstOp>(
+          op.getLoc(),
+          DenseIntElementsAttr::get(
+              RankedTensorType::get(/*shape=*/{}, builder.getI1Type()), true));
+
+    op.is_initialized().replaceAllUsesWith(const_true);
+    op.erase();
+  }
+}
+
 // Performs store-load forwarding. This effectively removes
 // 1) Any resource loads after a store to that same resource is done
 // 2) Any resource stores except the last one.
@@ -767,8 +787,6 @@ LogicalResult LiftArgRetResourcesForFunction(
     FuncOp func_op,
     const llvm::SmallDenseMap<int64_t, Type>& resource_data_types,
     llvm::function_ref<void(int64_t, Value)> handle_updated_arg_value) {
-  ForwardStoreToLoad(&func_op.front());
-
   RegionResourceHoister hoister(func_op);
   if (failed(hoister.Analyze())) return failure();
 
@@ -1167,7 +1185,7 @@ void UpdatePartitionedCallOpWithNewCallee(
 }
 
 LogicalResult HoistForControlFlow(
-    Block*, ModuleOp,
+    Block*, ModuleOp, bool,
     llvm::SmallDenseMap<llvm::StringRef, PartitionedCallLiftingInfo>*);
 
 // A templated routine for handling both PartitionedCallOp and
@@ -1176,14 +1194,15 @@ LogicalResult HoistForControlFlow(
 // flow, then performs lifting on the callee.
 template <typename CallOpType>
 LogicalResult HandlePartitionedCallOp(
-    CallOpType call_op, FuncOp callee, ModuleOp module,
+    CallOpType call_op, FuncOp callee, ModuleOp module, bool vars_initialized,
     llvm::SmallDenseMap<llvm::StringRef, PartitionedCallLiftingInfo>*
         lifted_callees) {
   auto emplace_res = lifted_callees->try_emplace(callee.getName(),
                                                  PartitionedCallLiftingInfo());
   if (emplace_res.second) {
     // Unseen callee. Perform resource lifting on it.
-    if (failed(HoistForControlFlow(&callee.front(), module, lifted_callees)))
+    if (failed(HoistForControlFlow(&callee.front(), module, vars_initialized,
+                                   lifted_callees)))
       return failure();
 
     if (failed(HandlePartitionedCallOpCallee(
@@ -1198,26 +1217,28 @@ LogicalResult HandlePartitionedCallOp(
 // Hoists resource loads/stores from control flow ops in `block` outside the
 // body/cond/branch/callee functions.
 LogicalResult HoistForControlFlow(
-    Block* block, ModuleOp module,
+    Block* block, ModuleOp module, bool vars_initialized,
     llvm::SmallDenseMap<llvm::StringRef, PartitionedCallLiftingInfo>*
         lifted_partitioned_call_callees) {
+  if (vars_initialized) SetAllVarIsInitializedToTrue(block);
+
   for (Operation& op : llvm::make_early_inc_range(*block)) {
     if (auto while_op = llvm::dyn_cast<TF::WhileOp>(&op)) {
       auto body = while_op.body_function();
       auto cond = while_op.cond_function();
       // Recursively handle the nested control flow.
-      HoistForControlFlow(&body.front(), module,
+      HoistForControlFlow(&body.front(), module, vars_initialized,
                           lifted_partitioned_call_callees);
-      HoistForControlFlow(&cond.front(), module,
+      HoistForControlFlow(&cond.front(), module, vars_initialized,
                           lifted_partitioned_call_callees);
       if (failed(HandleWhileLoop(while_op, body, cond))) return failure();
     } else if (auto if_op = llvm::dyn_cast<TF::IfOp>(&op)) {
       auto then_branch = if_op.then_function();
       auto else_branch = if_op.else_function();
       // Recursively handle the nested control flow.
-      HoistForControlFlow(&then_branch.front(), module,
+      HoistForControlFlow(&then_branch.front(), module, vars_initialized,
                           lifted_partitioned_call_callees);
-      HoistForControlFlow(&else_branch.front(), module,
+      HoistForControlFlow(&else_branch.front(), module, vars_initialized,
                           lifted_partitioned_call_callees);
       if (failed(HandleCaseOrIfOp(if_op, {then_branch, else_branch})))
         return failure();
@@ -1226,7 +1247,7 @@ LogicalResult HoistForControlFlow(
       case_op.get_branch_functions(branch_functions);
       for (FuncOp func : branch_functions) {
         // Recursively handle the nested control flow.
-        HoistForControlFlow(&func.front(), module,
+        HoistForControlFlow(&func.front(), module, vars_initialized,
                             lifted_partitioned_call_callees);
       }
       if (failed(HandleCaseOrIfOp(case_op, branch_functions))) return failure();
@@ -1237,6 +1258,7 @@ LogicalResult HoistForControlFlow(
             ""resource lifting does not support call with nested references."");
       }
       if (failed(HandlePartitionedCallOp(call_op, callee, module,
+                                         vars_initialized,
                                          lifted_partitioned_call_callees))) {
         // Nested control flow handling is done in HandlePartitionedCallOp().
         return failure();
@@ -1244,12 +1266,13 @@ LogicalResult HoistForControlFlow(
     } else if (auto call_op =
                    llvm::dyn_cast<TF::StatefulPartitionedCallOp>(&op)) {
       if (failed(HandlePartitionedCallOp(call_op, call_op.func(), module,
+                                         vars_initialized,
                                          lifted_partitioned_call_callees))) {
         return failure();
       }
     } else if (isa<TF::IfRegionOp, TF::CaseRegionOp, TF::WhileRegionOp>(op)) {
       for (Region& region : op.getRegions())
-        HoistForControlFlow(&region.front(), module,
+        HoistForControlFlow(&region.front(), module, vars_initialized,
                             lifted_partitioned_call_callees);
       LogicalResult result = RegionResourceHoister::ReplaceOpWithNewOp(&op);
       if (failed(result)) return failure();
@@ -1277,7 +1300,8 @@ void ResourceOpLiftingPass::runOnOperation() {
   auto walk_result = module.walk([&](FuncOp func_op) {
     return func_op.walk([&](tf_device::ClusterOp cluster) {
       LogicalResult result = HoistForControlFlow(
-          &cluster.GetBody(), module, &lifted_partitioned_call_callees);
+          &cluster.GetBody(), module, /*vars_initialized=*/true,
+          &lifted_partitioned_call_callees);
       if (failed(result)) return WalkResult::interrupt();
       result = RegionResourceHoister::ReplaceOpWithNewOp(cluster);
       if (failed(result)) return WalkResult::interrupt();
@@ -1340,9 +1364,9 @@ LogicalResult ResourceLiftingForFunctionalControlFlow(FuncOp function) {
 
   llvm::SmallDenseMap<llvm::StringRef, PartitionedCallLiftingInfo>
       lifted_partitioned_call_callees;
-  if (failed(HoistForControlFlow(&function.front(),
-                                 cast<ModuleOp>(function.getParentOp()),
-                                 &lifted_partitioned_call_callees)))
+  if (failed(HoistForControlFlow(
+          &function.front(), cast<ModuleOp>(function.getParentOp()),
+          /*vars_initialized=*/false, &lifted_partitioned_call_callees)))
     return failure();
 
   // Clean up and canonicalize to remove dead local variables as some local
",0,train
82aa53ec3664f5bbd48fa498901d16ef151164ff,tensorflow/tensorflow,"Adjust setup to fix Tensorboard entrypoint (run_main -> main).

PiperOrigin-RevId: 182119760",setup.py,"@@ -79,13 +79,13 @@ CONSOLE_SCRIPTS = [
     # is now declared by the tensorboard pip package. If we remove the
     # TensorBoard command, pip will inappropriately remove it during install,
     # even though the command is not removed, just moved to a different wheel.
-    'tensorboard = tensorboard.main:run_main',
+    'tensorboard = tensorboard.main:main',
 ]
 # pylint: enable=line-too-long
 
 # remove the tensorboard console script if building tf_nightly
 if 'tf_nightly' in project_name:
-  CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:run_main')
+  CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:main')
 
 TEST_PACKAGES = [
     'scipy >= 0.15.1',
",0,test
e58e4c754fa6145af2a411b940d8f7347a071b6f,tensorflow/tensorflow,"Minor adjustments to an error message.

PiperOrigin-RevId: 190972253",tpu_system_metadata.py,"@@ -72,9 +72,9 @@ def _query_tpu_system_metadata(master_address, run_config,
               tpu_core_count += 1
           break
     except errors.DeadlineExceededError:
-      msg = ('Fail to connect Tensorflow master. It could be the TPU worker is '
-             'not ready (still under scheduling) or Tensorflow '
-             'master address is correct: got (%s).' %
+      msg = ('Failed to connect to the Tensorflow master. The TPU worker may '
+             'not be ready (still scheduling) or the Tensorflow master address '
+             'is incorrect: got (%s).' %
              (master_address))
 
       # TODO(xiejw): For local or grpc master we might not need retry logic
",0,train
20199e91b3503881ce9a4253d64fa783f731230f,tensorflow/tensorflow,"Don't prematurely return streams

PiperOrigin-RevId: 173214110",local_client.cc,"@@ -175,10 +175,15 @@ StatusOr<std::unique_ptr<ScopedShapedBuffer>> LocalExecutable::Run(
   TF_RETURN_IF_ERROR(ValidateExecutionOptions(arguments, options, *backend_));
 
   ExecutableRunOptions actual_options = options;
+
+  Backend::StreamPtr stream;
   if (options.stream() == nullptr) {
+    // NB!  The lifetime of `stream` needs to match the lifetime of
+    // `actual_options` (otherwise we will end up using a returned stream in
+    // ExecuteOnStreamWrapper), which is why it isn't declared in the inner ""if""
+    // scope.
     TF_ASSIGN_OR_RETURN(
-        Backend::StreamPtr stream,
-        BorrowStreamForDevice(options.device_ordinal(), backend_));
+        stream, BorrowStreamForDevice(options.device_ordinal(), backend_));
     actual_options.set_stream(stream.get());
   }
   if (options.allocator() == nullptr) {
",0,train
bb405194390f1a60682c07915fa11e60fc027ec0,tensorflow/tensorflow,"[TF 2.0 API Docs] tf.image.adjust_saturation

Updated adjust_saturation by adding a usage example in the docstring in image_ops_impl.py. Also added a raise InvalidArgumentError for incorrect shape in the docstring. The issue has been raised and is provided in this link https://github.com/tensorflow/tensorflow/issues/29332",image_ops_impl.py,"@@ -2041,6 +2041,16 @@ def adjust_saturation(image, saturation_factor, name=None):
 
   Returns:
     Adjusted image(s), same shape and DType as `image`.
+    
+  Usage Example:
+    ```python
+    >> import tensorflow as tf
+    >> x = tf.random.normal(shape=(256, 256, 3))
+    >> tf.image.adjust_saturation(x, 0.5)
+    ```
+    
+  Raises:
+    InvalidArgumentError: input must have 3 channels
   """"""
   with ops.name_scope(name, 'adjust_saturation', [image]) as name:
     image = ops.convert_to_tensor(image, name='image')
",0,train
edd1f81155294073649dee0b3ec4f6b0e235f328,tensorflow/tensorflow,"[TF:XLA] Update resource_operation_safety_analysis file comment.

Read->read and write->write dependencies are not considered a safety problem, but the comment still stated they would not be clustered.

PiperOrigin-RevId: 258474807",resource_operation_safety_analysis.h,"@@ -25,11 +25,10 @@ namespace tensorflow {
 // execution and all the resource writes to the end.  This means it cannot
 // enforce arbitrary ordering dependencies (via control or data edges) between
 // resource operations.  Since all resource reads happen before all resource
-// writes, edges constraining resource reads to happen before resource writes
-// are fine, but all other kinds of edges are problematic.  This analysis
-// returns the set of pairs of resource operations that cannot be put in the
-// same cluster because XLA cannot respect the dependencies between them in the
-// TensorFlow program.
+// writes, edges constraining resource writes to happen before resource reads
+// are problematic.  This analysis returns the set of pairs of resource
+// operations that cannot be put in the same cluster because XLA cannot respect
+// the dependencies between them in the TensorFlow program.
 //
 // The restrictions are not transitive: it is fine to put A and C in the same
 // cluster even if the returned set contains (A,B) and (B,C).
@@ -41,19 +40,15 @@ namespace tensorflow {
 //
 // For instance if we auto-cluster all operations in this TensorFlow graph:
 //
-//         ReadVariablepOp0  ->  ReadVariableOp1
+//         AssignVariablepOp0  ->  AssignVariableOp1
 //                                      |
 //                                      v
-//                              AssignVariableOp0  ->  AssignVariableOp1
+//                              ReadVariableOp0  ->  ReadVariableOp1
 //
-// we will lose the ReadVariablepOp0 -> ReadVariableOp1 and the
-// AssignVariableOp0 -> AssignVariableOp1 dependencies.  I.e. it is possible for
-// XlaLaunchOp to issue ReadVariableOp1 before ReadVariablepOp0 since it reads
-// all the resource variables when the cluster starts executing without any
-// particular ordering between them; same holds for the AssignVariableOp0 ->
-// AssignVariableOp1 edge.  The ReadVariableOp1 -> AssignVariableOp0 edge will
-// be respected by XlaLaunchOp though because all reads happen before all
-// writes.
+// we will lose the AssignVariablepOp1 -> ReadVariableOp0. The ReadVariableOp0
+// -> ReadVariableOp1 and AssignVariableOp0 -> AssignVariableOp1 edges will be
+// respected by XlaLaunchOp though because all reads happen before all writes
+// with that limited clustering..
 //
 //
 // NB!  The result computed by this analysis assumes that we don't auto-cluster
",0,train
5de6f68848b8bc431e18a53fa03700820bcee57f,tensorflow/tensorflow,"Forward declare condition_variable

Necessary to enable friendship with mutex",mutex.h,"@@ -31,6 +31,8 @@ namespace tensorflow {
 
 enum LinkerInitialized { LINKER_INITIALIZED };
 
+class condition_variable;
+
 // Mimic std::mutex + C++17's shared_mutex, adding a LinkerInitialized
 // constructor interface.  This type is as fast as mutex, but is also a shared
 // lock.
",0,train
59cf62cc475651e75fc8d2948daf2444cc0e8c15,tensorflow/tensorflow,"Change estimator dep to >1.13rc0.

PiperOrigin-RevId: 231246464",setup.py,"@@ -58,7 +58,7 @@ REQUIRED_PACKAGES = [
     'six >= 1.10.0',
     'protobuf >= 3.6.1',
     'tensorboard >= 1.12.0, < 1.13.0',
-    'tensorflow_estimator >= 1.13.0, < 1.14.0',
+    'tensorflow_estimator >= 1.13.0rc0, < 1.14.0rc0',
     'termcolor >= 1.1.0',
 ]
 
",0,train
569095ba3d5a57a95595d7db685b4bb748ca7337,tensorflow/tensorflow,"Make is_resource_variable() an tf.__internal__ API.

PiperOrigin-RevId: 352613683
Change-Id: I92b67dc0d6d93dccf096690ff84c99cbd1221295",recurrent_v2.py,"@@ -37,7 +37,6 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_cudnn_rnn_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
-from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import sysconfig
@@ -419,19 +418,6 @@ class GRU(recurrent.DropoutRNNCellMixin, recurrent.GRU):
     if _use_new_code():
       self._defun_wrapper = _DefunWrapper(time_major, go_backwards, 'gru')
 
-  def build(self, input_shape):
-    super(GRU, self).build(input_shape)
-
-    if not all(isinstance(v, resource_variable_ops.ResourceVariable)
-               for v in self.weights):
-      # Non-resource variables, such as DistributedVariables and
-      # AutoCastVariables, do not work properly with the implementation
-      # selector, which is used when cuDNN is used. However, by chance, such
-      # variables happen to work in LSTM, so this check is only needed for GRU.
-      # TODO(b/136512020): Make non-resource variables work with the
-      # implementation selector.
-      self._could_use_gpu_kernel = False
-
   def call(self, inputs, mask=None, training=None, initial_state=None):
     # The input should be dense, padded with zeros. If a ragged input is fed
     # into the layer, it is padded and the row lengths are used for masking.
",0,train
569095ba3d5a57a95595d7db685b4bb748ca7337,tensorflow/tensorflow,"Make is_resource_variable() an tf.__internal__ API.

PiperOrigin-RevId: 352613683
Change-Id: I92b67dc0d6d93dccf096690ff84c99cbd1221295",tracking_util_test.py,"@@ -39,7 +39,6 @@ from tensorflow.python.keras.optimizer_v2 import adam
 from tensorflow.python.module import module
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import init_ops
-from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_ops
 from tensorflow.python.ops import template
 from tensorflow.python.ops import variable_scope
@@ -273,7 +272,7 @@ class CheckpointingTests(keras_parameterized.TestCase):
       # Optimizer slot variables are created when the original variable is
       # restored.
       self.assertAllEqual([1.5], self.evaluate(on_create_m_bias_slot))
-      dummy_var = resource_variable_ops.ResourceVariable([1.])
+      dummy_var = variables_lib.Variable([1.])
       on_create_optimizer.minimize(loss=dummy_var.read_value,
                                    var_list=[dummy_var])
       status.assert_existing_objects_matched()
@@ -459,8 +458,8 @@ class CheckpointingTests(keras_parameterized.TestCase):
 
       def __init__(self):
         super(Model, self).__init__()
-        self.w = resource_variable_ops.ResourceVariable(0.0)
-        self.b = resource_variable_ops.ResourceVariable(0.0)
+        self.w = variables_lib.Variable(0.0)
+        self.b = variables_lib.Variable(0.0)
         self.vars = [self.w, self.b]
 
       def call(self, x):
@@ -874,8 +873,7 @@ class CheckpointCompatibilityTests(keras_parameterized.TestCase):
         self._check_sentinels(root)
         # Check that there is no error when keys are missing from the name-based
         # checkpoint.
-        root.not_in_name_checkpoint = resource_variable_ops.ResourceVariable(
-            [1.])
+        root.not_in_name_checkpoint = variables_lib.Variable([1.])
         status = object_saver.restore(save_path)
         with self.assertRaises(AssertionError):
           status.assert_existing_objects_matched()
",0,train
569095ba3d5a57a95595d7db685b4bb748ca7337,tensorflow/tensorflow,"Make is_resource_variable() an tf.__internal__ API.

PiperOrigin-RevId: 352613683
Change-Id: I92b67dc0d6d93dccf096690ff84c99cbd1221295",resource_variable_ops.py,"@@ -55,6 +55,7 @@ from tensorflow.python.types import core
 from tensorflow.python.util import _pywrap_utils
 from tensorflow.python.util import compat
 from tensorflow.python.util.deprecation import deprecated
+from tensorflow.python.util.tf_export import tf_export
 
 acd.register_read_only_resource_op(""ReadVariableOp"")
 acd.register_read_only_resource_op(""VariableShape"")
@@ -2211,6 +2212,7 @@ ops.register_proto_function(
     from_proto=_from_proto_fn)
 
 
+@tf_export(""__internal__.ops.is_resource_variable"", v1=[])
 def is_resource_variable(var):
   """"""""Returns True if `var` is to be considered a ResourceVariable.""""""
   return isinstance(var, BaseResourceVariable) or hasattr(
",0,train
137c954eed6e5800d1ab6dda74c73049791fdac7,tensorflow/tensorflow,"[MLIR][KernelGen] Register MLIR context flags in `tf_to_kernel`

This change adds support for MLIR context flags like `--mlir-disable-threading`
to the `tf_to_kernel` tool.

PiperOrigin-RevId: 369658258
Change-Id: I73e06caf8c8e72916d9746b1f9828dbf62b67fc1",tf_to_kernel.cc,"@@ -173,6 +173,7 @@ int main(int argc, char** argv) {
   llvm::InitializeNativeTarget();
   llvm::InitializeNativeTargetAsmPrinter();
   mlir::registerPassManagerCLOptions();
+  mlir::registerMLIRContextCLOptions();
   llvm::cl::ParseCommandLineOptions(argc, argv, ""TF op kernel generator\n"");
 
   auto status = tensorflow::kernel_gen::Run(
",0,test
bb7f4079afbcb11bb360846849278253207ea8cc,tensorflow/tensorflow,"[tf.data service] Enable zero-copy data transfer for AUTO mode.

Previously, zero-copy data transfer is only enabled for LOCAL target
workers:
https://github.com/tensorflow/tensorflow/blob/fdfd1e09894e082e13314dffc9d36990524ac3f1/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc#L1141

Now that we're supporting the hybrid mode, we should use the local read
protocol whenever there's a local worker.

PiperOrigin-RevId: 395266341
Change-Id: I37b881de55f6e7c858c70e1c98a4fad540f5568b",worker_client.cc,"@@ -50,6 +50,16 @@ limitations under the License.
 namespace tensorflow {
 namespace data {
 
+StatusOr<std::unique_ptr<DataServiceWorkerClient>>
+CreateDataServiceWorkerClient(const std::string& address,
+                              const std::string& protocol,
+                              const std::string& transfer_protocol) {
+  auto client = absl::make_unique<DataServiceWorkerClient>(address, protocol,
+                                                           transfer_protocol);
+  TF_RETURN_IF_ERROR(client->Initialize());
+  return client;
+}
+
 Status DataServiceWorkerClient::GetElement(const GetElementRequest& req,
                                            GetElementResult& result) {
   TF_RETURN_IF_ERROR(EnsureInitialized());
@@ -62,22 +72,20 @@ Status DataServiceWorkerClient::EnsureInitialized() {
     return Status::OK();
   }
   TF_RETURN_IF_ERROR(DataTransferClient::Build(
-      transfer_protocol_, {protocol_, address_}, &client_));
+      GetDataTransferProtocol(), {protocol_, address_}, &client_));
   return Status::OK();
 }
 
-void DataServiceWorkerClient::TryCancel() { client_->TryCancel(); }
-
-StatusOr<std::unique_ptr<DataServiceWorkerClient>>
-CreateDataServiceWorkerClient(const std::string& address,
-                              const std::string& protocol,
-                              const std::string& transfer_protocol) {
-  auto client = absl::make_unique<DataServiceWorkerClient>(address, protocol,
-                                                           transfer_protocol);
-  TF_RETURN_IF_ERROR(client->Initialize());
-  return client;
+std::string DataServiceWorkerClient::GetDataTransferProtocol() const {
+  if (transfer_protocol_ == kGrpcTransferProtocol &&
+      LocalWorkers::Get(address_) != nullptr) {
+    return kLocalTransferProtocol;
+  }
+  return transfer_protocol_;
 }
 
+void DataServiceWorkerClient::TryCancel() { client_->TryCancel(); }
+
 class GrpcDataTransferClient : public DataTransferClient {
  public:
   GrpcDataTransferClient(std::shared_ptr<grpc::ChannelCredentials> credentials,
@@ -217,8 +225,8 @@ class LocalDataTransferClient : public DataTransferClient {
         LocalWorkers::Get(worker_address_);
     if (!worker) {
       return errors::Cancelled(absl::Substitute(
-          ""Worker at address $0 is no longer available; cancel request for ""
-          ""task $1."",
+          ""Local worker at address $0 is no longer available; cancel request ""
+          ""for task $1."",
           worker_address_, req.task_id()));
     }
     return worker;
",0,train
bb7f4079afbcb11bb360846849278253207ea8cc,tensorflow/tensorflow,"[tf.data service] Enable zero-copy data transfer for AUTO mode.

Previously, zero-copy data transfer is only enabled for LOCAL target
workers:
https://github.com/tensorflow/tensorflow/blob/fdfd1e09894e082e13314dffc9d36990524ac3f1/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc#L1141

Now that we're supporting the hybrid mode, we should use the local read
protocol whenever there's a local worker.

PiperOrigin-RevId: 395266341
Change-Id: I37b881de55f6e7c858c70e1c98a4fad540f5568b",worker_client.h,"@@ -52,6 +52,10 @@ class DataServiceWorkerClient : public DataServiceClientBase {
   Status EnsureInitialized() override;
 
  private:
+  // Returns the data transfer protocol, preferring to use the local transfer
+  // protocol if a local tf.data worker exists.
+  std::string GetDataTransferProtocol() const;
+
   const std::string transfer_protocol_;
   mutex mu_;
   // Initialization is guarded by `mu_`, but using the stub does not require
",0,train
bb7f4079afbcb11bb360846849278253207ea8cc,tensorflow/tensorflow,"[tf.data service] Enable zero-copy data transfer for AUTO mode.

Previously, zero-copy data transfer is only enabled for LOCAL target
workers:
https://github.com/tensorflow/tensorflow/blob/fdfd1e09894e082e13314dffc9d36990524ac3f1/tensorflow/core/kernels/data/experimental/data_service_dataset_op.cc#L1141

Now that we're supporting the hybrid mode, we should use the local read
protocol whenever there's a local worker.

PiperOrigin-RevId: 395266341
Change-Id: I37b881de55f6e7c858c70e1c98a4fad540f5568b",worker_client_test.cc,"@@ -142,7 +142,7 @@ TEST_F(WorkerClientTest, LocalRead) {
   LocalWorkers::Remove(GetWorkerAddress());
   EXPECT_THAT(GetElement(*client, task_id),
               StatusIs(error::CANCELLED,
-                       MatchesRegex(""Worker.*is no longer available.*"")));
+                       MatchesRegex(""Local worker.*is no longer available.*"")));
 }
 
 TEST_F(WorkerClientTest, LocalReadEmptyDataset) {
@@ -161,7 +161,7 @@ TEST_F(WorkerClientTest, LocalReadEmptyDataset) {
   LocalWorkers::Remove(GetWorkerAddress());
   EXPECT_THAT(GetElement(*client, task_id),
               StatusIs(error::CANCELLED,
-                       MatchesRegex(""Worker.*is no longer available.*"")));
+                       MatchesRegex(""Local worker.*is no longer available.*"")));
 }
 
 TEST_F(WorkerClientTest, GrpcRead) {
@@ -178,12 +178,12 @@ TEST_F(WorkerClientTest, GrpcRead) {
     EXPECT_FALSE(result.end_of_sequence);
   }
 
-  // Remove the local worker from `LocalWorkers`. Since the client reads from
-  // gRPC, this will not cause the request to fail.
+  // Remove the local worker from `LocalWorkers`. Since the client reads from a
+  // local server, this should cause the request to fail.
   LocalWorkers::Remove(GetWorkerAddress());
-  TF_ASSERT_OK_AND_ASSIGN(GetElementResult result,
-                          GetElement(*client, task_id));
-  EXPECT_TRUE(result.end_of_sequence);
+  EXPECT_THAT(GetElement(*client, task_id),
+              StatusIs(error::CANCELLED,
+                       MatchesRegex(""Local worker.*is no longer available.*"")));
 }
 
 TEST_F(WorkerClientTest, LocalServerShutsDown) {
@@ -198,7 +198,7 @@ TEST_F(WorkerClientTest, LocalServerShutsDown) {
   test_cluster_->StopWorkers();
   EXPECT_THAT(GetElement(*client, task_id),
               StatusIs(error::CANCELLED,
-                       MatchesRegex(""Worker.*is no longer available.*"")));
+                       MatchesRegex(""Local worker.*is no longer available.*"")));
 }
 
 TEST_F(WorkerClientTest, CancelClient) {
",0,train
3cb4deb6ddd20162ac4aa40db842de318d94f77b,tensorflow/tensorflow,"Make cuda_py_test create a gpu and cpu target.

PiperOrigin-RevId: 217838326",pip_smoke_test.py,"@@ -146,7 +146,7 @@ def main():
 
   missing_dependencies = []
   # File extensions and endings to ignore
-  ignore_extensions = [""_test"", ""_test.py""]
+  ignore_extensions = [""_test"", ""_test.py"", ""_test_gpu"", ""_test_gpu.py""]
 
   ignored_files = 0
   blacklisted_files = len(BLACKLIST)
",0,train
9361cd6fe21c78fea9260935d5121c9c9cd76f93,tensorflow/tensorflow,"Update docstring for keras.layers.ReLU.

PiperOrigin-RevId: 290215220
Change-Id: Iff1321a25f7ee3c7a25c9725de8bbfcb7b65434c",advanced_activations.py,"@@ -276,22 +276,42 @@ class ReLU(Layer):
   With default values, it returns element-wise `max(x, 0)`.
 
   Otherwise, it follows:
-  `f(x) = max_value` for `x >= max_value`,
-  `f(x) = x` for `threshold <= x < max_value`,
-  `f(x) = negative_slope * (x - threshold)` otherwise.
+  $$f(x) = max_value if x >= max_value$$
+  $$f(x) = x if threshold <= x < max_value$$
+  $$f(x) = negative_slope * (x - threshold) otherwise$$
+
+  Usage:
+
+  >>> layer = tf.keras.layers.ReLU()
+  >>> output = layer([-3.0, -1.0, 0.0, 2.0])
+  >>> list(output.numpy())
+  [0.0, 0.0, 0.0, 2.0]
+  >>> layer = tf.keras.layers.ReLU(max_value=1.0)
+  >>> output = layer([-3.0, -1.0, 0.0, 2.0])
+  >>> list(output.numpy())
+  [0.0, 0.0, 0.0, 1.0]
+  >>> layer = tf.keras.layers.ReLU(negative_slope=1.0)
+  >>> output = layer([-3.0, -1.0, 0.0, 2.0])
+  >>> list(output.numpy())
+  [-3.0, -1.0, 0.0, 2.0]
+  >>> layer = tf.keras.layers.ReLU(threshold=1.5)
+  >>> output = layer([-3.0, -1.0, 1.0, 2.0])
+  >>> list(output.numpy())
+  [0.0, 0.0, 0.0, 2.0]
 
   Input shape:
     Arbitrary. Use the keyword argument `input_shape`
-    (tuple of integers, does not include the samples axis)
+    (tuple of integers, does not include the batch axis)
     when using this layer as the first layer in a model.
 
   Output shape:
     Same shape as the input.
 
   Arguments:
-    max_value: Float >= 0. Maximum activation value.
-    negative_slope: Float >= 0. Negative slope coefficient.
-    threshold: Float. Threshold value for thresholded activation.
+    max_value: Float >= 0. Maximum activation value. Default to None, which
+      means unlimited.
+    negative_slope: Float >= 0. Negative slope coefficient. Default to 0.
+    threshold: Float. Threshold value for thresholded activation. Default to 0.
   """"""
 
   def __init__(self, max_value=None, negative_slope=0, threshold=0, **kwargs):
",0,train
579b142eb191b50deed006b264c4180e8575cd6a,tensorflow/tensorflow,"[MLIR] Add verification for XLA HLO -> LMHLO, and fix a verification failure.

Specifically, sometimes the arg type doesn't equal to the type of its use, even if the arg is an entry computation parameter. This is because program_shape in the computation may not match the operand shape. Add a MemrefReinterpretCast if two shapes don't equal.

The verification failure is discovered in https://github.com/google/jax/discussions/6645.

PiperOrigin-RevId: 372179297
Change-Id: I0865ff030217410cb5913e6386a53d7a68d3b7dd",mhlo_to_lhlo_with_xla.cc,"@@ -38,7 +38,6 @@ limitations under the License.
 #include ""mlir/IR/Operation.h""  // from @llvm-project
 #include ""mlir/IR/PatternMatch.h""  // from @llvm-project
 #include ""mlir/IR/SymbolTable.h""  // from @llvm-project
-#include ""mlir/IR/Verifier.h""  // from @llvm-project
 #include ""mlir/Pass/Pass.h""  // from @llvm-project
 #include ""mlir/Pass/PassOptions.h""  // from @llvm-project
 #include ""mlir/Translation.h""  // from @llvm-project
@@ -98,10 +97,6 @@ StatusOr<std::unique_ptr<HloModule>> HloModuleFromProto(
   return HloModule::CreateFromProto(module_proto, module_config);
 }
 
-bool AllocationShouldLowerToTypedArg(const BufferAllocation* alloc) {
-  return alloc->is_entry_computation_parameter() && !alloc->maybe_live_out();
-}
-
 }  // namespace
 
 // Convert the MLIR `module` from HLO dialect to LHLO dialect using XLA for the
@@ -1518,42 +1513,39 @@ StatusOr<Value> LhloDialectEmitter::GetOrCreateArrayView(
   TF_ASSIGN_OR_RETURN(BufferAllocation::Slice slice,
                       assignment_.GetUniqueSlice(instr, shape_index));
   Value alloc = allocations_[slice.allocation()];
+  if (alloc.getType() == out_type && slice.offset() == 0) {
+    return cached_value = alloc;
+  }
+
+  auto out_memref_type = out_type.dyn_cast<MemRefType>();
+  if (!out_memref_type)
+    return tensorflow::errors::Internal(
+        ""Expected memref type when creating a view for leaf type of a ""
+        ""tuple."");
+
+  Value byte_shift =
+      builder_.create<ConstantIndexOp>(alloc.getLoc(), slice.offset());
+
+  xla::Shape physical_shape =
+      xla::ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
+          static_shape);
+  TF_ASSIGN_OR_RETURN(
+      Type physical_out_type,
+      xla::ConvertShapeToType<MemRefType>(physical_shape, builder_));
 
   // TODO(timshen): revisit location handling.
   Location loc = builder_.getUnknownLoc();
 
-  Value result;
-  if (AllocationShouldLowerToTypedArg(slice.allocation())) {
-    TF_RET_CHECK(slice.offset() == 0);
-    TF_RET_CHECK(slice.size() == slice.allocation()->size());
-    result = alloc;
-  } else {
-    Value byte_shift =
-        builder_.create<ConstantIndexOp>(alloc.getLoc(), slice.offset());
-
-    xla::Shape physical_shape =
-        xla::ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
-            static_shape);
-    TF_ASSIGN_OR_RETURN(
-        Type physical_out_type,
-        xla::ConvertShapeToType<MemRefType>(physical_shape, builder_));
-
-    // ViewOp only takes memrefs without affine maps (layouts). Let ViewOp
-    // produce the physical shape (where dimensions are ordered in major to
-    // minor) first, then follow up with a MemRefReinterpretCast to cast the
-    // resulting memref to the original layout.
-    result = builder_.create<memref::ViewOp>(loc, physical_out_type, alloc,
-                                             byte_shift,
-                                             /*sizes=*/ValueRange{});
-  }
-  if (result.getType() != out_type) {
+  // ViewOp only takes memrefs without affine maps (layouts). Let ViewOp produce
+  // the physical shape (where dimensions are ordered in major to minor) first,
+  // then follow up with a MemRefReinterpretCast to cast the resulting memref to
+  // the original layout.
+  Value result =
+      builder_.create<memref::ViewOp>(loc, physical_out_type, alloc, byte_shift,
+                                      /*sizes=*/ValueRange{});
+  if (physical_out_type != out_type) {
     int64_t out_offset;
     SmallVector<int64_t, 4> out_strides;
-    auto out_memref_type = out_type.dyn_cast<MemRefType>();
-    if (!out_memref_type)
-      return tensorflow::errors::Internal(
-          ""Expected memref type when creating a view for leaf type of a ""
-          ""tuple."");
     if (failed(getStridesAndOffset(out_memref_type, out_strides, out_offset)))
       return tensorflow::errors::Internal(
           ""Failed to get strides and offset from the output type."");
@@ -1696,7 +1688,7 @@ Status LhloDialectEmitter::Initialize() {
 
     NamedAttrList arg_attr_list;
     mlir::Type arg_type;
-    if (AllocationShouldLowerToTypedArg(alloc)) {
+    if (alloc->is_entry_computation_parameter() && !alloc->maybe_live_out()) {
       xla::Shape buffer_shape = xla::ShapeUtil::GetSubshape(
           computation_.parameter_instruction(alloc->parameter_number())
               ->shape(),
@@ -1790,9 +1782,7 @@ Status HloToLhloModule(const BufferAssignment& assignment,
   if (!schedule)
     return xla::Unimplemented(""Missing sequential order for the computation"");
   const std::vector<HloInstruction*>& ordering = schedule->instructions();
-  TF_RETURN_IF_ERROR(computation->AcceptOrdered(&emitter, ordering));
-  TF_RET_CHECK(succeeded(mlir::verify(module)));
-  return Status::OK();
+  return computation->AcceptOrdered(&emitter, ordering);
 }
 
 OwningModuleRef HloTextToLhloTranslateFunction(llvm::StringRef input,
",0,train
579b142eb191b50deed006b264c4180e8575cd6a,tensorflow/tensorflow,"[MLIR] Add verification for XLA HLO -> LMHLO, and fix a verification failure.

Specifically, sometimes the arg type doesn't equal to the type of its use, even if the arg is an entry computation parameter. This is because program_shape in the computation may not match the operand shape. Add a MemrefReinterpretCast if two shapes don't equal.

The verification failure is discovered in https://github.com/google/jax/discussions/6645.

PiperOrigin-RevId: 372179297
Change-Id: I0865ff030217410cb5913e6386a53d7a68d3b7dd",gpu_compiler.cc,"@@ -169,6 +169,7 @@ Status GpuCompiler::OptimizeHloModule(
     pipeline.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/false,
                                               /*allow_mixed_precision=*/false);
     pipeline.AddPass<AllToAllDecomposer>();
+    pipeline.AddPass<RealImagExpander>();
 
     pipeline.AddPass<OperandUpcaster>();
     pipeline.AddPass<ResultCaster>();
@@ -268,7 +269,6 @@ Status GpuCompiler::OptimizeHloModule(
       pass.AddPass<ReshapeMover>();
       pass.AddPass<HloConstantFolding>();
       pass.AddPass<ConditionalSimplifier>();
-      pipeline.AddPass<RealImagExpander>();
     }
 
     pipeline.AddPass<TransposeFolding>(
",0,train
579b142eb191b50deed006b264c4180e8575cd6a,tensorflow/tensorflow,"[MLIR] Add verification for XLA HLO -> LMHLO, and fix a verification failure.

Specifically, sometimes the arg type doesn't equal to the type of its use, even if the arg is an entry computation parameter. This is because program_shape in the computation may not match the operand shape. Add a MemrefReinterpretCast if two shapes don't equal.

The verification failure is discovered in https://github.com/google/jax/discussions/6645.

PiperOrigin-RevId: 372179297
Change-Id: I0865ff030217410cb5913e6386a53d7a68d3b7dd",ir_emission_utils.cc,"@@ -850,49 +850,52 @@ StatusOr<BufferAllocation::Slice> GetAllocationSliceForMlir(
 
   int64 size = GetMemRefSizeInBytes(v.getType().cast<mlir::MemRefType>());
 
+  if (auto arg = v.dyn_cast<mlir::BlockArgument>()) {
+    return BufferAllocation::Slice(
+        &allocations[GetAllocationIndex(arg, constant_name)], 0, size);
+  }
+
   // We match the following patterns here:
-  //  base := ViewOp(arg) | get_global_memref (global_memref) | arg
+  //  base := ViewOp(arg) | get_global_memref (global_memref)
   //  root := base | MemRefReinterpretCastOp(base)
 
-  if (auto cast = mlir::dyn_cast_or_null<mlir::memref::ReinterpretCastOp>(
-          v.getDefiningOp())) {
-    v = cast.getViewSource();
-  }
-  if (auto view =
-          mlir::dyn_cast_or_null<mlir::memref::ViewOp>(v.getDefiningOp())) {
-    TF_RET_CHECK(view.source().isa<mlir::BlockArgument>());
-
-    return BufferAllocation::Slice(
-        &allocations[GetAllocationIndex(
-            view.source().cast<mlir::BlockArgument>(), constant_name)],
-        mlir::cast<mlir::ConstantOp>(view.byte_shift().getDefiningOp())
-            .value()
-            .cast<mlir::IntegerAttr>()
-            .getValue()
-            .getSExtValue(),
-        size);
-  }
-  if (auto get_global = mlir::dyn_cast_or_null<mlir::memref::GetGlobalOp>(
-          v.getDefiningOp())) {
-    auto module = get_global->getParentOfType<mlir::ModuleOp>();
-    if (constant_name) {
-      *constant_name = get_global.name().str();
+  if (mlir::Operation* op = v.getDefiningOp()) {
+    if (auto cast = mlir::dyn_cast<mlir::memref::ReinterpretCastOp>(op)) {
+      mlir::Value source = cast.getViewSource();
+      op = source.getDefiningOp();
+      if (!op) {
+        return Unimplemented(""MemRefReinterpretCastOp has to wrap an op"");
+      }
     }
-    auto global = mlir::cast<mlir::memref::GlobalOp>(
-        module.lookupSymbol(get_global.name()));
-    int64_t index =
-        global->getAttrOfType<mlir::IntegerAttr>(""lmhlo.alloc"").getInt();
-    return BufferAllocation::Slice(&allocations[index], 0,
-                                   allocations[index].size());
-  }
-  if (auto arg = v.dyn_cast<mlir::BlockArgument>()) {
-    return BufferAllocation::Slice(
-        &allocations[GetAllocationIndex(arg, constant_name)], 0, size);
+    if (auto view = mlir::dyn_cast<mlir::memref::ViewOp>(op)) {
+      return BufferAllocation::Slice(
+          &allocations[GetAllocationIndex(
+              view.source().cast<mlir::BlockArgument>(), constant_name)],
+          mlir::cast<mlir::ConstantOp>(view.byte_shift().getDefiningOp())
+              .value()
+              .cast<mlir::IntegerAttr>()
+              .getValue()
+              .getSExtValue(),
+          size);
+    } else if (auto get_global =
+                   mlir::dyn_cast<mlir::memref::GetGlobalOp>(op)) {
+      auto module = get_global->getParentOfType<mlir::ModuleOp>();
+      if (constant_name) {
+        *constant_name = get_global.name().str();
+      }
+      auto global = mlir::cast<mlir::memref::GlobalOp>(
+          module.lookupSymbol(get_global.name()));
+      int64_t index =
+          global->getAttrOfType<mlir::IntegerAttr>(""lmhlo.alloc"").getInt();
+      return BufferAllocation::Slice(&allocations[index], 0,
+                                     allocations[index].size());
+    }
+    return Unimplemented(""MemRefReinterpretCastOp has to wrap a ViewOp"");
   }
 
   return Unimplemented(
       ""Operand has to be in the form of ViewOp(arg) or ""
-      ""StaticMemRefCastOp(ViewOp(arg)) or arg"");
+      ""StaticMemRefCastOp(ViewOp(arg))"");
 }
 
 bool CanEmitFusedDynamicUpdateSliceInPlaceForGpu(
",0,train
dd3b812879671d633ddbb644a48f6fc44faae0bc,tensorflow/tensorflow,"Raise a better error message when a list element is not convertible to Tensor.

Previously, we hit an erroneous assertion when converting a list
argument to a list of tensors. This change makes it clearer what
caused the error when one or more of the arguments is an object that
is not convertible to `tf.Tensor`.

Fixes #2385.
Change: 122600354",op_def_library.py,"@@ -408,25 +408,36 @@ class OpDefLibrary(object):
             values = ops.convert_n_to_tensor(
                 values, name=input_arg.name, dtype=dtype if dtype else None,
                 as_ref=input_arg.is_ref)
+            if input_arg.number_attr and len(
+                set(v.dtype.base_dtype for v in values)) > 1:
+              raise TypeError()  # All types should match.
           except (TypeError, ValueError):
-            assert dtype is not None, ""Should not fail if dtype is None""
-            assert input_arg.number_attr, ""Should be number_attr case""
             # What types does the conversion function think values have?
-            values = ops.convert_n_to_tensor(values, as_ref=input_arg.is_ref)
-            observed = "", "".join(v.dtype.base_dtype.name for v in values)
+            observed_types = []
+            for value in values:
+              try:
+                converted_value = ops.convert_to_tensor(
+                    value, as_ref=input_arg.is_ref)
+                observed_types.append(converted_value.dtype.base_dtype.name)
+              except (TypeError, ValueError):
+                observed_types.append(""<NOT CONVERTIBLE TO TENSOR>"")
+            observed = "", "".join(observed_types)
 
             prefix = (
                 ""Tensors in list passed to '%s' of '%s' Op have types [%s]"" %
                 (input_name, op_type_name, observed))
-            if input_arg.type != types_pb2.DT_INVALID:
-              raise TypeError(""%s that do not match expected type %s."" %
-                              (prefix, dtype.name))
-            elif input_arg.type_attr in attrs:
-              raise TypeError(""%s that do not match type %s inferred from ""
-                              ""earlier arguments."" %
-                              (prefix, dtype.name))
+            if input_arg.number_attr:
+              if input_arg.type != types_pb2.DT_INVALID:
+                raise TypeError(""%s that do not match expected type %s."" %
+                                (prefix, dtype.name))
+              elif input_arg.type_attr in attrs:
+                raise TypeError(""%s that do not match type %s inferred from ""
+                                ""earlier arguments."" %
+                                (prefix, dtype.name))
+              else:
+                raise TypeError(""%s that don't all match."" % prefix)
             else:
-              raise TypeError(""%s that don't all match."" % prefix)
+              raise TypeError(""%s that are invalid."" % prefix)
 
           types = [x.dtype for x in values]
           inputs.extend(values)
",0,train
dd3b812879671d633ddbb644a48f6fc44faae0bc,tensorflow/tensorflow,"Raise a better error message when a list element is not convertible to Tensor.

Previously, we hit an erroneous assertion when converting a list
argument to a list of tensors. This change makes it clearer what
caused the error when one or more of the arguments is an object that
is not convertible to `tf.Tensor`.

Fixes #2385.
Change: 122600354",op_def_library_test.py,"@@ -400,6 +400,12 @@ class OpDefLibraryTest(test_util.TensorFlowTestCase):
                           ""Expected list for 'a' ""
                           ""argument to 'TypeList' Op, not "")
 
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op(""TypeList"", a=[self.Tensor(dtypes.int32), None])
+    self.assertStartsWith(str(cm.exception),
+                          ""Tensors in list passed to 'a' of 'TypeList' Op ""
+                          ""have types [int32, <NOT CONVERTIBLE TO TENSOR>]"")
+
   def testTypeListTwice(self):
     self._add_op(""name: 'TypeListTwice' ""
                  ""input_arg { name: 'a' type_list_attr: 'T' } ""
@@ -957,6 +963,16 @@ class OpDefLibraryTest(test_util.TensorFlowTestCase):
       attr { key: 'N' value { i: 2 } }
       """""", op.node_def)
 
+    op = self._lib.apply_op(""NPolymorphicIn"",
+                            a=[self.Tensor(dtypes.float32, name=""y""),
+                               self.Tensor(dtypes.float32_ref, name=""z"")],
+                            name=""r"")
+    self.assertProtoEquals(""""""
+      name: 'r' op: 'NPolymorphicIn' input: 'y' input: 'z'
+      attr { key: 'T' value { type: DT_FLOAT } }
+      attr { key: 'N' value { i: 2 } }
+      """""", op.node_def)
+
     with self.assertRaises(ValueError) as cm:
       self._lib.apply_op(""NPolymorphicIn"", a=[99])
     self.assertEqual(str(cm.exception),
@@ -966,8 +982,8 @@ class OpDefLibraryTest(test_util.TensorFlowTestCase):
     with self.assertRaises(TypeError) as cm:
       self._lib.apply_op(""NPolymorphicIn"", a=[38, ""bar""])
     self.assertEqual(str(cm.exception),
-                     ""All tensors passed to 'a' of 'NPolymorphicIn' ""
-                     ""Op must have the same type."")
+                     ""Tensors in list passed to 'a' of 'NPolymorphicIn' Op ""
+                     ""have types [int32, string] that don't all match."")
 
     with self.assertRaises(TypeError) as cm:
       self._lib.apply_op(""NPolymorphicIn"",
@@ -976,6 +992,13 @@ class OpDefLibraryTest(test_util.TensorFlowTestCase):
                      ""Tensors in list passed to 'a' of 'NPolymorphicIn' Op ""
                      ""have types [int32, string] that don't all match."")
 
+    with self.assertRaises(TypeError) as cm:
+      self._lib.apply_op(""NPolymorphicIn"", a=[38, None])
+    self.assertEqual(str(cm.exception),
+                     ""Tensors in list passed to 'a' of 'NPolymorphicIn' Op ""
+                     ""have types [int32, <NOT CONVERTIBLE TO TENSOR>] that ""
+                     ""don't all match."")
+
     with self.assertRaises(TypeError) as cm:
       self._lib.apply_op(""NPolymorphicIn"",
                         a=[""abcd"", self.Tensor(dtypes.int32)])
",0,train
8b4d11790b87efa4165d15612c60cc41c21f6bf8,tensorflow/tensorflow,"[XLA:CPU] Enable transpose folding of LLVM gemv

Naive LLVM doesn't care, tiled LLVM gemm gets promoted to Eigen for transposed inputs.

PiperOrigin-RevId: 241754291",dot_op_emitter.cc,"@@ -1007,11 +1007,8 @@ bool DotImplementationCanHandleTranspose(
       GetDotImplementationStrategy(dot_instr.parent()->parent()->config(),
                                    DotInfo(dot_instr), target_machine_features);
 
-  // TODO(sanjoy): This is not quite right, it should be `impl_strategy ==
-  // kEigen || impl_strategy == kTiledLlvmIrGemv || impl_strategy ==
-  // kNaiveLlvmIr` but I'll fix this in a later CL in the interest of keeping
-  // the CL adding this comment NFC.
-  return impl_strategy == DotImplementationStrategy::kTiledLlvmIrGemm ||
+  return impl_strategy == DotImplementationStrategy::kNaiveLlvmIr ||
+         impl_strategy == DotImplementationStrategy::kTiledLlvmIrGemv ||
          impl_strategy == DotImplementationStrategy::kEigen;
 }
 
",0,train
5c545e646ce54934e92ea59a24ff9bcfe52991df,tensorflow/tensorflow,"[tf.data] Minor changes to xprof metadata.

PiperOrigin-RevId: 353164453
Change-Id: If38813e6d7cd8ab843859d0d3b0bc7294a2e3a95",map_and_batch_dataset_op.cc,"@@ -33,6 +33,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/random/random.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/cpu_info.h""
+#include ""tensorflow/core/platform/env_time.h""
 #include ""tensorflow/core/platform/status.h""
 #include ""tensorflow/core/platform/stringprintf.h""
 #include ""tensorflow/core/platform/tracing.h""
@@ -254,7 +255,7 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase {
       }
       profiler::TraceMe traceme([&] {
         return profiler::TraceMeEncode(""MapAndBatchConsume"",
-                                       {{""element_id"", result->id}});
+                                       {{""element_id"", result->uid}});
       });
       return ProcessResult(ctx, result, out_tensors, end_of_sequence);
     }
@@ -328,14 +329,14 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase {
     // BatchResult encapsulates the output batch, as well as ancillary
     // metadata required to execute the fused map-and-batch operation.
     struct BatchResult {
-      explicit BatchResult(int64 batch_size, int64 id)
+      explicit BatchResult(int64 batch_size)
           : end_of_input(false),
             num_elements(0),
             output_allocated(false),
             status(Status::OK()),
             status_offset(-1),
             num_calls(batch_size),
-            id(id) {}
+            uid(tensorflow::EnvTime::NowNanos()) {}
 
       // UpdateStatus updates the batch's aggregate Status.
       //
@@ -362,7 +363,7 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase {
       int64 status_offset TF_GUARDED_BY(mu);
       // Counts the number of outstanding calls for this batch.
       int64 num_calls TF_GUARDED_BY(&Iterator::mu_);
-      const int64 id;
+      const uint64 uid = -1;
     };
 
     void CallCompleted(const std::shared_ptr<IteratorContext>& ctx,
@@ -387,7 +388,7 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase {
         TF_LOCKS_EXCLUDED(*mu_) {
       profiler::TraceMe traceme([&] {
         return profiler::TraceMeEncode(""MapAndBatchProduce"",
-                                       {{""element_id"", result->id}});
+                                       {{""element_id"", result->uid}});
       });
       // Get the next input element.
       std::vector<Tensor> input_element;
@@ -606,8 +607,6 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase {
                 (batch_results_.size() == max_batch_results_ &&
                  call_counter_ % dataset()->batch_size_ == 0));
       };
-      // Counts the total number of batches to use as an id of BatchResult.
-      int64 num_total_batches = 1;
       while (true) {
         {
           mutex_lock l(*mu_);
@@ -632,8 +631,8 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase {
 
           while (!busy()) {
             if (call_counter_ % dataset()->batch_size_ == 0) {
-              batch_results_.push_back(std::make_shared<BatchResult>(
-                  dataset()->batch_size_, num_total_batches++));
+              batch_results_.push_back(
+                  std::make_shared<BatchResult>(dataset()->batch_size_));
             }
             int64 offset = call_counter_++ % dataset()->batch_size_;
             new_calls.emplace_back(batch_results_.back(), offset);
@@ -659,7 +658,7 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase {
     Status ReadBatchResult(IteratorContext* ctx, IteratorStateReader* reader,
                            size_t index) TF_EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
       batch_results_.push_back(
-          std::make_shared<BatchResult>(dataset()->batch_size_, -1));
+          std::make_shared<BatchResult>(dataset()->batch_size_));
       std::shared_ptr<BatchResult> result = batch_results_.back();
       string prefix = strings::StrCat(kBatchResults, ""_"", index);
       mutex_lock l(result->mu);
",0,train
5c545e646ce54934e92ea59a24ff9bcfe52991df,tensorflow/tensorflow,"[tf.data] Minor changes to xprof metadata.

PiperOrigin-RevId: 353164453
Change-Id: If38813e6d7cd8ab843859d0d3b0bc7294a2e3a95",parallel_map_dataset_op.cc,"@@ -257,7 +257,7 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase {
       RecordStart(ctx);
       profiler::TraceMe traceme([&] {
         return profiler::TraceMeEncode(""ParallelMapConsume"",
-                                       {{""element_id"", result->id}});
+                                       {{""element_id"", result->uid}});
       });
       return ProcessResult(ctx, result, out_tensors, end_of_sequence);
     }
@@ -371,14 +371,13 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase {
 
    private:
     struct InvocationResult {
-      InvocationResult() = default;
-      explicit InvocationResult(int64 id) : id(id) {}
+      InvocationResult() : uid(tensorflow::EnvTime::NowNanos()) {}
 
       Notification notification;
       Status status;
       std::vector<Tensor> return_values;
       bool end_of_input = false;
-      int64 id = -1;
+      const int64 uid;
     };
 
     void CancelThreads(bool wait) TF_LOCKS_EXCLUDED(mu_) {
@@ -420,7 +419,7 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase {
         TF_LOCKS_EXCLUDED(*mu_) {
       profiler::TraceMe traceme([&] {
         return profiler::TraceMeEncode(""ParallelMapProduce"",
-                                       {{""element_id"", result->id}});
+                                       {{""element_id"", result->uid}});
       });
       // Get the next input element.
       std::vector<Tensor> input_element;
@@ -514,8 +513,6 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase {
         return num_calls_ >= num_parallel_calls ||
                invocation_results_.size() >= num_parallel_calls;
       };
-      // Counts the total number of calls to use as an id of InvocationResult.
-      int64 num_total_calls = 0;
       while (true) {
         {
           mutex_lock l(*mu_);
@@ -528,8 +525,7 @@ class ParallelMapDatasetOp::Dataset : public DatasetBase {
             return;
           }
           while (!busy()) {
-            invocation_results_.push_back(
-                std::make_shared<InvocationResult>(num_total_calls++));
+            invocation_results_.push_back(std::make_shared<InvocationResult>());
             new_calls.push_back(invocation_results_.back());
             num_calls_++;
           }
",0,train
5c545e646ce54934e92ea59a24ff9bcfe52991df,tensorflow/tensorflow,"[tf.data] Minor changes to xprof metadata.

PiperOrigin-RevId: 353164453
Change-Id: If38813e6d7cd8ab843859d0d3b0bc7294a2e3a95",prefetch_dataset_op.cc,"@@ -338,12 +338,14 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     // A buffer element comprises a status and (if that status is
     // OK) a vector of tensors, representing an element of the input dataset.
     struct BufferElement {
+      BufferElement() : uid(tensorflow::EnvTime::NowNanos()) {}
+
       // The producer sets `status` if getting the input element fails.
       Status status;
       // The buffered data element.
       std::vector<Tensor> value;
       int64 created_us;
-      int64 id;
+      const uint64 uid;
     };
 
     int64 buffer_limit() const TF_EXCLUSIVE_LOCKS_REQUIRED(*mu_) {
@@ -380,7 +382,7 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
       // (if we successfully got an element) the output values.
       Status s = buffer_.front().status;
       if (s.ok()) {
-        int64 buffer_element_id = buffer_.front().id;
+        int64 buffer_element_id = buffer_.front().uid;
         profiler::TraceMe traceme(
             [&] {
               return profiler::TraceMeEncode(
@@ -479,8 +481,8 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
         {
           profiler::TraceMe traceme(
               [&] {
-                return profiler::TraceMeEncode(""PrefetchProduce"",
-                                               {{""element_id"", num_produced}});
+                return profiler::TraceMeEncode(
+                    ""PrefetchProduce"", {{""element_id"", buffer_element.uid}});
               },
               profiler::kInfo);
           buffer_element.status = input_impl_->GetNext(
@@ -498,7 +500,6 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
           mutex_lock l(*mu_);
           RecordBufferEnqueue(ctx.get(), buffer_element.value);
           buffer_element.created_us = EnvTime::NowMicros();
-          buffer_element.id = num_produced;
           buffer_.push_back(std::move(buffer_element));
           cond_var_->notify_all();
         }
",0,train
9e45772d16bbcb3adb3c5faa298969e183cdc89e,tensorflow/tensorflow,Update metric_ops.py (#16712),metric_ops.py,"@@ -739,7 +739,7 @@ def _streaming_confusion_matrix_at_thresholds(predictions,
   else:
     for include in includes:
       if include not in all_includes:
-        raise ValueError('Invaild key: %s.' % include)
+        raise ValueError('Invalid key: %s.' % include)
 
   predictions, labels, weights = metrics_impl._remove_squeezable_dimensions(  # pylint: disable=protected-access
       predictions, labels, weights)
",0,train
65e1e1ebbc86a3c4740168445dae4c2075ea2932,tensorflow/tensorflow,"Small changes to API generation to help creation of virtual pip.

- Underscore some imports like _print_function so we don't have to
  delete them. Deleting them doesn't work well since they get added
  to our __all__ list before being deleted which makes ""import *"" fail.
- Give a more unique name to some generated target in the api genrule.
  Doing this since I want multiple genrules that generate the same version
  of the API. (a compat_v1 and root_v1 for example).

PiperOrigin-RevId: 221524470",create_python_api.py,"@@ -45,10 +45,10 @@ _GENERATED_FILE_HEADER = """"""# This file is MACHINE GENERATED! Do not edit.
 \""\""\""%s
 \""\""\""
 
-from __future__ import print_function
+from __future__ import print_function as _print_function
 
 """"""
-_GENERATED_FILE_FOOTER = '\n\ndel print_function\n'
+_GENERATED_FILE_FOOTER = '\n\ndel _print_function\n'
 
 
 class SymbolExposedTwiceError(Exception):
",0,train
6c6f5f144c8a780edbc9cc44d957b3cda363ee86,tensorflow/tensorflow,"Fix index out of bounds bug in GetNameFromURI()
Change: 133402965",file_system.cc,"@@ -76,8 +76,8 @@ string GetNameFromURI(const string& name) {
   // If the URI confirmed to scheme://filename, skip the two '/'s and return
   // filename. Otherwise return the original 'name', and leave it up to the
   // implementations to handle the full URI.
-  if (filename[0] == '/' && filename[1] == '/') {
-    return filename.substr(2).ToString();
+  if (filename.Consume(""//"")) {
+    return filename.ToString();
   }
   return name;
 }
",0,train
6c6f5f144c8a780edbc9cc44d957b3cda363ee86,tensorflow/tensorflow,"Fix index out of bounds bug in GetNameFromURI()
Change: 133402965",file_system_test.cc,"@@ -0,0 +1,31 @@
+/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/core/platform/file_system.h""
+
+#include ""tensorflow/core/platform/test.h""
+
+namespace tensorflow {
+namespace {
+
+TEST(FileSystemTest, GetNameFromURI) {
+  EXPECT_EQ(""foo"", GetNameFromURI(""file://foo""));
+  EXPECT_EQ(""file:/"", GetNameFromURI(""file:/""));
+  EXPECT_EQ(""file:"", GetNameFromURI(""file:""));
+  EXPECT_EQ(""bar"", GetNameFromURI(""bar""));
+}
+
+}  // namespace
+}  // namespace tensorflow
",0,train
43c7131efcbf8aca957c293498bfd1a300ee4c76,tensorflow/tensorflow,"Removed PyArray_Return from TF_TensorToPyArray and ConvertTensorToNdarray

This makes the array->scalar conversion explicit at conversion site.

PiperOrigin-RevId: 249792188",tf_session_helper.cc,"@@ -147,7 +147,8 @@ void TF_Run_wrapper_helper(TF_DeprecatedSession* session, const char* handle,
       Set_TF_Status_from_Status(out_status, s);
       return;
     }
-    py_outputs_safe.emplace_back(make_safe(py_array));
+    py_outputs_safe.emplace_back(
+        make_safe(PyArray_Return(reinterpret_cast<PyArrayObject*>(py_array))));
   }
 
   // 6. If we reach this point, we have successfully built a list of objects
@@ -274,7 +275,8 @@ void RunCallableHelper(tensorflow::Session* session, int64_t handle,
       Set_TF_Status_from_Status(out_status, s);
       return;
     }
-    py_outputs_safe.push_back(make_safe(py_array));
+    py_outputs_safe.push_back(
+        make_safe(PyArray_Return(reinterpret_cast<PyArrayObject*>(py_array))));
   }
 
   // If we reach this point, we have successfully built a list of objects
@@ -423,7 +425,8 @@ void TF_SessionRun_wrapper_helper(TF_Session* session, const char* handle,
       Set_TF_Status_from_Status(out_status, s);
       return;
     }
-    py_outputs_safe.emplace_back(make_safe(py_array));
+    py_outputs_safe.emplace_back(
+        make_safe(PyArray_Return(reinterpret_cast<PyArrayObject*>(py_array))));
   }
 
   // If we reach this point, we have successfully built a list of objects so we
@@ -672,7 +675,7 @@ PyObject* TF_TryEvaluateConstant_wrapper(TF_Graph* graph, TF_Output output,
   Status s = TF_TensorToPyArray(std::move(safe_result_tensor), &out);
   Set_TF_Status_from_Status(status, s);
   if (!s.ok()) Py_RETURN_NONE;
-  return out;
+  return PyArray_Return(reinterpret_cast<PyArrayObject*>(out));
 }
 
 }  // namespace tensorflow
",0,train
43c7131efcbf8aca957c293498bfd1a300ee4c76,tensorflow/tensorflow,"Removed PyArray_Return from TF_TensorToPyArray and ConvertTensorToNdarray

This makes the array->scalar conversion explicit at conversion site.

PiperOrigin-RevId: 249792188",pywrap_tensor.cc,"@@ -653,7 +653,7 @@ static PyObject* EagerTensor_numpy(EagerTensor* self) {
             dims.size(), dims.data(), data, t->dtype(), [copy] { delete copy; },
             &ret)
             .ok()) {
-      return ret;
+      return PyArray_Return(reinterpret_cast<PyArrayObject*>(ret));
     }
   }
 
@@ -662,7 +662,7 @@ static PyObject* EagerTensor_numpy(EagerTensor* self) {
     Py_XDECREF(ret);
     return nullptr;
   } else {
-    return ret;
+    return PyArray_Return(reinterpret_cast<PyArrayObject*>(ret));
   }
 }
 
",0,train
43c7131efcbf8aca957c293498bfd1a300ee4c76,tensorflow/tensorflow,"Removed PyArray_Return from TF_TensorToPyArray and ConvertTensorToNdarray

This makes the array->scalar conversion explicit at conversion site.

PiperOrigin-RevId: 249792188",ndarray_tensor.cc,"@@ -407,9 +407,7 @@ Status TF_TensorToPyArray(Safe_TF_TensorPtr tensor, PyObject** out_ndarray) {
                PyArray_NBYTES(py_array));
   }
 
-  // PyArray_Return turns rank 0 arrays into numpy scalars
-  *out_ndarray = PyArray_Return(
-      reinterpret_cast<PyArrayObject*>(safe_out_array.release()));
+  *out_ndarray = safe_out_array.release();
   return Status::OK();
 }
 
",0,train
43c7131efcbf8aca957c293498bfd1a300ee4c76,tensorflow/tensorflow,"Removed PyArray_Return from TF_TensorToPyArray and ConvertTensorToNdarray

This makes the array->scalar conversion explicit at conversion site.

PiperOrigin-RevId: 249792188",ndarray_tensor_bridge.cc,"@@ -218,7 +218,7 @@ Status ArrayFromMemory(int dim_size, npy_intp* dims, void* data, DataType dtype,
     Py_DECREF(releaser);
     return errors::Unknown(""Python array refused to use memory."");
   }
-  *result = PyArray_Return(np_array);
+  *result = reinterpret_cast<PyObject*>(np_array);
   return Status::OK();
 }
 
",0,train
43c7131efcbf8aca957c293498bfd1a300ee4c76,tensorflow/tensorflow,"Removed PyArray_Return from TF_TensorToPyArray and ConvertTensorToNdarray

This makes the array->scalar conversion explicit at conversion site.

PiperOrigin-RevId: 249792188",py_func.cc,"@@ -92,6 +92,7 @@ Status MakeArgTuple(const PyCall* call, PyObject** tuple) {
         Py_DECREF(lst);
         return s;
       }
+      arg = PyArray_Return(reinterpret_cast<PyArrayObject*>(arg));
     }
     PyList_SetItem(lst, i, arg);
   }
@@ -467,7 +468,7 @@ Status ConvertTensorToNdarray(const Tensor& t, PyObject** ret) {
     StringPiece p = t.tensor_data();
     memcpy(PyArray_DATA(np_array), p.data(), p.size());
   }
-  *ret = PyArray_Return(np_array);
+  *ret = reinterpret_cast<PyObject*>(np_array);
   return Status::OK();
 }
 
",0,train
b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test.

PiperOrigin-RevId: 351449157
Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",xla_test.py,"@@ -217,6 +217,8 @@ class XLACompileContextTest(test.TestCase, parameterized.TestCase):
 class XlaCompileTest(test.TestCase):
 
   @test_util.run_v2_only
+  @test_util.disable_tfrt(
+      'Legacy XLA test. It depends on EncapsulateXlaComputationsPass.')
   def test_xla_compile_eager(self):
     """"""Tests that xla.compile raises proper exception when used eagerly.""""""
 
@@ -225,6 +227,8 @@ class XlaCompileTest(test.TestCase):
 
     self.assertEqual(self.evaluate(xla.compile(computation, [1, 2])[0]), 3)
 
+  @test_util.disable_tfrt(
+      'Legacy XLA test. It depends on EncapsulateXlaComputationsPass.')
   def test_xla_compile_in_function(self):
     """"""Tests that xla.compile works in tf.function.""""""
 
@@ -238,6 +242,8 @@ class XlaCompileTest(test.TestCase):
 
     self.assertEqual(self.evaluate(func_wrapper(1))[0], 2)
 
+  @test_util.disable_tfrt(
+      'Legacy XLA test. It depends on EncapsulateXlaComputationsPass.')
   def test_xla_compile_write_variable_in_function(self):
     """"""Tests that xla.compile works with variable in tf.function.""""""
     a = variable_scope.get_variable(
",0,train
b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test.

PiperOrigin-RevId: 351449157
Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",replicate_test.py,"@@ -248,16 +248,22 @@ class EagerClusterReplicateTest(test_base.DatasetTestBase,
   def __init__(self, methodName=""runTest""):  # pylint: disable=invalid-name
     super(EagerClusterReplicateTest, self).__init__(methodName)
     self._job_name = ""remove_device""
-    self._cached_server1 = server_lib.Server.create_local_server()
-    self._cached_server2 = server_lib.Server.create_local_server()
-    self._cached_server1_target = self._cached_server1.target[len(""grpc://""):]
-    self._cached_server2_target = self._cached_server2.target[len(""grpc://""):]
     self._device0 = ""/job:%s/replica:0/task:0/device:CPU:0"" % self._job_name
     self._device1 = ""/job:%s/replica:0/task:1/device:CPU:0"" % self._job_name
     self._device2 = ""/job:%s/replica:0/task:2/device:CPU:0"" % self._job_name
 
   def setUp(self):
     super(EagerClusterReplicateTest, self).setUp()
+
+    if context.context().use_tfrt:
+      self.skipTest(""b/171412104: This test requires distributed support."")
+
+    # TODO(b/171412104): Move create server to __init__ once tfrt support it.
+    self._cached_server1 = server_lib.Server.create_local_server()
+    self._cached_server2 = server_lib.Server.create_local_server()
+    self._cached_server1_target = self._cached_server1.target[len(""grpc://""):]
+    self._cached_server2_target = self._cached_server2.target[len(""grpc://""):]
+
     # Start the local server.
     local_port = pywrap_tfe.TF_PickUnusedPortOrDie()
     context.set_server_def(
",0,train
b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test.

PiperOrigin-RevId: 351449157
Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",snapshot_test.py,"@@ -30,6 +30,7 @@ from tensorflow.python.data.experimental.ops import snapshot
 from tensorflow.python.data.kernel_tests import test_base
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.data.ops import readers as core_readers
+from tensorflow.python.eager import context
 from tensorflow.python.framework import combinations
 from tensorflow.python.framework import errors
 from tensorflow.python.ops import gen_array_ops
@@ -371,6 +372,8 @@ class SnapshotDatasetTest(reader_dataset_ops_test_base.TFRecordDatasetTestBase,
 
   @combinations.generate(test_base.default_test_combinations())
   def testReadOptimizableUsingFlatMap(self):
+    if context.context().use_tfrt:
+      self.skipTest(""b/177260096: Flaky test."")
     dataset = dataset_ops.Dataset.range(100)
     # Will be optimized into ShuffleAndRepeat.
     dataset = dataset.shuffle(10)
",0,train
b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test.

PiperOrigin-RevId: 351449157
Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",check_numerics_callback_test.py,"@@ -250,14 +250,12 @@ class CheckNumericsCallbackUnhealthyTest(test_util.TensorFlowTestCase):
     # Check that the correct line for op creation is printed.
     self.assertTrue(re.search(r""Stack trace of op's creation"", message))
     self.assertIn(""return math_ops.log(-x)"", message)
-    if context.executing_eagerly():
-      # The code path for raising error is slightly different under graph mode.
-      self.assertTrue(message.endswith(""\n""))
 
   @test_util.run_in_graph_and_eager_modes
   @test_util.disable_xla(
       ""There is a small inconsistency in the step at which overflow happens: ""
       ""128 (without XLA) and 127 (with XLA)."")
+  @test_util.disable_tfrt(""b/177261532: TFRT cannot detect overflow yet."")
   def testOverflowInTfFunction(self):
     """"""Test catching Infinity caused by overflow in a tf.function with while.""""""
     check_numerics_callback.enable_check_numerics()
",0,train
b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test.

PiperOrigin-RevId: 351449157
Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",cond_v2_test.py,"@@ -1461,6 +1461,7 @@ class CondV2ContainerTest(test.TestCase):
       self.assertEqual(compat.as_bytes(""""), container(q5.queue_ref))
 
 
+@test_util.disable_tfrt(""b/171412104: This test requires distributed support."")
 class CondV2ColocationGroupAndDeviceTest(test.TestCase, parameterized.TestCase):
 
   def setUp(self):
",0,train
b8cd771a055ef8a124245f0a8423c47dea19d6c5,tensorflow/tensorflow,"Disable a few failing tests in tensorflow.v2_tfrt and fix tensorflow/python/ops/ragged:ragged_map_fn_op_test.

PiperOrigin-RevId: 351449157
Change-Id: Ie6486b9c6e13e2f10143c403350aaca84d1b3441",collective_ops_test.py,"@@ -444,6 +444,8 @@ class CollectiveOpTest(test.TestCase):
     self.assertAllClose(results_[1], expected_output_, rtol=1e-5, atol=1e-5)
 
   @test_util.run_v2_only
+  @test_util.disable_tfrt(
+      'b/177270918: TFRT has dead lock when executing collective ops.')
   def testCollectiveGroupSizeMismatch(self):
     cpus = config.list_physical_devices('CPU')
     self.assertEqual(len(cpus), 1)
",0,train
281d056c9e29727c93a20e2170fd233aab076147,tensorflow/tensorflow,"Automated rollback of change 150082087
Change: 150140770",op_kernel.cc,"@@ -1098,7 +1098,8 @@ Status ValidateKernelRegistrations(const OpRegistryInterface& op_registry) {
     const OpRegistrationData* op_reg_data;
     const Status status = op_registry.LookUp(kernel_def.op(), &op_reg_data);
     if (!status.ok()) {
-      LOG(FATAL) << ""OpKernel ('"" << ProtoShortDebugString(kernel_def)
+      // TODO(josh11b): Make this a hard error.
+      LOG(ERROR) << ""OpKernel ('"" << ProtoShortDebugString(kernel_def)
                  << ""') for unknown op: "" << kernel_def.op();
       continue;
     }
",0,train
b7552cff4e9bb4f9d0b5a9f80c8a607e8db82901,tensorflow/tensorflow,Update prefetching_ops.py,prefetching_ops.py,"@@ -76,7 +76,6 @@ def copy_to_device(target_device, source_device=""/cpu:0""):
     return _CopyToDeviceDataset(
         dataset, target_device=target_device,
         source_device=source_device).with_options(options)
-        source_device=source_device)
 
   return _apply_fn
 
",0,train
a7e6b483d3b14be2f2cb419693d16d0639be4822,tensorflow/tensorflow,"Use a fallback graphdef based conversion when saved model schema version is zero

PiperOrigin-RevId: 321067895
Change-Id: I604657fdbd3c41a1ddc0b7bbfb21b919b3d8a187",lite.py,"@@ -510,6 +510,10 @@ class TFLiteConverterBase(object):
       if not self._saved_model_exported_names:
         self._saved_model_exported_names = []
       self._saved_model_version = saved_model_proto.saved_model_schema_version
+      if self._saved_model_version == 0:
+        self.saved_model_dir = None
+        logging.warning(""SavedModel schema version is zero."")
+        return
       if self._saved_model_version not in [1, 2]:
         raise ValueError(""SavedModel file format({0}) is not supported"".format(
             self._saved_model_version))
",0,train
a7e6b483d3b14be2f2cb419693d16d0639be4822,tensorflow/tensorflow,"Use a fallback graphdef based conversion when saved model schema version is zero

PiperOrigin-RevId: 321067895
Change-Id: I604657fdbd3c41a1ddc0b7bbfb21b919b3d8a187",lite_v2_test.py,"@@ -36,9 +36,11 @@ from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras.layers import recurrent
 from tensorflow.python.keras.layers import recurrent_v2
+from tensorflow.python.lib.io import file_io
 from tensorflow.python.platform import test
 from tensorflow.python.saved_model import save_options
 from tensorflow.python.saved_model import saved_model
+from tensorflow.python.saved_model.loader_impl import parse_saved_model
 from tensorflow.python.saved_model.save import save
 from tensorflow.python.training.tracking import tracking
 
@@ -548,6 +550,25 @@ class FromSavedModelTest(lite_v2_test_util.ModelTest):
       self.assertTrue(([1, 16, 16, 3] == output_details[0]['shape']).all())
       self.assertEqual((0., 0.), output_details[0]['quantization'])
 
+  @test_util.run_v2_only
+  def testTF1HubFormattedModel(self):
+    """"""Test a TF1 hub formatted model.""""""
+    saved_model_dir = self._createV1SavedModel(shape=[1, 16, 16, 3])
+
+    # TF1 hub model is based on V1 saved model and they omit the saved model
+    # schema version setting.
+    saved_model_proto = parse_saved_model(saved_model_dir)
+    saved_model_proto.saved_model_schema_version = 0
+
+    saved_model_pb_file_path = os.path.join(saved_model_dir, 'saved_model.pb')
+    with file_io.FileIO(saved_model_pb_file_path, 'wb') as writer:
+      writer.write(saved_model_proto.SerializeToString())
+
+    # Convert model and ensure model is not None.
+    converter = lite.TFLiteConverterV2.from_saved_model(saved_model_dir)
+    tflite_model = converter.convert()
+    self.assertTrue(tflite_model)
+
   @test_util.run_v2_only
   def testConstModel(self):
     """"""Test a basic model with functions to make sure functions are inlined.""""""
",0,train
4bab47f1d12cfdaee10d0e8ad087973a5a1c2560,tensorflow/tensorflow,"Update Materialize Broadcasts for same-rank broadcasts

PiperOrigin-RevId: 311001875
Change-Id: Ib5743ffa5d3605c9a58def1952ad8bd0eed24682",materialize_broadcasts.cc,"@@ -50,12 +50,6 @@ static DenseIntElementsAttr GetI64ElementsAttrForSeq(int start, int end,
 template <typename SrcOp>
 bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter,
                                        Value *out_lhs, Value *out_rhs) {
-  if (!op.broadcast_dimensions().hasValue()) {
-    // Note: the op may still have an implicit broadcast on it, such as
-    // for (tensor<1xf32>, tensor<4xf32>).
-    return false;
-  }
-
   // Insert BroadcastInDimOps for the left-hand-side and right-hand-side args,
   // replacing the original LHS and RHS args in the source op with the results
   // of the broadcasts.
@@ -79,25 +73,7 @@ bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter,
 
   auto lhs_rank = lhs_ranked_type.getRank();
   auto rhs_rank = rhs_ranked_type.getRank();
-
-  // Set broadcast_dimensions to [0, ..., rank] for the higher rank arg.
-  // Use the original op.broadcast_dimensions for the lower rank arg.
-  auto higher_rank_broadcast_dims =
-      GetI64ElementsAttrForSeq(0, std::max(lhs_rank, rhs_rank), rewriter);
-  DenseIntElementsAttr lhs_broadcast_dims;
-  DenseIntElementsAttr rhs_broadcast_dims;
-  if (lhs_rank > rhs_rank) {
-    lhs_broadcast_dims = higher_rank_broadcast_dims;
-    rhs_broadcast_dims = op.broadcast_dimensions().getValue();
-  } else if (lhs_rank < rhs_rank) {
-    lhs_broadcast_dims = op.broadcast_dimensions().getValue();
-    rhs_broadcast_dims = higher_rank_broadcast_dims;
-  } else {
-    // This shouldn't happen for legal ops. If the broadcast_dimensions
-    // attribute is set, the ranks should be different.
-    // TODO(scotttodd): Add a custom verification for ops and assert here.
-    return false;
-  }
+  ArrayRef<int64_t> op_shape = op_ranked_type.getShape();
 
   // BroadcastInDimOp must have the same element type for operands and results,
   // so preserve the original output shape and the original input element type.
@@ -105,16 +81,32 @@ bool CreateStaticBroadcastsForBinaryOp(SrcOp op, PatternRewriter *rewriter,
   //   broadcast_in_dim (tensor<1x4xf32>) -> tensor<1x4xf32>
   //   broadcast_in_dim (tensor<4xf32>) -> tensor<1x4xf32>
   //   SrcOp (tensor<1x4xf32>, tensor<1x4xf32>) -> tensor<1x4xi1>
-  ArrayRef<int64_t> op_shape = op_ranked_type.getShape();
-  auto lhs_type =
-      RankedTensorType::get(op_shape, lhs_ranked_type.getElementType());
-  auto rhs_type =
-      RankedTensorType::get(op_shape, rhs_ranked_type.getElementType());
+  if (lhs_ranked_type.getShape() != op_ranked_type.getShape()) {
+    auto type =
+        RankedTensorType::get(op_shape, lhs_ranked_type.getElementType());
+    DenseIntElementsAttr attr = GetI64ElementsAttrForSeq(0, lhs_rank, rewriter);
+    if (lhs_rank < rhs_rank) {
+      attr = op.broadcast_dimensions().getValue();
+    }
+
+    lhs =
+        rewriter->createOrFold<BroadcastInDimOp>(op.getLoc(), type, lhs, attr);
+  }
+
+  if (rhs_ranked_type.getShape() != op_ranked_type.getShape()) {
+    auto type =
+        RankedTensorType::get(op_shape, rhs_ranked_type.getElementType());
+    DenseIntElementsAttr attr = GetI64ElementsAttrForSeq(0, rhs_rank, rewriter);
+    if (rhs_rank < lhs_rank) {
+      attr = op.broadcast_dimensions().getValue();
+    }
+
+    rhs =
+        rewriter->createOrFold<BroadcastInDimOp>(op.getLoc(), type, rhs, attr);
+  }
 
-  *out_lhs = rewriter->createOrFold<BroadcastInDimOp>(op.getLoc(), lhs_type,
-                                                      lhs, lhs_broadcast_dims);
-  *out_rhs = rewriter->createOrFold<BroadcastInDimOp>(op.getLoc(), rhs_type,
-                                                      rhs, rhs_broadcast_dims);
+  *out_lhs = lhs;
+  *out_rhs = rhs;
   return true;
 }
 
@@ -359,9 +351,15 @@ struct CompareWithBroadcastConvert : public OpRewritePattern<CompareOp> {
 
 void SetupMaterializeBroadcastsLegality(MLIRContext *context,
                                         ConversionTarget *conversionTarget) {
-#define ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(OpType) \
-  conversionTarget->addDynamicallyLegalOp<OpType>(      \
-      [](OpType op) { return !op.broadcast_dimensions().hasValue(); });
+#define ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(OpType)           \
+  conversionTarget->addDynamicallyLegalOp<OpType>([](OpType op) { \
+    if (op.broadcast_dimensions().hasValue()) return false;       \
+    auto l = op.lhs().getType().cast<ShapedType>();               \
+    auto r = op.rhs().getType().cast<ShapedType>();               \
+    if (!l.hasRank() || !r.hasRank()) return false;               \
+    return l.getShape() == r.getShape();                          \
+  });
+
   // Binary elementwise ops.
   ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(AddOp);
   ADD_DYNAMICALLY_LEGAL_OP_WITH_BROADCAST(Atan2Op);
",0,test
716b7713e4c8b2d8f093f639ca41816cf4e1c696,tensorflow/tensorflow,Fix tensorboard import path,setup.py,"@@ -43,7 +43,7 @@ else:
 
 # pylint: disable=line-too-long
 CONSOLE_SCRIPTS = [
-    'tensorboard = tensorflow.tensorboard.backend.tensorboard:main',
+    'tensorboard = tensorflow.tensorboard.tensorboard:main',
 ]
 # pylint: enable=line-too-long
 
",0,test
a559acfb25886aa62077765a7c3739a50ca94b83,tensorflow/tensorflow,"Disable buggy ""small"" CUDA kernel for DepthwiseConv2dBackpropInput.

This fixes an issue where DepthwiseConv2dBackpropInput had incorrect outputs in some cases.

PiperOrigin-RevId: 300692210
Change-Id: Ib830d64df5c6dbfa5a04354db1031603b6c58bdc",depthwise_conv_op_gpu.h,"@@ -987,7 +987,9 @@ Status LaunchDepthwiseConv2dBackpropInputGPU(OpKernelContext* ctx,
                                              const T* filter, T* in_backprop,
                                              TensorFormat data_format) {
   if (args.depth_multiplier == 1) {
-    if (CanLaunchDepthwiseConv2dGPUSmall(args)) {
+    // This kernel doesn't currently work in all cases so it is disabled.
+    // TODO(b/150988950): Fix and reenable this kernel.
+    if (/* CanLaunchDepthwiseConv2dGPUSmall(args) */ false) {
       return LaunchDepthwiseConv2dGPUSmall<
           T, DIRECTION_BACKWARD, kKnownFilterWidth, kKnownFilterHeight>(
           ctx, args, out_backprop, filter, in_backprop, data_format);
",0,train
a559acfb25886aa62077765a7c3739a50ca94b83,tensorflow/tensorflow,"Disable buggy ""small"" CUDA kernel for DepthwiseConv2dBackpropInput.

This fixes an issue where DepthwiseConv2dBackpropInput had incorrect outputs in some cases.

PiperOrigin-RevId: 300692210
Change-Id: Ib830d64df5c6dbfa5a04354db1031603b6c58bdc",depthwise_conv_op_test.py,"@@ -186,6 +186,11 @@ def CheckGradConfigsToTest():
       Config([1, 15, 15, 2], [1, 3, 2, 1], [1, 15, 15, 2]),
       Config([2, 15, 16, 1], [3, 3, 1, 2], [2, 5, 5, 2], 3, padding=""VALID""),
       Config([2, 5, 8, 1], [4, 3, 1, 2], [2, 5, 8, 2], dilations=[1, 2]),
+      # These cases test the kernels in depthwise_conv_op_gpu.h which are used
+      # if the input size is small.
+      Config([1, 3, 1, 2], [2, 1, 2, 1], [1, 3, 1, 2]),
+      Config([2, 2, 3, 2], [2, 1, 2, 1], [2, 2, 3, 2]),
+      Config([2, 2, 3, 1], [2, 2, 1, 1], [2, 2, 3, 1]),
   ]
 
 
",0,train
61368b23ac560d158a27e679ac570a9b7ae94e0a,tensorflow/tensorflow,"[XLA] Use IsInf function in implementation of lgamma.

PiperOrigin-RevId: 235275004",math.cc,"@@ -402,9 +402,7 @@ XlaOp Lgamma(XlaOp input) {
 
     // lgamma(+/-inf) = +inf.
     XlaOp inf_bcast = FullLike(input, std::numeric_limits<float>::infinity());
-    return Select(Or(IsFinite(input),                           // is finite, or
-                     Not(Or(Lt(input, one), Ge(input, one)))),  // is nan
-                  result, inf_bcast);
+    return Select(IsInf(input), inf_bcast, result);
   };
 
   auto& b = *input.builder();
",0,train
93d4af9c859cf82e10bb443bf8fc1c4df6a293f9,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2022-04-10

PiperOrigin-RevId: 440674055",compat.py,"@@ -29,7 +29,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2022, 4, 9)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2022, 4, 10)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type.

This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops.

PiperOrigin-RevId: 212652588",quantile_ops.cc,"@@ -296,8 +296,9 @@ class QuantileAccumulatorAddSummariesOp : public OpKernel {
             int64 start, int64 end) {
           for (int resource_handle_idx = start; resource_handle_idx < end;
                ++resource_handle_idx) {
-            ResourceHandle handle = resource_handle_list[resource_handle_idx]
-                                        .flat<ResourceHandle>()(0);
+            const ResourceHandle& handle =
+                resource_handle_list[resource_handle_idx]
+                    .flat<ResourceHandle>()(0);
             QuantileStreamResource* streams_resource;
             // Create a reference to the underlying resource using the handle.
             OP_REQUIRES_OK(context,
@@ -709,8 +710,9 @@ class QuantileAccumulatorGetBucketsOp : public OpKernel {
          &buckets_list, stamp_token](int64 start, int64 end) {
           for (int resource_handle_idx = start; resource_handle_idx < end;
                ++resource_handle_idx) {
-            ResourceHandle handle = resource_handle_list[resource_handle_idx]
-                                        .flat<ResourceHandle>()(0);
+            const ResourceHandle& handle =
+                resource_handle_list[resource_handle_idx]
+                    .flat<ResourceHandle>()(0);
             QuantileStreamResource* streams_resource;
             OP_REQUIRES_OK(context,
                            LookupResource(context, handle, &streams_resource));
",0,test
1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type.

This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops.

PiperOrigin-RevId: 212652588",stats_accumulator_ops.cc,"@@ -448,8 +448,9 @@ class StatsAccumulatorScalarAddOp : public OpKernel {
          stamp_token](int64 start, int64 end) {
           for (int resource_handle_idx = start; resource_handle_idx < end;
                ++resource_handle_idx) {
-            ResourceHandle handle = resource_handle_list[resource_handle_idx]
-                                        .flat<ResourceHandle>()(0);
+            const ResourceHandle& handle =
+                resource_handle_list[resource_handle_idx]
+                    .flat<ResourceHandle>()(0);
 
             StatsAccumulatorScalarResource* accumulator_resource;
             OP_REQUIRES_OK(context, LookupResource(context, handle,
@@ -512,8 +513,9 @@ class StatsAccumulatorTensorAddOp : public OpKernel {
          stamp_token](int64 start, int64 end) {
           for (int resource_handle_idx = start; resource_handle_idx < end;
                ++resource_handle_idx) {
-            ResourceHandle handle = resource_handle_list[resource_handle_idx]
-                                        .flat<ResourceHandle>()(0);
+            const ResourceHandle& handle =
+                resource_handle_list[resource_handle_idx]
+                    .flat<ResourceHandle>()(0);
 
             StatsAccumulatorTensorResource* accumulator_resource;
             OP_REQUIRES_OK(context, LookupResource(context, handle,
",0,test
1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type.

This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops.

PiperOrigin-RevId: 212652588",direct_session_test.cc,"@@ -1255,7 +1255,7 @@ TEST(DirectSessionTest, RunHandleTest) {
   ASSERT_TRUE(s.ok());
   ASSERT_EQ(1, outputs.size());
 
-  ResourceHandle resource_handle = outputs[0].scalar<ResourceHandle>()();
+  const ResourceHandle& resource_handle = outputs[0].scalar<ResourceHandle>()();
   Tensor string_handle(DT_STRING, {});
   string_handle.flat<string>().setConstant(resource_handle.name());
 
@@ -1308,7 +1308,7 @@ TEST(DirectSessionTest, RunHandleTest_Callable) {
   ASSERT_TRUE(s.ok());
   ASSERT_EQ(1, outputs.size());
 
-  ResourceHandle resource_handle = outputs[0].scalar<ResourceHandle>()();
+  const ResourceHandle& resource_handle = outputs[0].scalar<ResourceHandle>()();
   Tensor string_handle(DT_STRING, {});
   string_handle.flat<string>().setConstant(resource_handle.name());
 
",0,test
1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type.

This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops.

PiperOrigin-RevId: 212652588",resource_mgr.cc,"@@ -271,7 +271,7 @@ string ContainerInfo::DebugString() const {
                          ""]"");
 }
 
-ResourceHandle HandleFromInput(OpKernelContext* ctx, int input) {
+const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input) {
   return ctx->input(input).flat<ResourceHandle>()(0);
 }
 
",0,test
1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type.

This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops.

PiperOrigin-RevId: 212652588",resource_mgr.h,"@@ -79,7 +79,7 @@ class ResourceBase : public core::RefCounted {
   virtual string DebugString() = 0;
 
   // Returns memory used by this resource.
-  virtual int64 MemoryUsed() const { return 0; };
+  virtual int64 MemoryUsed() const { return 0; }
 };
 
 // Container used for per-step resources.
@@ -234,7 +234,7 @@ ResourceHandle MakePerStepResourceHandle(OpKernelContext* ctx,
                                          const string& name);
 
 // Returns a resource handle from a numbered op input.
-ResourceHandle HandleFromInput(OpKernelContext* ctx, int input);
+const ResourceHandle& HandleFromInput(OpKernelContext* ctx, int input);
 Status HandleFromInput(OpKernelContext* ctx, StringPiece input,
                        ResourceHandle* handle);
 
",0,test
1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type.

This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops.

PiperOrigin-RevId: 212652588",partitioned_function_ops.cc,"@@ -210,7 +210,7 @@ class PartitionedCallOp : public AsyncOpKernel {
         TF_RETURN_IF_ERROR(node->attrs().Find(""T"", &attr_value));
         DataType dtype = attr_value->type();
         if (dtype == DT_RESOURCE) {
-          ResourceHandle handle = args[index].flat<ResourceHandle>()(0);
+          const ResourceHandle& handle = args[index].flat<ResourceHandle>()(0);
           node->set_assigned_device_name(handle.device());
         }
       }
",0,test
1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type.

This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops.

PiperOrigin-RevId: 212652588",queue_ops.cc,"@@ -65,7 +65,7 @@ class FakeQueueOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    ResourceHandle ref = context->input(0).flat<ResourceHandle>()(0);
+    const ResourceHandle& ref = context->input(0).flat<ResourceHandle>()(0);
     handle_.AccessTensor(context)->flat<string>()(0) = ref.container();
     handle_.AccessTensor(context)->flat<string>()(1) = ref.name();
     context->set_output_ref(0, &mu_, handle_.AccessTensor(context));
",0,test
1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type.

This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops.

PiperOrigin-RevId: 212652588",resource_variable_ops.cc,"@@ -79,7 +79,7 @@ ReadVariableOp::ReadVariableOp(OpKernelConstruction* c) : OpKernel(c) {
 
 void ReadVariableOp::Compute(OpKernelContext* ctx) {
   Var* variable = nullptr;
-  ResourceHandle handle = HandleFromInput(ctx, 0);
+  const ResourceHandle& handle = HandleFromInput(ctx, 0);
   const auto status = LookupResource(ctx, handle, &variable);
   OP_REQUIRES(ctx, status.ok(),
               errors::FailedPrecondition(
",0,test
1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type.

This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops.

PiperOrigin-RevId: 212652588",stack_ops.cc,"@@ -131,10 +131,8 @@ class Stack : public ResourceBase {
 };
 
 Status GetStack(OpKernelContext* ctx, Stack** stack) {
-  string key;
   if (ctx->input_dtype(0) == DT_RESOURCE) {
-    auto resource = ctx->input(0).flat<ResourceHandle>()(0);
-    key = resource.name();
+    return LookupResource(ctx, HandleFromInput(ctx, 0), stack);
   } else {
     Tensor Tstack_handle = ctx->mutable_input(0, false);
     if (Tstack_handle.NumElements() != 2) {
@@ -144,18 +142,18 @@ Status GetStack(OpKernelContext* ctx, Stack** stack) {
     }
     const string& container = Tstack_handle.flat<string>()(0);
     const string& stack_name = Tstack_handle.flat<string>()(1);
-    key = strings::StrCat(container, stack_name);
-  }
-  ResourceMgr* rm = ctx->resource_manager();
-  if (rm == nullptr) {
-    return errors::Internal(""No resource manager."");
-  }
-  auto* step_container = ctx->step_container();
-  if (step_container == nullptr) {
-    return errors::Internal(""No step container."");
+    string key = strings::StrCat(container, stack_name);
+    ResourceMgr* rm = ctx->resource_manager();
+    if (rm == nullptr) {
+      return errors::Internal(""No resource manager."");
+    }
+    auto* step_container = ctx->step_container();
+    if (step_container == nullptr) {
+      return errors::Internal(""No step container."");
+    }
+    TF_RETURN_IF_ERROR(rm->Lookup(step_container->name(), key, stack));
+    return Status::OK();
   }
-  TF_RETURN_IF_ERROR(rm->Lookup(step_container->name(), key, stack));
-  return Status::OK();
 }
 
 std::atomic<int64> Stack::stack_counter{0};
",0,test
1c4fceab7dc09cab18c0def098320d6c52d2e514,tensorflow/tensorflow,"Change HandleFromInput() to return a `const ResourceHandle&` and avoid copying that type.

This avoids unnecessary string copies and deallocations in the ReadVariableOp, and similar ops.

PiperOrigin-RevId: 212652588",tensor_array_ops.cc,"@@ -290,7 +290,7 @@ class TensorArrayGradOp : public TensorArrayCreationOp {
       }
     } else {
       container = ""_tensor_arrays"";
-      auto resource = ctx->input(0).flat<ResourceHandle>()(0);
+      const auto& resource = ctx->input(0).flat<ResourceHandle>()(0);
       if (StringPiece(resource.name()).substr(0, container.size()) !=
           container) {
         return errors::InvalidArgument(""Wrong input container. "",
",0,test
2e6f8b3f05fe2d212c19b9598f93f4e6ee07675f,tensorflow/tensorflow,"Provide a hint about the number of iterations to while_loop in the case of for loops over tensors of known size. This allows using this type of for loops on TPU.

PiperOrigin-RevId: 192166460",control_flow.py,"@@ -83,7 +83,8 @@ def _known_len_for_loop(iterated, extra_cond, loop_body, init_state):
       while_cond,
       while_body,
       init_state=(0,) + init_state,
-      extra_deps=(iterated,))
+      extra_deps=(iterated,),
+      opts=dict(maximum_iterations=n))
   # Dropping the iteration index because it's not syntactically visible.
   results = results[1:]
 
@@ -136,7 +137,7 @@ def _dataset_for_loop(ds, extra_cond, loop_body, init_state):
   return results
 
 
-def while_loop(loop_cond, loop_body, init_state, extra_deps):
+def while_loop(loop_cond, loop_body, init_state, extra_deps, opts=None):
   """"""Functional form of a while statement.
 
   The loop operates on a so-called state, which includes all symbols that are
@@ -153,6 +154,7 @@ def while_loop(loop_cond, loop_body, init_state, extra_deps):
     extra_deps: Tuple containing additional entities on which the loop may
         depend, such as loop invariants referenced by loop_cond. Used
         exclusively for dispatch control.
+    opts: Optional dict of extra loop parameters.
 
   Returns:
     Tuple containing the final state.
@@ -161,18 +163,21 @@ def while_loop(loop_cond, loop_body, init_state, extra_deps):
   # That could be somethins as simple as a collection of dispatch rules, with
   # some prioritization.
   if any(tensor_util.is_tensor(v) for v in init_state + extra_deps):
-    return _tf_while_loop(loop_cond, loop_body, init_state)
+    return _tf_while_loop(loop_cond, loop_body, init_state, opts)
   else:
-    return _py_while_loop(loop_cond, loop_body, init_state)
+    return _py_while_loop(loop_cond, loop_body, init_state, opts)
 
 
-def _tf_while_loop(loop_cond, loop_body, init_state):
+def _tf_while_loop(loop_cond, loop_body, init_state, opts):
   """"""Overload of while_loop that stages a TF while_loop.""""""
-  return control_flow_ops.while_loop(loop_cond, loop_body, init_state)
+  if opts is None:
+    opts = {}
+  return control_flow_ops.while_loop(loop_cond, loop_body, init_state, **opts)
 
 
-def _py_while_loop(loop_cond, loop_body, init_state):
+def _py_while_loop(loop_cond, loop_body, init_state, opts):
   """"""Overload of while_loop that executes a Python while loop.""""""
+  del opts
   state = init_state
   while loop_cond(*state):
     state = loop_body(*state)
",0,test
3c922d7df747ce3c25a0ad75a41f23c7e8d1df1e,tensorflow/tensorflow,"Print out bounded shape in HumanStringWithLayout

PiperOrigin-RevId: 229282536",shape_util.cc,"@@ -530,7 +530,8 @@ ShapeUtil::MakeShapeWithDescendingLayoutAndSamePhysicalLayout(
   string result = StrCat(
       primitive_util::LowercasePrimitiveTypeName(shape.element_type()), ""["");
   for (int i = 0; i < shape.dimensions().size(); i++) {
-    StrAppend(&result, (i > 0) ? "","" : """", shape.dimensions(i));
+    StrAppend(&result, (i > 0) ? "","" : """",
+              shape.is_dynamic_dimension(i) ? ""<="" : """", shape.dimensions(i));
   }
   result += ""]"";
   if (!IsScalar(shape) && shape.IsArray()) {
",0,train
a0bbeb10e2dada2a44caed9fd0bc1cd85e4ff93f,tensorflow/tensorflow,"Unreachable input gradients (#13071)

* Check if inputs are reachable from outputs in AddSymbolicGradients.

* Removing LOG.

* Edit following the PR comments.

* Lines > 80 chars.

* Formatting comments in gradients_test.

* Eliminate m2 and renamed m1->m, dm1->diff_m

* Edit InvalidArgument string concatenation.",gradients.cc,"@@ -175,8 +175,14 @@ Status SymbolicGradientBuilder::Initialize() {
         ""Must specify a gradient input for each output."");
   }
   std::vector<bool> reachable_nodes = GetReachableNodes();
-  // TODO(theflofly) Check that inputs_ are reachable from
-  // outputs_ using reachable_nodes
+  for (const Output& input : inputs_) {
+    if (!reachable_nodes[input.node()->id()]) {
+      return errors::InvalidArgument(
+        ""Cannot compute the partial derivative for node '"",
+        input.node()->name(),
+        ""' as it's unreachable from the output node(s)."");
+    }
+  }
   grad_outputs_->clear();
   grad_outputs_->resize(inputs_.size());
   // Populate `output_nodes_` from node ids in `outputs_`.
",0,train
a0bbeb10e2dada2a44caed9fd0bc1cd85e4ff93f,tensorflow/tensorflow,"Unreachable input gradients (#13071)

* Check if inputs are reachable from outputs in AddSymbolicGradients.

* Removing LOG.

* Edit following the PR comments.

* Lines > 80 chars.

* Formatting comments in gradients_test.

* Eliminate m2 and renamed m1->m, dm1->diff_m

* Edit InvalidArgument string concatenation.",gradients_test.cc,"@@ -48,9 +48,9 @@ class GradientsTest : public ::testing::Test {
   Scope scope_test_;
 };
 
-// EX.
+// Example:
 //      ^             ^
-//    dy|           dx|        // MatMul Gradient Graph
+//    dy|           dx|        (MatMul Gradient Graph)
 //      |             |
 //   MatMul_1      MatMul_2
 //   ^   ^          ^    ^
@@ -61,7 +61,7 @@ class GradientsTest : public ::testing::Test {
 //   |     Const_3       |
 //   |                   |
 //   |        ^          |
-//   |       z|          |     // MatMul Forward Graph
+//   |       z|          |     (MatMul Forward Graph)
 //   |        |          |
 //   |      MatMul_0     |
 //   |     /        \    |
@@ -373,24 +373,22 @@ TEST_F(GradientsTest, UnreachableEdgeGradOneOutput) {
   auto y_const = Const(scope_test_, {{1.0}, {2.0}, {3.0}});
   auto y_assign = Assign(scope_test_, y, y_const);
 
-  auto m1 = MatMul(scope_test_, x, y);
+  auto m = MatMul(scope_test_, x, y);
 
   auto z = Variable(scope_test_, {1, 3}, DT_DOUBLE);
   auto z_const = Const(scope_test_, {{9.0, 10.0, 11.0}});
   auto z_assign = Assign(scope_test_, z, z_const);
 
-  auto m2 = MatMul(scope_test_, y, z);
-
-  auto dm1 = Const(scope_test_, {{0.5}, {0.5}});
+  auto diff_m = Const(scope_test_, {{0.5}, {0.5}});
 
   std::vector<Output> grad_outputs;
   TF_ASSERT_OK(
-      AddSymbolicGradients(scope_test_, {m1}, {y}, {dm1}, &grad_outputs));
+      AddSymbolicGradients(scope_test_, {m}, {y}, {diff_m}, &grad_outputs));
 
   std::vector<Tensor> outputs;
   test::GetTensors(scope_test_, {x_assign, y_assign, z_assign},
                    {grad_outputs[0]}, &outputs);
-  // dz/dy = xT * dm1
+  // dz/dy = xT * diff_m
   test::ExpectTensorNear<double>(
       outputs[0], test::AsTensor<double>({2.5, 3.5, 4.5}, {3, 1}), 1e-5);
 }
@@ -424,13 +422,36 @@ TEST_F(GradientsTest, UnreachableEdgeGradTwoOutputs) {
   test::GetTensors(scope_test_, {x_assign, y_assign, z_assign},
                    {grad_outputs[0]}, &outputs);
 
-  // the gradients from m1 and m2 will be summed to compute the gradient
-  // w.r.t y
+  // The gradients from m1 and m2 will be summed to compute the gradient
+  // w.r.t y:
   // dz/dy = xT * dm1 + dm2 * zT
   test::ExpectTensorNear<double>(
       outputs[0], test::AsTensor<double>({17.5, 24.7, 26.8}, {3, 1}), 1e-5);
 }
 
+TEST_F(GradientsTest, UnreachableInput) {
+  auto x = Const(scope_test_, {{1.0, 2.0, 3.0}, {4.0, 5.0, 6.0}});
+  auto y = Const(scope_test_, {{1.0}, {2.0}, {3.0}});
+  auto z = Const(scope_test_.WithOpName(""z""), {{9.0, 10.0, 11.0}});
+
+  auto m1 = MatMul(scope_test_, x, y);
+  auto m2 = MatMul(scope_test_, y, z);
+  auto dm1 = Const(scope_test_, {{0.5}, {0.5}});
+  
+  // From m1, z is unreachable, so an error status should be returned.
+  //   m2  m1
+  //   |   |
+  //   *   *
+  //  / \ / \
+  // z   y   x
+  std::vector<Output> grad_outputs;
+  Status status = AddSymbolicGradients(scope_test_, {m1}, {z}, {dm1},
+      &grad_outputs);
+  EXPECT_EQ(status.code(), error::INVALID_ARGUMENT);
+  EXPECT_EQ(status.error_message(), ""Cannot compute the partial derivative""
+      "" for node 'z' as it's unreachable from the output node(s)."");
+}
+
 // StopGradientSingleOutputMultiEdgeTest tests combinations of valid and
 // 'NoGradient' (induced by StopGradient op) returned along multiple edges from
 // a single nodes output.
",0,train
d41d3f4c27722d2dbdd00227fbddb0713310f313,tensorflow/tensorflow,"[NNAPI] Enable delegation of NNAPI FL6 ops in NNAPI delegate

PiperOrigin-RevId: 407951625
Change-Id: I656a621a291aa4fdd887009fad952849c27ecd16",acceleration_test_list.cc,"@@ -95,6 +95,11 @@ ArgMinMaxOpTest/ArgMinMaxOpTest/Get.+ArgOutput64/[46],29
 # basic_rnn_test
 RnnOpTest/BlackBoxTest
 
+# batch_matmul_test
+# broadcasting is not supported
+-BatchMatMulOpTest/BatchMatMulOpTest/.+Broadcast.+
+BatchMatMulOpTest/BatchMatMulOpTest/.+,1000006
+
 # batch_to_space_nd_test
 BatchToSpaceNDOpTest/SimpleConstTest.*
 BatchToSpaceNDOpTest/BatchOneConstTest.*
@@ -282,6 +287,19 @@ FloatMulOpTest/.+
 -NegOpModel/.+Int64
 NegOpModel/.+,29
 
+# pack_test
+# int32 and uint8 are supported since NNAPI FL6
+PackOpTest/Int32.+,1000006
+PackOpTestInt/1/.+,1000006
+# PACK along last axis is supported since NNAPI FL6
+PackOpTest/FloatThreeInputsDifferentAxis,1000006
+PackOpTest/FloatThreeInputsNegativeAxis,1000006
+PackOpTestInt/0/ThreeInputsDifferentAxis,1000006
+PackOpTestInt/0/ThreeInputsNegativeAxis,1000006
+# f32 and int8 are supported since NNAPI 1.3 by decomposition
+PackOpTest/Float.+,30
+PackOpTestInt/0/.+,30
+
 # pad_test
 -PadOpTest/TooManyDimensions
 -PadOpTest/UnequalDimensions
@@ -349,9 +367,13 @@ ConstFloat(Sum|Prod|Max|Min)OpTest/ScalarAxis,29
 # reshape_test
 # Acceleration would be only for the test with shape being a constant tensor or
 # as hardcoded options.
-VariedShapeSpec/ReshapeOpTest/InvalidShape/[01]
-VariedShapeSpec/ReshapeOpTest/RegularShapes/[01]
-VariedShapeSpec/ReshapeOpTest/WithStretchDimension/[01]
+ReshapeOpTest/[01]/InvalidShape
+ReshapeOpTest/[01]/RegularShapes
+ReshapeOpTest/[01]/WithStretchDimension
+# int32 is supported since NNAPI FL6
+ReshapeOpTest/3/InvalidShape,1000006
+ReshapeOpTest/3/RegularShapes,1000006
+ReshapeOpTest/3/WithStretchDimension,1000006
 
 # resize_bilinear_test
 // align_corners & half_pixel_centers are not implemented in NNAPI before API 30
",0,test
d41d3f4c27722d2dbdd00227fbddb0713310f313,tensorflow/tensorflow,"[NNAPI] Enable delegation of NNAPI FL6 ops in NNAPI delegate

PiperOrigin-RevId: 407951625
Change-Id: I656a621a291aa4fdd887009fad952849c27ecd16",nnapi_delegate.cc,"@@ -364,6 +364,21 @@ bool IsMeanWithDifferentInputOutputQuantization(const TfLiteContext* context,
          input.params.zero_point != output.params.zero_point;
 }
 
+bool IsBroadcastBatchMatMul(const TfLiteContext* context,
+                            const TfLiteNode* node) {
+  const auto& input0 = context->tensors[node->inputs->data[0]];
+  const auto& input1 = context->tensors[node->inputs->data[1]];
+  if (input0.dims->size != input1.dims->size) {
+    return true;
+  }
+  for (int i = 0; i < input0.dims->size - 2; i++) {
+    if (input0.dims->data[i] != input1.dims->data[i]) {
+      return true;
+    }
+  }
+  return false;
+}
+
 bool IsHybridOperator(const TfLiteContext* context, int builtin_code,
                       const TfLiteNode* node) {
   switch (builtin_code) {
@@ -2366,7 +2381,11 @@ bool NNAPIDelegateKernel::Validate(
     } break;
     case kTfLiteBuiltinReshape: {
       ExpectOpVersion(version, 1, &val_ctx);
-      ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
+      if (android_sdk_version < kNNAPIRuntimeFeatureLevel6) {
+        ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
+      } else {
+        ExpectIsFloatQuant8OrInt32Operator(context, node, &val_ctx);
+      }
       if (node->inputs->size >= 2) {
         Expect(context->tensors[node->inputs->data[1]].allocation_type ==
                    kTfLiteMmapRo,
@@ -3270,14 +3289,19 @@ bool NNAPIDelegateKernel::Validate(
       ExpectMinAndroidSdkVersion(android_sdk_version, kMinSdkVersionForNNAPI13,
                                  &val_ctx);
       const auto input_type = context->tensors[node->inputs->data[0]].type;
-      EXPECT_INPUT_TYPE_IN(input_type, kTfLiteInt32, kTfLiteFloat32,
-                           kTfLiteInt8);
-      auto builtin = reinterpret_cast<TfLitePackParams*>(node->builtin_data);
-      Expect(builtin->axis != -1 &&
-                 builtin->axis !=
-                     context->tensors[node->inputs->data[0]].dims->size,
-             NNAPIValidationFailureType::kUnsupportedOperandValue,
-             ""NNAPI does not support axis being the last dimension"", &val_ctx);
+      if (android_sdk_version >= kNNAPIRuntimeFeatureLevel6) {
+        EXPECT_INPUT_TYPE_IN(input_type, kTfLiteInt32, kTfLiteFloat32,
+                             kTfLiteInt8, kTfLiteUInt8);
+      } else {
+        EXPECT_INPUT_TYPE_IN(input_type, kTfLiteFloat32, kTfLiteInt8);
+        auto builtin = reinterpret_cast<TfLitePackParams*>(node->builtin_data);
+        Expect(builtin->axis != -1 &&
+                   builtin->axis !=
+                       context->tensors[node->inputs->data[0]].dims->size,
+               NNAPIValidationFailureType::kUnsupportedOperandValue,
+               ""NNAPI does not support axis being the last dimension"",
+               &val_ctx);
+      }
     } break;
     case kTfLiteBuiltinUnpack: {
       ExpectOpVersion(version, 2, &val_ctx);
@@ -3322,6 +3346,24 @@ bool NNAPIDelegateKernel::Validate(
              NNAPIValidationFailureType::kUnsupportedOperandRank,
              ""NNAPI does not support input rank greater than 4"", &val_ctx);
     } break;
+    case kTfLiteBuiltinBatchMatmul: {
+      ExpectOpVersion(version, 2, &val_ctx);
+      ExpectMinAndroidSdkVersion(android_sdk_version,
+                                 kNNAPIRuntimeFeatureLevel6, &val_ctx);
+      const auto& input0 = context->tensors[node->inputs->data[0]];
+      const auto& input1 = context->tensors[node->inputs->data[1]];
+      EXPECT_INPUT_TYPE_IN(input0.type, kTfLiteFloat32, kTfLiteInt32);
+      Expect(input0.type == input1.type,
+             NNAPIValidationFailureType::kUnsupportedHybridOperator,
+             ""NNAPI does not support hybrid batch matmul"", &val_ctx);
+      Expect(input0.dims->size <= 4 && input0.dims->size >= 2,
+             NNAPIValidationFailureType::kUnsupportedOperandRank,
+             ""NNAPI does not support input rank greater than 4 or less than 2"",
+             &val_ctx);
+      Expect(!IsBroadcastBatchMatMul(context, node),
+             NNAPIValidationFailureType::kUnsupportedInputType,
+             ""NNAPI does not support broadcast batch matmul"", &val_ctx);
+    } break;
     default:
       // All other operators are not mapped.
       AddValidationFailure(NNAPIValidationFailureType::kUnsupportedOperator,
@@ -4187,6 +4229,16 @@ TfLiteStatus NNAPIDelegateKernel::Map(
     case kTfLiteBuiltinFill: {
       *nn_op_type = ANEURALNETWORKS_FILL;
     } break;
+    case kTfLiteBuiltinBatchMatmul: {
+      auto builtin = reinterpret_cast<TfLiteBatchMatMulParams*>(
+          mapping_args.node->builtin_data);
+      mapping_args.builder->AddScalarBoolOperand(builtin->adj_x);
+      mapping_args.builder->AddScalarBoolOperand(builtin->adj_y);
+      *nn_op_type = ANEURALNETWORKS_BATCH_MATMUL;
+    } break;
+    case kTfLiteBuiltinPack: {
+      *nn_op_type = ANEURALNETWORKS_PACK;
+    } break;
     default:
       // All other operators are not mapped.
       return kTfLiteError;
@@ -5023,7 +5075,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
     }
 
     // Delegate PACK by lowering it into CONCAT + RESHAPE.
-    if (reg->builtin_code == kTfLiteBuiltinPack) {
+    if (reg->builtin_code == kTfLiteBuiltinPack &&
+        target_feature_level_ < kNNAPIRuntimeFeatureLevel6) {
       TF_LITE_ENSURE_STATUS(
           builder.TransformPackIntoSupportedOps(node_index, node, reg));
       continue;
@@ -5172,6 +5225,16 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
           node_index);
       continue;
     }
+    // For PACK, NNAPI expects the axis scalar before all input tensors.
+    if (reg->builtin_code == kTfLiteBuiltinPack) {
+      const auto* builtin =
+          reinterpret_cast<TfLitePackParams*>(node->builtin_data);
+      // NNAPI only accepts non-negative axis.
+      auto& input_tensor = context->tensors[node->inputs->data[0]];
+      int axis = builtin->axis < 0 ? input_tensor.dims->size + builtin->axis + 1
+                                   : builtin->axis;
+      TF_LITE_ENSURE_STATUS(builder.AddScalarInt32Operand(axis));
+    }
     // Map inputs to NN API tensor indices.
     for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
       if (reg->builtin_code == kTfLiteBuiltinTransposeConv) {
",0,test
d41d3f4c27722d2dbdd00227fbddb0713310f313,tensorflow/tensorflow,"[NNAPI] Enable delegation of NNAPI FL6 ops in NNAPI delegate

PiperOrigin-RevId: 407951625
Change-Id: I656a621a291aa4fdd887009fad952849c27ecd16",nnapi_delegate_kernel.h,"@@ -35,6 +35,7 @@ constexpr int32_t kMinSdkVersionForNNAPI13 = 30;
 // TODO(b/185838597): change the remaining kMinSdkVersionForNNAPI* to
 // kNNAPIRuntimeFeatureLevel*.
 constexpr int32_t kNNAPIRuntimeFeatureLevel5 = 31;
+constexpr int32_t kNNAPIRuntimeFeatureLevel6 = 1000006;
 
 // Track tensor indices to NN API tensor indices mapping.
 class OperandMapping {
",0,test
d41d3f4c27722d2dbdd00227fbddb0713310f313,tensorflow/tensorflow,"[NNAPI] Enable delegation of NNAPI FL6 ops in NNAPI delegate

PiperOrigin-RevId: 407951625
Change-Id: I656a621a291aa4fdd887009fad952849c27ecd16",reshape_test.cc,"@@ -95,6 +95,12 @@ TYPED_TEST(ReshapeOpTest, TooManySpecialDimensions) {
 TYPED_TEST(ReshapeOpTest, InvalidShape) {
   for (ShapeSpecificationType shape_type :
        ReshapeOpTest<ShapeSpecificationType>::_range_) {
+    if (SingleOpModel::GetForceUseNnapi() &&
+        shape_type == ShapeSpecificationType::kAsTensor) {
+      // NNAPI delegate does not support RESHAPE with shape as a non-constant
+      // tensor.
+      continue;
+    }
     ReshapeOpModel<TypeParam> m({1, 2, 2}, {2, 2}, {1, 2, 2, 1}, shape_type);
     m.SetInput({5, 6, 7, 8});
     m.Invoke();
@@ -107,6 +113,12 @@ TYPED_TEST(ReshapeOpTest, InvalidShape) {
 TYPED_TEST(ReshapeOpTest, RegularShapes) {
   for (ShapeSpecificationType shape_type :
        ReshapeOpTest<ShapeSpecificationType>::_range_) {
+    if (SingleOpModel::GetForceUseNnapi() &&
+        shape_type == ShapeSpecificationType::kAsTensor) {
+      // NNAPI delegate does not support RESHAPE with shape as a non-constant
+      // tensor.
+      continue;
+    }
     ReshapeOpModel<TypeParam> m({1, 2, 4, 1}, {3}, {2, 2, 2}, shape_type);
     m.SetInput({1, 2, 3, 4, 5, 6, 7, 8});
     m.Invoke();
@@ -118,6 +130,12 @@ TYPED_TEST(ReshapeOpTest, RegularShapes) {
 TYPED_TEST(ReshapeOpTest, WithStretchDimension) {
   for (ShapeSpecificationType shape_type :
        ReshapeOpTest<ShapeSpecificationType>::_range_) {
+    if (SingleOpModel::GetForceUseNnapi() &&
+        shape_type == ShapeSpecificationType::kAsTensor) {
+      // NNAPI delegate does not support RESHAPE with shape as a non-constant
+      // tensor.
+      continue;
+    }
     ReshapeOpModel<TypeParam> m({1, 2, 4, 1}, {3}, {2, 1, -1}, shape_type);
     m.SetInput({1, 2, 3, 4, 5, 6, 7, 8});
     m.Invoke();
",0,test
d41d3f4c27722d2dbdd00227fbddb0713310f313,tensorflow/tensorflow,"[NNAPI] Enable delegation of NNAPI FL6 ops in NNAPI delegate

PiperOrigin-RevId: 407951625
Change-Id: I656a621a291aa4fdd887009fad952849c27ecd16",NeuralNetworksTypes.h,"@@ -145,6 +145,8 @@ enum {
   ANEURALNETWORKS_HARD_SWISH = 99,
   ANEURALNETWORKS_FILL = 100,
   ANEURALNETWORKS_RANK = 101,
+  ANEURALNETWORKS_BATCH_MATMUL = 102,
+  ANEURALNETWORKS_PACK = 103,
 };
 
 /**
@@ -255,6 +257,8 @@ enum {
    * API releases.
    */
   ANEURALNETWORKS_FEATURE_LEVEL_5 = 31,
+  /** Android NNAPI feature level 6 */
+  ANEURALNETWORKS_FEATURE_LEVEL_6 = 1000006,
 };
 
 /**
",0,test
e874244346b3945de77d304d5f12e192aaa6f539,tensorflow/tensorflow,"Add C++ loop memory leak test for MemoryChecker

PiperOrigin-RevId: 346579012
Change-Id: I4dcf861efac18f665ec9b8f951ab8b082a0d7777",memory_checker_test.py,"@@ -108,6 +108,18 @@ class MemoryCheckerTest(test.TestCase):
     with self.assertRaises(AssertionError):
       memory_checker.assert_no_leak_if_all_possibly_except_one()
 
+  def testLeak4(self):
+    helper = _memory_checker_test_helper.MemoryCheckerTestHelper()
+
+    with MemoryChecker() as memory_checker:
+      for i in range(10):
+        helper.list_push_back(i)
+        memory_checker.record_snapshot()
+
+    memory_checker.report()
+    with self.assertRaises(AssertionError):
+      memory_checker.assert_no_leak_if_all_possibly_except_one()
+
   def testNoNewPythonObjectsEmpty(self):
     self.skipTest('TODO(b/150324603): Flaky test.')
     with MemoryChecker() as memory_checker:
",0,test
dc7bc9e4053e8b643937447f1f31a2bf980a1d3a,tensorflow/tensorflow,"Add TTI pass initialization to pass managers.

Many LLVM transformations benefits from knowing the targets. This enables optimizations,
especially in a JIT context when the target is (generally) well-known.

Closes #49

PiperOrigin-RevId: 261840617",OptUtils.h,"@@ -31,6 +31,7 @@
 namespace llvm {
 class Module;
 class Error;
+class TargetMachine;
 } // namespace llvm
 
 namespace mlir {
@@ -41,17 +42,23 @@ void initializeLLVMPasses();
 
 /// Create a module transformer function for MLIR ExecutionEngine that runs
 /// LLVM IR passes corresponding to the given speed and size optimization
-/// levels (e.g. -O2 or -Os).
+/// levels (e.g. -O2 or -Os). If not null, `targetMachine` is used to
+/// initialize passes that provide target-specific information to the LLVM
+/// optimizer. `targetMachine` must outlive the returned std::function.
 std::function<llvm::Error(llvm::Module *)>
-makeOptimizingTransformer(unsigned optLevel, unsigned sizeLevel);
+makeOptimizingTransformer(unsigned optLevel, unsigned sizeLevel,
+                          llvm::TargetMachine *targetMachine);
 
 /// Create a module transformer function for MLIR ExecutionEngine that runs
 /// LLVM IR passes explicitly specified, plus an optional optimization level,
 /// Any optimization passes, if present, will be inserted before the pass at
-/// position optPassesInsertPos.
+/// position optPassesInsertPos. If not null, `targetMachine` is used to
+/// initialize passes that provide target-specific information to the LLVM
+/// optimizer. `targetMachine` must outlive the returned std::function.
 std::function<llvm::Error(llvm::Module *)>
 makeLLVMPassesTransformer(llvm::ArrayRef<const llvm::PassInfo *> llvmPasses,
                           llvm::Optional<unsigned> mbOptLevel,
+                          llvm::TargetMachine *targetMachine,
                           unsigned optPassesInsertPos = 0);
 
 } // end namespace mlir
",0,train
c762c4501ca017994c1fa5554c3c8e47b7c80b66,tensorflow/tensorflow,"Squash REQUIRED_PACKAGES logic

This cleanup should be a no-op. I moved a bunch of package-selection logic into
the list of REQUIRED_PACKAGES so that all dependencies are specified in just one
place. For example, one big difference is that the TF ecosystem packages are
right next to each other.

I also know that gast 0.5.2 breaks one of our tests, so I've pinned the gast
version to 0.4.0 or below, which is what our CI currently installs for testing.

PiperOrigin-RevId: 419650242
Change-Id: I30a70e6a54f89e55e117d8484c84b2b39709b711",setup.py,"@@ -59,6 +59,10 @@ if '--project_name' in sys.argv:
   sys.argv.remove('--project_name')
   sys.argv.pop(project_name_idx)
 
+# Returns standard if a tensorflow-* package is being built, and nightly if a
+# tf_nightly-* package is being built.
+def if_nightly(standard, nightly):
+  return nightly if 'tf_nightly' in project_name else standard
 
 # All versions of TF need these packages. We indicate the widest possible range
 # of package releases possible to be as up-to-date as possible as well as to
@@ -69,12 +73,13 @@ if '--project_name' in sys.argv:
 # NOTE: This assumes that all packages follow SemVer. If a package follows a
 # different versioning scheme (e.g., PVP), we use different bound specifier and
 # comment the versioning scheme.
-# NOTE: Please add test only packages to `TEST_PACKAGES` below.
 REQUIRED_PACKAGES = [
     'absl-py >= 0.4.0',
     'astunparse >= 1.6.0',
     'flatbuffers >= 1.12',
-    'gast >= 0.2.1',
+    # gast versions above 0.4.0 are incompatible with some of TF's tests.
+    # TODO(angerson): File a bug for these incompatible tests and the limitation
+    'gast >= 0.2.1, <= 0.4.0',
     'google_pasta >= 0.1.1',
     'h5py >= 2.9.0',
     'keras_preprocessing >= 1.1.1', # 1.1.0 needs tensorflow==1.7
@@ -87,49 +92,26 @@ REQUIRED_PACKAGES = [
     'termcolor >= 1.1.0',
     'typing_extensions >= 3.6.6',
     'wrapt >= 1.11.0',
-    # TensorFlow ecosystem packages that TF exposes API for
-    # These need to be in sync with the existing TF version
-    # They are updated during the release process
-    # When updating these, please also update the nightly versions below
-    'tensorboard >= 2.7, < 2.8',
-    'tensorflow_estimator >= 2.8.0rc0, < 2.9',
-    'keras >= 2.8.0rc0, < 2.9',
     'tensorflow-io-gcs-filesystem >= 0.23.1',
-]
-
-
-# For nightly packages, instead of depending on tensorboard,
-# tensorflow_estimator and keras, we depend on their nightly equivalent.
-# When updating these, make sure to also update the release versions above.
-# NOTE: the nightly versions are one version ahead of the release ones!
-# NOTE: the nightly versions specify alpha/dev!
-if 'tf_nightly' in project_name:
-  for i, pkg in enumerate(REQUIRED_PACKAGES):
-    if 'tensorboard' in pkg:
-      REQUIRED_PACKAGES[i] = 'tb-nightly ~= 2.8.0.a'
-    elif 'tensorflow_estimator' in pkg:
-      REQUIRED_PACKAGES[i] = 'tf-estimator-nightly ~= 2.9.0.dev'
-    elif 'keras' in pkg and 'keras_preprocessing' not in pkg:
-      REQUIRED_PACKAGES[i] = 'keras-nightly ~= 2.9.0.dev'
-
-
-# grpcio does not build correctly on big-endian machines due to lack of
-# BoringSSL support.
-# See https://github.com/tensorflow/tensorflow/issues/17882.
-if sys.byteorder == 'little':
-  REQUIRED_PACKAGES.append('grpcio >= 1.24.3, < 2.0')
-
-
-# Packages which are only needed for testing code.
-# Please don't add test-only packages to `REQUIRED_PACKAGES`!
-# Follows the same conventions as `REQUIRED_PACKAGES`
-TEST_PACKAGES = [
-    'portpicker >= 1.3.1',
-    'scipy >= 1.5.2',
-    'tblib >= 1.4.0',
-    'dill >= 0.2.9',
-]
-
+    # grpcio does not build correctly on big-endian machines due to lack of
+    # BoringSSL support.
+    # See https://github.com/tensorflow/tensorflow/issues/17882.
+    'grpcio >= 1.24.3, < 2.0' if sys.byteorder == 'little' else None,
+    # TensorFlow exposes the TF API for certain TF ecosystem packages like
+    # keras.  When TF depends on those packages, the package version needs to
+    # match the current TF version. For tf_nightly, we install the nightly
+    # variant of each package instead, which must be one version ahead of the
+    # current release version. These also usually have ""alpha"" or ""dev"" in their
+    # version name.
+    # These are all updated during the TF release process.
+    if_nightly('tensorboard >= 2.7, < 2.8',
+               'tb-nightly ~= 2.8.0.a'),
+    if_nightly('tensorflow_estimator >= 2.8.0rc0, < 2.9',
+               'tf-estimator-nightly ~= 2.9.0.dev'),
+    if_nightly('keras >= 2.8.0rc0, < 2.9',
+               'keras-nightly ~= 2.9.0.dev'),
+]))
+REQUIRED_PACKAGES = [ p for p in REQUIRED_PACKAGES if p is not None ]
 
 DOCLINES = __doc__.split('\n')
 if project_name.endswith('-gpu'):
@@ -152,17 +134,15 @@ CONSOLE_SCRIPTS = [
     # is now declared by the tensorboard pip package. If we remove the
     # TensorBoard command, pip will inappropriately remove it during install,
     # even though the command is not removed, just moved to a different wheel.
-    'tensorboard = tensorboard.main:run_main',
+    # We exclude it anyway if building tf_nightly.
+    if_nightly(None, 'tensorboard = tensorboard.main:run_main')
     'tf_upgrade_v2 = tensorflow.tools.compatibility.tf_upgrade_v2_main:main',
     'estimator_ckpt_converter = '
     'tensorflow_estimator.python.estimator.tools.checkpoint_converter:main',
 ]
+CONSOLE_SCRIPTS = [ s for s in CONSOLE_SCRIPTS if s is not None ]
 # pylint: enable=line-too-long
 
-# remove the tensorboard console script if building tf_nightly
-if 'tf_nightly' in project_name:
-  CONSOLE_SCRIPTS.remove('tensorboard = tensorboard.main:run_main')
-
 
 class BinaryDistribution(Distribution):
 
@@ -310,7 +290,6 @@ setup(
     },
     headers=headers,
     install_requires=REQUIRED_PACKAGES,
-    tests_require=REQUIRED_PACKAGES + TEST_PACKAGES,
     # Add in any packaged data.
     include_package_data=True,
     package_data={
",0,train
fc33e0f3783cab0d7486f6e277e77e1c95ce291d,tensorflow/tensorflow,"Reshaped real valued column for DNN input layer.
Change: 123786681",feature_column.py,"@@ -543,7 +543,6 @@ class _RealValuedColumn(_FeatureColumn, collections.namedtuple(
   def __new__(cls, column_name, dimension, default_value, dtype):
     if default_value is not None:
       default_value = tuple(default_value)
-
     return super(_RealValuedColumn, cls).__new__(cls, column_name, dimension,
                                                  default_value, dtype)
 
@@ -573,7 +572,10 @@ class _RealValuedColumn(_FeatureColumn, collections.namedtuple(
                          input_tensor,
                          weight_collections=None,
                          trainable=True):
-    return input_tensor
+    batch_size = input_tensor.get_shape().as_list()[0]
+    batch_size = int(batch_size) if batch_size else -1
+    flattened_shape = [batch_size, self.dimension]
+    return array_ops.reshape(math_ops.to_float(input_tensor), flattened_shape)
 
   def to_weighted_sum(self,
                       input_tensor,
",0,test
a189502cc3032f0bc8f3294b0e39062e89fe9181,tensorflow/tensorflow,"Activates Eigen path for CPU implementation of atrous/dilated convolution (only forward path).

PiperOrigin-RevId: 186071285",conv_2d.h,"@@ -54,10 +54,12 @@ struct InflatePadAndShuffle {
 template <typename Device, typename Input, typename Filter, typename Output>
 void SpatialConvolutionFunc(const Device& d, Output output, Input input,
                             Filter filter, int row_stride, int col_stride,
+                            int row_dilation, int col_dilation,
                             const Eigen::PaddingType& padding) {
   // Need to swap row/col when calling Eigen.
   output.device(d) =
-      Eigen::SpatialConvolution(input, filter, col_stride, row_stride, padding);
+      Eigen::SpatialConvolution(input, filter, col_stride, row_stride, padding,
+                                col_dilation, row_dilation);
 }
 
 template <typename Device, typename T>
@@ -65,9 +67,10 @@ struct SpatialConvolution {
   void operator()(const Device& d, typename TTypes<T, 4>::Tensor output,
                   typename TTypes<T, 4>::ConstTensor input,
                   typename TTypes<T, 4>::ConstTensor filter, int row_stride,
-                  int col_stride, const Eigen::PaddingType& padding) {
+                  int col_stride, int row_dilation, int col_dilation,
+                  const Eigen::PaddingType& padding) {
     SpatialConvolutionFunc(d, output, input, filter, row_stride, col_stride,
-                           padding);
+                           row_dilation, col_dilation, padding);
   }
 };
 
@@ -77,11 +80,12 @@ struct SpatialConvolution<Device, Eigen::half> {
                   typename TTypes<Eigen::half, 4>::Tensor output,
                   typename TTypes<Eigen::half, 4>::ConstTensor input,
                   typename TTypes<Eigen::half, 4>::ConstTensor filter,
-                  int row_stride, int col_stride,
-                  const Eigen::PaddingType& padding) {
+                  int row_stride, int col_stride, int row_dilation,
+                  int col_dilation, const Eigen::PaddingType& padding) {
     output.device(d) =
         Eigen::SpatialConvolution(input.cast<float>(), filter.cast<float>(),
-                                  col_stride, row_stride, padding)
+                                  col_stride, row_stride, padding, col_dilation,
+                                  row_dilation)
             .cast<Eigen::half>();
   }
 };
@@ -91,11 +95,13 @@ struct SpatialConvolutionBackwardInput {
   void operator()(const Device& d, typename TTypes<T, 4>::Tensor input_backward,
                   typename TTypes<T, 4>::ConstTensor kernel,
                   typename TTypes<T, 4>::ConstTensor output_backward,
-                  int row_stride, int col_stride) {
+                  int row_stride, int col_stride, int row_dilation,
+                  int col_dilation) {
     // Need to swap row/col when calling Eigen.
     input_backward.device(d) = Eigen::SpatialConvolutionBackwardInput(
         kernel, output_backward, input_backward.dimension(2),
-        input_backward.dimension(1), col_stride, row_stride);
+        input_backward.dimension(1), col_stride, row_stride, col_dilation,
+        row_dilation);
   }
 };
 
@@ -105,11 +111,13 @@ struct SpatialConvolutionBackwardFilter {
                   typename TTypes<T, 4>::Tensor kernel_backward,
                   typename TTypes<T, 4>::ConstTensor input,
                   typename TTypes<T, 4>::ConstTensor output_backward,
-                  int row_stride, int col_stride) {
+                  int row_stride, int col_stride, int row_dilation,
+                  int col_dilation) {
     // Need to swap row/col when calling Eigen.
     kernel_backward.device(d) = Eigen::SpatialConvolutionBackwardKernel(
         input, output_backward, kernel_backward.dimension(1),
-        kernel_backward.dimension(0), col_stride, row_stride);
+        kernel_backward.dimension(0), col_stride, row_stride, col_dilation,
+        row_dilation);
   }
 };
 
",0,test
a189502cc3032f0bc8f3294b0e39062e89fe9181,tensorflow/tensorflow,"Activates Eigen path for CPU implementation of atrous/dilated convolution (only forward path).

PiperOrigin-RevId: 186071285",conv_grad_filter_ops.cc,"@@ -101,7 +101,8 @@ struct LaunchConv2DBackpropFilterOp<CPUDevice, T> {
     const CPUDevice& d = ctx->eigen_device<CPUDevice>();
     functor::SpatialConvolutionBackwardFilter<CPUDevice, T>()(
         d, filter_backprop->tensor<T, 4>(), input.tensor<T, 4>(),
-        out_backprop.tensor<T, 4>(), row_stride, col_stride);
+        out_backprop.tensor<T, 4>(), row_stride, col_stride,
+        /*row_dilation=*/1, /*col_dilation=*/1);
   }
 };
 
",0,test
a189502cc3032f0bc8f3294b0e39062e89fe9181,tensorflow/tensorflow,"Activates Eigen path for CPU implementation of atrous/dilated convolution (only forward path).

PiperOrigin-RevId: 186071285",conv_grad_input_ops.cc,"@@ -106,7 +106,8 @@ struct LaunchConv2DBackpropInputOp<CPUDevice, T> {
     const CPUDevice& d = ctx->eigen_device<CPUDevice>();
     functor::SpatialConvolutionBackwardInput<CPUDevice, T>()(
         d, in_backprop->tensor<T, 4>(), filter.tensor<T, 4>(),
-        out_backprop.tensor<T, 4>(), row_stride, col_stride);
+        out_backprop.tensor<T, 4>(), row_stride, col_stride,
+        /*row_dilation=*/1, /*col_dilation=*/1);
   }
 };
 
",0,test
a189502cc3032f0bc8f3294b0e39062e89fe9181,tensorflow/tensorflow,"Activates Eigen path for CPU implementation of atrous/dilated convolution (only forward path).

PiperOrigin-RevId: 186071285",conv_ops.cc,"@@ -60,8 +60,8 @@ template <typename Device, typename T>
 struct LaunchGeneric {
   void operator()(OpKernelContext* ctx, const Tensor& input,
                   const Tensor& filter, int row_stride, int col_stride,
-                  const Padding& padding, Tensor* output,
-                  TensorFormat data_format) {
+                  int row_dilation, int col_dilation, const Padding& padding,
+                  Tensor* output, TensorFormat data_format) {
     CHECK(data_format == FORMAT_NHWC) << ""Generic conv implementation only ""
                                          ""supports NHWC tensor format for now."";
     if (filter.dim_size(0) == 1 && filter.dim_size(1) == 1 && row_stride == 1 &&
@@ -86,7 +86,8 @@ struct LaunchGeneric {
           filter.shaped<T, 2>({filter.dim_size(2), filter.dim_size(3)}),
           dim_pair);
     } else if (filter.dim_size(0) == input.dim_size(1) &&
-               filter.dim_size(1) == input.dim_size(2) && padding == VALID) {
+               filter.dim_size(1) == input.dim_size(2) && row_dilation == 1 &&
+               col_dilation == 1 && padding == VALID) {
       // If the input data and filter have the same height/width,
       // the 2D convolution is reduced to matrix multiplication.
       const int k =  // Length of reduction dimension.
@@ -103,7 +104,7 @@ struct LaunchGeneric {
       functor::SpatialConvolution<Device, T>()(
           ctx->eigen_device<Device>(), output->tensor<T, 4>(),
           input.tensor<T, 4>(), filter.tensor<T, 4>(), row_stride, col_stride,
-          BrainPadding2EigenPadding(padding));
+          row_dilation, col_dilation, BrainPadding2EigenPadding(padding));
     }
   }
 };
@@ -122,15 +123,9 @@ struct LaunchConv2DOp<CPUDevice, T> {
                                 ""NHWC tensor format for now.""));
       return;
     }
-    // TODO(yangzihao): Add the CPU implementation of dilated conv 2D.
-    if (row_dilation > 1 || col_dilation > 1) {
-      ctx->SetStatus(
-          errors::Unimplemented(""Generic conv implementation only supports ""
-                                ""dilated rate of 1 for now.""));
-      return;
-    }
     LaunchGeneric<CPUDevice, T>()(ctx, input, filter, row_stride, col_stride,
-                                  padding, output, data_format);
+                                  row_dilation, col_dilation, padding, output,
+                                  data_format);
   }
 };
 
@@ -792,7 +787,8 @@ namespace functor {
       const GPUDevice& d, typename TTypes<T, 4>::Tensor output,              \
       typename TTypes<T, 4>::ConstTensor input,                              \
       typename TTypes<T, 4>::ConstTensor filter, int row_stride,             \
-      int col_stride, const Eigen::PaddingType& padding);                    \
+      int col_stride, int row_dilation, int col_dilation,                    \
+      const Eigen::PaddingType& padding);                                    \
   extern template struct SpatialConvolution<GPUDevice, T>;                   \
   template <>                                                                \
   void MatMulConvFunctor<GPUDevice, T>::operator()(                          \
",0,test
a189502cc3032f0bc8f3294b0e39062e89fe9181,tensorflow/tensorflow,"Activates Eigen path for CPU implementation of atrous/dilated convolution (only forward path).

PiperOrigin-RevId: 186071285",conv_ops_test.py,"@@ -302,25 +302,20 @@ class Conv2DTest(test.TestCase):
                                padding, dilations):
     expected_results = []
     computed_results = []
-    default_dilations = (dilations[0] == 1 and dilations[1] == 1)
     for data_format, use_gpu in GetTestConfigs():
-      # If any dilation rate is larger than 1, only do test on the GPU
-      # because we currently do not have a CPU implementation for arbitrary
-      # dilation rates.
-      if default_dilations or use_gpu:
-        expected, computed = self._ComputeReferenceDilatedConv(
-            tensor_in_sizes, filter_in_sizes, strides, dilations, padding,
-            data_format, use_gpu)
-        expected_results.append(expected)
-        computed_results.append(computed)
-        tolerance = 1e-2 if use_gpu else 1e-5
-        expected_values = self.evaluate(expected_results)
-        computed_values = self.evaluate(computed_results)
-        for e_value, c_value in zip(expected_values, computed_values):
-          print(""expected = "", e_value)
-          print(""actual = "", c_value)
-          self.assertAllClose(
-              e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-4)
+      expected, computed = self._ComputeReferenceDilatedConv(
+          tensor_in_sizes, filter_in_sizes, strides, dilations, padding,
+          data_format, use_gpu)
+      expected_results.append(expected)
+      computed_results.append(computed)
+      tolerance = 1e-2 if use_gpu else 1e-5
+      expected_values = self.evaluate(expected_results)
+      computed_values = self.evaluate(computed_results)
+      for e_value, c_value in zip(expected_values, computed_values):
+        print(""expected = "", e_value)
+        print(""actual = "", c_value)
+        self.assertAllClose(
+            e_value.flatten(), c_value.flatten(), atol=tolerance, rtol=1e-4)
 
   def _VerifyValues(self, tensor_in_sizes, filter_in_sizes, strides, padding,
                     expected):
@@ -365,13 +360,12 @@ class Conv2DTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D2x2Filter2x1Dilation(self):
-    if test.is_gpu_available(cuda_only=True):
-      self._VerifyDilatedConvValues(
-          tensor_in_sizes=[1, 4, 4, 1],
-          filter_in_sizes=[2, 2, 1, 1],
-          strides=[1, 1],
-          dilations=[2, 1],
-          padding=""VALID"")
+    self._VerifyDilatedConvValues(
+        tensor_in_sizes=[1, 4, 4, 1],
+        filter_in_sizes=[2, 2, 1, 1],
+        strides=[1, 1],
+        dilations=[2, 1],
+        padding=""VALID"")
 
   @test_util.run_in_graph_and_eager_modes()
   def testConv2DEmpty(self):
@@ -385,13 +379,12 @@ class Conv2DTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testConv2DEmptyDilation(self):
-    if test.is_gpu_available(cuda_only=True):
-      self._VerifyDilatedConvValues(
-          tensor_in_sizes=[0, 2, 3, 3],
-          filter_in_sizes=[1, 1, 3, 3],
-          strides=[1, 1],
-          dilations=[2, 1],
-          padding=""VALID"")
+    self._VerifyDilatedConvValues(
+        tensor_in_sizes=[0, 2, 3, 3],
+        filter_in_sizes=[1, 1, 3, 3],
+        strides=[1, 1],
+        dilations=[2, 1],
+        padding=""VALID"")
 
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D2x2Filter(self):
@@ -406,13 +399,12 @@ class Conv2DTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D2x2FilterDilation(self):
-    if test.is_gpu_available(cuda_only=True):
-      self._VerifyDilatedConvValues(
-          tensor_in_sizes=[1, 2, 3, 3],
-          filter_in_sizes=[2, 2, 3, 3],
-          strides=[1, 1],
-          dilations=[1, 2],
-          padding=""VALID"")
+    self._VerifyDilatedConvValues(
+        tensor_in_sizes=[1, 2, 3, 3],
+        filter_in_sizes=[2, 2, 3, 3],
+        strides=[1, 1],
+        dilations=[1, 2],
+        padding=""VALID"")
 
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D1x2Filter(self):
@@ -430,13 +422,12 @@ class Conv2DTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D1x2FilterDilation(self):
-    if test.is_gpu_available(cuda_only=True):
-      self._VerifyDilatedConvValues(
-          tensor_in_sizes=[1, 2, 3, 3],
-          filter_in_sizes=[1, 2, 3, 3],
-          strides=[1, 1],
-          dilations=[2, 1],
-          padding=""VALID"")
+    self._VerifyDilatedConvValues(
+        tensor_in_sizes=[1, 2, 3, 3],
+        filter_in_sizes=[1, 2, 3, 3],
+        strides=[1, 1],
+        dilations=[2, 1],
+        padding=""VALID"")
 
   @test_util.run_in_graph_and_eager_modes()
   def testConv2D2x2FilterStride2(self):
@@ -512,13 +503,12 @@ class Conv2DTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes()
   def testConv2DKernelSizeMatchesInputSizeDilation(self):
-    if test.is_gpu_available(cuda_only=True):
-      self._VerifyDilatedConvValues(
-          tensor_in_sizes=[1, 3, 3, 1],
-          filter_in_sizes=[2, 2, 1, 2],
-          strides=[1, 1],
-          dilations=[2, 2],
-          padding=""VALID"")
+    self._VerifyDilatedConvValues(
+        tensor_in_sizes=[1, 3, 3, 1],
+        filter_in_sizes=[2, 2, 1, 2],
+        strides=[1, 1],
+        dilations=[2, 2],
+        padding=""VALID"")
 
   # TODO(yzhwang): this currently fails.
   # self._VerifyValues(tensor_in_sizes=[1, 8, 8, 1],
@@ -1538,21 +1528,6 @@ class Conv2DTest(test.TestCase):
             use_gpu=False)
         self.evaluate(conv)
 
-  def testCPUConv2DDilatedUnimplemented(self):
-    with self.test_session(use_gpu=False):
-      with self.assertRaisesRegexp(errors_impl.UnimplementedError,
-                                   ""dilated rate of 1 for now""):
-        conv = self._SetupValuesForDevice(
-            tensor_in_sizes=[1, 4, 4, 1],
-            filter_in_sizes=[2, 2, 1, 1],
-            dilations=[2, 1],
-            strides=[1, 1],
-            padding=""VALID"",
-            data_format=""NHWC"",
-            dtype=dtypes.float32,
-            use_gpu=False)
-        self.evaluate(conv)
-
 
 class DepthwiseConv2DTest(test.TestCase):
 
@@ -1887,7 +1862,7 @@ def GetInceptionFwdTest(input_size, filter_size, stride, padding,
 def GetInceptionFwdDilatedConvTest(input_size, filter_size, stride, padding):
 
   def Test(self):
-    if test.is_gpu_available(cuda_only=True) and stride == 1:
+    if stride == 1:
       tf_logging.info(""Testing InceptionFwd with dilations %s"",
                       (input_size, filter_size, stride, padding))
       self._VerifyDilatedConvValues(
",0,test
6e4b0a4a351260ea3a15457a24332fdba46abab7,tensorflow/tensorflow,"Refactor kernel thunk's launch dimension setting - part 3.

Move SetThunkLaunchDimensions() to right after KernelThunk construction. Launch dimension will be passed to KernelThunk's constructor as a parameter.

PiperOrigin-RevId: 386164406
Change-Id: Ifdbed56d1daaae2f2bde1da37f87216d1014909d",ir_emitter_unnested.cc,"@@ -5119,11 +5119,6 @@ Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions(
                                      ""doesn't set the input layout of ""
                                   << MlirToString(first_reduce);
 
-  std::vector<llvm_ir::IrArray> ir_arrays;
-  TF_ASSIGN_OR_RETURN(
-      std::unique_ptr<KernelThunk> kernel_thunk,
-      BuildKernelThunk(unnested_hlo, Thunk::ThunkInfo(), &ir_arrays));
-
   HloComputation* fused_computation = nullptr;
   TF_ASSIGN_OR_RETURN(fused_computation,
                       GetOrCreateSubComputationFromRegion(&fusion.region(),
@@ -5136,6 +5131,29 @@ Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions(
   VLOG(2) << StrCat(""Generate in "", instr_index_groups.size(), "" groups for "",
                     MlirToString(unnested_hlo));
 
+  ReductionCodegenInfo reduction_info =
+      ComputeReductionCodegenInfo(unnested_hlo, first_reduce, layout_analysis);
+  const KernelMappingScheme& mapping_scheme =
+      reduction_info.GetKernelMappingScheme();
+  // block_y_count is set to instr_index_groups.size(), so that each reduction
+  // group can be run in parallel by a different BlockIdy.
+  LaunchDimensions launch_dimensions(
+      {/*x=*/mapping_scheme.GetNumberOfBlocks(),
+       /*y=*/static_cast<int64>(instr_index_groups.size()),
+       /*z=*/1},
+      {/*x=*/mapping_scheme.GetThreadsPerBlock(), /*y=*/1, /*z=*/1});
+  VLOG(3) << ""Launch dimensions of ""
+          << mlir::GetNameFromLoc(unnested_hlo->getLoc())
+          << "": number of blocks: "" << mapping_scheme.GetNumberOfBlocks()
+          << "" - threads per block: "" << mapping_scheme.GetThreadsPerBlock();
+
+  std::vector<llvm_ir::IrArray> ir_arrays;
+  TF_ASSIGN_OR_RETURN(
+      std::unique_ptr<KernelThunk> kernel_thunk,
+      BuildKernelThunk(unnested_hlo, Thunk::ThunkInfo(), &ir_arrays));
+  SetThunkLaunchDimensions(launch_dimensions, kernel_thunk.get(),
+                           ir_emitter_context_->llvm_module());
+
   absl::optional<GpuElementalIrEmitter> elemental_emitter;
   absl::optional<FusedIrEmitter> optional_fused_emitter;
   FusedIrEmitter* fused_emitter = nullptr;
@@ -5189,23 +5207,6 @@ Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions(
         b_.CreateICmpEQ(raw_block_id_y, b_.getInt32(i));
     ksl.If(StrCat(""reduce-group-"", i), guarding_cond, emit_reduction_func);
   }
-  ReductionCodegenInfo reduction_info =
-      ComputeReductionCodegenInfo(unnested_hlo, first_reduce, layout_analysis);
-  const KernelMappingScheme& mapping_scheme =
-      reduction_info.GetKernelMappingScheme();
-  // block_y_count is set to instr_index_groups.size(), so that each reduction
-  // group can be run in parallel by a different BlockIdy.
-  LaunchDimensions launch_dimensions(
-      {/*x=*/mapping_scheme.GetNumberOfBlocks(),
-       /*y=*/static_cast<int64>(instr_index_groups.size()),
-       /*z=*/1},
-      {/*x=*/mapping_scheme.GetThreadsPerBlock(), /*y=*/1, /*z=*/1});
-  VLOG(3) << ""Launch dimensions of ""
-          << mlir::GetNameFromLoc(unnested_hlo->getLoc())
-          << "": number of blocks: "" << mapping_scheme.GetNumberOfBlocks()
-          << "" - threads per block: "" << mapping_scheme.GetThreadsPerBlock();
-  SetThunkLaunchDimensions(launch_dimensions, kernel_thunk.get(),
-                           ir_emitter_context_->llvm_module());
 
   thunks.push_back(std::move(kernel_thunk));
   std::unique_ptr<SequentialThunk> sequential_thunk =
",0,train
93cc43bef97f4371379c2ea6e87b260a2a2cf7af,tensorflow/tensorflow,add lockFile argument to save_model(),save.py,"@@ -48,6 +48,7 @@ _KERAS_SAVED_MODEL_STILL_EXPERIMENTAL = True
 def save_model(model,
                filepath,
                overwrite=True,
+               lockFile=True,
                include_optimizer=True,
                save_format=None,
                signatures=None,
@@ -95,6 +96,7 @@ def save_model(model,
       overwrite: Whether we should overwrite any existing model at the target
         location, or instead ask the user with a manual prompt.
       include_optimizer: If True, save optimizer's state together.
+      lockFile: If True, protect model file while saving model.
       save_format: Either 'tf' or 'h5', indicating whether to save the model
         to Tensorflow SavedModel or HDF5. Defaults to 'tf' in TF 2.X, and 'h5'
         in TF 1.X.
@@ -128,7 +130,7 @@ def save_model(model,
           'to the Tensorflow SavedModel format (by setting save_format=""tf"") '
           'or using `save_weights`.')
     hdf5_format.save_model_to_hdf5(
-        model, filepath, overwrite, include_optimizer)
+        model, filepath, overwrite, lockFile, include_optimizer)
   else:
     saved_model_save.save(model, filepath, overwrite, include_optimizer,
                           signatures, options)
",0,train
5cdf8f26c806e893e0773ad34e2b59008cc6f8ec,tensorflow/tensorflow,"Update parameter_server_strategy_test to not use Keras dense layer.

Replaced it with a variable and then matmul with the input. It doesn't fully copy all the keras behavior (like mix precision, etc), but it should be good enough for the existing test cases that uses it.

PiperOrigin-RevId: 319836812
Change-Id: I97f9979d927b8187fa6c72ceff6ff521dab4cc2d",parameter_server_strategy_test.py,"@@ -19,6 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import copy
+import functools
 import threading
 
 from absl.testing import parameterized
@@ -43,13 +44,14 @@ from tensorflow.python.eager import context
 from tensorflow.python.estimator import run_config
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import device as tf_device
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_util
-from tensorflow.python.keras.layers import core
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gradients
+from tensorflow.python.ops import init_ops_v2
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import partitioned_variables
 from tensorflow.python.ops import resource_variable_ops
@@ -450,10 +452,14 @@ class ParameterServerStrategyTestBase(
          self.cached_session(target=master_target,
                              config=sess_config) as sess, \
          d.scope():
-      l = core.Dense(1, use_bias=False)
+      initializer = functools.partial(
+          init_ops_v2.GlorotUniform(), (1, 1), dtype=dtypes.float32)
+      kernel = variables.Variable(
+          initial_value=initializer, name='kernel', trainable=True)
 
       def loss_fn(x):
-        y = array_ops.reshape(l(x), []) - constant_op.constant(1.)
+        y = array_ops.reshape(
+            math_ops.matmul(x, kernel), []) - constant_op.constant(1.)
         return y * y
 
       # TODO(yuefengz, apassos): eager.backprop.implicit_grad is not safe for
",0,train
553b04f292eb18dbb9f1a0a9c8459db8360dce5a,tensorflow/tensorflow,"examples change to sklearn dataset load method (#7512)

* examples change to sklean dataset load method

* fix blank lines

* fix blank lines and import order",boston.py,"@@ -16,15 +16,18 @@
 from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
+
+from sklearn import datasets
 from sklearn import model_selection
 from sklearn import metrics
 from sklearn import preprocessing
+
 import tensorflow as tf
 
 
 def main(unused_argv):
   # Load dataset
-  boston = tf.contrib.learn.datasets.load_dataset('boston')
+  boston = datasets.load_boston()
   x, y = boston.data, boston.target
 
   # Split dataset into train / test
",0,test
553b04f292eb18dbb9f1a0a9c8459db8360dce5a,tensorflow/tensorflow,"examples change to sklearn dataset load method (#7512)

* examples change to sklean dataset load method

* fix blank lines

* fix blank lines and import order",iris.py,"@@ -17,7 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-
+from sklearn import datasets
 from sklearn import metrics
 from sklearn import model_selection
 
@@ -26,7 +26,7 @@ import tensorflow as tf
 
 def main(unused_argv):
   # Load dataset.
-  iris = tf.contrib.learn.datasets.load_dataset('iris')
+  iris = datasets.load_iris()
   x_train, x_test, y_train, y_test = model_selection.train_test_split(
       iris.data, iris.target, test_size=0.2, random_state=42)
 
",0,test
52a91a0fb4c39f8f6e8c8b65130c6c3a252dcfea,tensorflow/tensorflow,"Make TpuPlatform::event_map_ access thread-safe to fix a race.

PiperOrigin-RevId: 320473357
Change-Id: I58c2e2b8b4a9cde46e4cd6ea23f994ef9335501c",tpu_executor.cc,"@@ -99,7 +99,10 @@ bool TpuExecutor::CreateStreamDependency(Stream* dependent, Stream* other) {
 
 Status TpuExecutor::AllocateEvent(Event* event) { return Status::OK(); }
 
-Status TpuExecutor::DeallocateEvent(Event* event) { return Status::OK(); }
+Status TpuExecutor::DeallocateEvent(Event* event) {
+  tpu_platform().EraseEvent(event->implementation());
+  return Status::OK();
+}
 
 // AllocateTimer/DeallocateTimer have no specialization.
 bool TpuExecutor::AllocateTimer(Timer* timer) { return true; }
@@ -120,26 +123,29 @@ bool TpuExecutor::StopTimer(Stream* stream, ::stream_executor::Timer* timer) {
 
 stream_executor::Event::Status TpuExecutor::PollForEventStatus(
     stream_executor::Event* event) {
+  auto se_event = tpu_platform().LookupEvent(event->implementation());
   return stream_executor::Event::Status(
-      tpu::ExecutorApiFn()->TpuExecutor_PollForEventStatusFn(
-          executor_, event_map().at(event->implementation())));
+      tpu::ExecutorApiFn()->TpuExecutor_PollForEventStatusFn(executor_,
+                                                             se_event));
 }
 
 Status TpuExecutor::RecordEvent(Stream* stream,
                                 ::stream_executor::Event* event) {
   StatusHelper status;
+  auto se_event = tpu_platform().LookupEvent(event->implementation());
   tpu::ExecutorApiFn()->TpuExecutor_RecordEventFn(
-      executor_, stream_map().at(stream->implementation()),
-      event_map().at(event->implementation()), status.c_status);
+      executor_, stream_map().at(stream->implementation()), se_event,
+      status.c_status);
   return status.status();
 }
 
 Status TpuExecutor::WaitForEvent(Stream* stream,
                                  ::stream_executor::Event* event) {
   StatusHelper status;
+  auto se_event = tpu_platform().LookupEvent(event->implementation());
   tpu::ExecutorApiFn()->TpuExecutor_WaitForEventFn(
-      executor_, stream_map().at(stream->implementation()),
-      event_map().at(event->implementation()), status.c_status);
+      executor_, stream_map().at(stream->implementation()), se_event,
+      status.c_status);
   return status.status();
 }
 
@@ -172,7 +178,7 @@ std::unique_ptr<::stream_executor::internal::EventInterface>
 TpuExecutor::CreateEventImplementation() {
   SE_Event* tpu_event = tpu::ExecutorApiFn()->TpuEvent_NewFn(executor_);
   auto ptr = absl::make_unique<TpuEvent>(tpu_event);
-  event_map()[ptr.get()] = tpu_event;
+  tpu_platform().InsertEvent(ptr.get(), tpu_event);
   return ptr;
 }
 
",0,train
52a91a0fb4c39f8f6e8c8b65130c6c3a252dcfea,tensorflow/tensorflow,"Make TpuPlatform::event_map_ access thread-safe to fix a race.

PiperOrigin-RevId: 320473357
Change-Id: I58c2e2b8b4a9cde46e4cd6ea23f994ef9335501c",tpu_executor.h,"@@ -17,6 +17,8 @@ limitations under the License.
 #define TENSORFLOW_STREAM_EXECUTOR_TPU_TPU_EXECUTOR_H_
 
 #include ""absl/container/flat_hash_map.h""
+#include ""tensorflow/core/platform/casts.h""
+#include ""tensorflow/core/platform/mutex.h""
 #include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/stream_executor/device_memory.h""
 #include ""tensorflow/stream_executor/device_options.h""
@@ -223,17 +225,16 @@ class TpuExecutor : public tensorflow::tpu::TpuExecutorInterface {
   }
 
  private:
-  TimerMap timer_map_;
-
-  TpuPlatform::StreamMap& stream_map() {
-    return *(static_cast<TpuPlatform*>(platform_)->stream_map());
+  TpuPlatform& tpu_platform() {
+    return *(tensorflow::down_cast<TpuPlatform*>(platform_));
   }
 
-  TpuPlatform::EventMap& event_map() {
-    return *(static_cast<TpuPlatform*>(platform_)->event_map());
+  TpuPlatform::StreamMap& stream_map() {
+    return *(tpu_platform().stream_map());
   }
 
-  ::tensorflow::tpu::TpuPlatformInterface* platform_;
+  TimerMap timer_map_;
+  tensorflow::tpu::TpuPlatformInterface* platform_;
   SE_StreamExecutor* executor_;
 };
 
",0,train
52a91a0fb4c39f8f6e8c8b65130c6c3a252dcfea,tensorflow/tensorflow,"Make TpuPlatform::event_map_ access thread-safe to fix a race.

PiperOrigin-RevId: 320473357
Change-Id: I58c2e2b8b4a9cde46e4cd6ea23f994ef9335501c",tpu_platform.cc,"@@ -118,6 +118,23 @@ bool TpuPlatform::ShouldRegisterTpuDeviceToDeviceCopy() {
       ->TpuPlatform_ShouldRegisterTpuDeviceToDeviceCopyFn(platform_);
 }
 
+void TpuPlatform::InsertEvent(stream_executor::internal::EventInterface* key,
+                              SE_Event* val) {
+  tensorflow::mutex_lock lock(event_map_mu_);
+  event_map_[key] = val;
+}
+
+SE_Event* TpuPlatform::LookupEvent(
+    stream_executor::internal::EventInterface* key) {
+  tensorflow::tf_shared_lock lock(event_map_mu_);
+  return event_map_.at(key);
+}
+
+void TpuPlatform::EraseEvent(stream_executor::internal::EventInterface* key) {
+  tensorflow::mutex_lock lock(event_map_mu_);
+  event_map_.erase(key);
+}
+
 Status TpuPlatform::TpusPerHost(int* tpus) {
   TF_Status* status = TF_NewStatus();
   tpu::ConfigApiFn()->TpuConfigurationApi_TpusPerHostFn(tpus, status);
",0,train
52a91a0fb4c39f8f6e8c8b65130c6c3a252dcfea,tensorflow/tensorflow,"Make TpuPlatform::event_map_ access thread-safe to fix a race.

PiperOrigin-RevId: 320473357
Change-Id: I58c2e2b8b4a9cde46e4cd6ea23f994ef9335501c",tpu_platform.h,"@@ -19,6 +19,7 @@ limitations under the License.
 #include <memory>
 
 #include ""absl/container/flat_hash_map.h""
+#include ""tensorflow/core/platform/mutex.h""
 #include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/stream_executor/executor_cache.h""
 #include ""tensorflow/stream_executor/platform.h""
@@ -111,7 +112,10 @@ class TpuPlatform : public ::tensorflow::tpu::TpuPlatformInterface {
 
   StreamMap* stream_map() { return &stream_map_; }
 
-  EventMap* event_map() { return &event_map_; }
+  void InsertEvent(stream_executor::internal::EventInterface* key,
+                   SE_Event* val);
+  SE_Event* LookupEvent(stream_executor::internal::EventInterface* key);
+  void EraseEvent(stream_executor::internal::EventInterface* key);
 
   // Returns the number of TPUs per host.
   static Status TpusPerHost(int* tpus);
@@ -125,6 +129,7 @@ class TpuPlatform : public ::tensorflow::tpu::TpuPlatformInterface {
   stream_executor::ExecutorCache executor_cache_;
   StreamMap stream_map_;
   EventMap event_map_;
+  tensorflow::mutex event_map_mu_;
 };
 
 bool RegisterTpuPlatform();
",0,train
72ac5a16b80410d33067f69ee422df8aa2140578,tensorflow/tensorflow,"Add a migration docs block for `tf.compat.v1.train.init_from_checkpoint`.

PiperOrigin-RevId: 386466272
Change-Id: Id4571fc0a1a695b26a69fef6af55efe4966c44b7",checkpoint_utils.py,"@@ -220,6 +220,45 @@ def checkpoints_iterator(checkpoint_dir,
 def init_from_checkpoint(ckpt_dir_or_file, assignment_map):
   """"""Replaces `tf.Variable` initializers so they load from a checkpoint file.
 
+  @compatibility(TF2)
+  `tf.compat.v1.train.init_from_checkpoint` is not recommended for restoring
+  variable values in TF2.
+
+  To restore checkpoints in TF2, please use
+  `tf.keras.Model.load_weights` or `tf.train.Checkpoint.restore`. These APIs use
+  use an [object-based method of checkpointing]
+  (https://www.tensorflow.org/guide/checkpoint#loading_mechanics), while
+  `tf.compat.v1.init_from_checkpoint` relies on a more-fragile variable-name
+  based method of checkpointing. There is no object-based equivalent of
+  `init_from_checkpoint` in TF2.
+
+  Please re-write your checkpoints immediately using the object-based APIs,
+  see [migration guide]
+  (https://www.tensorflow.org/guide/migrate#checkpoint_compatibility) for more
+  details.
+
+  You can load a name-based checkpoint written by `tf.compat.v1.train.Saver`
+  using `tf.train.Checkpoint.restore` or `tf.keras.Model.load_weights`. However,
+  you may have to change the names of the variables in your model to match the
+  variable names in the name-based checkpoint, which can be viewed with
+  `tf.train.list_variables(path)`.
+
+  Another option is to create an `assignment_map` that maps the name of the
+  variables in the name-based checkpoint to the variables in your model, eg:
+  ```
+  {
+      'sequential/dense/bias': model.variables[0],
+      'sequential/dense/kernel': model.variables[1]
+  }
+  ```
+  and use `tf.compat.v1.train.init_from_checkpoint(path, assignment_map)` to
+  restore the name-based checkpoint.
+
+  After restoring, re-encode your checkpoint using `tf.train.Checkpoint.save`
+  or `tf.keras.Model.save_weights`.
+
+  @end_compatibility
+
   Values are not loaded immediately, but when the initializer is run
   (typically by running a `tf.compat.v1.global_variables_initializer` op).
 
",0,train
2145b44339642796dc382153d26b434c2cc18559,tensorflow/tensorflow,"Fix two bugs int the Graph editor;
- compute_boundary_ts was sometimes adding spurious inputs
- sgv.consumers was returning op inside the subgraph
Change: 142645987",select.py,"@@ -272,7 +272,7 @@ def get_ops_ios(ops, control_inputs=False, control_outputs=None,
   return res
 
 
-def compute_boundary_ts(ops, ambiguous_ts_are_outputs=True):
+def compute_boundary_ts(ops):
   """"""Compute the tensors at the boundary of a set of ops.
 
   This function looks at all the tensors connected to the given ops (in/out)
@@ -281,17 +281,18 @@ def compute_boundary_ts(ops, ambiguous_ts_are_outputs=True):
   2) output tensors: tensors whose consumer operations are not in ops
   3) inside tensors: tensors which are neither input nor output tensors.
 
+  Note that a tensor can be both an inside tensor and an output tensor if it is
+  consumed by operations both outside and inside of `ops`.
+
   Args:
     ops: an object convertible to a list of tf.Operation.
-    ambiguous_ts_are_outputs: a tensor can have consumers both inside and
-      outside ops. Such tensors are treated as outside tensor if
-      ambiguous_ts_are_outputs is True, otherwise they are treated as
-      inside tensor.
   Returns:
     A tuple `(outside_input_ts, outside_output_ts, inside_ts)` where:
       `outside_input_ts` is a Python list of input tensors;
       `outside_output_ts` is a python list of output tensors;
       `inside_ts` is a python list of inside tensors.
+    Since a tensor can be both an inside tensor and an output tensor,
+    `outside_output_ts` and `inside_ts` might intersect.
   Raises:
     TypeError: if ops cannot be converted to a list of tf.Operation.
   """"""
@@ -301,22 +302,25 @@ def compute_boundary_ts(ops, ambiguous_ts_are_outputs=True):
   output_ts_set = frozenset(output_ts)
   ops_set = frozenset(ops)
 
-  # fill in inside
+  # Compute inside tensors.
   inside_ts = []
+  only_inside_ts = []
   for t in input_ts:
-    # is also output?
+    # Skip if the input tensor is not also an output tensor.
     if t not in output_ts_set:
       continue
-    # is ambiguous_ts_are_outputs is True, don't add to inside if ambiguous
-    if ambiguous_ts_are_outputs:
-      consumers = frozenset(t.consumers())
-      if consumers - ops_set:
-        continue
+    # Mark as ""inside"".
     inside_ts.append(t)
+    # Mark as ""only inside"" if the tensor is not both inside and output.
+    consumers = frozenset(t.consumers())
+    if consumers - ops_set:
+      continue
+    only_inside_ts.append(t)
 
   inside_ts_set = frozenset(inside_ts)
+  only_inside_ts_set = frozenset(only_inside_ts)
+  outside_output_ts = [t for t in output_ts if t not in only_inside_ts_set]
   outside_input_ts = [t for t in input_ts if t not in inside_ts_set]
-  outside_output_ts = [t for t in output_ts if t not in inside_ts_set]
   return outside_input_ts, outside_output_ts, inside_ts
 
 
",0,train
2145b44339642796dc382153d26b434c2cc18559,tensorflow/tensorflow,"Fix two bugs int the Graph editor;
- compute_boundary_ts was sometimes adding spurious inputs
- sgv.consumers was returning op inside the subgraph
Change: 142645987",subgraph.py,"@@ -561,10 +561,19 @@ class SubGraphView(object):
     return subgraph_id
 
   def consumers(self):
-    """"""Return a Python set of all the consumers of this subgraph view.""""""
+    """"""Return a Python set of all the consumers of this subgraph view.
+
+    A consumer of a subgraph view is a tf.Operation which is a consumer
+    of one of the output tensors and is not in the subgraph.
+
+    Returns:
+      A list of `tf.Operation` which are the consumers of this subgraph view.
+    """"""
+    ops_set = frozenset(self._ops)
     res = []
     for output in self._output_ts:
-      util.concatenate_unique(res, output.consumers())
+      consumers = [op for op in output.consumers() if op not in ops_set]
+      util.concatenate_unique(res, consumers)
     return res
 
 
",0,train
2145b44339642796dc382153d26b434c2cc18559,tensorflow/tensorflow,"Fix two bugs int the Graph editor;
- compute_boundary_ts was sometimes adding spurious inputs
- sgv.consumers was returning op inside the subgraph
Change: 142645987",edit_test.py,"@@ -49,10 +49,10 @@ class EditTest(tf.test.TestCase):
     """"""Test for ge.detach.""""""
     sgv = ge.sgv(self.c.op, self.a.op)
     control_outputs = ge.util.ControlOutputs(self.graph)
-    ge.detach(sgv, control_inputs=control_outputs)
+    ge.detach(sgv, control_ios=control_outputs)
     # make sure the detached graph is as expected.
     self.assertTrue(ge.matcher(""^foo/c$"")
-                    .input_ops(""geph__a_0"", ""geph__b_0"")(self.c.op))
+                    .input_ops(""a"", ""geph__b_0"")(self.c.op))
 
   def test_connect(self):
     """"""Test for ge.connect.""""""
",0,train
2145b44339642796dc382153d26b434c2cc18559,tensorflow/tensorflow,"Fix two bugs int the Graph editor;
- compute_boundary_ts was sometimes adding spurious inputs
- sgv.consumers was returning op inside the subgraph
Change: 142645987",select_test.py,"@@ -101,6 +101,19 @@ class SelectTest(tf.test.TestCase):
     self.assertEqual(list(output_ts), [self.h])
     self.assertEqual(list(inside_ts), [self.g])
 
+  def test_compute_boundary_ts_2(self):
+    """"""Test for ge.select.compute_boundary_ts.""""""
+    graph = tf.Graph()
+    with graph.as_default():
+      a = tf.constant(1, name=""a"")
+      b = tf.constant(1, name=""b"")
+      c = tf.add(a, b, name=""c"")
+      _ = a + c
+    input_ts, output_ts, inside_ts = ge.select.compute_boundary_ts([a.op, c.op])
+    self.assertEqual(list(input_ts), [b])
+    self.assertEqual(list(output_ts), [a, c])
+    self.assertEqual(list(inside_ts), [a])
+
   def test_get_within_boundary_ops_0(self):
     """"""Test for test_get_within_boundary_ops.""""""
     control_outputs = ge.util.ControlOutputs(self.graph)
",0,train
6f6cfdc99a2156bcd67441d46b71dcf7d98b5c14,tensorflow/tensorflow,"Add _cache_size for testing and debugging purposes.

PiperOrigin-RevId: 387650902
Change-Id: Icec03f1cc4a0f07cee6d0d939684811b558395f1",pmap_lib.cc,"@@ -358,6 +358,8 @@ void BuildPmapSubmodule(pybind11::module& m) {
                                                                ""PmapFunction"");
   cfun.def(""__call__"", &PmapFunction::Call);
   cfun.def_property_readonly(""__signature__"", &PmapFunction::PythonSignature);
+  // All private members are only for testing/debugging purposes
+  cfun.def(""_cache_size"", &PmapFunction::cache_size);
 
   pmap_lib.def(
       ""pmap"",
",0,train
1838163152217eac4d8cb9bf960beac29f38b969,tensorflow/tensorflow,"Fix comments, return non-gradient tests for real inputs",eig_op_test.py,"@@ -150,7 +150,7 @@ def _GetEigTest(dtype_, shape_, compute_v_):
     np_dtype = dtype_.as_numpy_dtype
 
     def RandomInput():
-      # most of matrices are diagonalizable # TODO
+      # Most matrices are diagonalizable
       a = np.random.uniform(
           low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
       if dtype_.is_complex:
@@ -202,7 +202,7 @@ def _GetEigGradTest(dtype_, shape_, compute_v_):
     np_dtype = dtype_.as_numpy_dtype
 
     def RandomInput():
-      # most of matrices are diagonalizable # TODO
+      # Most matrices are diagonalizable
       a = np.random.uniform(
           low=-1.0, high=1.0, size=n * n).reshape([n, n]).astype(np_dtype)
       if dtype_.is_complex:
@@ -264,6 +264,10 @@ if __name__ == ""__main__"":
           shape = batch_dims + (size, size)
           name = ""%s_%s_%s"" % (dtype.name, ""_"".join(map(str, shape)), compute_v)
           _AddTest(EigTest, ""Eig"", name, _GetEigTest(dtype, shape, compute_v))
-          _AddTest(EigGradTest, ""EigGrad"", name, 
-                   _GetEigGradTest(dtype, shape, compute_v))
+
+          # TODO: gradient_check gets wrong numeric output for real inputs
+          # (might be connected with the fact that outputs are complex)
+          if dtype not in [dtypes_lib.float32, dtypes_lib.float64]:
+            _AddTest(EigGradTest, ""EigGrad"", name,
+                     _GetEigGradTest(dtype, shape, compute_v))
   test.main()
",0,train
1838163152217eac4d8cb9bf960beac29f38b969,tensorflow/tensorflow,"Fix comments, return non-gradient tests for real inputs",linalg_grad.py,"@@ -635,7 +635,7 @@ def _MatrixTriangularSolveGrad(op, grad):
 
 # To avoid nan in cases with degenerate eigenvalues or
 # degenerate/zero singular values in calculations of
-# f and s_inv_mat, we introduce a Lorentz brodening.
+# f and s_inv_mat, we introduce a Lorentz broadening.
 def _SafeReciprocal(x, epsilon=1E-20):
   return x * math_ops.reciprocal(x * x + epsilon)
 
",0,train
8e29dc771442e8ca9df0c37277080d11599f1043,tensorflow/tensorflow,"Sort control edges on input considering src node

Else the order of control edges could change from run to run. The order of the control edges shouldn't matter, but providing consistent input order for given graph makes changes easier to identify.

PiperOrigin-RevId: 310024796
Change-Id: I45c6e0801093b2037e5950cca6210df32625038b",import_model.cc,"@@ -1817,6 +1817,8 @@ Status ImporterBase::ConvertNode(const Node& node) {
   absl::c_stable_sort(in_edges, [](const Edge* e1, const Edge* e2) {
     if (e1->IsControlEdge() && !e2->IsControlEdge()) return false;
     if (!e1->IsControlEdge() && e2->IsControlEdge()) return true;
+    if (e1->IsControlEdge() && e2->IsControlEdge())
+      return e1->src()->id() < e2->src()->id();
     return e1->dst_input() < e2->dst_input();
   });
 
",0,train
de7b004fc72b30ec55011173b76f63a7e0e93279,tensorflow/tensorflow,"Handle rank 1 broadcasts in unranked kernel lowering.

Previously this started at rank 2 after checking for scalars and equal shapes. This resulted in cases such as <1xf32> + <2xf32> being treated as impossible.

PiperOrigin-RevId: 341043965
Change-Id: Id8539c58795bb50c4dda6e7c13a47040cfec96b4",transform_unranked_hlo.cc,"@@ -386,14 +386,14 @@ struct ConvertUnrankedDynamicBroadcastBinaryOp
         rewriter.create<SelectOp>(loc, greater_rank_lhs, lhs_rank, rhs_rank);
 
     // Generate a list of nested if/else statements to handle rank
-    // specializations from 2-6.
+    // specializations from 1-6.
     scf::IfOp if_op = createRankSpecializedBroadcastAndOp(rewriter, op, lhs,
-                                                          rhs, greater_rank, 2);
+                                                          rhs, greater_rank, 1);
 
     // Put each subsequent rank specialization inside the else statement of the
     // previous one.
     OpBuilder else_builder = if_op.getElseBodyBuilder(rewriter.getListener());
-    for (int i = 3; i < max_rank_specialization; i++) {
+    for (int i = 2; i < max_rank_specialization; i++) {
       auto inner_if = createRankSpecializedBroadcastAndOp(else_builder, op, lhs,
                                                           rhs, greater_rank, i);
 
",0,train
417dd3793a648406feb8668cad2e341fe979c391,tensorflow/tensorflow,"[XLA] Fix opensource build breakage caused by undefined int64.

Include xla/types.h in versioned_computation_handle. int64 was
previously not defined in this file in the opensource build.
Change: 148357352",versioned_computation_handle.h,"@@ -18,8 +18,8 @@ limitations under the License.
 
 #include <ostream>
 
+#include ""tensorflow/compiler/xla/types.h""
 #include ""tensorflow/compiler/xla/xla_data.pb.h""
-#include ""tensorflow/core/platform/types.h""
 
 namespace xla {
 
",0,train
d3cc268d017ca22a2259befa521894cb8e3ed002,tensorflow/tensorflow,Added example related to top_k parameter in tf.keras.metrics.Precision,metrics.py,"@@ -1191,6 +1191,17 @@ class Precision(Metric):
   >>> m.result().numpy()
   1.0
 
+  >>> # With top_k=2, it will calculate precision over y_true[:2] and y_pred[:2]
+  >>> m = tf.keras.metrics.Precision(top_k=2)
+  >>> _ = m.update_state([0, 0, 1, 1], [1, 1, 1, 1])
+  >>> m.result().numpy()
+  0.0
+  >>> # With top_k=2, it will calculate precision over y_true[:4] and y_pred[:4]
+  >>> m = tf.keras.metrics.Precision(top_k=4)
+  >>> _ = m.update_state([0, 0, 1, 1], [1, 1, 1, 1])
+  >>> m.result().numpy()
+  0.5
+
   Usage with tf.keras API:
 
   ```python
",0,train
e52ac76b773693d5d289205162ca43ee23561251,tensorflow/tensorflow,support IndexedSlices in `add_n`,math_ops.py,"@@ -2105,7 +2105,8 @@ def add_n(inputs, name=None):
   """"""Adds all input tensors element-wise.
 
   Args:
-    inputs: A list of `Tensor` objects, each with same shape and type.
+    inputs: A list of `Tensor` or `IndexedSlices` objects, each with same shape
+      and type.
     name: A name for the operation (optional).
 
   Returns:
@@ -2116,17 +2117,21 @@ def add_n(inputs, name=None):
     cannot be inferred.
   """"""
   if not inputs or not isinstance(inputs, (list, tuple)):
-    raise ValueError(""inputs must be a list of at least one Tensor with the ""
-                     ""same dtype and shape"")
+    raise ValueError(""inputs must be a list of at least one Tensor/IndexedSlices""
+                     ""with the same dtype and shape"")
   inputs = ops.convert_n_to_tensor_or_indexed_slices(inputs)
-  if not all(isinstance(x, ops.Tensor) for x in inputs):
-    raise ValueError(""inputs must be a list of at least one Tensor with the ""
-                     ""same dtype and shape"")
+  if not all(isinstance(x, (ops.Tensor, ops.IndexedSlices)) for x in inputs):
+    raise ValueError(""inputs must be a list of at least one Tensor/IndexedSlices""
+                     ""with the same dtype and shape"")
 
   if len(inputs) == 1:
+    if isinstance(inputs[0], ops.IndexedSlices):
+      values = inputs[0].values
+    else:
+      values = inputs[0]
     if name:
-      return array_ops.identity(inputs[0], name=name)
-    return inputs[0]
+      return array_ops.identity(values, name=name)
+    return values
   return gen_math_ops.add_n(inputs, name=name)
 
 
",0,test
d1b08cf5159bba6033df87f93f27778c2b94e14a,tensorflow/tensorflow,"NFC: Move the Type::is* predicates to StandardTypes.cpp

These methods are currently defined 'inline' in StandardTypes.h, but this may create linker errors if StandardTypes.h isn't included at the use site.

PiperOrigin-RevId: 263850328",StandardTypes.h,"@@ -71,13 +71,6 @@ enum Kind {
 
 } // namespace StandardTypes
 
-inline bool Type::isBF16() { return getKind() == StandardTypes::BF16; }
-inline bool Type::isF16() { return getKind() == StandardTypes::F16; }
-inline bool Type::isF32() { return getKind() == StandardTypes::F32; }
-inline bool Type::isF64() { return getKind() == StandardTypes::F64; }
-
-inline bool Type::isIndex() { return getKind() == StandardTypes::Index; }
-
 /// Index is a special integer-like type with unknown platform-dependent bit
 /// width.
 class IndexType : public Type::TypeBase<IndexType, Type> {
@@ -123,25 +116,6 @@ public:
   static constexpr unsigned kMaxWidth = 4096;
 };
 
-/// Return true if this is an integer type with the specified width.
-inline bool Type::isInteger(unsigned width) {
-  if (auto intTy = dyn_cast<IntegerType>())
-    return intTy.getWidth() == width;
-  return false;
-}
-
-inline bool Type::isIntOrIndex() {
-  return isa<IndexType>() || isa<IntegerType>();
-}
-
-inline bool Type::isIntOrIndexOrFloat() {
-  return isa<IndexType>() || isa<IntegerType>() || isa<FloatType>();
-}
-
-inline bool Type::isIntOrFloat() {
-  return isa<IntegerType>() || isa<FloatType>();
-}
-
 class FloatType : public Type::TypeBase<FloatType, Type> {
 public:
   using Base::Base;
",0,train
e9deb127980812d2925d701c919f094c977b359f,tensorflow/tensorflow,"Expose tf.summary.record_if(condition) context manager in TF 2.0

This generalizes the TF 1.x contrib summary APIs always_record_summaries(), never_record_summaries(), and record_summaries_every_n_global_steps(). The new context manager accepts a ""condition"" that can be a constant boolean, a boolean tensor value, or a callable returning such.

PiperOrigin-RevId: 233823923",context.py,"@@ -141,8 +141,8 @@ class _EagerContext(threading.local):
     self.mode = default_execution_mode
     self.is_eager = default_execution_mode == EAGER_MODE
     self.scope_name = """"
-    self.recording_summaries = False
     self.summary_writer_resource = None
+    self.recording_summaries = None
     self.scalar_cache = {}
     self._ones_rank_cache = None
     self._zeros_cache = None
@@ -520,6 +520,16 @@ class Context(object):
     """"""Sets summary writer resource.""""""
     self._eager_context.summary_writer_resource = resource
 
+  @property
+  def recording_summaries(self):
+    """"""Returns summary recording condition.""""""
+    return self._eager_context.recording_summaries
+
+  @recording_summaries.setter
+  def recording_summaries(self, condition):
+    """"""Sets summary recording condition.""""""
+    self._eager_context.recording_summaries = condition
+
   @property
   def device_name(self):
     """"""Returns the device name for the current thread.""""""
",0,train
e9deb127980812d2925d701c919f094c977b359f,tensorflow/tensorflow,"Expose tf.summary.record_if(condition) context manager in TF 2.0

This generalizes the TF 1.x contrib summary APIs always_record_summaries(), never_record_summaries(), and record_summaries_every_n_global_steps(). The new context manager accepts a ""condition"" that can be a constant boolean, a boolean tensor value, or a callable returning such.

PiperOrigin-RevId: 233823923",summary_ops_v2.py,"@@ -45,11 +45,6 @@ from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
 
-# Dictionary mapping graph keys to a boolean Tensor (or callable returning
-# a boolean Tensor) indicating whether we should record summaries for the
-# graph identified by the key of the dictionary.
-_SHOULD_RECORD_SUMMARIES = {}
-
 # A global dictionary mapping graph keys to a list of summary writer init ops.
 _SUMMARY_WRITER_INIT_OP = {}
 
@@ -61,10 +56,8 @@ _USER_NAME_PATTERNS = re.compile(r""^[a-z]([-a-z0-9]{0,29}[a-z0-9])?$"", re.I)
 def _should_record_summaries_internal():
   """"""Returns boolean Tensor if summaries should/shouldn't be recorded, or None.
   """"""
-  global _SHOULD_RECORD_SUMMARIES
-  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
-  should = _SHOULD_RECORD_SUMMARIES.get(key)
-  return should() if callable(should) else should
+  condition = context.context().recording_summaries
+  return condition() if callable(condition) else condition
 
 
 def _should_record_summaries_v2():
@@ -83,32 +76,28 @@ def should_record_summaries():
   return False if result is None else result
 
 
+@tf_export(""summary.record_if"", v1=[])
 @tf_contextlib.contextmanager
-def _record_summaries(boolean=True):
+def record_if(condition):
   """"""Sets summary recording on or off per the provided boolean value.
 
   The provided value can be a python boolean, a scalar boolean Tensor, or
   or a callable providing such a value; if a callable is passed it will be
-  invoked each time should_record_summaries() is called to determine whether
-  summary writing should be enabled.
+  invoked on-demand to determine whether summary writing will occur.
 
   Args:
-    boolean: can be True, False, a bool Tensor, or a callable providing such.
-      Defaults to True.
+    condition: can be True, False, a bool Tensor, or a callable providing such.
 
   Yields:
     Returns a context manager that sets this value on enter and restores the
     previous value on exit.
   """"""
-  # TODO(nickfelt): make this threadlocal
-  global _SHOULD_RECORD_SUMMARIES
-  key = ops.get_default_graph()._graph_key  # pylint: disable=protected-access
-  old = _SHOULD_RECORD_SUMMARIES.setdefault(key, None)
+  old = context.context().recording_summaries
   try:
-    _SHOULD_RECORD_SUMMARIES[key] = boolean
+    context.context().recording_summaries = condition
     yield
   finally:
-    _SHOULD_RECORD_SUMMARIES[key] = old
+    context.context().recording_summaries = old
 
 
 # TODO(apassos) consider how to handle local step here.
@@ -120,17 +109,17 @@ def record_summaries_every_n_global_steps(n, global_step=None):
     should = lambda: math_ops.equal(global_step % n, 0)
     if not context.executing_eagerly():
       should = should()
-  return _record_summaries(should)
+  return record_if(should)
 
 
 def always_record_summaries():
   """"""Sets the should_record_summaries Tensor to always true.""""""
-  return _record_summaries(True)
+  return record_if(True)
 
 
 def never_record_summaries():
   """"""Sets the should_record_summaries Tensor to always false.""""""
-  return _record_summaries(False)
+  return record_if(False)
 
 
 @tf_export(""summary.SummaryWriter"", v1=[])
",0,train
491ea166528922468d5b5b7b826f42df44e3b88f,tensorflow/tensorflow,"[XLA] Do not reserve large hash maps when there are many small computations.

PiperOrigin-RevId: 380641027
Change-Id: I51ae2adcaefbc94a139eac4a5cafb5df724cb280",hlo_instruction.cc,"@@ -3470,12 +3470,7 @@ template <typename Visitor>
 static Status PostOrderDFS(HloInstruction* root, Visitor* visitor,
                            const InternalCompareFunction* operand_order,
                            bool ignore_control_predecessors) {
-  // Calculating the instruction count within a module can be expensive on large
-  // models so only do it if the visit state is empty. This will help when the
-  // same visitor is reused across many computations of a single module.
-  if (visitor->VisitStateCapacity() == 0) {
-    visitor->ReserveVisitStates(root->GetModule()->instruction_count());
-  }
+  visitor->ReserveVisitStates(root->parent()->instruction_count());
 
   // dfs_stack holds pairs of <HloInstruction*->unique_id(), HloInstruction*>.
   //
",0,test
5bc685d7f16b0fc27b936e63fa01668e4af4034c,tensorflow/tensorflow,"[XLA] If an op has a single ""large"" operand, we want to fuse this op into some of its consumers, even if we can't fuse into all of them.

PiperOrigin-RevId: 157779106",instruction_fusion.cc,"@@ -151,7 +151,26 @@ StatusOr<bool> InstructionFusion::Run(HloModule* module) {
         return true;
       };
 
-      if (std::all_of(hlo->users().begin(), hlo->users().end(),
+      // An ""effectively unary"" operation is one that has one ""large""
+      // input with the others being negligible in terms of memory usage.
+      // We use ""has a smaller true rank than the output"" as a heuristic
+      // for ""negligible"" memory usage.
+      auto effectively_unary = [](HloInstruction* hlo) {
+        if (hlo->operands().size() == 1) {
+          return true;
+        }
+        auto output_rank = ShapeUtil::TrueRank(hlo->shape());
+        return std::count_if(
+                   hlo->operands().begin(), hlo->operands().end(),
+                   [output_rank](HloInstruction* operand) {
+                     return ((operand->opcode() != HloOpcode::kBroadcast) &&
+                             ShapeUtil::TrueRank(operand->shape()) >=
+                                 output_rank);
+                   }) <= 1;
+      };
+
+      if (effectively_unary(hlo) ||
+          std::all_of(hlo->users().begin(), hlo->users().end(),
                       user_fusable_into_hlo)) {
         all_consumers_fusable.insert(hlo);
       }
",0,test
5bc685d7f16b0fc27b936e63fa01668e4af4034c,tensorflow/tensorflow,"[XLA] If an op has a single ""large"" operand, we want to fuse this op into some of its consumers, even if we can't fuse into all of them.

PiperOrigin-RevId: 157779106",instruction_fusion_test.cc,"@@ -156,18 +156,64 @@ TEST_F(InstructionFusionTest, PotentialBitcastTransposeOfParameterUnfused) {
 
 TEST_F(InstructionFusionTest, AvoidDuplicationIfNotAllFusable) {
   HloComputation::Builder builder(TestName());
-  auto param0 = builder.AddInstruction(HloInstruction::CreateParameter(
-      0, ShapeUtil::MakeShape(F32, {16, 16}), ""0""));
-  HloInstruction* unary1 = builder.AddInstruction(HloInstruction::CreateUnary(
-      ShapeUtil::MakeShape(S32, {}), HloOpcode::kFloor, param0));
+  auto shape = ShapeUtil::MakeShape(F32, {16, 16});
+  auto param0 =
+      builder.AddInstruction(HloInstruction::CreateParameter(0, shape, ""0""));
+  auto param1 =
+      builder.AddInstruction(HloInstruction::CreateParameter(1, shape, ""1""));
+  HloInstruction* binary1 = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1));
+  builder.AddInstruction(HloInstruction::CreateSend(binary1, 0));
+  HloInstruction* unary = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kAbs, binary1));
+
+  auto module = MakeUnique<HloModule>(TestName());
+  auto computation = module->AddEntryComputation(builder.Build());
+  EXPECT_EQ(unary, computation->root_instruction());
+  EXPECT_FALSE(
+      InstructionFusion(InstructionFusion::IsExpensive, /*may_duplicate=*/true)
+          .Run(module.get())
+          .ValueOrDie());
+}
+
+TEST_F(InstructionFusionTest, AllowUnaryDuplication) {
+  HloComputation::Builder builder(TestName());
+  auto shape = ShapeUtil::MakeShape(F32, {16, 16});
+  auto param0 =
+      builder.AddInstruction(HloInstruction::CreateParameter(0, shape, ""0""));
+  HloInstruction* unary1 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kFloor, param0));
   builder.AddInstruction(HloInstruction::CreateSend(unary1, 0));
-  HloInstruction* unary2 = builder.AddInstruction(HloInstruction::CreateUnary(
-      ShapeUtil::MakeShape(S32, {}), HloOpcode::kAbs, unary1));
+  HloInstruction* unary2 = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kAbs, unary1));
 
   auto module = MakeUnique<HloModule>(TestName());
   auto computation = module->AddEntryComputation(builder.Build());
   EXPECT_EQ(unary2, computation->root_instruction());
-  EXPECT_FALSE(
+  EXPECT_TRUE(
+      InstructionFusion(InstructionFusion::IsExpensive, /*may_duplicate=*/true)
+          .Run(module.get())
+          .ValueOrDie());
+}
+
+TEST_F(InstructionFusionTest, AllowEffectiveUnaryDuplication) {
+  auto shape = ShapeUtil::MakeShape(F32, {16, 16});
+  auto small_shape = ShapeUtil::MakeShape(F32, {16});
+  HloComputation::Builder builder(TestName());
+  auto param0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, small_shape, ""0""));
+  auto param1 =
+      builder.AddInstruction(HloInstruction::CreateParameter(1, shape, ""1""));
+  HloInstruction* binary1 = builder.AddInstruction(
+      HloInstruction::CreateBinary(shape, HloOpcode::kAdd, param0, param1));
+  builder.AddInstruction(HloInstruction::CreateSend(binary1, 0));
+  HloInstruction* unary = builder.AddInstruction(
+      HloInstruction::CreateUnary(shape, HloOpcode::kAbs, binary1));
+
+  auto module = MakeUnique<HloModule>(TestName());
+  auto computation = module->AddEntryComputation(builder.Build());
+  EXPECT_EQ(unary, computation->root_instruction());
+  EXPECT_TRUE(
       InstructionFusion(InstructionFusion::IsExpensive, /*may_duplicate=*/true)
           .Run(module.get())
           .ValueOrDie());
",0,test
d1198909057c635de2bae3e1c4fb8505466ee325,tensorflow/tensorflow,"Naming for valid scope name in eager mode.

PiperOrigin-RevId: 223392695",data_flow_ops.py,"@@ -171,7 +171,10 @@ class QueueBase(object):
       self._names = None
     self._queue_ref = queue_ref
     if context.executing_eagerly():
-      self._name = context.context().scope_name
+      if context.context().scope_name:
+        self._name = context.context().scope_name
+      else:
+        self._name = ""Empty""
       self._resource_deleter = resource_variable_ops.EagerResourceDeleter(
           queue_ref, None)
     else:
",0,train
df94f63281a40acdd695a9a28862905af976adfd,tensorflow/tensorflow,"Minor: fix typo in Defun doc.
Change: 124731079",function.py,"@@ -428,7 +428,7 @@ class Defun(object):
   argument of the function to decorate, with the expected type of the argument
   as value.
 
-  For example if the function to decorate accepts to `tf.float32` arguments
+  For example if the function to decorate accepts two `tf.float32` arguments
   named `x` and `y`, call the decorator with:
 
       @Defun(tf.float32, tf.float32)
",0,train
0ae47a7f3a3cbfb47a432741d525bafc50c6b68a,tensorflow/tensorflow,"Fix unit test for matrix square root: Don't try to take the matrix square root of a matrix for which the square root may not exist.

PiperOrigin-RevId: 223078107",matrix_square_root_op_test.py,"@@ -102,13 +102,13 @@ class SquareRootOpTest(test.TestCase):
       self.evaluate(gen_linalg_ops.matrix_square_root(tensor))
 
   def testConcurrentExecutesWithoutError(self):
-    self.skipTest(""Triggers assert in matrix_sqrt_quasi_triangular_diagonal"")
-
     with test_util.use_gpu():
       matrix1 = random_ops.random_normal([5, 5], seed=42)
       matrix2 = random_ops.random_normal([5, 5], seed=42)
-      sqrt1 = gen_linalg_ops.matrix_square_root(matrix1)
-      sqrt2 = gen_linalg_ops.matrix_square_root(matrix2)
+      square1 = math_ops.matmul(matrix1, matrix1)
+      square2 = math_ops.matmul(matrix2, matrix2)
+      sqrt1 = gen_linalg_ops.matrix_square_root(square1)
+      sqrt2 = gen_linalg_ops.matrix_square_root(square2)
       all_ops = [sqrt1, sqrt2]
       sqrt = self.evaluate(all_ops)
       self.assertAllEqual(sqrt[0], sqrt[1])
",0,train
bb0190f6c26bf11f601102dfe2166a68a7833020,tensorflow/tensorflow,"Added support for fp16 to the softmax operation
Change: 123382264",softmax_op.cc,"@@ -40,6 +40,9 @@ struct SoftmaxFunctor<CPUDevice, T> {
 };
 }  // namespace functor
 
+REGISTER_KERNEL_BUILDER(
+    Name(""Softmax"").Device(DEVICE_CPU).TypeConstraint<Eigen::half>(""T""),
+    SoftmaxOp<CPUDevice, Eigen::half>);
 REGISTER_KERNEL_BUILDER(Name(""Softmax"")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<float>(""T""),
@@ -48,24 +51,30 @@ REGISTER_KERNEL_BUILDER(Name(""Softmax"")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<double>(""T""),
                         SoftmaxOp<CPUDevice, double>);
-REGISTER_KERNEL_BUILDER(Name(""LogSoftmax"")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<float>(""T""),
-                        SoftmaxOp<CPUDevice, float>);
+REGISTER_KERNEL_BUILDER(
+    Name(""LogSoftmax"").Device(DEVICE_CPU).TypeConstraint<Eigen::half>(""T""),
+    SoftmaxOp<CPUDevice, Eigen::half>);
+REGISTER_KERNEL_BUILDER(
+    Name(""LogSoftmax"").Device(DEVICE_CPU).TypeConstraint<float>(""T""),
+    SoftmaxOp<CPUDevice, float>);
 REGISTER_KERNEL_BUILDER(Name(""LogSoftmax"")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<double>(""T""),
                         SoftmaxOp<CPUDevice, double>);
 
 #if GOOGLE_CUDA
-REGISTER_KERNEL_BUILDER(Name(""Softmax"")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<float>(""T""),
-                        SoftmaxOp<GPUDevice, float>);
-REGISTER_KERNEL_BUILDER(Name(""LogSoftmax"")
-                            .Device(DEVICE_GPU)
-                            .TypeConstraint<float>(""T""),
-                        SoftmaxOp<GPUDevice, float>);
+REGISTER_KERNEL_BUILDER(
+    Name(""Softmax"").Device(DEVICE_GPU).TypeConstraint<Eigen::half>(""T""),
+    SoftmaxOp<GPUDevice, Eigen::half>);
+REGISTER_KERNEL_BUILDER(
+    Name(""Softmax"").Device(DEVICE_GPU).TypeConstraint<float>(""T""),
+    SoftmaxOp<GPUDevice, float>);
+REGISTER_KERNEL_BUILDER(
+    Name(""LogSoftmax"").Device(DEVICE_GPU).TypeConstraint<Eigen::half>(""T""),
+    SoftmaxOp<GPUDevice, Eigen::half>);
+REGISTER_KERNEL_BUILDER(
+    Name(""LogSoftmax"").Device(DEVICE_GPU).TypeConstraint<float>(""T""),
+    SoftmaxOp<GPUDevice, float>);
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
",0,test
bb0190f6c26bf11f601102dfe2166a68a7833020,tensorflow/tensorflow,"Added support for fp16 to the softmax operation
Change: 123382264",softmax_op_gpu.cu.cc,"@@ -39,6 +39,7 @@ struct SoftmaxFunctor<GPUDevice, T> {
 }  // end namespace functor
 
 // Instantiate the GPU implementation for float.
+template struct functor::SoftmaxFunctor<GPUDevice, Eigen::half>;
 template struct functor::SoftmaxFunctor<GPUDevice, float>;
 
 }  // end namespace tensorflow
",0,test
bb0190f6c26bf11f601102dfe2166a68a7833020,tensorflow/tensorflow,"Added support for fp16 to the softmax operation
Change: 123382264",nn_ops.cc,"@@ -856,7 +856,7 @@ backprops: The gradients: `gradients / (1 + abs(-features)) ** 2`.
 REGISTER_OP(""Softmax"")
     .Input(""logits: T"")
     .Output(""softmax: T"")
-    .Attr(""T: {float, double}"")
+    .Attr(""T: {half, float, double}"")
     .Doc(R""doc(
 Computes softmax activations.
 
@@ -873,7 +873,7 @@ softmax: Same shape as `logits`.
 REGISTER_OP(""LogSoftmax"")
     .Input(""logits: T"")
     .Output(""logsoftmax: T"")
-    .Attr(""T: {float, double}"")
+    .Attr(""T: {half, float, double}"")
     .Doc(R""doc(
 Computes log softmax activations.
 
",0,test
bb0190f6c26bf11f601102dfe2166a68a7833020,tensorflow/tensorflow,"Added support for fp16 to the softmax operation
Change: 123382264",softmax_op_test.py,"@@ -50,13 +50,13 @@ class SoftmaxTest(tf.test.TestCase):
       else:
         tf_softmax = tf.nn.softmax(np_features, name=name)
       out = tf_softmax.eval()
-    self.assertAllClose(np_softmax, out)
+    self.assertAllCloseAccordingToType(np_softmax, out)
     self.assertShapeEqual(np_softmax, tf_softmax)
     if not log:
       # Bonus check: the softmaxes should add to one in each
       # batch element.
-      self.assertAllClose(np.ones(out.shape[0]),
-                          np.sum(out, axis=1))
+      self.assertAllCloseAccordingToType(np.ones(out.shape[0]),
+                                         np.sum(out, axis=1))
 
   def _testAll(self, features):
     self._testSoftmax(features, use_gpu=False)
@@ -118,6 +118,10 @@ class SoftmaxTest(tf.test.TestCase):
     self._testAll(
         np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float32))
 
+  def testHalf(self):
+    self._testAll(
+        np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float16))
+
   def testDouble(self):
     self._testSoftmax(
         np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64),
",0,test
667f63f9a473150caef5ba9c2445dfbd4d3cf3e2,tensorflow/tensorflow,"[tf:tfrt] Check returned memrefs/tensors alignment

PiperOrigin-RevId: 410496254
Change-Id: Ie707a0be520aeb725a5cfc48bef59dae2595941c",tf_cpurt.h,"@@ -142,10 +142,11 @@ struct ConvertTensor {
 
     // Incorrect alignment will lead to a segfault in the downstream Tensorflow
     // kernels, check it before returning to the runtime.
+    // TODO(ezhulenev): Downgrade CHECKs to DCHECKs.
     if (internal::IsStaticStorageDuration(memref)) {
-      DCHECK(tensor.IsAligned()) << ""global memref is not aligned"";
+      CHECK(tensor.IsAligned()) << ""global memref is not aligned"";
     } else {
-      DCHECK(tensor.IsAligned()) << ""allocated memref is not aligned"";
+      CHECK(tensor.IsAligned()) << ""allocated memref is not aligned"";
     }
 
     return tensor;
",0,test
5574d6041a5a5d91c4be3449d7a456a146da4c0e,tensorflow/tensorflow,"Enrich update ops from inputs

PiperOrigin-RevId: 204223077",training.py,"@@ -599,7 +599,7 @@ class Model(Network):
         # Unconditional updates
         updates += self.get_updates_for(None)
         # Conditional updates relevant to this model
-        updates += self.get_updates_for(self._feed_inputs)
+        updates += self.get_updates_for(self.inputs)
         # Stateful metrics updates
         updates += self.metrics_updates
         # Gets loss and metrics. Updates weights at each call.
",0,train
5574d6041a5a5d91c4be3449d7a456a146da4c0e,tensorflow/tensorflow,"Enrich update ops from inputs

PiperOrigin-RevId: 204223077",models_test.py,"@@ -37,6 +37,7 @@ class TestModelCloning(test.TestCase):
 
       model = keras.models.Sequential()
       model.add(keras.layers.Dense(4, input_shape=(4,)))
+      model.add(keras.layers.BatchNormalization())
       model.add(keras.layers.Dropout(0.5))
       model.add(keras.layers.Dense(4))
 
@@ -46,6 +47,8 @@ class TestModelCloning(test.TestCase):
     with self.test_session():
       # With placeholder creation
       new_model = keras.models.clone_model(model)
+      # update ops from batch norm needs to be included
+      self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2)
       new_model.compile('rmsprop', 'mse')
       new_model.train_on_batch(val_a, val_out)
 
@@ -53,6 +56,7 @@ class TestModelCloning(test.TestCase):
       input_a = keras.Input(shape=(4,))
       new_model = keras.models.clone_model(
           model, input_tensors=input_a)
+      self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2)
       new_model.compile('rmsprop', 'mse')
       new_model.train_on_batch(val_a, val_out)
 
@@ -60,6 +64,7 @@ class TestModelCloning(test.TestCase):
       input_a = keras.backend.variable(val_a)
       new_model = keras.models.clone_model(
           model, input_tensors=input_a)
+      self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2)
       new_model.compile('rmsprop', 'mse')
       new_model.train_on_batch(None, val_out)
 
@@ -76,6 +81,7 @@ class TestModelCloning(test.TestCase):
 
       x_a = dense_1(input_a)
       x_a = keras.layers.Dropout(0.5)(x_a)
+      x_a = keras.layers.BatchNormalization()(x_a)
       x_b = dense_1(input_b)
       x_a = dense_2(x_a)
       outputs = keras.layers.add([x_a, x_b])
@@ -87,6 +93,7 @@ class TestModelCloning(test.TestCase):
     with self.test_session():
       # With placeholder creation
       new_model = keras.models.clone_model(model)
+      self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2)
       new_model.compile('rmsprop', 'mse')
       new_model.train_on_batch([val_a, val_b], val_out)
 
@@ -95,6 +102,7 @@ class TestModelCloning(test.TestCase):
       input_b = keras.Input(shape=(4,), name='b')
       new_model = keras.models.clone_model(
           model, input_tensors=[input_a, input_b])
+      self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2)
       new_model.compile('rmsprop', 'mse')
       new_model.train_on_batch([val_a, val_b], val_out)
 
@@ -103,6 +111,7 @@ class TestModelCloning(test.TestCase):
       input_b = keras.backend.variable(val_b)
       new_model = keras.models.clone_model(
           model, input_tensors=[input_a, input_b])
+      self.assertEquals(len(new_model.get_updates_for(new_model.inputs)), 2)
       new_model.compile('rmsprop', 'mse')
       new_model.train_on_batch(None, val_out)
 
",0,train
f2e46bddc9639b643829778011111932e49b6241,tensorflow/tensorflow,"Propogate attrs from the forward function call
to its corresponding backward function call.
Change: 137480188",function_test.py,"@@ -166,6 +166,32 @@ class FunctionTest(tf.test.TestCase):
       self.assertEqual(x.get_shape(), dx.get_shape())
       self.assertEqual(y.get_shape(), dy.get_shape())
 
+  def testSymGradAttr(self):
+    @function.Defun(noinline=True)
+    def Foo(x):
+      return x * 2
+
+    g = tf.Graph()
+    with g.as_default():
+      x = tf.constant(3.0)
+      y = Foo(x)
+      dx, = tf.gradients(y, [x])
+
+    self.assertTrue(y.op.node_def.attr[""_noinline""].b)
+    self.assertTrue(dx.op.node_def.attr['f'].func.attr['_noinline'].b)
+
+    cfg = tf.ConfigProto(graph_options=tf.GraphOptions(
+        optimizer_options=tf.OptimizerOptions(
+            opt_level=tf.OptimizerOptions.L0,
+            do_common_subexpression_elimination=True,
+            do_function_inlining=True,
+            do_constant_folding=True)))
+
+    with self.test_session(graph=g, config=cfg):
+      self.assertAllClose(y.eval(), 6.)
+      self.assertAllClose(dx.eval(), 2.)
+
+
   def testZNoDepOnY(self):
 
     @function.Defun(tf.float32, tf.float32)
",0,train
f2e46bddc9639b643829778011111932e49b6241,tensorflow/tensorflow,"Propogate attrs from the forward function call
to its corresponding backward function call.
Change: 137480188",op_def_library.py,"@@ -695,7 +695,9 @@ class OpDefLibrary(object):
           attr_value.list.tensor.extend(
               [_MakeTensor(x, key) for x in value])
         elif attr_def.type == ""func"":
-          if isinstance(value, compat.bytes_or_text_types):
+          if isinstance(value, attr_value_pb2.NameAttrList):
+            attr_value.func.CopyFrom(value)
+          elif isinstance(value, compat.bytes_or_text_types):
             attr_value.func.name = value
           else:
             value.add_to_graph(ops.get_default_graph())
",0,train
f2e46bddc9639b643829778011111932e49b6241,tensorflow/tensorflow,"Propogate attrs from the forward function call
to its corresponding backward function call.
Change: 137480188",gradients.py,"@@ -26,6 +26,7 @@ import numpy as np
 import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.core.framework import attr_value_pb2
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -89,10 +90,8 @@ def _IndexedSlicesToTensor(value, dtype=None, name=None, as_ref=False):
     warnings.warn(
         ""Converting sparse IndexedSlices to a dense Tensor of unknown shape. ""
         ""This may consume a large amount of memory."")
-  return math_ops.unsorted_segment_sum(value.values,
-                                       value.indices,
-                                       value.dense_shape[0],
-                                       name=name)
+  return math_ops.unsorted_segment_sum(
+      value.values, value.indices, value.dense_shape[0], name=name)
 
 
 ops.register_tensor_conversion_function(ops.IndexedSlices,
@@ -224,8 +223,8 @@ def _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops):
     if grad_y is None:
       with _maybe_colocate_with(y.op, colocate_gradients_with_ops):
         grad_ys[i] = array_ops.fill(
-            array_ops.shape(y),
-            constant_op.constant(1, dtype=y.dtype))
+            array_ops.shape(y), constant_op.constant(
+                1, dtype=y.dtype))
     else:
       if grad_y.dtype != y.dtype:
         raise ValueError(""Y and ys_grad must be of the same type, ""
@@ -304,6 +303,20 @@ def _maybe_colocate_with(op, colocate_gradients_with_ops):
     yield
 
 
+def _SymGrad(op, out_grads):
+  """"""Backprop through a function call node op given its outputs' gradients.""""""
+  f_in = [x for x in op.inputs] + out_grads
+  f_types = [x.dtype for x in op.inputs]
+  f = attr_value_pb2.NameAttrList()
+  f.name = op.type
+  for k in op.node_def.attr:
+    f.attr[k].CopyFrom(op.node_def.attr[k])
+  # pylint: disable=protected-access
+  in_grads = functional_ops._symbolic_gradient(input=f_in, Tout=f_types, f=f)
+  # pylint: enable=protected-access
+  return in_grads
+
+
 def gradients(ys,
               xs,
               grad_ys=None,
@@ -376,8 +389,8 @@ def gradients(ys,
     # to the xs.
     to_ops = [t.op for t in ys]
     from_ops = [t.op for t in xs]
-    pending_count, loop_state = _PendingCount(ops.get_default_graph(),
-                                              to_ops, from_ops,
+    pending_count, loop_state = _PendingCount(ops.get_default_graph(), to_ops,
+                                              from_ops,
                                               colocate_gradients_with_ops)
 
     # Iterate over the collected ops.
@@ -451,8 +464,8 @@ def gradients(ys,
           # output, it means that the cost does not depend on output[i],
           # therefore dC/doutput[i] is 0.
           for i, out_grad in enumerate(out_grads):
-            if (not isinstance(out_grad, ops.Tensor)
-                and not out_grad) and _IsTrainable(op.outputs[i]):
+            if (not isinstance(out_grad, ops.Tensor) and
+                not out_grad) and _IsTrainable(op.outputs[i]):
               # Only floating-point outputs get a zero gradient. Gradient
               # functions should ignore the gradient for other outputs.
               if loop_state:
@@ -466,16 +479,12 @@ def gradients(ys,
               if grad_fn:
                 # If grad_fn was found, do not use SymbolicGradient even for
                 # functions.
-                in_grads = _AsList(grad_fn(op, *out_grads))
+                in_grads = grad_fn(op, *out_grads)
               else:
                 # For function call ops, we add a 'SymbolicGradient'
                 # node to the graph to compute gradients.
-                f_in = [x for x in op.inputs] + out_grads
-                f_types = [x.dtype for x in op.inputs]
-                # pylint: disable=protected-access
-                in_grads = _AsList(functional_ops._symbolic_gradient(
-                    f_in, f_types, op.type))
-                # pylint: enable=protected-access
+                in_grads = _SymGrad(op, out_grads)
+              in_grads = _AsList(in_grads)
               _VerifyGeneratedGradients(in_grads, op)
               if gate_gradients and len(
                   [x for x in in_grads if x is not None]) > 1:
@@ -595,8 +604,9 @@ def _HandleNestedIndexedSlices(grad):
   else:
     assert isinstance(grad.values, ops.IndexedSlices)
     g = _HandleNestedIndexedSlices(grad.values)
-    return ops.IndexedSlices(
-        g.values, array_ops.gather(grad.indices, g.indices), g.dense_shape)
+    return ops.IndexedSlices(g.values,
+                             array_ops.gather(grad.indices, g.indices),
+                             g.dense_shape)
 
 
 def _AccumulatorShape(inputs):
@@ -610,6 +620,7 @@ def _AccumulatorShape(inputs):
 def _LogOpGradients(op, out_grads, in_grads):
   """"""Log the in and out grads of an op.""""""
   logging.vlog(1, ""Gradient for '"" + op.name + ""'"")
+
   def _FilterGrad(x):
     if x is None:
       return False
@@ -617,6 +628,7 @@ def _LogOpGradients(op, out_grads, in_grads):
       return bool(x)
     else:
       return True
+
   logging.vlog(1, ""  in  --> %s"",
                "", "".join([x.name for x in out_grads if _FilterGrad(x)]))
   logging.vlog(1, ""  out --> %s"",
@@ -636,8 +648,10 @@ def _MultiDeviceAddN(tensor_list):
   # TODO(sjhwang): Create hierarchical aggregation tree as pbar's suggestion.
   # E.g., aggregate per GPU, then per task, and so on.
   summands = []
+
   def DeviceKey(dev):
     return """" if dev is None else dev
+
   for dev in sorted(six.iterkeys(tensors_on_device), key=DeviceKey):
     tensors = tensors_on_device[dev]
     with ops.colocate_with(tensors[0].op, ignore_existing=True):
@@ -689,11 +703,12 @@ def _AggregatedGrads(grads, op, loop_state, aggregation_method=None):
   """"""
   if aggregation_method is None:
     aggregation_method = AggregationMethod.DEFAULT
-  if aggregation_method not in [AggregationMethod.ADD_N,
-                                AggregationMethod.EXPERIMENTAL_TREE,
-                                AggregationMethod.EXPERIMENTAL_ACCUMULATE_N]:
-    raise ValueError(
-        ""Invalid aggregation_method specified %s."" % aggregation_method)
+  if aggregation_method not in [
+      AggregationMethod.ADD_N, AggregationMethod.EXPERIMENTAL_TREE,
+      AggregationMethod.EXPERIMENTAL_ACCUMULATE_N
+  ]:
+    raise ValueError(""Invalid aggregation_method specified %s."" %
+                     aggregation_method)
   out_grads = _GetGrads(grads, op)
   for i, out_grad in enumerate(out_grads):
     if loop_state:
@@ -701,9 +716,10 @@ def _AggregatedGrads(grads, op, loop_state, aggregation_method=None):
         assert control_flow_ops.IsLoopSwitch(op)
         continue
     # Grads have to be Tensors or IndexedSlices
-    if (isinstance(out_grad, collections.Sequence) and
-        not all([isinstance(g, (ops.Tensor, ops.IndexedSlices))
-                 for g in out_grad if g is not None])):
+    if (isinstance(out_grad, collections.Sequence) and not all([
+        isinstance(g, (ops.Tensor, ops.IndexedSlices)) for g in out_grad
+        if g is not None
+    ])):
       raise TypeError(""gradients have to be either all Tensors ""
                       ""or all IndexedSlices"")
     # Aggregate multiple gradients, and convert [] to None.
@@ -725,9 +741,10 @@ def _AggregatedGrads(grads, op, loop_state, aggregation_method=None):
           # 2 grads then we fall through to the ""tree"" case below.
           used = ""accumulate_n""
           out_grads[i] = math_ops.accumulate_n(out_grad)
-        elif aggregation_method in [AggregationMethod.EXPERIMENTAL_TREE,
-                                    AggregationMethod.EXPERIMENTAL_ACCUMULATE_N
-                                   ]:
+        elif aggregation_method in [
+            AggregationMethod.EXPERIMENTAL_TREE,
+            AggregationMethod.EXPERIMENTAL_ACCUMULATE_N
+        ]:
           # Aggregate all gradients by doing pairwise sums: this may
           # reduce performance, but it can improve memory because the
           # gradients can be released earlier.
@@ -744,18 +761,18 @@ def _AggregatedGrads(grads, op, loop_state, aggregation_method=None):
         else:
           used = ""add_n""
           out_grads[i] = _MultiDeviceAddN(out_grad)
-        logging.vlog(2, ""  _AggregatedGrads %d x %s using %s"", len(out_grad),
-                     tensor_shape, used)
+        logging.vlog(2, ""  _AggregatedGrads %d x %s using %s"",
+                     len(out_grad), tensor_shape, used)
       else:
-        out_grad = math_ops._as_indexed_slices_list([g for g in out_grad
-                                                     if g is not None])
+        out_grad = math_ops._as_indexed_slices_list(
+            [g for g in out_grad if g is not None])
         out_grad = [_HandleNestedIndexedSlices(x) for x in out_grad]
         # Form IndexedSlices out of the concatenated values and
         # indices.
         out_grads[i] = ops.IndexedSlices(
             array_ops.concat(0, [x.values for x in out_grad]),
-            array_ops.concat(0, [x.indices
-                                 for x in out_grad]), out_grad[0].dense_shape)
+            array_ops.concat(0, [x.indices for x in out_grad]),
+            out_grad[0].dense_shape)
     else:
       out_grads[i] = []
   return out_grads
@@ -805,9 +822,10 @@ def _hessian_vector_product(ys, xs, v):
   grads = gradients(ys, xs)
 
   assert len(grads) == length
-  elemwise_products = [math_ops.mul(grad_elem, array_ops.stop_gradient(v_elem))
-                       for grad_elem, v_elem in zip(grads, v)
-                       if grad_elem is not None]
+  elemwise_products = [
+      math_ops.mul(grad_elem, array_ops.stop_gradient(v_elem))
+      for grad_elem, v_elem in zip(grads, v) if grad_elem is not None
+  ]
 
   # Second backprop
   return gradients(elemwise_products, xs)
",0,train
455db2a13c5c5d738f240c11531c4d198605efb9,tensorflow/tensorflow,[Grappler] Add support for QuantizeAndDequantizeV4,op_level_cost_estimator.cc,"@@ -582,6 +582,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
   elementwise_ops_.emplace(""Prod"", EIGEN_COST(scalar_product_op<float>));
   elementwise_ops_.emplace(""QuantizeAndDequantizeV2"",
                            quantize_and_dequantize_v2_cost);
+  elementwise_ops_.emplace(""QuantizeAndDequantizeV4"",
+                           quantize_and_dequantize_v2_cost);
   elementwise_ops_.emplace(""QuantizedSigmoid"",
                            EIGEN_COST(scalar_logistic_op<float>));
   elementwise_ops_.emplace(""QuantizeV2"", quantize_v2_cost);
",0,test
455db2a13c5c5d738f240c11531c4d198605efb9,tensorflow/tensorflow,[Grappler] Add support for QuantizeAndDequantizeV4,generic_layout_optimizer_transposer.cc,"@@ -2027,6 +2027,7 @@ bool IsDefaultLayoutAgnosticOp(const NodeDef& node) {
                                             ""PreventGradient"",
                                             ""QuantizeAndDequantizeV2"",
                                             ""QuantizeAndDequantizeV3"",
+                                            ""QuantizeAndDequantizeV4"",
                                             ""Real"",
                                             ""Reciprocal"",
                                             ""Relu"",
",0,test
455db2a13c5c5d738f240c11531c4d198605efb9,tensorflow/tensorflow,[Grappler] Add support for QuantizeAndDequantizeV4,layout_optimizer.cc,"@@ -170,6 +170,7 @@ std::set<string> GetOpsFormatAgnostic() {
                                           ""Polygamma"",
                                           ""QuantizeAndDequantizeV2"",
                                           ""QuantizeAndDequantizeV3"",
+                                          ""QuantizeAndDequantizeV4"",
                                           ""Pow"",
                                           ""Real"",
                                           ""RealDiv"",
",0,test
d194dd4b823a1e74efd47a5b4215d839858244e9,tensorflow/tensorflow,"Use RichLine instead of explicit font_attr_segs in two more files
Change: 148335792",cli_shared.py,"@@ -26,6 +26,7 @@ from tensorflow.python.debug.cli import tensor_format
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import variables
 
+RL = debugger_cli_common.RichLine
 
 # Default threshold number of elements above which ellipses will be used
 # when printing the value of the tensor.
@@ -152,9 +153,8 @@ def error(msg):
       for screen output.
   """"""
 
-  full_msg = ""ERROR: "" + msg
-  return debugger_cli_common.RichTextLines(
-      [full_msg], font_attr_segs={0: [(0, len(full_msg), ""red"")]})
+  return debugger_cli_common.rich_text_lines_from_rich_line_list([
+      RL(""ERROR: "" + msg, ""red"")])
 
 
 def _get_fetch_name(fetch):
@@ -214,16 +214,16 @@ def _recommend_command(command, description, indent=2, create_link=False):
   """"""
 
   indent_str = "" "" * indent
-  lines = [indent_str + command + "":"", indent_str + ""  "" + description]
 
   if create_link:
-    font_attr_segs = {
-        0: [(indent, indent + len(command), [
-            debugger_cli_common.MenuItem("""", command), ""bold""])]}
+    font_attr = [debugger_cli_common.MenuItem("""", command), ""bold""]
   else:
-    font_attr_segs = {0: [(indent, indent + len(command), ""bold"")]}
+    font_attr = ""bold""
 
-  return debugger_cli_common.RichTextLines(lines, font_attr_segs=font_attr_segs)
+  lines = [RL(indent_str) + RL(command, font_attr) + "":"",
+           indent_str + ""  "" + description]
+
+  return debugger_cli_common.rich_text_lines_from_rich_line_list(lines)
 
 
 def get_tfdbg_logo():
@@ -308,23 +308,19 @@ def get_run_start_intro(run_call_count,
           ""Keep executing run() calls until a dumped tensor passes a given, ""
           ""registered filter (conditional breakpoint mode)""))
 
-  more_font_attr_segs = {}
   more_lines = [""    Registered filter(s):""]
   if tensor_filters:
     filter_names = []
     for filter_name in tensor_filters:
       filter_names.append(filter_name)
-      more_lines.append(""        * "" + filter_name)
       command_menu_node = debugger_cli_common.MenuItem(
           """", ""run -f %s"" % filter_name)
-      more_font_attr_segs[len(more_lines) - 1] = [
-          (10, len(more_lines[-1]), command_menu_node)]
+      more_lines.append(RL(""        * "") + RL(filter_name, command_menu_node))
   else:
     more_lines.append(""        (None)"")
 
   out.extend(
-      debugger_cli_common.RichTextLines(
-          more_lines, font_attr_segs=more_font_attr_segs))
+      debugger_cli_common.rich_text_lines_from_rich_line_list(more_lines))
 
   out.extend(
       _recommend_command(
@@ -334,11 +330,10 @@ def get_run_start_intro(run_call_count,
           ""inspect/modify their values"", create_link=True))
 
   out.append("""")
-  suggest_help = ""For more details, see help.""
-  out.append(
-      suggest_help,
-      font_attr_segs=[(len(suggest_help) - 5, len(suggest_help) - 1,
-                       debugger_cli_common.MenuItem("""", ""help""))])
+
+  out.append_rich_line(RL(""For more details, see "") +
+                       RL(""help."", debugger_cli_common.MenuItem("""", ""help"")) +
+                       ""."")
   out.append("""")
 
   # Make main menu for the run-start intro.
@@ -407,14 +402,12 @@ def get_error_intro(tf_error):
 
   intro_lines = [
       ""--------------------------------------"",
-      ""!!! An error occurred during the run !!!"",
+      RL(""!!! An error occurred during the run !!!"", ""blink""),
       """",
       ""You may use the following commands to debug:"",
   ]
-  intro_font_attr_segs = {1: [(0, len(intro_lines[1]), ""blink"")]}
 
-  out = debugger_cli_common.RichTextLines(
-      intro_lines, font_attr_segs=intro_font_attr_segs)
+  out = debugger_cli_common.rich_text_lines_from_rich_line_list(intro_lines)
 
   out.extend(
       _recommend_command(""ni -a -d -t %s"" % op_name,
",0,train
d194dd4b823a1e74efd47a5b4215d839858244e9,tensorflow/tensorflow,"Use RichLine instead of explicit font_attr_segs in two more files
Change: 148335792",debugger_cli_common.py,"@@ -23,6 +23,7 @@ import re
 import sre_constants
 import traceback
 
+import six
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.platform import gfile
@@ -89,7 +90,7 @@ class RichLine(object):
         attributes applied to the corresponding substrings.
     """"""
     ret = RichLine()
-    if isinstance(other, str):
+    if isinstance(other, six.string_types):
       ret.text = self.text + other
       ret.font_attr_segs = self.font_attr_segs[:]
       return ret
@@ -105,19 +106,23 @@ class RichLine(object):
 
 
 def rich_text_lines_from_rich_line_list(rich_text_list):
-  """"""Convert a list of RichLine objects to a RichTextLines object.
+  """"""Convert a list of RichLine objects or strings to a RichTextLines object.
 
   Args:
-    rich_text_list: a list of RichLine objects
+    rich_text_list: a list of RichLine objects or strings
 
   Returns:
     A corresponding RichTextLines object.
   """"""
-  lines = [rl.text for rl in rich_text_list]
+  lines = []
   font_attr_segs = {}
   for i, rl in enumerate(rich_text_list):
-    if rl.font_attr_segs:
-      font_attr_segs[i] = rl.font_attr_segs
+    if isinstance(rl, RichLine):
+      lines.append(rl.text)
+      if rl.font_attr_segs:
+        font_attr_segs[i] = rl.font_attr_segs
+    else:
+      lines.append(rl)
   return RichTextLines(lines, font_attr_segs)
 
 
@@ -314,6 +319,9 @@ class RichTextLines(object):
     if font_attr_segs:
       self._font_attr_segs[len(self._lines) - 1] = font_attr_segs
 
+  def append_rich_line(self, rich_line):
+    self.append(rich_line.text, rich_line.font_attr_segs)
+
   def prepend(self, line, font_attr_segs=None):
     """"""Prepend (i.e., add to the front) a single line of text.
 
",0,train
d194dd4b823a1e74efd47a5b4215d839858244e9,tensorflow/tensorflow,"Use RichLine instead of explicit font_attr_segs in two more files
Change: 148335792",debugger_cli_common_test.py,"@@ -77,6 +77,17 @@ class RichTextLinesTest(test_util.TensorFlowTestCase):
     self.assertEqual(1, len(screen_output.font_attr_segs[0]))
     self.assertEqual(1, len(screen_output.annotations))
 
+  def testRichLinesAppendRichLine(self):
+    rtl = debugger_cli_common.RichTextLines(
+        ""Roses are red"",
+        font_attr_segs={0: [(0, 5, ""red"")]})
+    rtl.append_rich_line(debugger_cli_common.RichLine(""Violets are "") +
+                         debugger_cli_common.RichLine(""blue"", ""blue""))
+    self.assertEqual(2, len(rtl.lines))
+    self.assertEqual(2, len(rtl.font_attr_segs))
+    self.assertEqual(1, len(rtl.font_attr_segs[0]))
+    self.assertEqual(1, len(rtl.font_attr_segs[1]))
+
   def testRichTextLinesConstructorIncomplete(self):
     # Test RichTextLines constructor, with incomplete keyword arguments.
     screen_output = debugger_cli_common.RichTextLines(
",0,train
d194dd4b823a1e74efd47a5b4215d839858244e9,tensorflow/tensorflow,"Use RichLine instead of explicit font_attr_segs in two more files
Change: 148335792",stepper_cli.py,"@@ -247,42 +247,31 @@ class NodeStepperCLI(object):
     ]
 
     lines = []
-    font_attr_segs = {}
     if verbose:
       lines.extend(
           [""Topologically-sorted transitive input(s) and fetch(es):"", """"])
 
-    line_counter = len(lines)
     for i, element_name in enumerate(self._sorted_nodes):
       if i < index_range[0] or i >= index_range[1]:
         continue
 
-      font_attr_segs[line_counter] = []
-
       # TODO(cais): Use fixed-width text to show node index.
-      node_prefix = ""(%d / %d)"" % (i + 1, len(self._sorted_nodes))
       if i == self._next:
-        node_prefix = ""  "" + self.NEXT_NODE_POINTER_STR + node_prefix
-        font_attr_segs[line_counter].append((0, 3, ""bold""))
+        node_prefix = RL(""  "") + RL(self.NEXT_NODE_POINTER_STR, ""bold"")
       else:
-        node_prefix = ""     "" + node_prefix
+        node_prefix = RL(""     "")
 
-      node_prefix += ""  [""
-      labels, label_font_attr_segs = self._get_status_labels(
+      node_prefix += ""(%d / %d)"" % (i + 1, len(self._sorted_nodes)) + ""  [""
+      node_prefix += self._get_status_labels(
           element_name,
           handle_node_names,
           intermediate_tensor_names,
           override_names,
-          dirty_variable_names,
-          len(node_prefix))
-      node_prefix += labels
-      font_attr_segs[line_counter].extend(label_font_attr_segs)
+          dirty_variable_names)
 
       lines.append(node_prefix + ""] "" + element_name)
-      line_counter += 1
 
-    output = debugger_cli_common.RichTextLines(
-        lines, font_attr_segs=font_attr_segs)
+    output = debugger_cli_common.rich_text_lines_from_rich_line_list(lines)
 
     if verbose:
       output.extend(self._node_status_label_legend())
@@ -294,8 +283,7 @@ class NodeStepperCLI(object):
                          handle_node_names,
                          intermediate_tensor_names,
                          override_names,
-                         dirty_variable_names,
-                         offset):
+                         dirty_variable_names):
     """"""Get a string of status labels for a graph element.
 
     A status label indicates that a node has a certain state in this
@@ -313,15 +301,13 @@ class NodeStepperCLI(object):
       override_names: (list of str) Names of the tensors of which the values
         are overridden.
       dirty_variable_names: (list of str) Names of the dirty variables.
-      offset: (int) Initial offset of the font attribute segments.
 
     Returns:
-      (str) The string made of status labels that currently apply to the graph
-        element.
-      (list of tuples) The font attribute segments, with offset applied.
+      (RichLine) The rich text string of status labels that currently apply to
+        the graph element.
     """"""
 
-    status = RL("" "" * offset)
+    status = RL()
 
     node_name = element_name.split("":"")[0]
     status += (RL(self.STATE_IS_PLACEHOLDER,
@@ -350,9 +336,7 @@ class NodeStepperCLI(object):
                   self._STATE_COLORS[self.STATE_DIRTY_VARIABLE])
                if element_name in dirty_variable_names else "" "")
 
-    # TODO(ebreck) Return status here, once the caller is updated with the
-    # RichLine API.
-    return status.text[offset:], status.font_attr_segs
+    return status
 
   def _node_status_label_legend(self):
     """"""Get legend for node-status labels.
@@ -362,8 +346,8 @@ class NodeStepperCLI(object):
     """"""
 
     return debugger_cli_common.rich_text_lines_from_rich_line_list([
-        RL(""""),
-        RL(""Legend:""),
+        """",
+        ""Legend:"",
         (RL(""  "") +
          RL(self.STATE_IS_PLACEHOLDER,
             self._STATE_COLORS[self.STATE_IS_PLACEHOLDER]) +
@@ -444,18 +428,18 @@ class NodeStepperCLI(object):
     """"""
     feed_types = self._node_stepper.last_feed_types()
 
-    out = debugger_cli_common.RichTextLines([""Stepper used feeds:""])
+    out = [""Stepper used feeds:""]
     if feed_types:
       for feed_name in feed_types:
         feed_info = RL(""  %s : "" % feed_name)
         feed_info += RL(feed_types[feed_name],
                         self._FEED_COLORS[feed_types[feed_name]])
-        out.append(feed_info.text, font_attr_segs=feed_info.font_attr_segs)
+        out.append(feed_info)
     else:
       out.append(""  (No feeds)"")
     out.append("""")
 
-    return out
+    return debugger_cli_common.rich_text_lines_from_rich_line_list(out)
 
   def _report_last_updated(self):
     """"""Generate a report of the variables updated in the last cont/step call.
@@ -472,8 +456,8 @@ class NodeStepperCLI(object):
     rich_lines = [RL(""Updated:"", self._UPDATED_ATTRIBUTE)]
     sorted_last_updated = sorted(list(last_updated))
     for updated in sorted_last_updated:
-      rich_lines.append(RL(""  %s"" % updated))
-    rich_lines.append(RL(""""))
+      rich_lines.append(""  %s"" % updated)
+    rich_lines.append("""")
     return debugger_cli_common.rich_text_lines_from_rich_line_list(rich_lines)
 
   def step(self, args, screen_info=None):
",0,train
ac01d27997e73942c2e598b4f203c84f756c35c3,tensorflow/tensorflow,"Translation to LLVM: check the validity of module-level Ops

Translation to LLVM expects the entry module to have only specific types of ops
that correspond to LLVM IR entities allowed in a module. Currently those are
restricted to functions and globals. Introduce an additional check at the
module level. Inside individual functions, the check for supported Ops is
already performed, but it accepts all LLVM dialect Ops and wouldn't be
immediately applicable at the module level.

PiperOrigin-RevId: 274058651",ModuleTranslation.h,"@@ -51,7 +51,11 @@ class ModuleTranslation {
 public:
   template <typename T = ModuleTranslation>
   static std::unique_ptr<llvm::Module> translateModule(ModuleOp m) {
+    if (failed(checkSupportedModuleOps(m)))
+      return nullptr;
     auto llvmModule = prepareLLVMModule(m);
+    if (!llvmModule)
+      return nullptr;
 
     T translator(m);
     translator.llvmModule = std::move(llvmModule);
@@ -74,6 +78,9 @@ protected:
   static std::unique_ptr<llvm::Module> prepareLLVMModule(ModuleOp m);
 
 private:
+  /// Check whether the module contains only supported ops directly in its body.
+  static LogicalResult checkSupportedModuleOps(ModuleOp m);
+
   LogicalResult convertFunctions();
   void convertGlobals();
   LogicalResult convertOneFunction(LLVMFuncOp func);
",0,train
09fa4a4e355171fa30f5793ff9eb1b61a4e34ed0,tensorflow/tensorflow,"Fix ConvBackpropComputeDimensionsV2() interface.

PiperOrigin-RevId: 171165222",conv_grad_ops.h,"@@ -248,7 +248,7 @@ Status ConvBackpropComputeDimensions(StringPiece label, int num_spatial_dims,
 Status ConvBackpropComputeDimensionsV2(
     StringPiece label, int num_spatial_dims, const TensorShape& input_shape,
     const TensorShape& filter_shape, const TensorShape& out_backprop_shape,
-    const std::vector<int32>& dilations, const std::vector<int32>& strides,
+    const gtl::ArraySlice<int32>& dilations, const std::vector<int32>& strides,
     Padding padding, TensorFormat data_format, ConvBackpropDimensions* dims);
 }  // namespace tensorflow
 
",0,train
cf4eaffb3ddcedb723355f01c5fec2cdc020a40b,tensorflow/tensorflow,"Add support for equal() and not_equal() to tf.contrib.util.constant_value().

PiperOrigin-RevId: 166920279",tensor_util.py,"@@ -689,6 +689,22 @@ def _ConstantValue(tensor, partial):
       return np.full(fill_shape.as_list(), fill_value, dtype=fill_value.dtype)
     else:
       return None
+  elif tensor.op.type == ""Equal"":
+    value1 = constant_value(tensor.op.inputs[0])
+    if value1 is None:
+      return None
+    value2 = constant_value(tensor.op.inputs[1])
+    if value2 is None:
+      return None
+    return np.equal(value1, value2)
+  elif tensor.op.type == ""NotEqual"":
+    value1 = constant_value(tensor.op.inputs[0])
+    if value1 is None:
+      return None
+    value2 = constant_value(tensor.op.inputs[1])
+    if value2 is None:
+      return None
+    return np.not_equal(value1, value2)
   else:
     return None
 
",0,train
cf4eaffb3ddcedb723355f01c5fec2cdc020a40b,tensorflow/tensorflow,"Add support for equal() and not_equal() to tf.contrib.util.constant_value().

PiperOrigin-RevId: 166920279",tensor_util_test.py,"@@ -800,6 +800,36 @@ class ConstantValueTest(test.TestCase):
     self.assertAllClose(input_, c_val[0])
     self.assertIsNone(c_val[1])
 
+  def testEqual(self):
+    # Scalar inputs.
+    tf_val = math_ops.equal(constant_op.constant(1), constant_op.constant(1))
+    self.assertEqual(tensor_util.constant_value(tf_val), True)
+
+    tf_val = math_ops.equal(constant_op.constant(1), constant_op.constant(0))
+    self.assertEqual(tensor_util.constant_value(tf_val), False)
+
+    # Shaped inputs with broadcast semantics.
+    tf_val = math_ops.equal(constant_op.constant([[0, 1]]),
+                            constant_op.constant([[0], [1]]))
+    c_val = tensor_util.constant_value(tf_val)
+    self.assertAllEqual(c_val, [[True, False], [False, True]])
+
+  def testNotEqual(self):
+    # Scalar inputs.
+    tf_val = math_ops.not_equal(constant_op.constant(1),
+                                constant_op.constant(1))
+    self.assertEqual(tensor_util.constant_value(tf_val), False)
+
+    tf_val = math_ops.not_equal(constant_op.constant(1),
+                                constant_op.constant(0))
+    self.assertEqual(tensor_util.constant_value(tf_val), True)
+
+    # Shaped inputs with broadcast semantics.
+    tf_val = math_ops.not_equal(constant_op.constant([[0, 1]]),
+                                constant_op.constant([[0], [1]]))
+    c_val = tensor_util.constant_value(tf_val)
+    self.assertAllEqual(c_val, [[False, True], [True, False]])
+
 
 class ConstantValueAsShapeTest(test.TestCase):
 
",0,train
23a3e222562cba97b4b03ccf8d4027a91d179051,tensorflow/tensorflow,"GPU registration for resource scatter add

PiperOrigin-RevId: 162289810",resource_variable_ops.cc,"@@ -445,6 +445,17 @@ class ResourceScatterUpdateOp : public OpKernel {
 
 TF_CALL_NUMBER_TYPES(REGISTER_SCATTER_ARITHEMTIC_CPU);
 
+// Registers GPU kernels.
+#if GOOGLE_CUDA
+#define REGISTER_SCATTER_ARITHEMTIC_GPU(type) \
+  REGISTER_SCATTER_ARITHEMTIC(type, GPU);
+
+#define REGISTER_SCATTER_UPDATE_GPU(type) REGISTER_SCATTER_UPDATE(type, GPU);
+
+TF_CALL_GPU_NUMBER_TYPES_NO_HALF(REGISTER_SCATTER_ARITHEMTIC_GPU);
+
+#endif  // GOOGLE_CUDA
+
 #undef REGISTER_SCATTER_ARITHEMTIC
 #undef REGISTER_SCATTER_ARITHEMTIC_CPU
 #undef REGISTER_SCATTER_KERNEL
",0,train
23a3e222562cba97b4b03ccf8d4027a91d179051,tensorflow/tensorflow,"GPU registration for resource scatter add

PiperOrigin-RevId: 162289810",resource_variable_ops_test.py,"@@ -93,7 +93,7 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
       self.assertEqual(read.eval(), 2)
 
   def testScatterAdd(self):
-    with self.test_session():
+    with self.test_session(use_gpu=True):
       handle = resource_variable_ops.var_handle_op(
           dtype=dtypes.int32, shape=[1, 1])
       resource_variable_ops.assign_variable_op(
",0,train
d0c470be2e573e9f82d54d13807726b0a8cca89d,tensorflow/tensorflow,"Use uint32 for tid.

PiperOrigin-RevId: 297901590
Change-Id: Id5d03f41f0442f8c7a18b668b7267e6d8c727583",traceme_recorder.cc,"@@ -196,12 +196,12 @@ class TraceMeRecorder::ThreadLocalRecorder {
   return singleton;
 }
 
-void TraceMeRecorder::RegisterThread(int32 tid, ThreadLocalRecorder* thread) {
+void TraceMeRecorder::RegisterThread(uint32 tid, ThreadLocalRecorder* thread) {
   mutex_lock lock(mutex_);
   threads_.emplace(tid, thread);
 }
 
-void TraceMeRecorder::UnregisterThread(int32 tid) {
+void TraceMeRecorder::UnregisterThread(uint32 tid) {
   mutex_lock lock(mutex_);
   auto it = threads_.find(tid);
   if (it != threads_.end()) {
",0,train
d0c470be2e573e9f82d54d13807726b0a8cca89d,tensorflow/tensorflow,"Use uint32 for tid.

PiperOrigin-RevId: 297901590
Change-Id: Id5d03f41f0442f8c7a18b668b7267e6d8c727583",traceme_recorder.h,"@@ -59,7 +59,7 @@ class TraceMeRecorder {
     uint64 end_time;    // 0 = missing
   };
   struct ThreadInfo {
-    int32 tid;
+    uint32 tid;
     string name;
   };
   struct ThreadEvents {
@@ -101,8 +101,8 @@ class TraceMeRecorder {
 
   TF_DISALLOW_COPY_AND_ASSIGN(TraceMeRecorder);
 
-  void RegisterThread(int32 tid, ThreadLocalRecorder* thread);
-  void UnregisterThread(int32 tid);
+  void RegisterThread(uint32 tid, ThreadLocalRecorder* thread);
+  void UnregisterThread(uint32 tid);
 
   bool StartRecording(int level);
   Events StopRecording();
@@ -113,7 +113,7 @@ class TraceMeRecorder {
   mutex mutex_;
   // Map of the static container instances (thread_local storage) for each
   // thread. While active, a ThreadLocalRecorder stores trace events.
-  absl::flat_hash_map<int32, ThreadLocalRecorder*> threads_ GUARDED_BY(mutex_);
+  absl::flat_hash_map<uint32, ThreadLocalRecorder*> threads_ GUARDED_BY(mutex_);
   // Events from threads that died during recording.
   TraceMeRecorder::Events orphaned_events_ GUARDED_BY(mutex_);
 };
",0,train
0e61131b5a20916e2445821e8b18f1416b375dcf,tensorflow/tensorflow,"Log the start of filling up the shuffle buffer

When debugging performance issues we only saw 2 logs, the intermediate
state and the end. However, we did not know when the shuffle buffer
processing started. The updated logging should be clearer and will
record both the start and end.

PiperOrigin-RevId: 240624588",shuffle_dataset_op.cc,"@@ -129,6 +129,10 @@ class ShuffleDatasetOpBase : public UnaryDatasetOpKernel {
                 ctx, this->prefix(), &input_impl_));
           }
           if (!end_of_input_sequence) {
+            if (num_elements_ == 0) {
+              VLOG(1) << ""Starting to fill up shuffle buffer of size: ""
+                      << this->dataset()->buffer_size_;
+            }
             this->RecordBufferEnqueue(ctx, input_element);
             buffer_[slices_.back()->end % this->dataset()->buffer_size_] =
                 std::move(input_element);
",0,train
0ef76693fdab2a4d1a4923444a2593f79a6b7873,tensorflow/tensorflow,"Automated g4 rollback of changelist 199308328

PiperOrigin-RevId: 199809082",algebraic_simplifier_test.cc,"@@ -1714,7 +1714,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopPad) {
 
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
-  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
 
   EXPECT_THAT(computation->root_instruction(), param);
 }
@@ -1759,7 +1759,7 @@ TEST_F(AlgebraicSimplifierTest, NegativePadding) {
   EXPECT_THAT(computation->root_instruction(), op::Pad(param, zero));
   EXPECT_TRUE(has_negative_padding(pad));
 
-  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
 
   EXPECT_THAT(computation->root_instruction(), op::Slice(op::Pad(param, zero)));
   EXPECT_FALSE(
@@ -1781,7 +1781,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopReshape) {
 
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
-  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
 
   EXPECT_THAT(computation->root_instruction(), param);
 }
@@ -1804,7 +1804,7 @@ TEST_F(AlgebraicSimplifierTest, RemoveNoopSlice) {
 
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
-  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
 
   EXPECT_THAT(computation->root_instruction(), param);
 }
@@ -1932,7 +1932,8 @@ TEST_F(AlgebraicSimplifierTest, ConvertConvToMatmul) {
     b.AddInstruction(HloInstruction::CreateConvolve(out_shape, input, filter,
                                                     window, dnums));
 
-    auto module = CreateNewModule();
+    // TODO(b/80488902): verify this module.
+    auto module = HloTestBase::CreateNewModule();
     auto* computation = module->AddEntryComputation(b.Build());
 
     AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/true,
@@ -2060,7 +2061,7 @@ TEST_F(AlgebraicSimplifierTest, MaxMinToClamp) {
 
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
-  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
 
   EXPECT_THAT(computation->root_instruction(),
               op::Clamp(max_value, param0, min_value));
@@ -2090,7 +2091,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxToClamp) {
 
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
-  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
 
   EXPECT_THAT(computation->root_instruction(),
               op::Clamp(max_value, param0, min_value));
@@ -2121,7 +2122,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxWithBroadcastToClamp) {
 
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
-  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
 
   EXPECT_THAT(computation->root_instruction(),
               op::Clamp(max_value, param0, min_value));
@@ -2151,7 +2152,7 @@ TEST_F(AlgebraicSimplifierTest, MinMaxNotToClamp) {
 
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
-  EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie());
+  EXPECT_FALSE(simplifier.Run(module).ValueOrDie());
 
   EXPECT_THAT(computation->root_instruction(),
               op::Minimum(op::Maximum(param0, max_value), min_value));
@@ -2184,7 +2185,7 @@ TEST_F(AlgebraicSimplifierTest, MinEquationWithMaxNotToClamp) {
 
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
-  EXPECT_FALSE(simplifier.Run(module.get()).ValueOrDie());
+  EXPECT_FALSE(simplifier.Run(module).ValueOrDie());
 
   EXPECT_THAT(computation->root_instruction(),
               op::Minimum(op::Add(op::Maximum(param0, max_value), max_value),
@@ -2200,10 +2201,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToSlice) {
       HloInstruction::CreateParameter(0, r0f32, ""scalar_param""));
 
   Shape broadcast_shape = ShapeUtil::MakeShape(F32, {4, 5, 6, 7});
-  HloInstruction* broadcast =
-      builder.AddInstruction(HloInstruction::CreateBroadcast(
-          broadcast_shape, scalar_param,
-          AsInt64Slice(broadcast_shape.dimensions())));
+  HloInstruction* broadcast = builder.AddInstruction(
+      HloInstruction::CreateBroadcast(broadcast_shape, scalar_param, {}));
 
   Shape slice_shape = ShapeUtil::MakeShape(F32, {2, 2, 3, 3});
   HloInstruction* slice = builder.AddInstruction(HloInstruction::CreateSlice(
@@ -2219,10 +2218,10 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToSlice) {
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
 
-  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
 
   // Running simplification again should not result in any further changes.
-  ASSERT_FALSE(simplifier.Run(module.get()).ValueOrDie());
+  ASSERT_FALSE(simplifier.Run(module).ValueOrDie());
 
   root = computation->root_instruction();
   EXPECT_THAT(root, op::Broadcast(scalar_param));
@@ -2237,10 +2236,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) {
       HloInstruction::CreateConstant(Literal::CreateR0<float>(42.0f)));
 
   Shape broadcast_shape = ShapeUtil::MakeShape(F32, {4, 5, 6});
-  HloInstruction* broadcast =
-      builder.AddInstruction(HloInstruction::CreateBroadcast(
-          broadcast_shape, forty_two,
-          AsInt64Slice(broadcast_shape.dimensions())));
+  HloInstruction* broadcast = builder.AddInstruction(
+      HloInstruction::CreateBroadcast(broadcast_shape, forty_two, {}));
 
   HloInstruction* transpose =
       builder.AddInstruction(HloInstruction::CreateTranspose(
@@ -2259,7 +2256,7 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) {
 
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
-  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
 
   root = computation->root_instruction();
   EXPECT_THAT(root, op::Broadcast(forty_two));
@@ -2268,7 +2265,8 @@ TEST_F(AlgebraicSimplifierTest, ScalarBroadcastToTransposeReshape) {
 
 // Test that ReduceWindow(Pad(op, x), y) can simplify to ReduceWindow(op, x).
 TEST_F(AlgebraicSimplifierTest, FoldPadIntoReduceWindow) {
-  auto module = CreateNewModule();
+  // TODO(b/80488902): verify this module.
+  auto module = HloTestBase::CreateNewModule();
   HloComputation::Builder builder(TestName());
 
   // Create operand to the pad.
@@ -2349,7 +2347,8 @@ TEST_F(AlgebraicSimplifierTest, FoldPadIntoReduceWindow) {
 // Test that ReduceWindow(Convert(Pad(op, x)), y) can simplify to
 // ReduceWindow(Convert(op), x).
 TEST_F(AlgebraicSimplifierTest, FoldConvertedPadIntoReduceWindow) {
-  auto module = CreateNewModule();
+  // TODO(b/80488902): verify this module.
+  auto module = HloTestBase::CreateNewModule();
   HloComputation::Builder builder(TestName());
 
   // Create operand to the pad.
@@ -2444,7 +2443,7 @@ TEST_F(AlgebraicSimplifierTest, ReversalOfTrivialDimensionsToBitcast) {
 
   AlgebraicSimplifier simplifier(/*is_layout_sensitive=*/false,
                                  non_bitcasting_callback());
-  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  ASSERT_TRUE(simplifier.Run(module).ValueOrDie());
 
   HloInstruction* root = computation->root_instruction();
   EXPECT_EQ(a, root);
",0,train
0ef76693fdab2a4d1a4923444a2593f79a6b7873,tensorflow/tensorflow,"Automated g4 rollback of changelist 199308328

PiperOrigin-RevId: 199809082",hlo_test_base.h,"@@ -66,6 +66,15 @@ namespace xla {
 //
 // For a more detailed example, see ""../tests/sample_text_test.cc"".
 class HloTestBase : public ::testing::Test {
+ public:
+  // Creates a new HLO module for a test. The module created will have
+  // TestName() for its name; it will also automatically populate its debug
+  // options from command-line flags. If you want a fresh HloModule object and
+  // then add HloComputations to it, it's recommended to use this method in your
+  // tests.
+  static std::unique_ptr<HloModule> CreateNewModule(
+      const string& name = TestName());
+
  protected:
   // This uses the interpreter backend as the reference backend and
   // automatically finds another supported backend as the test backend. If the
@@ -80,14 +89,6 @@ class HloTestBase : public ::testing::Test {
 
   ~HloTestBase() override {}
 
-  // Creates a new HLO module for a test. The module created will have
-  // TestName() for its name; it will also automatically populate its debug
-  // options from command-line flags. If you want a fresh HloModule object and
-  // then add HloComputations to it, it's recommended to use this method in your
-  // tests.
-  static std::unique_ptr<HloModule> CreateNewModule(
-      const string& name = TestName());
-
   // Populates debug options from command-line flags and adjusts the options for
   // testing. It is recommended to use this when you need to pass in
   // DebugOptions, e.g. when creating a module from a string or a file.
",0,train
0ef76693fdab2a4d1a4923444a2593f79a6b7873,tensorflow/tensorflow,"Automated g4 rollback of changelist 199308328

PiperOrigin-RevId: 199809082",hlo_verified_test_base.cc,"@@ -41,14 +41,17 @@ void HloVerifiedTestBase::TearDown() {
       << ""TearDown called more than once; it should be called exactly once."";
   tear_down_called_ = true;
   if (module_) {
-    VerifyModule();
+    VerifyModule(module_.get());
+  }
+  for (int i = 0; i < modules_.size(); ++i) {
+    VerifyModule(modules_.at(i).get());
   }
   HloTestBase::TearDown();
 }
 
-void HloVerifiedTestBase::VerifyModule() {
-  HloVerifier verifier;
-  xla::StatusOr<bool> mutated = verifier.Run(module_.get());
+void HloVerifiedTestBase::VerifyModule(HloModule* module) {
+  HloVerifier verifier(/*allow_mixed_precision=*/true);
+  xla::StatusOr<bool> mutated = verifier.Run(module);
   if (!mutated.ok()) {
     ADD_FAILURE() << ""HloVerifier failed: "" << mutated.status();
   } else {
@@ -59,15 +62,20 @@ void HloVerifiedTestBase::VerifyModule() {
 
 HloModule& HloVerifiedTestBase::module() {
   if (!module_) {
-    module_ = CreateNewModule();
+    module_ = HloTestBase::CreateNewModule();
   }
   return *module_;
 }
 
+HloModule* HloVerifiedTestBase::CreateNewModule(const string& name) {
+  modules_.emplace_back(HloTestBase::CreateNewModule());
+  return modules_.back().get();
+}
+
 void HloVerifiedTestBase::ParseAndVerifyModule(
     tensorflow::StringPiece hlo_text) {
   CHECK(!module_) << ""Called ParseModule when test already has a module."";
   TF_ASSERT_OK_AND_ASSIGN(module_, ParseHloString(hlo_text));
-  VerifyModule();
+  VerifyModule(module_.get());
 }
 }  // namespace xla
",0,train
0ef76693fdab2a4d1a4923444a2593f79a6b7873,tensorflow/tensorflow,"Automated g4 rollback of changelist 199308328

PiperOrigin-RevId: 199809082",hlo_verified_test_base.h,"@@ -52,11 +52,23 @@ class HloVerifiedTestBase : public HloTestBase {
     shape_verifier_ = std::move(shape_verifier);
   }
 
+  // Creates a new module for a test, and stores it in modules_ so it can be
+  // verified. Intentionally hides HloTestBase::CreateNewModule, to prevent
+  // creation of unverified modules.
+  HloModule* CreateNewModule(const string& name = TestName());
+
+  // It is confusing to store modules created by module() and CreateNewModule()
+  // in different fields, but it allows us to migrate tests to
+  // HloVerifiedTestBase more easily, so it's a win because we can verify more
+  // modules. See b/80488902.
  private:
-  std::unique_ptr<HloModule> module_;  // Lazily populated. Access via module().
+  // Lazily populated. Access via module().
+  std::unique_ptr<HloModule> module_;
+  // Populated by calls to CreateNewModule.
+  std::vector<std::unique_ptr<HloModule>> modules_;
   std::unique_ptr<ShapeVerifier> shape_verifier_;
   bool tear_down_called_ = false;
-  void VerifyModule();
+  static void VerifyModule(HloModule* module);
 };
 
 }  // namespace xla
",0,train
c446422e3857344d9b94a1521ff86734b700f1ae,tensorflow/tensorflow,"Fix bug in and speed up ConstantFolding::CreateNodeDef():
  * Fix bug trying to store more than kintmax32 values in a repeated proto field.
  * Speed up populating compressed format. Example: tensorflow/python/kernel_tests/large_concat_op_test with size = 2**29+6 goes from ~30 seconds to ~15 seconds. The fraction of time spent in ConstantFolding::CreateNodeDef() goes down from about 35% to about 12%.

PiperOrigin-RevId: 184693749",constant_folding.cc,"@@ -808,20 +808,26 @@ NodeDef ConstantFolding::CreateNodeDef(const string& name,
   // Use the packed representation whenever possible to avoid generating large
   // graphdefs. Moreover, avoid repeating the last values if they're equal.
   if (tensor->NumElements() > 4) {
-#define POPULATE_TENSOR_PROTO(tensor, t, TYPE, NAME)         \
-  optimized = true;                                          \
-  TYPE last = tensor->flat<TYPE>()(0);                       \
-  int last_index = 0;                                        \
-  for (int i = 0; i < tensor->NumElements(); ++i) {          \
-    TYPE cur = tensor->flat<TYPE>()(i);                      \
-    t->add_##NAME##_val(cur);                                \
-    if (cur != last) {                                       \
-      last = cur;                                            \
-      last_index = i;                                        \
-    }                                                        \
-  }                                                          \
-  /* Remove all identical trailing values to save memory. */ \
-  t->mutable_##NAME##_val()->Truncate(last_index + 1);
+#define POPULATE_TENSOR_PROTO(tensor, t, TYPE, NAME)                \
+  const TYPE* val_ptr = tensor->flat<TYPE>().data();                \
+  TYPE last = *val_ptr;                                             \
+  int64 last_index = 0;                                             \
+  for (int64 i = 0; i < tensor->NumElements(); ++i) {               \
+    TYPE cur = *val_ptr++;                                          \
+    if (cur != last) {                                              \
+      last = cur;                                                   \
+      last_index = i;                                               \
+    }                                                               \
+  }                                                                 \
+  if (last_index < kint32max) {                                     \
+    optimized = true;                                               \
+    t->mutable_##NAME##_val()->Reserve(last_index + 1);             \
+    t->mutable_##NAME##_val()->AddNAlreadyReserved(last_index + 1); \
+    val_ptr = tensor->flat<TYPE>().data();                          \
+    for (int64 i = 0; i <= last_index; ++i) {                       \
+      t->set_##NAME##_val(i, *val_ptr++);                           \
+    }                                                               \
+  }
 
     if (tensor->dtype() == DT_FLOAT) {
       POPULATE_TENSOR_PROTO(tensor, t, float, float)
",0,train
ec3edaf0277041350ad312e477db48266cbd860f,tensorflow/tensorflow,Remove not working code path for odd row size.,ir_emitter_unnested.cc,"@@ -2042,73 +2042,23 @@ void IrEmitterUnnested::EmitTile(
           };
 
           char * env_if = getenv(""RED_IF"");
-          int red_if = 1;
+          int red_if = 0;
           if (env_if) {
             red_if = atoi(env_if);
             printf(""RED_IF2 = %d %s\n"", red_if, env_if);
           }
-          if (red_if == 1) {
+          if (red_if == 1 || x_tile_fits) {
             std::cout << ""IF_NB 1: "" << std::endl;
             unroll(!x_tile_fits, x_num_steps, vec_stride);
-          } else if (red_if == 2) {
+          } else {
             std::cout << ""IF_NB 2"" << std::endl;
             ksl->If(loop_name + ""_is_full_tile"",
-                    //b->CreateICmpULT(last_element, tile_width),
-                    // If (the thread fully unrolled) {no condition path} else {condition path}
+                    // if (block fully fit) {fast path} else {slow path}
+                    // tile_width is always exact. For the last block,
+                    // it will be the exact number of elements left.
                     b_.CreateICmpEQ(constant(mapping_scheme.GetTileSizeFor(2)), tile_width),
                     [&] {unroll(false, x_num_steps, vec_stride);},
                     [&] {unroll(true, x_num_steps, vec_stride);});
-          } else {
-            std::cout << ""IF_NB 3"" << std::endl;
-            //b->CreateICmpULT(start_offset_x+j * step_x * vec_stride + i, tile_width)
-            int last_block_left_element = mapping_scheme.GetDimsInElems()[2] % x_num_steps;
-            std::cout << ""MAPPING "" << mapping_scheme.GetDimsInElems()[0] << "" ""
-                      << mapping_scheme.GetDimsInElems()[1] << "" ""
-                      << mapping_scheme.GetDimsInElems()[2] << std::endl;
-            std::cout << ""LAST_BLOCK x_num_steps "" << x_num_steps
-                      << "" last_block"" << last_block_left_element << std::endl;
-            // NB block per reduction.
-            int nb_block = CeilOfRatio(mapping_scheme.GetDimsInElems()[2],
-                                       tile_size_x);
-            std::cout << ""NB_BLOCK"" << nb_block << std::endl;
-            if (x_tile_fits) {
-              // All threads will completly unroll
-              unroll(false, x_num_steps, vec_stride);
-            } else if(nb_block == 1) {
-              // No thread will completly unroll.
-              // TODO: unroll by the right amount
-              unroll(true, x_num_steps, vec_stride);
-            } else {
-              // For some blocks, all threads will will completly unroll.
-              // For other blocks, some of its threads will completly unroll, others will partially and some won't be used.
-              // So do an if(thread fully unroll) {code with no if between elements} else {code with if between each elements}
-              // TODO: in the else part, unroll without if but with the right number of elements left.
-
-              llvm::Value* block_id = gpu::EmitCallToTargetIntrinsic(
-                  gpu::TargetIntrinsicID::kBlockIdx, {}, {}, &b_);
-              llvm::Value* last_element = b_.CreateAdd(constant(x_num_steps * tile_size_x),
-                                                       start_offset_x, ""last_element"");
-              int x_num_steps_partial = mapping_scheme.GetDimsInElems()[2] % tile_size_x;
-              x_num_steps_partial *= 2;
-              //x_num_steps_partial = x_num_steps;
-              ksl->If(loop_name + ""_is_full_tile"",
-                      // Test if all the elements of this thread is withing tile.
-                      b_.CreateICmpULT(last_element, tile_width),
-                      // Not the last block, so unroll without ifs.
-                      [&] {unroll(false, x_num_steps, vec_stride);},
-                      // The last block isn't completly unrolled.
-
-                      // TODO: unroll the right size. Take care
-                      // vec_stride must match the above unroll for
-                      // now.
-                      // TODO: after unroll of the right size, remove the IFs.
-                      // ONGOING, try to make it work with less
-                      // then unroll x_num_steps
-
-                       [&] {unroll(true, x_num_steps, vec_stride);}); // works
-              //[&] {unroll(true, x_num_steps, x_num_steps);});
-              //[&] {unroll(true, x_num_steps_partial, vec_stride);});
-            }
           }
         }});
 }
",0,train
01c56613967c6cf12dbea7256342b75ba58087ab,tensorflow/tensorflow,"Correct the punctuation in the deprecation message.

PiperOrigin-RevId: 243893700",normalization.py,"@@ -170,8 +170,8 @@ class BatchNormalization(keras_layers.BatchNormalization, base.Layer):
 @deprecation.deprecated(
     date=None, instructions='Use keras.layers.BatchNormalization instead.  In '
     'particular, `tf.control_dependencies(tf.GraphKeys.UPDATE_OPS)` should not '
-    'be used (consult the `tf.keras.layers.batch_normalization`).'
-    'documentation.')
+    'be used (consult the `tf.keras.layers.batch_normalization` '
+    'documentation).')
 @tf_export(v1=['layers.batch_normalization'])
 def batch_normalization(inputs,
                         axis=-1,
",0,train
6c7e0cc4831b892b7fe5f91b7297f534ea99940b,tensorflow/tensorflow,Fixed typos in GradientTape warning message,backprop.py,"@@ -926,14 +926,14 @@ class GradientTape(object):
       else:
         logging.log_first_n(logging.WARN,
                             ""Calling GradientTape.gradient on a persistent ""
-                            ""tape inside it's context is significantly less ""
+                            ""tape inside its context is significantly less ""
                             ""efficient than calling it outside the context (it ""
                             ""causes the gradient ops to be recorded on the ""
                             ""tape, leading to increased CPU and memory usage). ""
                             ""Only call GradientTape.gradient inside the ""
                             ""context if you actually want to trace the ""
                             ""gradient in order to compute higher order ""
-                            ""derrivatives."", 1)
+                            ""derivatives."", 1)
 
     flat_targets = []
     for t in nest.flatten(target):
",0,train
8b20ddf3e0eedb52a7ae0f10a55658e64efc4d1a,tensorflow/tensorflow,"[XLA] Sanity check the list of called computations for fusion nodes

called_compuatations for a fusion node should only include the fusion
computation that it calls.

PiperOrigin-RevId: 167149669",hlo_instruction.cc,"@@ -793,13 +793,6 @@ HloInstruction* HloInstruction::CloneAndFuseInternal(
     }
   }
 
-  for (HloComputation* computation :
-       instruction_to_fuse->called_computations()) {
-    if (std::find(called_computations_.begin(), called_computations_.end(),
-                  computation) == called_computations_.end()) {
-      called_computations_.push_back(computation);
-    }
-  }
   VLOG(2) << ""New clone:\n"" << clone->ToString();
   return clone;
 }
",0,train
8b20ddf3e0eedb52a7ae0f10a55658e64efc4d1a,tensorflow/tensorflow,"[XLA] Sanity check the list of called computations for fusion nodes

called_compuatations for a fusion node should only include the fusion
computation that it calls.

PiperOrigin-RevId: 167149669",hlo_instruction.h,"@@ -797,8 +797,7 @@ class HloInstruction {
       const Shape& shape,
       tensorflow::gtl::ArraySlice<HloInstruction*> operands);
 
-  // Returns the computations this instruction calls (if any). This includes
-  // computations called by fused instructions inside of a fusion instruction.
+  // Returns the computations this instruction directly calls (if any).
   const std::vector<HloComputation*>& called_computations() const {
     return called_computations_;
   }
",0,train
8b20ddf3e0eedb52a7ae0f10a55658e64efc4d1a,tensorflow/tensorflow,"[XLA] Sanity check the list of called computations for fusion nodes

called_compuatations for a fusion node should only include the fusion
computation that it calls.

PiperOrigin-RevId: 167149669",hlo_instruction_test.cc,"@@ -758,16 +758,13 @@ TEST_F(HloInstructionTest, FusionOpWithCalledComputations) {
   auto* fusion = computation->CreateFusionInstruction(
       {map_3_y}, HloInstruction::FusionKind::kLoop);
   auto* fused_computation = fusion->fused_instructions_computation();
-  EXPECT_THAT(fusion->called_computations(),
-              ElementsAre(fused_computation, computation_y));
+  EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation));
 
   fusion->FuseInstruction(map_2_x);
-  EXPECT_THAT(fusion->called_computations(),
-              ElementsAre(fused_computation, computation_y, computation_x));
+  EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation));
 
   fusion->FuseInstruction(map_1_x);
-  EXPECT_THAT(fusion->called_computations(),
-              ElementsAre(fused_computation, computation_y, computation_x));
+  EXPECT_THAT(fusion->called_computations(), ElementsAre(fused_computation));
 }
 
 TEST_F(HloInstructionTest, ComplexFusionOp) {
",0,train
8b20ddf3e0eedb52a7ae0f10a55658e64efc4d1a,tensorflow/tensorflow,"[XLA] Sanity check the list of called computations for fusion nodes

called_compuatations for a fusion node should only include the fusion
computation that it calls.

PiperOrigin-RevId: 167149669",hlo_rematerialization.cc,"@@ -1248,7 +1248,8 @@ StatusOr<bool> HloRematerialization::Run(
                                 sequence->at(node.computation())));
         }
         return Status::OK();
-      }));
+      },
+      /*visit_unreachable_nodes=*/false));
 
   // The peak memory usage of the module equals the peak memory use of the entry
   // computation plus the output size of the computation. This is because the
",0,train
8b20ddf3e0eedb52a7ae0f10a55658e64efc4d1a,tensorflow/tensorflow,"[XLA] Sanity check the list of called computations for fusion nodes

called_compuatations for a fusion node should only include the fusion
computation that it calls.

PiperOrigin-RevId: 167149669",hlo_verifier.cc,"@@ -280,6 +280,14 @@ class ShapeVerifier : public DfsHloVisitor {
   const std::function<int64(const Shape&)> shape_size_fn_;
 };
 
+string ComputationsToString(
+    tensorflow::gtl::ArraySlice<HloComputation*> computations) {
+  return tensorflow::str_util::Join(
+      computations, "","", [](string* s, const HloComputation* computation) {
+        s->append(computation->name());
+      });
+}
+
 }  // namespace
 
 StatusOr<bool> HloVerifier::Run(HloModule* module) {
@@ -290,6 +298,17 @@ StatusOr<bool> HloVerifier::Run(HloModule* module) {
     for (const auto& instruction : computation->instructions()) {
       TF_RET_CHECK(instruction->parent() == computation.get());
       if (instruction->opcode() == HloOpcode::kFusion) {
+        TF_RET_CHECK(
+            ContainersEqual(instruction->called_computations(),
+                            {instruction->fused_instructions_computation()}))
+            << ""Fusion HLO calls computations other than the ""
+               ""fused_instructions_computation: ""
+            << instruction->ToString()
+            << "" instruction->fused_instructions_computation(): ""
+            << instruction->fused_instructions_computation()->ToString()
+            << "" instruction->called_computations(): ""
+            << ComputationsToString(instruction->called_computations());
+
         for (const auto& fused : instruction->fused_instructions()) {
           TF_RET_CHECK(fused->parent() ==
                        instruction->fused_instructions_computation())
",0,train
da0f64b27e619d3ba509dcd6ce1d29fe29f374e7,tensorflow/tensorflow,"Fix tensorshape for static sized tensorarray

PiperOrigin-RevId: 265702387",tensor_array_ops_test.py,"@@ -1365,7 +1365,7 @@ class TensorArrayTest(test.TestCase):
       x = constant_op.constant([1.0, 2.0, 3.0])
       ta = ta.write(0, x)
       t = ta.stack()
-      self.assertEqual(t.shape.as_list(), [None, 3])
+      self.assertEqual(t.shape.as_list(), [3, 3])
       return t
 
     ta_stack()
@@ -1790,6 +1790,11 @@ class TensorArrayTest(test.TestCase):
         dtypes.float32, size=0, element_shape=(5, None), dynamic_size=True)
     self.assertEqual([None, 5, None], ta.stack().shape.as_list())
 
+  def testStackShapeOnStaticSize(self):
+    ta = tensor_array_ops.TensorArray(dtypes.float32, size=42)
+    ta = ta.write(0, [0])
+    self.assertEqual([42, 1], ta.stack().shape.as_list())
+
 
 class TensorArrayBenchmark(test.Benchmark):
 
",0,train
da0f64b27e619d3ba509dcd6ce1d29fe29f374e7,tensorflow/tensorflow,"Fix tensorshape for static sized tensorarray

PiperOrigin-RevId: 265702387",tensor_array_ops.py,"@@ -137,6 +137,7 @@ class _GraphTensorArray(object):
     # shape equality.
     self._element_shape = [tensor_shape.as_shape(element_shape)]
     self._infer_shape = infer_shape
+    self._size = size
     with ops.name_scope(name, ""TensorArray"", [handle, size, flow]) as scope:
       if handle is not None:
         self._handle = handle
@@ -281,7 +282,12 @@ class _GraphTensorArray(object):
     """"""See TensorArray.""""""
     with ops.colocate_with(self._handle):
       with ops.name_scope(name, ""TensorArrayStack"", [self._handle]):
-        return self.gather(math_ops.range(0, self.size()), name=name)
+        value = self.gather(math_ops.range(0, self.size()), name=name)
+        if (self.element_shape and not self._dynamic_size and
+            self._size is not None):
+          value.set_shape([tensor_util.constant_value(self._size)] +
+                          self.element_shape.dims)
+        return value
 
   def gather(self, indices, name=None):
     """"""See TensorArray.""""""
@@ -365,8 +371,11 @@ class _GraphTensorArray(object):
 
   def size(self, name=None):
     """"""See TensorArray.""""""
-    return gen_data_flow_ops.tensor_array_size_v3(
-        handle=self._handle, flow_in=self.flow, name=name)
+    if not self._dynamic_size and self._size is not None:
+      return ops.convert_to_tensor(self._size, dtype=dtypes.int32)
+    else:
+      return gen_data_flow_ops.tensor_array_size_v3(
+          handle=self._handle, flow_in=self.flow, name=name)
 
   @tf_should_use.should_use_result
   def close(self, name=None):
@@ -427,6 +436,7 @@ class _GraphTensorArrayV2(object):
     del colocate_with_first_write_call
 
     self._dynamic_size = dynamic_size
+    self._size = size
 
     if (flow is not None and
         (not isinstance(flow, ops.Tensor) or flow.dtype != dtypes.variant)):
@@ -536,9 +546,15 @@ class _GraphTensorArrayV2(object):
   def stack(self, name=None):
     """"""See TensorArray.""""""
     with ops.name_scope(name, ""TensorArrayV2Stack"", [self._flow]):
+      # TODO(b/139941163): remove constant_value after changing num_elements to regular input
+      if not self._dynamic_size and self._size is not None:
+        ta_size = tensor_util.constant_value(self._size)
+      else:
+        ta_size = -1
       value = list_ops.tensor_list_stack(
           input_handle=self._flow,
           element_dtype=self._dtype,
+          num_elements=ta_size,
           element_shape=self.element_shape)
       return value
 
@@ -619,7 +635,10 @@ class _GraphTensorArrayV2(object):
 
   def size(self, name=None):
     """"""See TensorArray.""""""
-    return list_ops.tensor_list_length(input_handle=self._flow, name=name)
+    if not self._dynamic_size and self._size is not None:
+      return ops.convert_to_tensor(self._size, dtype=dtypes.int32)
+    else:
+      return list_ops.tensor_list_length(input_handle=self._flow, name=name)
 
   @tf_should_use.should_use_result
   def close(self, name=None):
@@ -1227,6 +1246,7 @@ def build_ta_with_new_flow(old_ta, flow):
       colocate_with_first_write_call=impl._colocate_with_first_write_call)
   new_impl = new_ta._implementation
   new_impl._dynamic_size = impl._dynamic_size
+  new_impl._size = impl._size
   new_impl._colocate_with = impl._colocate_with
   new_impl._element_shape = impl._element_shape  # Share _element_shape.
   return new_ta
",0,train
ffc651af58ebacdf3ddbe9537efda694c71a64f3,tensorflow/tensorflow,"Update LogToSTDErr for TF Lite usage

PiperOrigin-RevId: 192379483",arg_max_test.cc,"@@ -100,8 +100,7 @@ TEST(ArgMaxOpTest, GetMaxArgOutput64) {
 }  // namespace tflite
 
 int main(int argc, char** argv) {
-  // On Linux, add: FLAGS_logtostderr = true;
-  FLAGS_logtostderr = true;
+  ::tflite::LogToStderr();
   ::testing::InitGoogleTest(&argc, argv);
   return RUN_ALL_TESTS();
 }
",0,test
9274bcebb31322370139467039034f8ff852b004,tensorflow/tensorflow,"Internal fixes to sample_distored_bounding_box.

PiperOrigin-RevId: 268719203",sample_distorted_bounding_box_op.cc,"@@ -126,11 +126,19 @@ bool GenerateRandomCrop(int original_width, int original_height,
   int height = static_cast<int>(lrintf(std::sqrt(min_area / aspect_ratio)));
   int max_height = static_cast<int>(lrintf(std::sqrt(max_area / aspect_ratio)));
 
+  // TODO(b/140767341): Rewrite the generation logic to be more tolerant
+  // of floating point behavior.
   if (lrintf(max_height * aspect_ratio) > original_width) {
     // We must find the smallest max_height satisfying
     // round(max_height * aspect_ratio) <= original_width:
     const float kEps = 0.0000001;
     max_height = static_cast<int>((original_width + 0.5 - kEps) / aspect_ratio);
+    // If due some precision issues, we still cannot guarantee
+    // round(max_height * aspect_ratio) <= original_width, subtract 1 from
+    // max height.
+    if (lrintf(max_height * aspect_ratio) > original_width) {
+      max_height -= 1;
+    }
   }
 
   if (max_height > original_height) {
",0,train
333abd19d9c49393abb2466401b040cc417801d8,tensorflow/tensorflow,"Allow users to register objects that return `None` in `_serialize_to_proto`.

PiperOrigin-RevId: 396428995
Change-Id: I54d87e48e23a899c570c4cf5a91b630f2d8215d7",load_test.py,"@@ -31,7 +31,6 @@ import weakref
 from absl.testing import parameterized
 import numpy as np
 
-from google.protobuf import wrappers_pb2
 
 from tensorflow.python.client import session as session_lib
 from tensorflow.python.compat import compat
@@ -69,7 +68,6 @@ from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.saved_model import load
 from tensorflow.python.saved_model import load_options
-from tensorflow.python.saved_model import registration
 from tensorflow.python.saved_model import save
 from tensorflow.python.saved_model import save_options
 from tensorflow.python.saved_model import tag_constants
@@ -2130,36 +2128,6 @@ class LoadTest(test.TestCase, parameterized.TestCase):
 
     self.assertAllClose(grads, expected_grads)
 
-  def test_load_registered(self, cycles):
-
-    @registration.register_serializable(name=f""Module{cycles}"")
-    class Module(tracking.AutoTrackable):
-
-      def __init__(self, name=""module""):
-        self.v = variables.Variable(1.)
-        self.name = name
-
-      def _serialize_to_proto(self, **unused_kwargs):
-        return wrappers_pb2.StringValue(value=self.name)
-
-      @classmethod
-      def _deserialize_from_proto(cls, proto, **unused_kwargs):
-        if proto.Is(wrappers_pb2.StringValue.DESCRIPTOR):
-          unpacked = wrappers_pb2.StringValue()
-          proto.Unpack(unpacked)
-          return cls(name=unpacked.value)
-        raise AssertionError(
-            ""Did not receive proto of correct type during deserialization. ""
-            f""Expected type {wrappers_pb2.StringValue.DESCRIPTOR.full_name}, ""
-            f""got {proto.TypeName()}"")
-
-    m = Module(""a"")
-    m.v.assign(5)
-    loaded = cycle(m, cycles)
-    self.assertIsInstance(loaded, Module)
-    self.assertEqual(5, loaded.v.numpy())
-    self.assertEqual(""a"", loaded.name)
-
 
 class SingleCycleTests(test.TestCase, parameterized.TestCase):
 
",0,test
333abd19d9c49393abb2466401b040cc417801d8,tensorflow/tensorflow,"Allow users to register objects that return `None` in `_serialize_to_proto`.

PiperOrigin-RevId: 396428995
Change-Id: I54d87e48e23a899c570c4cf5a91b630f2d8215d7",registration_saving_test.py,"@@ -0,0 +1,111 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""Tests saving with registered Trackable classes and checkpoint functions.""""""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tempfile
+from absl.testing import parameterized
+
+from google.protobuf import wrappers_pb2
+from tensorflow.python.eager import test
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import variables
+from tensorflow.python.saved_model import load
+from tensorflow.python.saved_model import registration
+from tensorflow.python.saved_model import save
+from tensorflow.python.training.tracking import tracking
+
+
+def cycle(obj, cycles, signatures=None, options=None):
+  to_save = obj
+  for _ in range(cycles):
+    path = tempfile.mkdtemp(prefix=test.get_temp_dir())
+    # If available, we'll run the save and restore preferring the GPU. This
+    # just makes sure we aren't throwing errors and have enough
+    # device(""CPU"") blocks to satisfy the placer.
+    with test_util.use_gpu():
+      save.save(to_save, path, signatures, options=options)
+      loaded = load.load(path)
+      signatures = loaded.signatures
+    to_save = loaded
+  return loaded
+
+
+@parameterized.named_parameters(
+    dict(testcase_name=""ReloadOnce"", cycles=1),
+    dict(testcase_name=""ReloadTwice"", cycles=2),
+    dict(testcase_name=""ReloadThrice"", cycles=3)
+)
+class SavedModelTest(test.TestCase, parameterized.TestCase):
+
+  def test_save_and_load(self, cycles):
+
+    @registration.register_serializable(name=f""SaveAndLoad{cycles}"")
+    class Module(tracking.AutoTrackable):
+
+      def __init__(self, name=""module""):
+        self.v = variables.Variable(1.)
+        self.name = name
+
+      def _serialize_to_proto(self, **unused_kwargs):
+        return wrappers_pb2.StringValue(value=self.name)
+
+      @classmethod
+      def _deserialize_from_proto(cls, proto, **unused_kwargs):
+        if proto.Is(wrappers_pb2.StringValue.DESCRIPTOR):
+          unpacked = wrappers_pb2.StringValue()
+          proto.Unpack(unpacked)
+          return cls(name=unpacked.value)
+        raise AssertionError(
+            ""Did not receive proto of correct type during deserialization. ""
+            f""Expected type {wrappers_pb2.StringValue.DESCRIPTOR.full_name}, ""
+            f""got {proto.TypeName()}"")
+
+    m = Module(""a"")
+    m.v.assign(5)
+    loaded = cycle(m, cycles)
+    self.assertIsInstance(loaded, Module)
+    self.assertEqual(5, loaded.v.numpy())
+    self.assertEqual(""a"", loaded.name)
+
+  def test_none_proto(self, cycles):
+
+    @registration.register_serializable(name=f""NoneProto{cycles}"")
+    class Module(tracking.AutoTrackable):
+
+      def __init__(self, name=""module""):
+        self.v = variables.Variable(1.)
+        self.name = name
+
+      # Leave _serialize_to_proto as the default (returns `None`).
+
+      @classmethod
+      def _deserialize_from_proto(cls, proto, **unused_kwargs):
+        self.assertEqual(proto.ByteSize(), 0)
+        return cls(""deserialized"")
+
+    m = Module(""a"")
+    m.v.assign(5)
+    loaded = cycle(m, cycles)
+    self.assertIsInstance(loaded, Module)
+    self.assertEqual(5, loaded.v.numpy())
+    self.assertEqual(""deserialized"", loaded.name)
+
+
+if __name__ == ""__main__"":
+  test.main()
",0,test
333abd19d9c49393abb2466401b040cc417801d8,tensorflow/tensorflow,"Allow users to register objects that return `None` in `_serialize_to_proto`.

PiperOrigin-RevId: 396428995
Change-Id: I54d87e48e23a899c570c4cf5a91b630f2d8215d7",registration_test.py,"@@ -12,7 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-""""""Trackable class registration tests.""""""
+""""""Trackable class registration tests.
+
+For integrated tests, see registration_saving_test.py.
+""""""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -20,7 +23,7 @@ from __future__ import print_function
 
 from absl.testing import parameterized
 
-from tensorflow.python.platform import test
+from tensorflow.python.eager import test
 from tensorflow.python.saved_model import registration
 from tensorflow.python.training.tracking import base
 
",0,test
333abd19d9c49393abb2466401b040cc417801d8,tensorflow/tensorflow,"Allow users to register objects that return `None` in `_serialize_to_proto`.

PiperOrigin-RevId: 396428995
Change-Id: I54d87e48e23a899c570c4cf5a91b630f2d8215d7",save.py,"@@ -1003,7 +1003,9 @@ def _write_object_proto(obj, proto, asset_file_def_index, function_name_map):
   registered_name = registration.get_registered_name(obj)
   if registered_name:
     proto.registered_name = registered_name
-    proto.serialized_user_proto.Pack(obj._serialize_to_proto())  # pylint: disable=protected-access
+    serialized_user_proto = obj._serialize_to_proto()  # pylint: disable=protected-access
+    if serialized_user_proto is not None:
+      proto.serialized_user_proto.Pack(serialized_user_proto)
 
 
 def _export_debug_info(exported_graph, export_dir):
",0,test
2f2b41a42d80c57cee5171beb89675f0875546d3,tensorflow/tensorflow,"Improved the performance of the batch normalization gradient computation by
leveraging index lists whenever possible.
Change: 110482898",batch_norm_op.h,"@@ -86,9 +86,20 @@ struct BatchNormGrad {
     const int rest_size = input.size() / depth;
 
     typedef typename TTypes<T>::ConstVec::Index Index;
+
     Eigen::DSizes<Index, 2> rest_by_depth(rest_size, depth);
+#if !defined(EIGEN_HAS_INDEX_LIST)
     Eigen::DSizes<Index, 2> rest_by_one(rest_size, 1);
     Eigen::DSizes<Index, 2> one_by_depth(1, depth);
+    Eigen::array<Index, 1> reduction_axis;
+    reduction_axis[0] = 0;  // Reduces on first dimension.
+#else
+    Eigen::IndexList<Index, Eigen::type2index<1> > rest_by_one;
+    rest_by_one.set(0, rest_size);
+    Eigen::IndexList<Eigen::type2index<1>, Index> one_by_depth;
+    one_by_depth.set(1, depth);
+    Eigen::IndexList<Eigen::type2index<0> > reduction_axis;
+#endif
 
     // db = out_backprop
     //
@@ -100,9 +111,6 @@ struct BatchNormGrad {
     // dm = sum_over_rest(out_backprop * gamma) * (-1 / rsqrt(v + epsilon))
     //
     // dx = out_backprop * (gamma * rsqrt(v + epsilon))
-    Eigen::array<Index, 1> reduction_axis;
-    reduction_axis[0] = 0;  // Reduces on first dimension.
-
     db.device(d) = out_backprop.reshape(rest_by_depth).sum(reduction_axis);
 
     // scratch1 = rsqrt(v + epsilon)
",0,train
c2c4c208679305d6d538255be569a2822f1c920f,tensorflow/tensorflow,"Delete scope argument (#6332)

* _linear scope bug: use the scope if provided

* Remove the scope argument

* Remove obsolete scope arg from _linear calls",rnn_cell_impl.py,"@@ -177,7 +177,7 @@ class BasicRNNCell(RNNCell):
     """"""Most basic RNN: output = new_state = act(W * input + U * state + B).""""""
     with vs.variable_scope(scope or ""basic_rnn_cell""):
       output = self._activation(
-          _linear([inputs, state], self._num_units, True, scope=scope))
+          _linear([inputs, state], self._num_units, True))
     return output, output
 
 
@@ -205,14 +205,13 @@ class GRUCell(RNNCell):
         # We start with bias of 1.0 to not reset and not update.
         r, u = array_ops.split(
             value=_linear(
-                [inputs, state], 2 * self._num_units, True, 1.0, scope=scope),
+                [inputs, state], 2 * self._num_units, True, 1.0),
             num_or_size_splits=2,
             axis=1)
         r, u = sigmoid(r), sigmoid(u)
       with vs.variable_scope(""candidate""):
         c = self._activation(_linear([inputs, r * state],
-                                     self._num_units, True,
-                                     scope=scope))
+                                     self._num_units, True))
       new_h = u * state + (1 - u) * c
     return new_h, new_h
 
@@ -292,7 +291,7 @@ class BasicLSTMCell(RNNCell):
         c, h = state
       else:
         c, h = array_ops.split(value=state, num_or_size_splits=2, axis=1)
-      concat = _linear([inputs, h], 4 * self._num_units, True, scope=scope)
+      concat = _linear([inputs, h], 4 * self._num_units, True)
 
       # i = input_gate, j = new_input, f = forget_gate, o = output_gate
       i, j, f, o = array_ops.split(value=concat, num_or_size_splits=4, axis=1)
@@ -450,8 +449,7 @@ class LSTMCell(RNNCell):
             partitioned_variables.fixed_size_partitioner(
                 self._num_unit_shards))
       # i = input_gate, j = new_input, f = forget_gate, o = output_gate
-      lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True,
-                            scope=scope)
+      lstm_matrix = _linear([inputs, m_prev], 4 * self._num_units, bias=True)
       i, j, f, o = array_ops.split(
           value=lstm_matrix, num_or_size_splits=4, axis=1)
 
@@ -490,7 +488,7 @@ class LSTMCell(RNNCell):
             proj_scope.set_partitioner(
                 partitioned_variables.fixed_size_partitioner(
                     self._num_proj_shards))
-          m = _linear(m, self._num_proj, bias=False, scope=scope)
+          m = _linear(m, self._num_proj, bias=False)
 
         if self._proj_clip is not None:
           # pylint: disable=invalid-unary-operand-type
@@ -542,7 +540,7 @@ class OutputProjectionWrapper(RNNCell):
     output, res_state = self._cell(inputs, state)
     # Default scope: ""OutputProjectionWrapper""
     with vs.variable_scope(scope or ""output_projection_wrapper""):
-      projected = _linear(output, self._output_size, True, scope=scope)
+      projected = _linear(output, self._output_size, True)
     return projected, res_state
 
 
@@ -584,7 +582,7 @@ class InputProjectionWrapper(RNNCell):
     """"""Run the input projection and then the cell.""""""
     # Default scope: ""InputProjectionWrapper""
     with vs.variable_scope(scope or ""input_projection_wrapper""):
-      projected = _linear(inputs, self._num_proj, True, scope=scope)
+      projected = _linear(inputs, self._num_proj, True)
     return self._cell(projected, state)
 
 
@@ -820,7 +818,7 @@ class _SlimRNNCell(RNNCell):
     return output, state
 
 
-def _linear(args, output_size, bias, bias_start=0.0, scope=None):
+def _linear(args, output_size, bias, bias_start=0.0):
   """"""Linear map: sum_i(args[i] * W[i]), where W[i] is a variable.
 
   Args:
@@ -828,7 +826,6 @@ def _linear(args, output_size, bias, bias_start=0.0, scope=None):
     output_size: int, second dimension of W[i].
     bias: boolean, whether to add a bias term or not.
     bias_start: starting value to initialize the bias; 0 by default.
-    scope: (optional) Variable scope to create parameters in.
 
   Returns:
     A 2D Tensor with shape [batch x output_size] equal to
",0,train
432852375ec07cde915246c841d18d3993236f17,tensorflow/tensorflow,"Lazily allocate the referenced tensors set in UniqueTensorReferences.
Since the common case is that no elements will be put into the
set, this helps with both CPU cost and reduces the cache-trashing
footprint of constructing a UniqueTensorReferences object (which is
done on every op in setting up the OpKernelContext).
Change: 113377001",unique_tensor_references.h,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_reference.h""
 #include ""tensorflow/core/lib/gtl/inlined_vector.h""
+#include ""tensorflow/core/platform/macros.h""
 
 namespace tensorflow {
 
@@ -32,7 +33,7 @@ namespace tensorflow {
 // references switches to using an unordered set.
 class UniqueTensorReferences {
  public:
-  UniqueTensorReferences() : frozen_(false) {}
+  UniqueTensorReferences() : frozen_(false), referenced_tensors_set_(nullptr) {}
 
   ~UniqueTensorReferences() {
     if (!frozen_) {
@@ -44,6 +45,7 @@ class UniqueTensorReferences {
         tensor.Unref();
       }
     }
+    delete referenced_tensors_set_;
   }
 
   // Adds a reference to tensor if its buffer is not already referenced.
@@ -51,11 +53,11 @@ class UniqueTensorReferences {
     DCHECK(!frozen_);
     // Do nothing if the tensor has a null buffer.
     if (tensor.IsInitialized()) {
-      if (referenced_tensors_set_.size() > 0) {
+      if (referenced_tensors_set_ != nullptr) {
         // There are enough tensors that we are using a hash set to
         // de-duplicate.
         const TensorReference tensor_ref(tensor);
-        if (!referenced_tensors_set_.insert(tensor_ref).second) {
+        if (!referenced_tensors_set_->insert(tensor_ref).second) {
           // The tensor was a duplicate, so discard the reference.
           tensor_ref.Unref();
         }
@@ -70,12 +72,13 @@ class UniqueTensorReferences {
         if (kInVector == referenced_tensors_vector_.size()) {
           // There are too many tensors to keep using the N^2 algorithm
           // so start de-duplicating using a set.
-          DCHECK_EQ(0, referenced_tensors_set_.size());
           // Transfer the refs from the vector to the set.
-          referenced_tensors_set_.reserve(kInVector);
-          referenced_tensors_set_.insert(referenced_tensors_vector_.begin(),
-                                         referenced_tensors_vector_.end());
-          DCHECK_EQ(kInVector, referenced_tensors_set_.size());
+          DCHECK(referenced_tensors_set_ == nullptr);
+          referenced_tensors_set_ = new ReferencedTensorsSet;
+          referenced_tensors_set_->reserve(kInVector);
+          referenced_tensors_set_->insert(referenced_tensors_vector_.begin(),
+                                          referenced_tensors_vector_.end());
+          DCHECK_EQ(kInVector, referenced_tensors_set_->size());
           referenced_tensors_vector_.clear();
         }
       }
@@ -87,13 +90,15 @@ class UniqueTensorReferences {
   void FreezeAndReturnReferences(TensorReferenceVector* out_vector) {
     // Prevent any further additions.
     frozen_ = true;
-    if (referenced_tensors_set_.size() > 0) {
+    if (referenced_tensors_set_ != nullptr) {
       DCHECK(referenced_tensors_vector_.empty());
-      out_vector->reserve(referenced_tensors_set_.size());
-      for (const auto& ref : referenced_tensors_set_) {
+      out_vector->reserve(referenced_tensors_set_->size());
+      for (const auto& ref : *referenced_tensors_set_) {
         out_vector->push_back(ref);
       }
-      referenced_tensors_set_.clear();
+      referenced_tensors_set_->clear();
+      delete referenced_tensors_set_;
+      referenced_tensors_set_ = nullptr;
     } else {
       out_vector->reserve(referenced_tensors_vector_.size());
       for (const auto& ref : referenced_tensors_vector_) {
@@ -123,9 +128,16 @@ class UniqueTensorReferences {
 
   bool frozen_;
   TensorReferenceVector referenced_tensors_vector_;
-  std::unordered_set<TensorReference, TensorReferenceHashFn,
-                     TensorReferenceEqualFn>
-      referenced_tensors_set_;
+
+  typedef std::unordered_set<TensorReference, TensorReferenceHashFn,
+                             TensorReferenceEqualFn>
+      ReferencedTensorsSet;
+  // Lazily allocated hash set for when the number of tensors becomes too large.
+  // If this is non-NULL, then we use the hash set, otherwise, we use the
+  // referenced_tensors_vector_ (and do O(N^2) work per insertion).
+  ReferencedTensorsSet* referenced_tensors_set_;
+
+  TF_DISALLOW_COPY_AND_ASSIGN(UniqueTensorReferences);
 };
 
 }  // end namespace tensorflow
",0,train
a64ff0f2cda9d4e35ea450d4e945009a90ddee9a,tensorflow/tensorflow,"Add device annotations to gradient_function

This colocates the gradient functions with the input tensors.
Otherwise they would be executed on the device that is current when
calling GradientTape.gradient() which breaks splitting a large model
across multiple GPUs.

Fixes #33688",backprop.py,"@@ -116,13 +116,14 @@ class _MockOp(object):
     )
 
 
-def _gradient_function(op_name, attr_tuple, num_inputs, inputs, outputs,
+def _gradient_function(op_name, attr_tuple, device, num_inputs, inputs, outputs,
                        out_grads, skip_input_indices):
   """"""Calls the gradient function of the op.
 
   Args:
     op_name: the name of the op to be differentiated.
     attr_tuple: the attrs, as a tuple.
+    device: the device of the op.
     num_inputs: the number of inputs to the op.
     inputs: inputs to the original operation.
     outputs: outputs to the original operation.
@@ -138,7 +139,8 @@ def _gradient_function(op_name, attr_tuple, num_inputs, inputs, outputs,
   if grad_fn is None:
     return [None] * num_inputs
 
-  return grad_fn(mock_op, *out_grads)
+  with ops.device(device):
+    return grad_fn(mock_op, *out_grads)
 
 
 pywrap_tensorflow.TFE_Py_RegisterGradientFunction(_gradient_function)
",0,train
a64ff0f2cda9d4e35ea450d4e945009a90ddee9a,tensorflow/tensorflow,"Add device annotations to gradient_function

This colocates the gradient functions with the input tensors.
Otherwise they would be executed on the device that is current when
calling GradientTape.gradient() which breaks splitting a large model
across multiple GPUs.

Fixes #33688",device_placement_test.py,"@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.eager import remote
@@ -27,6 +28,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import variables
 
 
 class SoftDevicePlacementTest(test.TestCase):
@@ -86,6 +88,23 @@ class SoftDevicePlacementTest(test.TestCase):
     # We don't support nested device placement right now.
     self.assertIn('GPU:0', c.device)
 
+  @test_util.run_gpu_only
+  def testGradientPlacement(self):
+    with ops.device('GPU:0'):
+      x = variables.Variable(1.0)
+    with ops.device('CPU:0'):
+      y = variables.Variable(1.0)
+
+    with backprop.GradientTape() as tape:
+      with ops.device('GPU:0'):
+        x1 = constant_op.constant(2.0) * x
+      with ops.device('CPU:0'):
+        y1 = constant_op.constant(2.0) * y
+      z = x1 + y1
+    grads = tape.gradient(z, [x, y])
+    self.assertIn('GPU:0', grads[0].device)
+    self.assertIn('CPU:0', grads[1].device)
+
 
 class ClusterPlacementTest(test.TestCase):
 
",0,train
a64ff0f2cda9d4e35ea450d4e945009a90ddee9a,tensorflow/tensorflow,"Add device annotations to gradient_function

This colocates the gradient functions with the input tensors.
Otherwise they would be executed on the device that is current when
calling GradientTape.gradient() which breaks splitting a large model
across multiple GPUs.

Fixes #33688",pywrap_tfe_src.cc,"@@ -3007,6 +3007,22 @@ PyObject* CopySequenceSettingIndicesToNull(
   return result;
 }
 
+PyObject* DeviceFromTensorSeq(PyObject* seq) {
+  for (Py_ssize_t i = 0; i < PySequence_Size(seq); i++) {
+    PyObject* item = PySequence_ITEM(seq, i);
+    PyObject* dev = PyObject_GetAttrString(item, ""device"");
+    Py_DECREF(item);
+    if (dev) {
+      const char* devStr = TFE_GetPythonString(dev);
+      if (devStr && !string(devStr).empty()) {
+        return dev;
+      }
+      Py_DECREF(dev);
+    }
+  }
+  return Py_None;
+}
+
 PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs,
                          PyObject* results) {
   std::vector<tensorflow::int64> input_ids = MakeTensorIDList(inputs);
@@ -3033,6 +3049,11 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs,
 
   string c_op_name = TFE_GetPythonString(op_name);
 
+  PyObject* device = DeviceFromTensorSeq(results);
+  if (device == Py_None) {
+    device = DeviceFromTensorSeq(inputs);
+  }
+
   PyObject* op_outputs;
   bool op_outputs_tuple_created = false;
   std::pair<bool, tensorflow::gtl::FlatSet<int>>* outputs_not_required;
@@ -3091,14 +3112,15 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs,
 
   TapeSetRecordOperation(
       op_name, inputs, results, input_ids, input_dtypes,
-      [op_name, attrs, num_inputs, op_inputs, op_outputs]() {
+      [op_name, attrs, device, num_inputs, op_inputs, op_outputs]() {
         Py_INCREF(op_name);
         Py_INCREF(attrs);
+        Py_INCREF(device);
         Py_INCREF(num_inputs);
         Py_INCREF(op_inputs);
         Py_INCREF(op_outputs);
         PyBackwardFunction* function = new PyBackwardFunction(
-            [op_name, attrs, num_inputs, op_inputs, op_outputs](
+            [op_name, attrs, device, num_inputs, op_inputs, op_outputs](
                 PyObject* output_grads,
                 const std::vector<tensorflow::int64>& unneeded_gradients) {
               if (PyErr_Occurred()) {
@@ -3118,8 +3140,8 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs,
                 skip_input_indices.reset(Py_None);
               }
               tensorflow::Safe_PyObjectPtr callback_args(Py_BuildValue(
-                  ""OOOOOOO"", op_name, attrs, num_inputs, op_inputs, op_outputs,
-                  output_grads, skip_input_indices.get()));
+                  ""OOOOOOOO"", op_name, attrs, device, num_inputs, op_inputs,
+                  op_outputs, output_grads, skip_input_indices.get()));
 
               tensorflow::Safe_PyObjectPtr result(
                   PyObject_CallObject(gradient_function, callback_args.get()));
@@ -3130,10 +3152,11 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs,
             });
         return function;
       },
-      [op_name, attrs, num_inputs, op_inputs,
+      [op_name, attrs, device, num_inputs, op_inputs,
        op_outputs](PyBackwardFunction* backward_function) {
         Py_DECREF(op_name);
         Py_DECREF(attrs);
+        Py_DECREF(device);
         Py_DECREF(num_inputs);
         Py_DECREF(op_inputs);
         Py_DECREF(op_outputs);
@@ -3143,6 +3166,7 @@ PyObject* RecordGradient(PyObject* op_name, PyObject* inputs, PyObject* attrs,
       forward_function);
 
   Py_DECREF(num_inputs);
+  Py_DECREF(device);
   if (op_outputs_tuple_created) Py_DECREF(op_outputs);
   if (op_inputs_tuple_created) Py_DECREF(op_inputs);
 
",0,train
6d10ef2c2ecd30bc61e125da41b80c1c02ac4cbd,tensorflow/tensorflow,fix v1 test,gradients_test.py,"@@ -1028,7 +1028,10 @@ class GetDependentVariablesTest(test_util.TensorFlowTestCase):
     with context.graph_mode():
       init = constant_op.constant(100.0)
       var = variable_scope.variable(init, name='a/replica_1')
-      var._handle = array_ops.identity(var, name='a')
+      if isinstance(var, variables.RefVariable):
+        var._variable = array_ops.identity(var, name='a')
+      else:
+        var._handle = array_ops.identity(var, name='a')
       var2 = custom_gradient.get_variable_by_name('a')
       self.assertEqual(var2.name, var.name)
   
",0,train
a9b386407034529769a7fc6f95e7aaf4c1c0a15f,tensorflow/tensorflow,"[XLA:CPU] Always set the unnamed_addr bit on constants

This allows LLVM (and the linker) to merge or pool the constants. In many cases
LLVM can derive unnamed_addr by itself, but not if it's a constant being passed
to a runtime function.

PiperOrigin-RevId: 236298140",ir_emitter.cc,"@@ -189,6 +189,7 @@ llvm::Constant* IrEmitter::EmitGlobalForLiteral(const Literal& literal) {
       /*Initializer=*/initializer,
       /*Name=*/"""");
   result_global->setAlignment(MinimumAlignmentForShape(literal.shape()));
+  result_global->setUnnamedAddr(llvm::GlobalVariable::UnnamedAddr::Global);
   return llvm::ConstantExpr::getBitCast(
       result_global, IrShapeType(literal.shape())->getPointerTo());
 }
",0,train
a9b386407034529769a7fc6f95e7aaf4c1c0a15f,tensorflow/tensorflow,"[XLA:CPU] Always set the unnamed_addr bit on constants

This allows LLVM (and the linker) to merge or pool the constants. In many cases
LLVM can derive unnamed_addr by itself, but not if it's a constant being passed
to a runtime function.

PiperOrigin-RevId: 236298140",cpu_external_constants_test.cc,"@@ -56,8 +56,8 @@ class CpuExternalConstantsTest : public CpuCodegenTest {
 
 TEST_F(CpuExternalConstantsTest, Basic) {
   TestWithArray(/*rows=*/1024, /*cols=*/1024, R""(
-CHECK-NOT: @constant_global_0 = external constant [1024 x [1024 x float]], align 16
-CHECK: @0 = private constant [4194304 x i8] {{.*}}, align 16
+CHECK-NOT: @constant_global_0 = external unnamed_addr constant [1024 x [1024 x float]], align 16
+CHECK: @0 = private unnamed_addr constant [4194304 x i8] {{.*}}, align 16
 )"");
 }
 
@@ -65,8 +65,8 @@ TEST_F(CpuExternalConstantsTest, BasicNegative) {
   // The constant array in this test case is small enough that there is no need
   // to externalize it.
   TestWithArray(/*rows=*/4, /*cols=*/4, R""(
-CHECK-NOT: @constant_global_0 = external constant [16 x float], align 8
-CHECK: @0 = private constant [64 x i8] {{.*}}, align 8
+CHECK-NOT: @constant_global_0 = external unnamed_addr constant [16 x float], align 8
+CHECK: @0 = private unnamed_addr constant [64 x i8] {{.*}}, align 8
 )"");
 }
 }  // namespace
",0,train
a9b386407034529769a7fc6f95e7aaf4c1c0a15f,tensorflow/tensorflow,"[XLA:CPU] Always set the unnamed_addr bit on constants

This allows LLVM (and the linker) to merge or pool the constants. In many cases
LLVM can derive unnamed_addr by itself, but not if it's a constant being passed
to a runtime function.

PiperOrigin-RevId: 236298140",cpu_literal_caching_test.cc,"@@ -56,8 +56,8 @@ ENTRY main {
 )"";
 
   string filecheck_pattern = R""(
-CHECK: private constant [48 x i8]
-CHECK-NOT: private constant [48 x i8]
+CHECK: private unnamed_addr constant [48 x i8]
+CHECK-NOT: private unnamed_addr constant [48 x i8]
 )"";
 
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
@@ -102,10 +102,10 @@ ENTRY main {
 )"";
 
   string filecheck_pattern = R""(
-CHECK-DAG: private constant [4 x i8]
-CHECK-DAG: private constant [8 x i8]
-CHECK-NOT: private constant [4 x i8]
-CHECK-NOT: private constant [8 x i8]
+CHECK-DAG: private unnamed_addr constant [4 x i8]
+CHECK-DAG: private unnamed_addr constant [8 x i8]
+CHECK-NOT: private unnamed_addr constant [4 x i8]
+CHECK-NOT: private unnamed_addr constant [8 x i8]
 )"";
 
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
",0,train
a9b386407034529769a7fc6f95e7aaf4c1c0a15f,tensorflow/tensorflow,"[XLA:CPU] Always set the unnamed_addr bit on constants

This allows LLVM (and the linker) to merge or pool the constants. In many cases
LLVM can derive unnamed_addr by itself, but not if it's a constant being passed
to a runtime function.

PiperOrigin-RevId: 236298140",cpu_outfeed_test.cc,"@@ -38,7 +38,7 @@ ENTRY main {
 )"";
 
   string filecheck_pattern = R""(
-CHECK: private constant [48 x i8]
+CHECK: private unnamed_addr constant [48 x i8]
 )"";
 
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
",0,train
a9b386407034529769a7fc6f95e7aaf4c1c0a15f,tensorflow/tensorflow,"[XLA:CPU] Always set the unnamed_addr bit on constants

This allows LLVM (and the linker) to merge or pool the constants. In many cases
LLVM can derive unnamed_addr by itself, but not if it's a constant being passed
to a runtime function.

PiperOrigin-RevId: 236298140",fused_ir_emitter.cc,"@@ -82,6 +82,7 @@ Status FusedIrEmitter::HandleConstant(HloInstruction* constant) {
         /*Linkage=*/llvm::GlobalValue::PrivateLinkage,
         /*Initializer=*/initializer,
         /*Name=*/"""");
+    global->setUnnamedAddr(llvm::GlobalVariable::UnnamedAddr::Global);
     llvm::Constant* shape_constant = llvm::ConstantExpr::getBitCast(
         global,
         llvm_ir::ShapeToIrType(literal.shape(), module_)->getPointerTo());
",0,train
1d02bb4cbaa9308dd7b4ad21ff1c74dd4134f920,tensorflow/tensorflow,Fixed indentation in test_ps_session_config,run_config_test.py,"@@ -1197,8 +1197,8 @@ class RunConfigSessionConfigTest(test.TestCase):
     }
     run_config = _create_run_config_with_cluster_spec(tf_config)
     self._assert_equal_session_config(
-      run_config.session_config,
-      ['/job:ps', '/job:worker', '/job:chief', '/job:master'])
+        run_config.session_config,
+        ['/job:ps', '/job:worker', '/job:chief', '/job:master'])
 
   def test_evaluator_session_config(self):
     tf_config = {
",0,train
be5d98a8bd541bdc45a9c727a2b0a7195c85f739,tensorflow/tensorflow,"Adds integration tests for DNNClassifier.

PiperOrigin-RevId: 157592010",dnn_test.py,"@@ -695,6 +695,169 @@ class DNNRegressorIntegrationTest(test.TestCase):
         batch_size=batch_size)
 
 
+class DNNClassifierIntegrationTest(test.TestCase):
+
+  def setUp(self):
+    self._model_dir = tempfile.mkdtemp()
+
+  def tearDown(self):
+    if self._model_dir:
+      shutil.rmtree(self._model_dir)
+
+  def _test_complete_flow(
+      self, train_input_fn, eval_input_fn, predict_input_fn, input_dimension,
+      n_classes, batch_size):
+    feature_columns = [
+        feature_column.numeric_column('x', shape=(input_dimension,))]
+    est = dnn.DNNClassifier(
+        hidden_units=(2, 2),
+        feature_columns=feature_columns,
+        n_classes=n_classes,
+        model_dir=self._model_dir)
+
+    # TRAIN
+    num_steps = 10
+    est.train(train_input_fn, steps=num_steps)
+
+    # EVALUTE
+    scores = est.evaluate(eval_input_fn)
+    self.assertEqual(num_steps, scores[ops.GraphKeys.GLOBAL_STEP])
+    self.assertIn('loss', six.iterkeys(scores))
+
+    # PREDICT
+    predicted_proba = np.array([
+        x[prediction_keys.PredictionKeys.PROBABILITIES]
+        for x in est.predict(predict_input_fn)
+    ])
+    self.assertAllEqual((batch_size, n_classes), predicted_proba.shape)
+
+    # EXPORT
+    feature_spec = feature_column.make_parse_example_spec(feature_columns)
+    serving_input_receiver_fn = export.build_parsing_serving_input_receiver_fn(
+        feature_spec)
+    export_dir = est.export_savedmodel(tempfile.mkdtemp(),
+                                       serving_input_receiver_fn)
+    self.assertTrue(gfile.Exists(export_dir))
+
+  def test_numpy_input_fn(self):
+    """"""Tests complete flow with numpy_input_fn.""""""
+    n_classes = 2
+    input_dimension = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
+    x_data = data.reshape(batch_size, input_dimension)
+    y_data = np.reshape(data[:batch_size], (batch_size, 1))
+    # learn y = x
+    train_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data},
+        y=y_data,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data},
+        y=y_data,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = numpy_io.numpy_input_fn(
+        x={'x': x_data},
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size)
+
+  def test_pandas_input_fn(self):
+    """"""Tests complete flow with pandas_input_fn.""""""
+    if not HAS_PANDAS:
+      return
+    input_dimension = 1
+    n_classes = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size, dtype=np.float32)
+    x = pd.DataFrame({'x': data})
+    y = pd.Series(data)
+    train_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        num_epochs=None,
+        shuffle=True)
+    eval_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        y=y,
+        batch_size=batch_size,
+        shuffle=False)
+    predict_input_fn = pandas_io.pandas_input_fn(
+        x=x,
+        batch_size=batch_size,
+        shuffle=False)
+
+    self._test_complete_flow(
+        train_input_fn=train_input_fn,
+        eval_input_fn=eval_input_fn,
+        predict_input_fn=predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size)
+
+  def test_input_fn_from_parse_example(self):
+    """"""Tests complete flow with input_fn constructed from parse_example.""""""
+    input_dimension = 2
+    n_classes = 2
+    batch_size = 10
+    data = np.linspace(0., 2., batch_size * input_dimension, dtype=np.float32)
+    data = data.reshape(batch_size, input_dimension)
+
+    serialized_examples = []
+    for datum in data:
+      example = example_pb2.Example(features=feature_pb2.Features(
+          feature={
+              'x': feature_pb2.Feature(
+                  float_list=feature_pb2.FloatList(value=datum)),
+              'y': feature_pb2.Feature(
+                  float_list=feature_pb2.FloatList(value=datum[:1])),
+          }))
+      serialized_examples.append(example.SerializeToString())
+
+    feature_spec = {
+        'x': parsing_ops.FixedLenFeature([input_dimension], dtypes.float32),
+        'y': parsing_ops.FixedLenFeature([1], dtypes.float32),
+    }
+    def _train_input_fn():
+      feature_map = parsing_ops.parse_example(serialized_examples, feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _eval_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      labels = features.pop('y')
+      return features, labels
+    def _predict_input_fn():
+      feature_map = parsing_ops.parse_example(
+          input_lib.limit_epochs(serialized_examples, num_epochs=1),
+          feature_spec)
+      features = _queue_parsed_features(feature_map)
+      features.pop('y')
+      return features, None
+
+    self._test_complete_flow(
+        train_input_fn=_train_input_fn,
+        eval_input_fn=_eval_input_fn,
+        predict_input_fn=_predict_input_fn,
+        input_dimension=input_dimension,
+        n_classes=n_classes,
+        batch_size=batch_size)
+
+
 def _full_var_name(var_name):
   return '%s/part_0:0' % var_name
 
",0,train
966ed1cafc770e81e6a56be3f5715e0fe257b742,tensorflow/tensorflow,Use provided host name/ip instead of localhost if possible,grpc_server_lib.cc,"@@ -132,8 +132,9 @@ GrpcServer::~GrpcServer() {
 void GrpcServer::MaybeMutateBuilder(::grpc::ServerBuilder* builder) {}
 
 // Look up the port that has been requested for this task in `server_def`.
-Status GrpcServer::GetPort(const ServerDef& server_def, int* port) const {
+Status GrpcServer::GetHostAndPort(const ServerDef& server_def, string* host_name, int* port) const {
   *port = -1;
+  *host_name = ""localhost"";
   for (const auto& job : server_def.cluster().job()) {
     if (job.name() == server_def.job_name()) {
       auto iter = job.tasks().find(server_def.task_index());
@@ -153,6 +154,10 @@ Status GrpcServer::GetPort(const ServerDef& server_def, int* port) const {
               ""Could not parse port for local server from \"""", iter->second,
               ""\""."");
         }
+
+        if (colon_index != string::npos && !iter->second.substr(0, colon_index).empty()) {
+          *host_name = iter->second.substr(0, colon_index);
+        }
       }
       break;
     }
@@ -175,7 +180,9 @@ Status GrpcServer::Init(const GrpcServerOptions& opts) {
   // otherwise if 'task_index=-1' the program will abort.
 
   int requested_port;
-  TF_RETURN_IF_ERROR(GetPort(server_def_, &requested_port));
+  string host_name;
+  TF_RETURN_IF_ERROR(GetHostAndPort(server_def_, &host_name, &requested_port));
+  host_name_ = host_name;
 
   SessionOptions sess_opts;
   ConfigProto config = server_def_.default_session_config();
@@ -325,7 +332,7 @@ Status GrpcServer::ParseChannelSpec(const WorkerCacheFactoryOptions& options,
                                        task.second);
       }
       if (job.name() == *options.job_name && task.first == options.task_index) {
-        host_port = strings::StrCat(""localhost:"", bound_port_);
+        host_port = strings::StrCat(host_name_, "":"", bound_port_);
       } else {
         host_port = task.second;
       }
@@ -478,7 +485,7 @@ Status GrpcServer::Join() {
 }
 
 const string GrpcServer::target() const {
-  return strings::StrCat(""grpc://localhost:"", bound_port_);
+  return strings::StrCat(""grpc://"", host_name_, "":"", bound_port_);
 }
 
 std::shared_ptr<::grpc::ServerCredentials> GrpcServer::GetServerCredentials(
",0,train
966ed1cafc770e81e6a56be3f5715e0fe257b742,tensorflow/tensorflow,Use provided host name/ip instead of localhost if possible,grpc_server_lib.h,"@@ -104,7 +104,7 @@ class GrpcServer : public ServerInterface {
   Status UpdateServerDef(const ServerDef& server_def);
 
  protected:
-  virtual Status GetPort(const ServerDef& server_def, int* port) const;
+  virtual Status GetHostAndPort(const ServerDef& server_def, string* host_name, int* port) const;
   Status Init(const GrpcServerOptions& opts = GrpcServerOptions());
 
   // A subclass can override this method to support secure credentials.
@@ -136,6 +136,9 @@ class GrpcServer : public ServerInterface {
   // The port to which this server is bound.
   int bound_port_ = 0;
 
+  // The host name of this server
+  string host_name_;
+
   // Guards server configuration, server, and state.
   mutex mu_;
 
",0,train
0a335dc4fd8cae06d331589eab5858fd0a3ffc73,tensorflow/tensorflow,"[XLA] Prevent using XlaOp from the wrong XlaBuilder.

PiperOrigin-RevId: 190312254",xla_builder.cc,"@@ -284,10 +284,12 @@ XlaOp XlaBuilder::Mul(const XlaOp& lhs, const XlaOp& rhs,
 }
 
 XlaOp XlaBuilder::ConstantLiteral(const Literal& literal) {
-  HloInstructionProto instr;
-  *instr.mutable_shape() = literal.shape();
-  *instr.mutable_literal() = literal.ToProto();
-  return AddInstruction(std::move(instr), HloOpcode::kConstant);
+  return NoteErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    HloInstructionProto instr;
+    *instr.mutable_shape() = literal.shape();
+    *instr.mutable_literal() = literal.ToProto();
+    return AddInstruction(std::move(instr), HloOpcode::kConstant);
+  }());
 }
 
 XlaOp XlaBuilder::Call(const XlaComputation& computation,
@@ -794,8 +796,9 @@ XlaOp XlaBuilder::Recv(const Shape& shape, const ChannelHandle& handle) {
   return UnimplementedOp();
 }
 
-XlaOp XlaBuilder::AddInstruction(HloInstructionProto&& instr, HloOpcode opcode,
-                                 tensorflow::gtl::ArraySlice<XlaOp> operands) {
+StatusOr<XlaOp> XlaBuilder::AddInstruction(
+    HloInstructionProto&& instr, HloOpcode opcode,
+    tensorflow::gtl::ArraySlice<XlaOp> operands) {
   const int64 handle = instructions_.size();
   instr.set_id(handle);
   instr.set_opcode(HloOpcodeString(opcode));
@@ -806,6 +809,10 @@ XlaOp XlaBuilder::AddInstruction(HloInstructionProto&& instr, HloOpcode opcode,
     instr.set_name(StrCat(instr.name(), ""."", handle));
   }
   for (const auto& operand : operands) {
+    TF_RET_CHECK(operand.builder_ != nullptr);
+    TF_RET_CHECK(operand.builder_ == this)
+        << ""Do not add XlaOp from builder "" << operand.builder_->name()
+        << "" to builder "" << this->name();
     instr.add_operand_ids(operand.handle());
     // TODO(b/74197823): Set metadata and sharding.
   }
",0,train
0a335dc4fd8cae06d331589eab5858fd0a3ffc73,tensorflow/tensorflow,"[XLA] Prevent using XlaOp from the wrong XlaBuilder.

PiperOrigin-RevId: 190312254",xla_builder.h,"@@ -706,8 +706,9 @@ class XlaBuilder {
   StatusOr<ProgramShape> GetProgramShape();
 
  private:
-  XlaOp AddInstruction(HloInstructionProto&& instr, HloOpcode opcode,
-                       tensorflow::gtl::ArraySlice<XlaOp> operands = {});
+  StatusOr<XlaOp> AddInstruction(
+      HloInstructionProto&& instr, HloOpcode opcode,
+      tensorflow::gtl::ArraySlice<XlaOp> operands = {});
 
   // Notes that the error occurred by:
   // * storing it internally and capturing a backtrace if it's the first error
",0,train
0a335dc4fd8cae06d331589eab5858fd0a3ffc73,tensorflow/tensorflow,"[XLA] Prevent using XlaOp from the wrong XlaBuilder.

PiperOrigin-RevId: 190312254",xla_builder_test.cc,"@@ -179,5 +179,16 @@ TEST_F(XlaBuilderTest, BinopHasInDimAndDegenerateBroadcast) {
                             op::Broadcast(op::Reshape(op::Parameter(1)))));
 }
 
+TEST_F(XlaBuilderTest, OperandFromWrongBuilder) {
+  XlaBuilder b1(""b1"");
+  auto p0 = b1.Parameter(0, ShapeUtil::MakeShape(F32, {}), ""p0"");
+  XlaBuilder builder(""main"");
+  builder.Add(p0, p0);
+  auto statusor = builder.Build();
+  ASSERT_FALSE(statusor.ok());
+  EXPECT_THAT(statusor.status().error_message(),
+              HasSubstr(""Do not add XlaOp from builder b1 to builder main""));
+}
+
 }  // namespace
 }  // namespace xla
",0,train
10522e2c58649fd8588ec32f8e11d25f18d271e4,tensorflow/tensorflow,"if condition modified

if condition modified to not use the else part",image_ops_impl.py,"@@ -1737,9 +1737,7 @@ def convert_image_dtype(image, dtype, saturate=False, name=None):
   """"""
   image = ops.convert_to_tensor(image, name='image')
   dtype = tf.dtypes.as_dtype(dtype)
-  if dtype.is_floating or dtype.is_integer:
-    pass
-  else:
+  if not dtype.is_floating and not dtype.is_integer:
     raise AttributeError(""dtype must be either floating point or integer"")
   if dtype == image.dtype:
     return array_ops.identity(image, name=name)
",0,train
c5b8e150645e5c2178b65477ad575c35279124d8,tensorflow/tensorflow,"Fix reference count leak with scatter_nd

Fixes #27288

PiperOrigin-RevId: 250102793",scatter_nd_op.cc,"@@ -273,6 +273,7 @@ class ScatterNdUpdateOp : public OpKernel {
     if (dtype_ == DT_RESOURCE) {
       Var* v;
       OP_REQUIRES_OK(c, LookupResource(c, HandleFromInput(c, 0), &v));
+      core::ScopedUnref scoped_unref(v);
       Tensor* t = v->tensor();
       params = *t;
       params_shape = params.shape();
",0,test
0e3574d39c66d937fa9f9d2e25554aab0066f250,tensorflow/tensorflow,"Add rank check to Sub op delegation to NNAPI

PiperOrigin-RevId: 307821863
Change-Id: Ib98448d67e9948576e6c9fb43a98d364ab434e37",nnapi_delegate.cc,"@@ -1799,7 +1799,7 @@ bool NNAPIDelegateKernel::Validate(
              "" NNAPI only support float tanh."", &val_ctx);
     } break;
     case kTfLiteBuiltinSub: {
-      ExpectMaxOpVersion(version, 2, &val_ctx);
+      ExpectMaxOpVersion(version, 3, &val_ctx);
       const TfLiteType input_type =
           context->tensors[node->inputs->data[0]].type;
       Expect((android_sdk_version >= kMinSdkVersionForNNAPI11 &&
@@ -1808,6 +1808,13 @@ bool NNAPIDelegateKernel::Validate(
                   IsQuantized(input_type)),
              NNAPIValidationFailureType::kUnsupportedInputType,
              ""NNAPI only support float sub."", &val_ctx);
+      const int input0_rank =
+          context->tensors[node->inputs->data[0]].dims->size;
+      const int input1_rank =
+          context->tensors[node->inputs->data[1]].dims->size;
+      Expect(input0_rank <= 4 && input1_rank <= 4,
+             NNAPIValidationFailureType::kUnsupportedOperandRank,
+             ""Input rank must be <= 4"", &val_ctx);
     } break;
     case kTfLiteBuiltinDiv: {
       ExpectOpVersion(version, 1, &val_ctx);
@@ -2327,7 +2334,7 @@ bool NNAPIDelegateKernel::Validate(
                            ""Unsupported operation type."", &val_ctx);
   }
   return val_ctx.is_valid;
-}
+}  // NOLINT(readability/fn_size)
 
 TfLiteStatus NNAPIDelegateKernel::Map(
     TfLiteContext* context, int builtin_code, int version,
",0,test
175e49a0df73d6256146152591bf599bd3a9734b,tensorflow/tensorflow,[CostModel] Add TF_ASSERT_OK,costmodel_test.cc,"@@ -24,6 +24,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/cost_graph.pb.h""
 #include ""tensorflow/core/framework/step_stats.pb.h""
 #include ""tensorflow/core/graph/graph.h""
+#include ""tensorflow/core/lib/core/status_test_util.h""
 #include ""tensorflow/core/platform/protobuf.h""
 #include ""tensorflow/core/platform/test.h""
 #include ""tensorflow/core/util/dump_graph.h""
@@ -83,8 +84,10 @@ TEST(CostModelTest, GlobalId) {
   CostModelManager cost_model_manager;
   collector.BuildCostModel(&cost_model_manager, device_map);
   CostGraphDef cost_graph_def;
-  cost_model_manager.AddToCostGraphDef(graph1.get(), &cost_graph_def);
-  cost_model_manager.AddToCostGraphDef(graph2.get(), &cost_graph_def);
+  TF_ASSERT_OK(
+      cost_model_manager.AddToCostGraphDef(graph1.get(), &cost_graph_def));
+  TF_ASSERT_OK(
+      cost_model_manager.AddToCostGraphDef(graph2.get(), &cost_graph_def));
   ASSERT_EQ(cost_graph_def.node_size(), 12);
   absl::flat_hash_map<int32, const CostGraphDef::Node> ids;
   for (auto node : cost_graph_def.node()) {
",0,train
3df3f818da59771113d9b2b24cd06072ea948dc3,tensorflow/tensorflow,"Use 'JNI_FALSE' rather than 'false' for Java boolean value.

PiperOrigin-RevId: 383832302
Change-Id: Ifefbe56bbd44b9814f323fde560b46081bca3fe1",nativeinterpreterwrapper_jni.cc,"@@ -254,7 +254,7 @@ Java_org_tensorflow_lite_NativeInterpreterWrapper_hasUnresolvedFlexOp(
     JNIEnv* env, jclass clazz, jlong handle) {
 #if TFLITE_DISABLE_SELECT_JAVA_APIS
   TFLITE_LOG(tflite::TFLITE_LOG_WARNING, ""Not supported: hasUnresolvedFlexOp"");
-  return false;
+  return JNI_FALSE;
 #else
   Interpreter* interpreter = convertLongToInterpreter(env, handle);
   if (interpreter == nullptr) return JNI_FALSE;
",0,train
e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks

Details of the changes:
- In the Python API of tensorflow, Const ops are created by calling
  `_create_op_internal()` from constant_op.py. This differs from how most other ops
  are created, and is similar to Placeholder ops, which are already instrumented
  by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in
  constant_op.py to allow instrumentation of Const ops.
  that makes that call.
- In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op,
  so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant
  tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`.
- In `dumping_callback_test.py`, replace use of a deprecated Dataset API
  (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`)
- Make other necessary changes to tfdbg2's tests to accommodate the Const ops
  which were previously not instrumented, but are now.
- Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6
  to avoid timeouts under the instrumented number of instrumented ops.

PiperOrigin-RevId: 307723353
Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",check_numerics_callback_test.py,"@@ -94,10 +94,16 @@ class CheckNumericsCallbackTest(test_util.TensorFlowTestCase):
 
     dataset = dataset_ops.Dataset.from_tensor_slices(tensor).batch(2).map(
         map_fn)
-    iterator = dataset_ops.make_one_shot_iterator(dataset)
 
-    self.assertAllClose(self.evaluate(iterator.get_next()), np.log([1.25, 2]))
-    self.assertAllClose(self.evaluate(iterator.get_next()), np.log([3.25, 5]))
+    @def_function.function
+    def get_batches():
+      iterator = iter(dataset)
+      return [next(iterator), next(iterator)]
+
+    batches = self.evaluate(get_batches())
+    self.assertLen(batches, 2)
+    self.assertAllClose(batches[0], np.log([1.25, 2]))
+    self.assertAllClose(batches[1], np.log([3.25, 5]))
 
 
 class CheckNumericsCallbackUnhealthyTest(test_util.TensorFlowTestCase):
@@ -267,6 +273,23 @@ class CheckNumericsCallbackUnhealthyTest(test_util.TensorFlowTestCase):
     self.assertTrue(re.search(r""Stack trace of op's creation"", message))
     self.assertIn(""accum.assign(accum * 2.0)"", message)
 
+  @test_util.run_in_graph_and_eager_modes
+  def testNanInConstIsCaptured(self):
+    check_numerics_callback.enable_check_numerics()
+    v = variables.Variable(3.0, dtype=dtypes.float32)
+    @def_function.function
+    def add_a_bad_constant(x):
+      c = constant_op.constant(np.nan)
+      return x + c
+    if not context.executing_eagerly():
+      self.evaluate(v.initializer)
+    message = self._assertRaisesInvalidArgumentErrorAndGetMessage(
+        lambda: self.evaluate(add_a_bad_constant(v)))
+    self.assertTrue(re.search(r""graph op.*\""Const\"""", message))
+    self.assertTrue(re.search(r""dtype:.*float32"", message))
+    self.assertTrue(re.search(r""shape:.*\(\)"", message))
+    self.assertTrue(re.search(r""Graph name:.*add_a_bad_constant"", message))
+
   @test_util.run_in_graph_and_eager_modes
   def testCatchInfinityInDatasetMapFunction(self):
     """"""Test that callback catches NaN in a tf.dataset map function.""""""
",0,train
e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks

Details of the changes:
- In the Python API of tensorflow, Const ops are created by calling
  `_create_op_internal()` from constant_op.py. This differs from how most other ops
  are created, and is similar to Placeholder ops, which are already instrumented
  by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in
  constant_op.py to allow instrumentation of Const ops.
  that makes that call.
- In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op,
  so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant
  tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`.
- In `dumping_callback_test.py`, replace use of a deprecated Dataset API
  (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`)
- Make other necessary changes to tfdbg2's tests to accommodate the Const ops
  which were previously not instrumented, but are now.
- Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6
  to avoid timeouts under the instrumented number of instrumented ops.

PiperOrigin-RevId: 307723353
Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",debug_events_monitors_test.py,"@@ -173,7 +173,8 @@ class DebugEventsMonitorTest(dumping_callback_test_lib.DumpingCallbackTestBase,
         self.assertLen(traces[1].debug_tensor_value, 11)
         self.assertLen(traces[2].debug_tensor_value, 11)
       elif tensor_debug_mode == ""FULL_TENSOR"":
-        self.assertLen(traces, 4)  # [Placeholder:0, Unique:0, Unique:1, Sum:0].
+        # [Placeholder:0, Unique:0, Unique:1, Const:0, Sum:0].
+        self.assertLen(traces, 5)
         self.assertEqual(traces[0].op_type, ""Placeholder"")
         self.assertEqual(traces[0].output_slot, 0)
         self.assertIsNone(traces[0].debug_tensor_value)
@@ -192,11 +193,16 @@ class DebugEventsMonitorTest(dumping_callback_test_lib.DumpingCallbackTestBase,
         self.assertAllEqual(
             reader.graph_execution_trace_to_tensor_value(traces[2]),
             [0, 1, 2, 3, 0])
-        self.assertEqual(traces[3].op_type, ""Sum"")
+        self.assertEqual(traces[3].op_type, ""Const"")
         self.assertEqual(traces[3].output_slot, 0)
         self.assertIsNone(traces[3].debug_tensor_value)
         self.assertAllClose(
-            reader.graph_execution_trace_to_tensor_value(traces[3]), 17.)
+            reader.graph_execution_trace_to_tensor_value(traces[3]), [0])
+        self.assertEqual(traces[4].op_type, ""Sum"")
+        self.assertEqual(traces[4].output_slot, 0)
+        self.assertIsNone(traces[4].debug_tensor_value)
+        self.assertAllClose(
+            reader.graph_execution_trace_to_tensor_value(traces[4]), 17.)
 
 
 class AlertDataObjectsTest(test_util.TensorFlowTestCase):
",0,train
e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks

Details of the changes:
- In the Python API of tensorflow, Const ops are created by calling
  `_create_op_internal()` from constant_op.py. This differs from how most other ops
  are created, and is similar to Placeholder ops, which are already instrumented
  by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in
  constant_op.py to allow instrumentation of Const ops.
  that makes that call.
- In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op,
  so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant
  tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`.
- In `dumping_callback_test.py`, replace use of a deprecated Dataset API
  (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`)
- Make other necessary changes to tfdbg2's tests to accommodate the Const ops
  which were previously not instrumented, but are now.
- Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6
  to avoid timeouts under the instrumented number of instrumented ops.

PiperOrigin-RevId: 307723353
Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",dumping_callback.py,"@@ -292,7 +292,12 @@ class _DumpingCallback(object):
       # TODO(cais): Evaluate performance optimization options. For the
       # `NO_TENSOR` debug mode, an alternative is to add `debug_tensor` as a
       # control dependency of `tensor.op` without an additional identity op.
-      if tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR:
+      if (tensor_debug_mode == debug_event_pb2.TensorDebugMode.FULL_TENSOR and
+          op_type != ""Const""):
+        # NOTE(b/153716279): Under v1 graph mode, overriding the output tensor
+        # of Const ops can lead to downstream errors related to shapes. We opt
+        # to use an identity op to avoid this issue at the cost of slightly
+        # larger graph size.
         return debug_tensor
       else:
         identity = array_ops.identity(tensor)
@@ -530,8 +535,8 @@ class _DumpingCallback(object):
       is_v1_graph_mode = not ops.executing_eagerly_outside_functions()
       context_id = self._get_context_id(graph)  # Innermost context ID.
       output_tensor_ids = self._get_symbolic_tensor_ids(len(outputs))
-      if op_type in (""Placeholder"", ""PlaceholderWithDefault""):
-        # In some cases, the op name of a Placeholder op in a graph
+      if op_type in (""Const"", ""Placeholder"", ""PlaceholderWithDefault""):
+        # In some cases, the op name of a Const or Placeholder op in a graph
         # can be duplicate (e.g., with the name ""resource"").
         # When this happens, we give the op an debugger-generated name
         # in order to prevent problems and check failures down the pipe.
",0,train
e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks

Details of the changes:
- In the Python API of tensorflow, Const ops are created by calling
  `_create_op_internal()` from constant_op.py. This differs from how most other ops
  are created, and is similar to Placeholder ops, which are already instrumented
  by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in
  constant_op.py to allow instrumentation of Const ops.
  that makes that call.
- In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op,
  so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant
  tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`.
- In `dumping_callback_test.py`, replace use of a deprecated Dataset API
  (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`)
- Make other necessary changes to tfdbg2's tests to accommodate the Const ops
  which were previously not instrumented, but are now.
- Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6
  to avoid timeouts under the instrumented number of instrumented ops.

PiperOrigin-RevId: 307723353
Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",dumping_callback_test.py,"@@ -289,7 +289,8 @@ class DumpingCallbackTest(
     with debug_events_reader.DebugDataReader(self.dump_root) as reader:
       reader.update()
       graph_exec_traces = reader.graph_execution_traces()
-      executed_op_types = [trace.op_type for trace in graph_exec_traces]
+      executed_op_types = [trace.op_type for trace in graph_exec_traces
+                           if trace.op_type != ""Const""]
       self.assertCountEqual(
           executed_op_types,
           [""Placeholder"", ""Placeholder"", ""AddV2"", ""Sub"", ""RealDiv""])
@@ -344,6 +345,46 @@ class DumpingCallbackTest(
           self.assertAllClose(trace.debug_tensor_value,
                               [tensor_id, 19, 1, 8, 8, 0, 0, 0, 0, 0])
 
+  @parameterized.named_parameters(
+      (""CurtHealth"", ""CURT_HEALTH""),
+      (""FullTensor"", ""FULL_TENSOR""),
+  )
+  @test_util.run_in_graph_and_eager_modes
+  def testConstTensorsAreCaptured(self, tensor_debug_mode):
+    writer = dumping_callback.enable_dump_debug_info(
+        self.dump_root, tensor_debug_mode=tensor_debug_mode)
+    @def_function.function
+    def times_two_plus_three(x):
+      return x * constant_op.constant(2.0) + constant_op.constant(3.0)
+    self.assertAllEqual(
+        self.evaluate(times_two_plus_three(10.0)), 23.0)
+    writer.FlushNonExecutionFiles()
+    writer.FlushExecutionFiles()
+
+    with debug_events_reader.DebugDataReader(self.dump_root) as reader:
+      reader.update()
+      const_traces = [trace for trace in reader.graph_execution_traces()
+                      if trace.op_type == ""Const""]
+      self.assertGreaterEqual(len(const_traces), 3)
+      if tensor_debug_mode == ""CURT_HEALTH"":
+        # Under CURT_HEALTH, each debug tensor value has the form
+        # [tensor_id, has_inf_or_nan].
+        self.assertLen(const_traces[0].debug_tensor_value, 2)
+        self.assertEqual(const_traces[0].debug_tensor_value[1], 0)
+        self.assertLen(const_traces[1].debug_tensor_value, 2)
+        self.assertEqual(const_traces[1].debug_tensor_value[1], 0)
+        self.assertLen(const_traces[2].debug_tensor_value, 2)
+        self.assertEqual(const_traces[2].debug_tensor_value[1], 0)
+      else:  # FULL_TENSOR.
+        const_tensor_values = [
+            reader.graph_execution_trace_to_tensor_value(const_trace)
+            for const_trace in const_traces]
+        # Avoid making assertion on the particular order of the debug tensors
+        # for the three Consts because it may be indeterminate.
+        self.assertIn(10.0, const_tensor_values)
+        self.assertIn(2.0, const_tensor_values)
+        self.assertIn(3.0, const_tensor_values)
+
   @parameterized.named_parameters(
       (""Shape"", ""SHAPE""),
   )
@@ -367,7 +408,8 @@ class DumpingCallbackTest(
     with debug_events_reader.DebugDataReader(self.dump_root) as reader:
       reader.update()
       graph_exec_traces = reader.graph_execution_traces()
-      executed_op_types = [trace.op_type for trace in graph_exec_traces]
+      executed_op_types = [trace.op_type for trace in graph_exec_traces
+                           if trace.op_type != ""Const""]
       self.assertEqual(
           executed_op_types,
           [""Placeholder"", ""Placeholder"", ""LogicalAnd"", ""LogicalNot""])
@@ -489,7 +531,8 @@ class DumpingCallbackTest(
         _, stack_frames = reader.read_graph_op_creation_stack_trace(op_digest)
         self._verifyStackFrames(stack_frames)
 
-      graph_exec_traces = reader.graph_execution_traces()
+      graph_exec_traces = [trace for trace in reader.graph_execution_traces()
+                           if trace.op_type != ""Const""]
       executed_op_types = [digest.op_type for digest in graph_exec_traces]
       self.assertEqual(
           executed_op_types,
@@ -902,10 +945,10 @@ class DumpingCallbackTest(
       reader.update()
       graph_exec_digests = reader.graph_execution_traces(digest=True)
       executed_op_types = [digest.op_type for digest in graph_exec_digests
-                           if digest.op_type != ""Placeholder""]
+                           if digest.op_type not in (""Const"", ""Placeholder"")]
       tensor_values = [reader.graph_execution_trace_to_tensor_value(digest)
                        for digest in graph_exec_digests
-                       if digest.op_type != ""Placeholder""]
+                       if digest.op_type not in (""Const"", ""Placeholder"")]
 
       if tensor_dtypes == [dtypes.float32] and not op_regex:
         self.assertEqual(executed_op_types, [""Unique"", ""Sum""])
@@ -1003,7 +1046,8 @@ class DumpingCallbackTest(
           self.assertAllClose(tensor_values, [8.0])
 
       graph_exec_traces = reader.graph_execution_traces()
-      executed_op_types = [trace.op_type for trace in graph_exec_traces]
+      executed_op_types = [trace.op_type for trace in graph_exec_traces
+                           if trace.op_type != ""Const""]
       if tensor_debug_mode != ""CURT_HEALTH"":
         # Less outputs a boolean tensor, which is not tracked under CURT_HEALTH.
         # The Less op should have been executed 5 times.
",0,train
e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks

Details of the changes:
- In the Python API of tensorflow, Const ops are created by calling
  `_create_op_internal()` from constant_op.py. This differs from how most other ops
  are created, and is similar to Placeholder ops, which are already instrumented
  by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in
  constant_op.py to allow instrumentation of Const ops.
  that makes that call.
- In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op,
  so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant
  tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`.
- In `dumping_callback_test.py`, replace use of a deprecated Dataset API
  (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`)
- Make other necessary changes to tfdbg2's tests to accommodate the Const ops
  which were previously not instrumented, but are now.
- Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6
  to avoid timeouts under the instrumented number of instrumented ops.

PiperOrigin-RevId: 307723353
Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",constant_op.py,"@@ -28,6 +28,7 @@ from tensorflow.core.framework import types_pb2
 from tensorflow.python.eager import context
 from tensorflow.python.eager import execute
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import op_callbacks
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
@@ -299,11 +300,17 @@ def _constant_impl(
           value, dtype=dtype, shape=shape, verify_shape=verify_shape,
           allow_broadcast=allow_broadcast))
   dtype_value = attr_value_pb2.AttrValue(type=tensor_value.tensor.dtype)
+  attrs = {""value"": tensor_value, ""dtype"": dtype_value}
   const_tensor = g._create_op_internal(  # pylint: disable=protected-access
-      ""Const"", [], [dtype_value.type],
-      attrs={""value"": tensor_value,
-             ""dtype"": dtype_value},
-      name=name).outputs[0]
+      ""Const"", [], [dtype_value.type], attrs=attrs, name=name).outputs[0]
+
+  if op_callbacks.should_invoke_op_callbacks():
+    # TODO(b/147670703): Once the special-op creation code paths
+    # are unified. Remove this `if` block.
+    callback_outputs = op_callbacks.invoke_op_callbacks(
+        ""Const"", tuple(), attrs, (const_tensor,), op_name=name, graph=g)
+    if callback_outputs is not None:
+      const_tensor, = callback_outputs
   return const_tensor
 
 
",0,train
e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks

Details of the changes:
- In the Python API of tensorflow, Const ops are created by calling
  `_create_op_internal()` from constant_op.py. This differs from how most other ops
  are created, and is similar to Placeholder ops, which are already instrumented
  by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in
  constant_op.py to allow instrumentation of Const ops.
  that makes that call.
- In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op,
  so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant
  tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`.
- In `dumping_callback_test.py`, replace use of a deprecated Dataset API
  (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`)
- Make other necessary changes to tfdbg2's tests to accommodate the Const ops
  which were previously not instrumented, but are now.
- Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6
  to avoid timeouts under the instrumented number of instrumented ops.

PiperOrigin-RevId: 307723353
Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",op_callbacks_test.py,"@@ -109,7 +109,8 @@ class _NumpyFunctionCallback(object):
         if compat.as_bytes(op_type) in (_ENTER_OP, _EXIT_OP, _IF_OP, _MERGE_OP,
                                         _NEXT_ITERATION_OP, _STATELESS_IF_OP,
                                         _SWITCH_OP, _WHILE_OP, _IDENTITY_OP,
-                                        _VAR_HANDLE_OP, _PLACEHOLDER_OP):
+                                        _VAR_HANDLE_OP, _PLACEHOLDER_OP,
+                                        _CONSTANT_OP):
           # TODO(cais): Overriding the output of StatelessIf, If and While ops
           # currently fails with error. Investigate (b/139668453).
           # Avoid instrumenting Identity ops as well, as they are inserted
@@ -724,7 +725,7 @@ class OpCallbacksTest(test_util.TensorFlowTestCase):
   def testOverrideDTypeInFuncGraph(self):
     def to_float64(op_type, inputs, attrs, outputs, op_name=None, graph=None):
       del inputs, attrs, op_name, graph  # Unused.
-      if op_type == ""Placeholder"":
+      if op_type in (""Const"", ""Placeholder""):
         return outputs
       else:
         return [math_ops.cast(output, dtypes.float64) for output in outputs]
@@ -751,6 +752,17 @@ class OpCallbacksTest(test_util.TensorFlowTestCase):
     self.assertIsNone(w)
     self.assertEqual(instrument.eager_op_types, [_ADD_OP])
 
+  def testOpCallbackCapturesConstTensors(self):
+    instrument = _NumpyFunctionCallback()
+    op_callbacks.add_op_callback(instrument.callback)
+
+    @def_function.function
+    def times_two_plus_three(x):
+      return x * 2.0 + 3.0
+
+    self.assertAllClose(times_two_plus_three(constant_op.constant(10.0)), 23.0)
+    self.assertEqual(instrument.graph_op_types.count(b""Const""), 2)
+
   @test_util.run_in_graph_and_eager_modes
   def testOpCallbackWorksWithGradientTape(self):
     instrument = _NumpyFunctionCallback()
",0,train
e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks

Details of the changes:
- In the Python API of tensorflow, Const ops are created by calling
  `_create_op_internal()` from constant_op.py. This differs from how most other ops
  are created, and is similar to Placeholder ops, which are already instrumented
  by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in
  constant_op.py to allow instrumentation of Const ops.
  that makes that call.
- In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op,
  so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant
  tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`.
- In `dumping_callback_test.py`, replace use of a deprecated Dataset API
  (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`)
- Make other necessary changes to tfdbg2's tests to accommodate the Const ops
  which were previously not instrumented, but are now.
- Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6
  to avoid timeouts under the instrumented number of instrumented ops.

PiperOrigin-RevId: 307723353
Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",tensor_util.py,"@@ -791,6 +791,10 @@ def _ConstantValue(tensor, partial):
     return np.not_equal(value1, value2)
   elif tensor.op.type == ""StopGradient"":
     return constant_value(tensor.op.inputs[0], partial)
+  elif tensor.op.type == ""Identity"":
+    return constant_value(tensor.op.inputs[0], partial)
+  elif tensor.op.type in (""CheckNumericsV2"", ""DebugIdentityV2""):
+    return constant_value(tensor.op.inputs[0], partial)
   else:
     return None
 
",0,train
e6f22ee5f4d483c1b05fbdaf8e8f5d55033f2bdb,tensorflow/tensorflow,"[tfdbg2] Ensure Const ops in graphs are captured by op_callbacks

Details of the changes:
- In the Python API of tensorflow, Const ops are created by calling
  `_create_op_internal()` from constant_op.py. This differs from how most other ops
  are created, and is similar to Placeholder ops, which are already instrumented
  by tfdbg2' op_callbacks. In this CL, we add a op_callback hook to the code in
  constant_op.py to allow instrumentation of Const ops.
  that makes that call.
- In `_ConstantValue()` in tensor_util.py, add a special case for `CheckNumericsV2` op,
  so the `constant_value()` does not treat the `CheckNumericsV2` op as the constant
  tensor value. Similarly, add special cases for `Identity` and `DebugIdentityV2`.
- In `dumping_callback_test.py`, replace use of a deprecated Dataset API
  (`make_one_shot_iterator()`) with non-deprecated API (`iter()` and `next()`)
- Make other necessary changes to tfdbg2's tests to accommodate the Const ops
  which were previously not instrumented, but are now.
- Increase the shard_count of learning/brain/python/debug/tpu_callbacks_test.py to 6
  to avoid timeouts under the instrumented number of instrumented ops.

PiperOrigin-RevId: 307723353
Change-Id: Iecdbfcb439f6e04fc12c1503ad5339d42703e8bc",confusion_matrix_test.py,"@@ -188,7 +188,7 @@ class ConfusionMatrixTest(test.TestCase):
   def testLabelsTooLarge(self):
     labels = np.asarray([1, 1, 0, 3, 5], dtype=np.int32)
     predictions = np.asarray([2, 1, 0, 2, 2], dtype=np.int32)
-    with self.assertRaisesOpError(""`labels`.*x < y""):
+    with self.assertRaisesOpError(""`labels`.*out of bound""):
       self._testConfMatrix(
           labels=labels, predictions=predictions, num_classes=3, truth=None)
 
@@ -203,7 +203,7 @@ class ConfusionMatrixTest(test.TestCase):
   def testPredictionsTooLarge(self):
     labels = np.asarray([1, 1, 0, 2, 2], dtype=np.int32)
     predictions = np.asarray([2, 1, 0, 3, 5], dtype=np.int32)
-    with self.assertRaisesOpError(""`predictions`.*x < y""):
+    with self.assertRaisesOpError(""`predictions`.*out of bound""):
       self._testConfMatrix(
           labels=labels, predictions=predictions, num_classes=3, truth=None)
 
",0,train
607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow.

If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API.

PiperOrigin-RevId: 358048382
Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",arg_op.cc,"@@ -13,12 +13,14 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include ""tensorflow/compiler/tf2xla/literal_util.h""
 #include ""tensorflow/compiler/tf2xla/type_util.h""
 #include ""tensorflow/compiler/tf2xla/xla_compilation_device.h""
 #include ""tensorflow/compiler/tf2xla/xla_compiler.h""
 #include ""tensorflow/compiler/tf2xla/xla_helpers.h""
 #include ""tensorflow/compiler/tf2xla/xla_op_kernel.h""
 #include ""tensorflow/compiler/tf2xla/xla_op_registry.h""
+#include ""tensorflow/compiler/xla/client/xla_builder.h""
 #include ""tensorflow/core/framework/kernel_def_builder.h""
 #include ""tensorflow/core/lib/core/errors.h""
 
@@ -60,6 +62,17 @@ class XlaArgOp : public XlaOpKernel {
                 errors::InvalidArgument(""Invalid/missing argument expression""));
     if (ctx->expected_output_dtype(0) == DT_VARIANT) {
       ctx->SetTensorListOutput(0, arg.handle());
+    } else if (arg.value_bound().has_value()) {
+      // The argument has a bound attached to it, call SetBound op on the
+      // argument.
+      xla::XlaBuilder* builder = ctx->builder();
+      auto input_op = arg.AsXlaOp(builder);
+      xla::Literal bound = HostTensorToLiteral(*arg.value_bound()).ValueOrDie();
+      ctx->SetOutput(
+          0, xla::CustomCall(builder, ""SetBound"", {input_op},
+                             builder->GetShape(input_op).ValueOrDie(), """",
+                             false, {}, &bound));
+      return;
     } else {
       ctx->SetOutputExpression(0, arg);
     }
",0,train
607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow.

If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API.

PiperOrigin-RevId: 358048382
Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",if_while_utils.cc,"@@ -16,6 +16,8 @@ limitations under the License.
 #include ""tensorflow/compiler/tf2xla/kernels/if_while_utils.h""
 
 #include ""tensorflow/compiler/tf2xla/const_analysis.h""
+#include ""tensorflow/compiler/tf2xla/literal_util.h""
+#include ""tensorflow/compiler/xla/literal.h""
 
 namespace tensorflow {
 
@@ -38,11 +40,28 @@ absl::InlinedVector<int, 5> ConvertCompileTimeConstArgumentsToConst(
       xla::StatusOr<absl::optional<Tensor>> maybe_constant =
           expression.ResolveConstant(ctx->compiler()->client());
       if (maybe_constant.ok() && maybe_constant.ValueOrDie().has_value()) {
-        arg->kind = XlaCompiler::Argument::kConstant;
-        arg->type = expression.dtype();
-        arg->constant_value = std::move(maybe_constant.ValueOrDie().value());
-        arg->shape = expression.GetShape().ValueOrDie();
-        resolved_constant_idxs.push_back(i);
+        xla::StatusOr<Tensor> values_are_dynamic =
+            expression.ResolveDynamism(ctx->compiler()->client());
+        bool all_values_are_static = false;
+        if (!values_are_dynamic.ok()) {
+          // Conservatiely assume all values are dynamic.
+          all_values_are_static = true;
+        } else {
+          xla::Literal literal =
+              HostTensorToLiteral(values_are_dynamic.ValueOrDie()).ValueOrDie();
+          all_values_are_static = literal.IsAll(0);
+        }
+
+        if (all_values_are_static) {
+          arg->kind = XlaCompiler::Argument::kConstant;
+          arg->type = expression.dtype();
+          arg->constant_value = std::move(maybe_constant.ValueOrDie().value());
+          arg->shape = expression.GetShape().ValueOrDie();
+          resolved_constant_idxs.push_back(i);
+        } else {
+          arg->value_bound.emplace(
+              std::move(maybe_constant.ValueOrDie().value()));
+        }
       }
     }
   }
",0,train
607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow.

If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API.

PiperOrigin-RevId: 358048382
Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",xla_argument.h,"@@ -16,6 +16,8 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_TF2XLA_XLA_ARGUMENT_H_
 #define TENSORFLOW_COMPILER_TF2XLA_XLA_ARGUMENT_H_
 
+#include <optional>
+
 #include ""absl/types/span.h""
 #include ""tensorflow/compiler/tf2xla/host_compute_metadata.pb.h""
 #include ""tensorflow/compiler/tf2xla/xla_resource.h""
@@ -75,6 +77,9 @@ struct XlaArgument {
   // host-memory tensor.
   Tensor constant_value;
 
+  // The upper bounds of the value.
+  absl::optional<Tensor> value_bound;
+
   // The name of this argument, used for debugging.
   string name;
 
",0,train
607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow.

If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API.

PiperOrigin-RevId: 358048382
Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",xla_compiler.cc,"@@ -1159,6 +1159,10 @@ Status XlaCompiler::BuildArguments(
               xla::Reshape(arg_handles[i], arg.DimensionSizes()), arg.type);
         } else {
           arg_expression = XlaExpression::XlaOp(arg_handles[i], arg.type);
+          if (arg.value_bound) {
+            // Propagate upper bound to arg_expression.
+            arg_expression.set_value_bound(arg.value_bound.value());
+          }
         }
         break;
       case XlaCompiler::Argument::kTensorList: {
",0,train
607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow.

If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API.

PiperOrigin-RevId: 358048382
Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",xla_expression.cc,"@@ -170,7 +170,9 @@ xla::StatusOr<absl::optional<Tensor>> XlaExpression::ResolveConstant(
 
   TF_ASSIGN_OR_RETURN(bool is_constant,
                       handle().builder()->IsConstant(handle()));
-  if (!is_constant) return {absl::nullopt};
+  if (!is_constant) {
+    return {absl::nullopt};
+  }
 
   if (!client)
     return errors::InvalidArgument(""client is required to resolve constant"");
",0,train
607ffbc56cf054a02b86d05e232cb6640e11519d,tensorflow/tensorflow,"Propagate value bounds into control flow.

If an argument has to be resolved into constant and if the input is dynamic, propagate the bound into the control flow using SetBound API.

PiperOrigin-RevId: 358048382
Change-Id: Ib49dac4f7a94206380a16d59cc2f550b21ac1b82",xla_expression.h,"@@ -94,6 +94,13 @@ class XlaExpression {
     return constant_value_;
   }
 
+  // Set the bound of the expression.
+  void set_value_bound(Tensor tensor) {
+    value_bound_.emplace(std::move(tensor));
+  }
+
+  // Return the bound of the expression, if available.
+  absl::optional<Tensor> value_bound() const { return value_bound_; }
   XlaResource* resource() const { return resource_; }
 
   // Returns a human-readable summary of the expression.
@@ -138,6 +145,9 @@ class XlaExpression {
   // The value of the constant, if available.
   absl::optional<Tensor> constant_value_;
 
+  // The bound of the expression, if available.
+  absl::optional<Tensor> value_bound_;
+
   // The resource, if kind_ == kResource. Not owned.
   XlaResource* resource_ = nullptr;
 };
",0,train
674048cad145f8e4000aec4d1ec7f9854ad9c44c,tensorflow/tensorflow,Remove unnecessary checks,generic_layout_optimizer_transposer.cc,"@@ -1061,8 +1061,7 @@ Status DefaultLayoutAgnosticOpTransposer::TransposeNode(
                                         dst_format_3d);
   }
   if (!ShouldProcess(*context, *node) || (rank != 4 && rank != 5) ||
-      !IsFanoutPortRankN(*node, 0, rank) || !IsAfterDstToSrcTransform(*context,
-                                                                      *node)) {
+      !IsAfterDstToSrcTransform(*context, *node)) {
     if (allow_5d) {
       context->AssignDeviceAndDataFormats(context->target_device, src_format,
                                           dst_format);
",0,train
bf62fcec003636338386f5246103b90a9580181c,tensorflow/tensorflow,"Automated rollback of commit 23e33f871b2bf2879b40ebf3b883e104f30f389b. Revert #31450.

PiperOrigin-RevId: 262675086",core.py,"@@ -26,7 +26,6 @@ import warnings
 import numpy as np
 
 from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
@@ -581,29 +580,9 @@ class Flatten(Layer):
       permutation.append(1)
       inputs = array_ops.transpose(inputs, perm=permutation)
 
-    input_shape = inputs.shape
-    if input_shape[1:].is_fully_defined():
-      flattened_dim = tensor_shape.dimension_value(
-          np.prod(input_shape[1:], dtype=int))
-      # Temporary fix for integer overflow issue.
-      if flattened_dim > np.iinfo(np.int32).max:
-        shape_dtype = dtypes.int64
-      else:
-        shape_dtype = dtypes.int32
-      outputs = array_ops.reshape(
-          inputs, constant_op.constant((-1, flattened_dim), dtype=shape_dtype))
-    else:
-      batch_size = tensor_shape.dimension_value(inputs.shape[0])
-      if batch_size:
-        # Temporary fix for integer overflow issue.
-        if batch_size > np.iinfo(np.int32).max:
-          shape_dtype = dtypes.int64
-        else:
-          shape_dtype = dtypes.int32
-        outputs = array_ops.reshape(
-            inputs, constant_op.constant((batch_size, -1), dtype=shape_dtype))
-      else:
-        outputs = array_ops.reshape(inputs, (array_ops.shape(inputs)[0], -1))
+    outputs = array_ops.reshape(
+        inputs, (tensor_shape.dimension_value(inputs.shape[0]) or
+                 array_ops.shape(inputs)[0], -1))
     if not context.executing_eagerly():
       outputs.set_shape(self.compute_output_shape(inputs.shape))
     return outputs
",0,test
bf62fcec003636338386f5246103b90a9580181c,tensorflow/tensorflow,"Automated rollback of commit 23e33f871b2bf2879b40ebf3b883e104f30f389b. Revert #31450.

PiperOrigin-RevId: 262675086",core_test.py,"@@ -556,20 +556,6 @@ class FlattenTest(test.TestCase):
       self.assertEqual(list(np_output.shape), [5, 6])
       self.assertEqual(y.get_shape().as_list(), [5, None])
 
-  @test_util.run_deprecated_v1
-  def testFlattenLargeDim(self):
-    x = array_ops.placeholder(shape=(None, 21316, 21316, 80), dtype='float32')
-    y = core_layers.Flatten()(x)
-    self.assertEqual(y.shape.as_list(), [None, 21316 * 21316 * 80])
-
-  @test_util.run_deprecated_v1
-  def testFlattenLargeBatchDim(self):
-    batch_size = np.iinfo(np.int32).max + 10
-    x = array_ops.placeholder(
-        shape=(batch_size, None, None, 1), dtype='float32')
-    y = core_layers.Flatten()(x)
-    self.assertEqual(y.shape.as_list(), [batch_size, None])
-
 
 if __name__ == '__main__':
   test.main()
",0,test
e8d4a3d079ec9c49c75e93978c5b9a3709a623fd,tensorflow/tensorflow,"Adds numerical correctness tests for all Keras modes and model types

PiperOrigin-RevId: 225584709",correctness_test.py,"@@ -0,0 +1,147 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""Tests for numerical correctness.""""""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python import keras
+from tensorflow.python.keras import keras_parameterized
+from tensorflow.python.keras import testing_utils
+from tensorflow.python.platform import test
+
+
+class Bias(keras.layers.Layer):
+  """"""Layer that add a bias to its inputs.""""""
+
+  def build(self, input_shape):
+    self.bias = self.add_variable('bias', (1,), initializer='zeros')
+
+  def call(self, inputs):
+    return inputs + self.bias
+
+
+class MultiInputSubclassed(keras.Model):
+  """"""Subclassed Model that adds its inputs and then adds a bias.""""""
+
+  def __init__(self):
+    super(MultiInputSubclassed, self).__init__()
+    self.add = keras.layers.Add()
+    self.bias = Bias()
+
+  def call(self, inputs):
+    added = self.add(inputs)
+    return self.bias(added)
+
+
+def multi_input_functional():
+  """"""Functional Model that adds its inputs and then adds a bias.""""""
+  input_1 = keras.Input(shape=(1,))
+  input_2 = keras.Input(shape=(1,))
+  input_3 = keras.Input(shape=(1,))
+  added = keras.layers.Add()([input_1, input_2, input_3])
+  output = Bias()(added)
+  return keras.Model([input_1, input_2, input_3], output)
+
+
+@keras_parameterized.run_with_all_model_types
+@keras_parameterized.run_all_keras_modes
+class SimpleBiasTest(keras_parameterized.TestCase):
+
+  def _get_simple_bias_model(self):
+    model = testing_utils.get_model_from_layers([Bias()], input_shape=(1,))
+    model.compile(keras.optimizer_v2.gradient_descent.SGD(0.1), 'mae')
+    return model
+
+  def test_simple_bias_fit(self):
+    x = np.array([[0.], [1.], [2.]])
+    y = np.array([[0.5], [2.], [3.5]])
+    model = self._get_simple_bias_model()
+
+    history = model.fit(x, y, batch_size=3, epochs=5)
+    self.assertAllClose(history.history['loss'], [1., 0.9, 0.8, 0.7, 0.6])
+
+  def test_simple_bias_evaluate(self):
+    x = np.array([[0.], [1.], [2.]])
+    y = np.array([[1.], [3.], [5.]])
+    model = self._get_simple_bias_model()
+
+    loss = model.evaluate(x, y, batch_size=1)
+    self.assertAlmostEqual(loss, 2.)
+
+  def test_simple_bias_predict(self):
+    x = np.array([[0.], [1.], [2.]])
+    model = self._get_simple_bias_model()
+
+    pred = model.predict(x, batch_size=1)
+    self.assertAllClose(x, pred)
+
+
+@keras_parameterized.run_all_keras_modes
+class MultipleInputTest(keras_parameterized.TestCase):
+
+  def _get_multiple_input_model(self, subclassed=True):
+    if subclassed:
+      model = MultiInputSubclassed()
+    else:
+      model = multi_input_functional()
+    model.compile(keras.optimizer_v2.gradient_descent.SGD(0.1), 'mae')
+    return model
+
+  @parameterized.named_parameters(('subclassed', True), ('functional', False))
+  def test_multiple_input_fit(self, subclassed):
+    x = [
+        np.array([[1.], [2.], [3.]]),
+        np.array([[4.], [5.], [6.]]),
+        np.array([[7.], [8.], [9.]])
+    ]
+    y = np.array([[12.5], [16.], [19.5]])
+
+    model = self._get_multiple_input_model(subclassed)
+    history = model.fit(x, y, batch_size=3, epochs=5)
+    self.assertAllClose(history.history['loss'], [1., 0.9, 0.8, 0.7, 0.6])
+
+  @parameterized.named_parameters(('subclassed', True), ('functional', False))
+  def test_multiple_input_evaluate(self, subclassed):
+    x = [
+        np.array([[1.], [2.], [3.]]),
+        np.array([[4.], [5.], [6.]]),
+        np.array([[7.], [8.], [9.]])
+    ]
+    y = np.array([[13.], [17.], [21.]])
+
+    model = self._get_multiple_input_model(subclassed)
+    loss = model.evaluate(x, y, batch_size=3)
+    self.assertAlmostEqual(loss, 2.)
+
+  @parameterized.named_parameters(('subclassed', True), ('functional', False))
+  def test_multiple_input_predict(self, subclassed):
+    x = [
+        np.array([[1.], [2.], [3.]]),
+        np.array([[4.], [5.], [6.]]),
+        np.array([[7.], [8.], [9.]])
+    ]
+
+    model = self._get_multiple_input_model(subclassed)
+    pred = model.predict(x, batch_size=1)
+    self.assertAllClose(pred, [[12.], [15.], [18.]])
+
+
+if __name__ == '__main__':
+  test.main()
",0,train
e8d4a3d079ec9c49c75e93978c5b9a3709a623fd,tensorflow/tensorflow,"Adds numerical correctness tests for all Keras modes and model types

PiperOrigin-RevId: 225584709",training_eager_test.py,"@@ -248,21 +248,6 @@ class CorrectnessTest(keras_parameterized.TestCase):
     layer(1.)  # Plain-value inputs are only valid in eager mode.
     self.assertEqual(1, len(layer.losses))
 
-  def test_predict_correctness(self):
-    i1 = keras.layers.Input(shape=(4, 5))
-    i2 = keras.layers.Input(shape=(4, 5))
-    i3 = keras.layers.Input(shape=(4, 5))
-    o = keras.layers.add([i1, i2, i3])
-    model = keras.models.Model([i1, i2, i3], o)
-    model.run_eagerly = True
-
-    x1 = np.random.random((2, 4, 5))
-    x2 = np.random.random((2, 4, 5))
-    x3 = np.random.random((2, 4, 5))
-    out = model.predict([x1, x2, x3])
-
-    self.assertAllClose(out, x1 + x2 + x3)
-
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
",0,train
7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime.
Change: 153861629",constant_folding.cc,"@@ -43,7 +43,7 @@ namespace tensorflow {
 namespace {
 
 bool IsConstantFoldable(const Node* n,
-                        std::function<bool(const Node*)> consider) {
+                        const std::function<bool(const Node*)>& consider) {
   if (n->op_def().is_stateful()) {
     return false;
   }
",0,train
7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime.
Change: 153861629",copy_tensor.cc,"@@ -71,7 +71,8 @@ void CopyTensor::ViaDMA(StringPiece edge_name, DeviceContext* send_dev_context,
       if (ri.sender_device_type == src_device_type &&
           ri.receiver_device_type == dst_device_type) {
         ri.copy_function(send_dev_context, recv_dev_context, src, dst,
-                         src_alloc_attr, dst_alloc_attr, input, output, done);
+                         src_alloc_attr, dst_alloc_attr, input, output,
+                         std::move(done));
         return;
       }
     }
",0,train
7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime.
Change: 153861629",executor.cc,"@@ -1434,7 +1434,7 @@ void ExecutorState::RunAsync(Executor::DoneCallback done) {
   } else {
     num_outstanding_ops_ = ready.size();
     root_frame_->iterations[0]->outstanding_ops = ready.size();
-    done_cb_ = done;
+    done_cb_ = std::move(done);
     // Schedule to run all the ready ops in thread pool.
     ScheduleReady(ready, nullptr);
   }
@@ -2560,7 +2560,7 @@ bool ExecutorState::FrameState::CleanupIterations(const GraphView* gview,
 }
 
 void ExecutorImpl::RunAsync(const Args& args, DoneCallback done) {
-  (new ExecutorState(args, this))->RunAsync(done);
+  (new ExecutorState(args, this))->RunAsync(std::move(done));
 }
 
 }  // end namespace
",0,train
7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime.
Change: 153861629",function.cc,"@@ -604,7 +604,7 @@ struct CustomCreatorSingleton {
 
   void Set(CustomKernelCreator cb) {
     mutex_lock l(mu);
-    custom_creator = cb;
+    custom_creator = std::move(cb);
   }
 
   CustomKernelCreator Get() {
@@ -621,7 +621,7 @@ CustomCreatorSingleton* GetCustomCreatorSingleton() {
 }  // end namespace
 
 void RegisterDefaultCustomKernelCreator(CustomKernelCreator cb) {
-  GetCustomCreatorSingleton()->Set(cb);
+  GetCustomCreatorSingleton()->Set(std::move(cb));
 }
 
 FunctionLibraryRuntime* NewFunctionLibraryRuntime(
@@ -631,7 +631,7 @@ FunctionLibraryRuntime* NewFunctionLibraryRuntime(
     CustomKernelCreator custom_kernel_creator) {
   return new FunctionLibraryRuntimeImpl(dmgr, env, device, graph_def_version,
                                         lib_def, optimizer_options,
-                                        custom_kernel_creator);
+                                        std::move(custom_kernel_creator));
 }
 
 FunctionLibraryRuntime* NewFunctionLibraryRuntime(
",0,train
7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime.
Change: 153861629",function_test.cc,"@@ -44,7 +44,7 @@ Status GetOpSig(const string& op, const OpDef** sig) {
 void FunctionTestSchedClosure(std::function<void()> fn) {
   static thread::ThreadPool* w =
       new thread::ThreadPool(Env::Default(), ""Test"", 8);
-  w->Schedule(fn);
+  w->Schedule(std::move(fn));
 }
 
 void HasError(const Status& s, const string& substr) {
@@ -654,7 +654,8 @@ namespace {
 
 bool DoNothing(Graph* g) { return false; }
 
-string Optimize(std::function<bool(Graph* g)> pass, const FunctionDef& fdef) {
+string Optimize(const std::function<bool(Graph* g)>& pass,
+                const FunctionDef& fdef) {
   InstantiationResult result;
   InstantiateAttrValueMap empty;
   TF_CHECK_OK(InstantiateFunction(fdef, empty, GetOpSig, &result));
",0,train
7ee26f7e144849d07e985b0a1c8abf7bf36adb27,tensorflow/tensorflow,"Fix some ClangTidy warnings in third_party/tensorflow/core/common_runtime.
Change: 153861629",rendezvous_mgr.cc,"@@ -106,7 +106,7 @@ void IntraProcessRendezvous::SameWorkerRecvDone(
   CopyTensor::ViaDMA(parsed.edge_name, send_args.device_context,
                      recv_args.device_context, src_device, dst_device,
                      send_args.alloc_attrs, recv_args.alloc_attrs, &in, out,
-                     done);
+                     std::move(done));
 }
 
 void IntraProcessRendezvous::RecvAsync(const ParsedKey& parsed,
@@ -132,7 +132,8 @@ void IntraProcessRendezvous::RecvAsync(const ParsedKey& parsed,
     };
 
     if (status.ok() && in.IsInitialized()) {
-      SameWorkerRecvDone(parsed, send_args, recv_args, in, out, final_callback);
+      SameWorkerRecvDone(parsed, send_args, recv_args, in, out,
+                         std::move(final_callback));
     } else {
       final_callback(status);
     }
",0,train
771f93bd9e1a62036217e1958bb272682923d28c,tensorflow/tensorflow,"Update python package description to include python 3.9

As tf-nightly have python 3.9 available, this PR updates
the python package description to include python 3.9 entry.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",setup.py,"@@ -343,6 +343,7 @@ setup(
         'Programming Language :: Python :: 3.6',
         'Programming Language :: Python :: 3.7',
         'Programming Language :: Python :: 3.8',
+        'Programming Language :: Python :: 3.9',
         'Programming Language :: Python :: 3 :: Only',
         'Topic :: Scientific/Engineering',
         'Topic :: Scientific/Engineering :: Mathematics',
",0,test
92976ad1f05fd2f4946837700855892e47f516ac,tensorflow/tensorflow,Minor change.,mkl_reshape_op.cc,"@@ -83,7 +83,7 @@ class MklReshapeOp : public OpKernel {
     TensorShape shape;
     int64 product = 1;
     int unknown_index = -1;
-    bool sizes_has_zero_dim;
+    bool sizes_has_zero_dim = false;
     switch (sizes.dtype()) {
       case DT_INT32:
         OP_REQUIRES_OK(context,
",0,train
4e329e4dd8e59b0d96122e2a8241bda9ba80ffb4,tensorflow/tensorflow,"Changed InputColocationExemptionRegistry::ops_ to gtl::FlatSet<string>
instead of set<string>.

PiperOrigin-RevId: 268973789",input_colocation_exemption_registry.cc,"@@ -27,8 +27,6 @@ InputColocationExemptionRegistry* InputColocationExemptionRegistry::Global() {
   return registry;
 }
 
-const std::set<string>& InputColocationExemptionRegistry::Get() { return ops_; }
-
 void InputColocationExemptionRegistry::Register(const string& op) {
   auto it = ops_.find(op);
   if (it != ops_.end()) {
",0,train
4e329e4dd8e59b0d96122e2a8241bda9ba80ffb4,tensorflow/tensorflow,"Changed InputColocationExemptionRegistry::ops_ to gtl::FlatSet<string>
instead of set<string>.

PiperOrigin-RevId: 268973789",input_colocation_exemption_registry.h,"@@ -15,9 +15,9 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_INPUT_COLOCATION_EXEMPTION_REGISTRY_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_INPUT_COLOCATION_EXEMPTION_REGISTRY_H_
 
-#include <set>
 #include <string>
 
+#include ""tensorflow/core/lib/gtl/flatset.h""
 #include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
@@ -40,13 +40,13 @@ class InputColocationExemptionRegistry {
   static InputColocationExemptionRegistry* Global();
 
   // Returns the set of ops exempt from the input colocation constraints.
-  const std::set<string>& Get();
+  const gtl::FlatSet<string>& Get() { return ops_; }
 
   // Registers an op to be excluded from the input colocation constraints.
   void Register(const string& op);
 
  private:
-  std::set<string> ops_;
+  gtl::FlatSet<string> ops_;
 };
 
 namespace input_colocation_exemption_registration {
",0,train
e692dda4c8b199555e2fa32132a7784e0893c870,tensorflow/tensorflow,"Fixed a bug in CollectiveAllReduce that sometimes the variable names it sees are not complete and thus not unique, leading to same collective keys for different variables.

PiperOrigin-RevId: 214117466",collective_all_reduce_strategy.py,"@@ -143,8 +143,10 @@ class CollectiveAllReduceStrategy(mirrored_strategy.MirroredStrategy):
     def _real_mirrored_creator(devices, *args, **kwargs):
       """"""Creates one MirroredVariable on the current worker.""""""
       index = {}
+      unique_var_name = ops.get_default_graph().unique_name(
+          kwargs[""name""], mark_as_used=False).rstrip(""/"")
       collective_instance_key = self._collective_keys.get_instance_key(
-          key_id=kwargs[""name""])
+          key_id=unique_var_name)
       if ""initial_value"" not in kwargs:
         raise ValueError(""Initial value must be specified."")
       initial_value = kwargs[""initial_value""]
@@ -188,6 +190,10 @@ class CollectiveAllReduceStrategy(mirrored_strategy.MirroredStrategy):
           with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
             v = next_creator(*args, **kwargs)
 
+          if i == 0:
+            actual_var_name = v.name.split("":"")[0]
+            assert unique_var_name == actual_var_name, ""%r vs %r"" % (
+                unique_var_name, actual_var_name)
           assert not isinstance(v, values.DistributedVariable)
           index[d] = v
       return index
",0,train
e692dda4c8b199555e2fa32132a7784e0893c870,tensorflow/tensorflow,"Fixed a bug in CollectiveAllReduce that sometimes the variable names it sees are not complete and thus not unique, leading to same collective keys for different variables.

PiperOrigin-RevId: 214117466",collective_all_reduce_strategy_test.py,"@@ -26,6 +26,7 @@ from tensorflow.contrib.distribute.python import combinations
 from tensorflow.contrib.distribute.python import cross_tower_utils
 from tensorflow.contrib.distribute.python import multi_worker_test_base
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.python import keras
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -34,9 +35,14 @@ from tensorflow.python.layers import core
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradients
 from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import nn
+from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
+from tensorflow.python.ops.losses import losses
 from tensorflow.python.platform import test
+from tensorflow.python.training import adam
+from tensorflow.python.training import training_util
 
 
 class CollectiveAllReduceStrategyTestBase(
@@ -146,6 +152,56 @@ class CollectiveAllReduceStrategyTestBase(
       self.assertLess(error_after, error_before)
       return error_after < error_before
 
+  def _test_complex_model(self, task_type, task_id, num_gpus):
+    d, master_target = self._get_test_object(task_type, task_id, num_gpus)
+
+    def model_fn():
+      """"""Mnist model with synthetic input.""""""
+      data_format = 'channels_last'
+      input_shape = [28, 28, 1]
+      l = keras.layers
+      max_pool = l.MaxPooling2D((2, 2), (2, 2),
+                                padding='same',
+                                data_format=data_format)
+      model = keras.Sequential([
+          l.Reshape(target_shape=input_shape, input_shape=(28 * 28,)),
+          l.Conv2D(
+              32,
+              5,
+              padding='same',
+              data_format=data_format,
+              activation=nn.relu), max_pool,
+          l.Conv2D(
+              64,
+              5,
+              padding='same',
+              data_format=data_format,
+              activation=nn.relu), max_pool,
+          l.Flatten(),
+          l.Dense(1024, activation=nn.relu),
+          l.Dropout(0.4),
+          l.Dense(10)
+      ])
+      image = random_ops.random_uniform([2, 28, 28])
+      label = random_ops.random_uniform([2, 1], maxval=10, dtype=dtypes.int32)
+      logits = model(image, training=True)
+      loss = losses.sparse_softmax_cross_entropy(labels=label, logits=logits)
+      optimizer = adam.AdamOptimizer(learning_rate=1e-4)
+      train_op = optimizer.minimize(loss,
+                                    training_util.get_or_create_global_step())
+      return train_op
+
+    with ops.Graph().as_default(), \
+         self.test_session(config=self._sess_config,
+                           target=master_target) as sess:
+      with d.scope():
+        train_op = d.call_for_each_tower(model_fn)
+        train_op = d.group(d.unwrap(train_op))
+
+      sess.run(variables.global_variables_initializer())
+      sess.run(train_op)
+      return True
+
   def _test_variable_initialization(self, task_type, task_id, num_gpus):
     distribution, master_target = self._get_test_object(task_type, task_id,
                                                         num_gpus)
@@ -206,6 +262,14 @@ class DistributedCollectiveAllReduceStrategyTest(
         self._cluster_spec,
         num_gpus=num_gpus)
 
+  @combinations.generate(
+      combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], required_gpus=1))
+  def testComplexModel(self, num_gpus):
+    if context.num_gpus() < num_gpus:
+      return
+    self._run_between_graph_clients(
+        self._test_complex_model, self._cluster_spec, num_gpus=num_gpus)
+
 
 class DistributedCollectiveAllReduceStrategyTestWithChief(
     CollectiveAllReduceStrategyTestBase, parameterized.TestCase):
@@ -236,6 +300,14 @@ class DistributedCollectiveAllReduceStrategyTestWithChief(
         self._cluster_spec,
         num_gpus=num_gpus)
 
+  @combinations.generate(
+      combinations.combine(mode=['graph'], num_gpus=[0, 1, 2], required_gpus=1))
+  def testComplexModel(self, num_gpus):
+    if context.num_gpus() < num_gpus:
+      return
+    self._run_between_graph_clients(
+        self._test_complex_model, self._cluster_spec, num_gpus=num_gpus)
+
 
 class LocalCollectiveAllReduceStrategy(
     CollectiveAllReduceStrategyTestBase, parameterized.TestCase):
@@ -246,6 +318,12 @@ class LocalCollectiveAllReduceStrategy(
       return
     self._test_minimize_loss_graph(None, None, num_gpus)
 
+  def testComplexModel(self, num_gpus=2):
+    # Collective ops doesn't support strategy with one device.
+    if context.num_gpus() < num_gpus:
+      return
+    self._test_complex_model(None, None, num_gpus)
+
 
 if __name__ == '__main__':
   test.main()
",0,train
3f803a9421fddf10a30745fc145d565d9737bd40,tensorflow/tensorflow,Make add_n() handle a single IndexedSlices argument properly,math_ops.py,"@@ -2135,6 +2135,8 @@ def _as_indexed_slices_list(inputs, optimize=True):
 def add_n(inputs, name=None):
   """"""Adds all input tensors element-wise.
 
+  Converts `IndexedSlices` objects into dense tensors prior to adding.
+
   Args:
     inputs: A list of `Tensor` or `IndexedSlices` objects, each with same shape
       and type.
@@ -2157,7 +2159,7 @@ def add_n(inputs, name=None):
 
   if len(inputs) == 1:
     if isinstance(inputs[0], ops.IndexedSlices):
-      values = inputs[0].values
+      values = ops.convert_to_tensor(inputs[0])
     else:
       values = inputs[0]
     if name:
",0,train
3f803a9421fddf10a30745fc145d565d9737bd40,tensorflow/tensorflow,Make add_n() handle a single IndexedSlices argument properly,math_ops_test.py,"@@ -359,6 +359,17 @@ class AddNTest(test_util.TensorFlowTestCase):
                             [g.eval() for g in add_n_grad])
 
 
+  def testIndexedSlices(self):
+    slc = tf.IndexedSlices(array_ops.constant([1, 2], shape=[1, 2]), 
+        array_ops.constant([2]), array_ops.constant([2,2])
+    slc_as_dense = np.array([[0, 0], [1, 2]])
+    with self.test_session(use_gpu=True):
+      # add_n currently always converts IndexedSlices to dense
+      self.assertAllEqual(slc_as_dense, math_ops.add_n([slc]).eval())
+      self.assertAllEqual(2 * slc_as_dense, math_ops.add_n([slc, slc]).eval())
+
+
+
 class DivAndModTest(test_util.TensorFlowTestCase):
   # TODO(aselle): Test more types before exposing new division operators.
 
",0,train
ed6357cbd4f6e47ab87b219a0e0840739c92c970,tensorflow/tensorflow,"Added missing definition for the Packet16q16i.
Fixed a couple of bugs in the implementation of max reductions for
avx512",PacketMathAVX2.h,"@@ -11,6 +11,13 @@ typedef struct Packet32q8i {
   Packet32q8i(__m256i val) : val(val) {}
 } Packet32q8i;
 
+typedef struct Packet16q16i {
+  __m256i val;
+  operator __m256i() const { return val; }
+  Packet16q16i();
+  Packet16q16i(__m256i val) : val(val) {}
+} Packet16q16i;
+
 typedef struct Packet32q8u {
   __m256i val;
   operator __m256i() const { return val; }
@@ -32,6 +39,13 @@ typedef struct Packet16q8u {
   Packet16q8u(__m128i val) : val(val) {}
 } Packet16q8u;
 
+typedef struct Packet8q16i {
+  __m128i val;
+  operator __m128i() const { return val; }
+  Packet8q16i();
+  Packet8q16i(__m128i val) : val(val) {}
+} Packet8q16i;
+
 typedef struct Packet8q32i {
   __m256i val;
   operator __m256i() const { return val; }
@@ -92,6 +106,28 @@ struct packet_traits<QUInt8> : default_packet_traits {
   };
 };
 template <>
+struct packet_traits<QInt16> : default_packet_traits {
+  typedef Packet16q16i type;
+  typedef Packet8q16i half;
+  enum {
+    Vectorizable = 1,
+    AlignedOnScalar = 1,
+    size = 16,
+  };
+  enum {
+    HasAdd = 0,
+    HasSub = 0,
+    HasMul = 0,
+    HasNegate = 0,
+    HasAbs = 0,
+    HasAbs2 = 0,
+    HasMin = 1,
+    HasMax = 1,
+    HasConj = 0,
+    HasSetLinear = 0
+  };
+};
+template <>
 struct packet_traits<QInt32> : default_packet_traits {
   typedef Packet8q32i type;
   typedef Packet4q32i half;
@@ -122,6 +158,12 @@ struct unpacket_traits<Packet32q8i> {
   enum { size = 32, alignment=Aligned32 };
 };
 template <>
+struct unpacket_traits<Packet16q16i> {
+  typedef QInt16 type;
+  typedef Packet8q16i half;
+  enum { size = 16, alignment=Aligned32 };
+};
+template <>
 struct unpacket_traits<Packet32q8u> {
   typedef QUInt8 type;
   typedef Packet16q8u half;
@@ -146,6 +188,11 @@ EIGEN_STRONG_INLINE Packet32q8u ploadu<Packet32q8u>(const QUInt8* from) {
       reinterpret_cast<const __m256i*>(from));
 }
 template <>
+EIGEN_STRONG_INLINE Packet16q16i ploadu<Packet16q16i>(const QInt16* from) {
+  EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(
+      reinterpret_cast<const __m256i*>(from));
+}
+template <>
 EIGEN_STRONG_INLINE Packet8q32i ploadu<Packet8q32i>(const QInt32* from) {
   EIGEN_DEBUG_UNALIGNED_LOAD return _mm256_loadu_si256(
       reinterpret_cast<const __m256i*>(from));
",0,train
ed6357cbd4f6e47ab87b219a0e0840739c92c970,tensorflow/tensorflow,"Added missing definition for the Packet16q16i.
Fixed a couple of bugs in the implementation of max reductions for
avx512",PacketMathAVX512.h,"@@ -457,7 +457,7 @@ EIGEN_STRONG_INLINE QInt16 predux_max<Packet32q16i>(const Packet32q16i& a) {
   std::uint32_t w =
       pfirst(
         _mm_max_epi16(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
-  return std::min({
+  return std::max({
            static_cast<std::int16_t>(w >> 16),
            static_cast<std::int16_t>(w)
          });
@@ -493,7 +493,7 @@ EIGEN_STRONG_INLINE QUInt8 predux_max<Packet64q8u>(const Packet64q8u& a) {
   std::uint32_t w =
       pfirst(
         _mm_max_epu8(res, _mm_shuffle_epi32(res, _MM_SHUFFLE(0, 0, 0, 1))));
-  return std::min({
+  return std::max({
            static_cast<std::uint8_t>(w >> 24),
            static_cast<std::uint8_t>(w >> 16),
            static_cast<std::uint8_t>(w >> 8),
",0,train
b797bfb750504e03a38a988c44e3c52e902e87c4,tensorflow/tensorflow,"[HloOrdering] Make parameter always defined before other instructions.

- Make parameter always defined before other instructions.
- Add extra indentations to the predecessor field in ToString() method to make it clear.

PiperOrigin-RevId: 215162840",hlo_ordering.cc,"@@ -92,14 +92,18 @@ bool HloOrdering::ExecutesBefore(const HloInstruction* a,
 }
 
 bool HloOrdering::IsDefinedBefore(const HloValue& a, const HloValue& b) const {
-  // If 'b' is an entry param then 'a' cannot be defined before 'b' because 'b'
-  // is live into the module.
+  // Entry parameter should always be defined before other instructions.
   const HloModule* module = b.defining_instruction()->parent()->parent();
   if (b.defining_instruction()->parent() == module->entry_computation() &&
       b.defining_instruction()->opcode() == HloOpcode::kParameter) {
     return false;
   }
 
+  if (a.defining_instruction()->parent() == module->entry_computation() &&
+      a.defining_instruction()->opcode() == HloOpcode::kParameter) {
+    return true;
+  }
+
   // Phi values require special handling. Because XLA does not have a phi
   // instruction, the definition instruction of the phis values are
   // placeholders: either the subcomputation parameter (body or condition) or
@@ -316,7 +320,7 @@ string PredecessorHloOrdering::ToStringHelper(const string& name) const {
       for (auto predecessor : all) {
         if (predecessors_.at(computation)
                 ->IsReachable(predecessor, instruction)) {
-          pieces.push_back(absl::StrFormat(""  %s"", predecessor->name()));
+          pieces.push_back(absl::StrFormat(""    %s"", predecessor->name()));
         }
       }
     }
",0,train
b797bfb750504e03a38a988c44e3c52e902e87c4,tensorflow/tensorflow,"[HloOrdering] Make parameter always defined before other instructions.

- Make parameter always defined before other instructions.
- Add extra indentations to the predecessor field in ToString() method to make it clear.

PiperOrigin-RevId: 215162840",hlo_ordering_test.cc,"@@ -174,6 +174,26 @@ TEST_F(HloOrderingTest, InstructionsInWhileComputations) {
   EXPECT_FALSE(ordering.ExecutesBefore(body_param, cond_param));
 }
 
+TEST_F(HloOrderingTest, ParametersDefinedBeforeOthers) {
+  // Entry parameter should always be defined before other instruction.
+  auto module = CreateNewModule();
+  const Shape scalar_shape = ShapeUtil::MakeShape(xla::F32, {});
+  auto builder = HloComputation::Builder(TestName());
+  auto constant = builder.AddInstruction(
+      HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(1.0)));
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, ""param""));
+  module->AddEntryComputation(builder.Build());
+  TF_ASSERT_OK_AND_ASSIGN(auto dataflow,
+                          HloDataflowAnalysis::Run(*module, /*ssa_form=*/true));
+
+  DependencyHloOrdering ordering(module.get());
+  EXPECT_TRUE(ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(param),
+                                       dataflow->GetValueDefinedAt(constant)));
+  EXPECT_TRUE(!ordering.IsDefinedBefore(dataflow->GetValueDefinedAt(constant),
+                                        dataflow->GetValueDefinedAt(param)));
+}
+
 TEST_F(HloOrderingTest, ValuesInWhileComputations) {
   // Tests the ordering of values (defined by dataflow analysis) in the body and
   // condition of a while instruction. HLO code:
",0,train
0fdb21c045c7a7c085e2da3766747c8287a7962c,tensorflow/tensorflow,"Function for better work-group picking moved to util.
Pooling is using this function for better performance.

PiperOrigin-RevId: 242153291",pooling.cc,"@@ -27,6 +27,7 @@ limitations under the License.
 #include ""tensorflow/lite/delegates/gpu/common/shape.h""
 #include ""tensorflow/lite/delegates/gpu/common/util.h""
 #include ""tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h""
+#include ""tensorflow/lite/delegates/gpu/metal/kernels/util.h""
 
 namespace tflite {
 namespace gpu {
@@ -40,8 +41,8 @@ std::string GetMaxPoolingCode(const HW& kernel_size) {
     constant int window_w = $0;
     constant int window_h = $1;
     struct uniforms {
-      int2 src_size;
-      int2 dst_size;
+      int4 src_size;
+      int4 dst_size;
       int2 stride;
       int2 offset;
     };
@@ -51,7 +52,8 @@ std::string GetMaxPoolingCode(const HW& kernel_size) {
                                 $$1
                                 uint3 gid[[thread_position_in_grid]]) {
       if (static_cast<int>(gid.x) >= params.dst_size.x ||
-          static_cast<int>(gid.y) >= params.dst_size.y) {
+          static_cast<int>(gid.y) >= params.dst_size.y ||
+          static_cast<int>(gid.z) >= params.dst_size.z) {
         return;
       }
 
@@ -84,8 +86,8 @@ std::string GetMaxPoolingIndicesCode(const HW& kernel_size) {
     constant int window_w = $0;
     constant int window_h = $1;
     struct uniforms {
-      int2 src_size;
-      int2 dst_size;
+      int4 src_size;
+      int4 dst_size;
       int2 stride;
       int2 offset;
     };
@@ -95,7 +97,8 @@ std::string GetMaxPoolingIndicesCode(const HW& kernel_size) {
                                 $$1
                                 uint3 gid[[thread_position_in_grid]]) {
       if (static_cast<int>(gid.x) >= params.dst_size.x ||
-          static_cast<int>(gid.y) >= params.dst_size.y) {
+          static_cast<int>(gid.y) >= params.dst_size.y ||
+          static_cast<int>(gid.z) >= params.dst_size.z) {
         return;
       }
 
@@ -147,8 +150,8 @@ std::string GetAveragePoolingCode(const HW& kernel_size) {
   constant int window_h = $1;
   constant float multiplier = $2;
   struct uniforms {
-    int2 src_size;
-    int2 dst_size;
+    int4 src_size;
+    int4 dst_size;
     int2 stride;
     int2 offset;
   };
@@ -158,7 +161,8 @@ std::string GetAveragePoolingCode(const HW& kernel_size) {
                               uint tid[[thread_index_in_threadgroup]],
                               uint3 gid[[thread_position_in_grid]]) {
     if (static_cast<int>(gid.x) >= params.dst_size.x ||
-        static_cast<int>(gid.y) >= params.dst_size.y) {
+        static_cast<int>(gid.y) >= params.dst_size.y ||
+        static_cast<int>(gid.z) >= params.dst_size.z) {
       return;
     }
 
@@ -219,8 +223,12 @@ ComputeTaskDescriptorPtr PoolingInternal(int id, ValueId input_id,
          std::vector<int> uniform_params = {
              dimension.w,
              dimension.h,
+             IntegralDivideRoundUp(dimension.c, 4),
+             dimension.w * dimension.h,
              output_dimension.w,
              output_dimension.h,
+             IntegralDivideRoundUp(dimension.c, 4),
+             output_dimension.w * output_dimension.h,
              params.strides.w,
              params.strides.h,
              params.padding.prepended.w,
@@ -230,14 +238,14 @@ ComputeTaskDescriptorPtr PoolingInternal(int id, ValueId input_id,
        }},
   };
 
-  desc->resize_function = [input_id,
-                           params](const std::map<ValueId, BHWC>& buffers) {
-    const uint3 groups_size{16, 16, 1};
-    const auto& src_shape = buffers.find(input_id)->second;
-    BHWC dst_shape = CalculateOutputShape(src_shape, params);
-    int groups_x = IntegralDivideRoundUp(dst_shape.w, groups_size.x);
-    int groups_y = IntegralDivideRoundUp(dst_shape.h, groups_size.y);
-    int groups_z = IntegralDivideRoundUp(dst_shape.c, 4);
+  desc->resize_function = [output_id](const std::map<ValueId, BHWC>& buffers) {
+    BHWC dst_shape = buffers.find(output_id)->second;
+    const uint3 grid =
+        uint3(dst_shape.w, dst_shape.h, IntegralDivideRoundUp(dst_shape.c, 4));
+    const uint3 groups_size = GetWorkGroupSizeForGrid(grid);
+    int groups_x = IntegralDivideRoundUp(grid.x, groups_size.x);
+    int groups_y = IntegralDivideRoundUp(grid.y, groups_size.y);
+    int groups_z = IntegralDivideRoundUp(grid.z, groups_size.z);
     return std::make_pair(groups_size, uint3{groups_x, groups_y, groups_z});
   };
 
",0,train
0fdb21c045c7a7c085e2da3766747c8287a7962c,tensorflow/tensorflow,"Function for better work-group picking moved to util.
Pooling is using this function for better performance.

PiperOrigin-RevId: 242153291",reshape.cc,"@@ -27,31 +27,12 @@ limitations under the License.
 #include ""tensorflow/lite/delegates/gpu/common/types.h""
 #include ""tensorflow/lite/delegates/gpu/common/util.h""
 #include ""tensorflow/lite/delegates/gpu/metal/compute_task_descriptor.h""
+#include ""tensorflow/lite/delegates/gpu/metal/kernels/util.h""
 
 namespace tflite {
 namespace gpu {
 namespace metal {
 namespace {
-
-uint GetBestSize(int grid_size) {
-  if (grid_size % 8 == 0 || grid_size % 8 >= 4 || grid_size >= 16) {
-    return 8;
-  } else if (grid_size % 4 == 0 || grid_size % 4 >= 2 || grid_size >= 8) {
-    return 4;
-  } else if (grid_size % 2 == 0 || grid_size >= 4) {
-    return 2;
-  } else {
-    return 1;
-  }
-}
-
-uint3 GetWorkGroupSize(const BHWC& dst_shape) {
-  uint x_size = GetBestSize(dst_shape.w);
-  uint y_size = GetBestSize(dst_shape.h);
-  uint z_size = std::max(1u, 32u / (x_size * y_size));
-  return {x_size, y_size, z_size};
-}
-
 std::string GetReshapeCode() {
   std::string code = R""(
 #include <metal_stdlib>
@@ -177,11 +158,12 @@ std::vector<ComputeTaskDescriptorPtr> Reshape(int id, ValueId input_id,
   };
 
   desc->resize_function = [attr](const std::map<ValueId, BHWC>& buffers) {
-    const uint3 groups_size = GetWorkGroupSize(attr.new_shape);
-    int groups_x = IntegralDivideRoundUp(attr.new_shape.w, groups_size.x);
-    int groups_y = IntegralDivideRoundUp(attr.new_shape.h, groups_size.y);
-    const int dst_layers = IntegralDivideRoundUp(attr.new_shape.c, 4);
-    int groups_z = IntegralDivideRoundUp(dst_layers, groups_size.z);
+    const uint3 grid = uint3(attr.new_shape.w, attr.new_shape.h,
+                             IntegralDivideRoundUp(attr.new_shape.c, 4));
+    const uint3 groups_size = GetWorkGroupSizeForGrid(grid);
+    int groups_x = IntegralDivideRoundUp(grid.x, groups_size.x);
+    int groups_y = IntegralDivideRoundUp(grid.y, groups_size.y);
+    int groups_z = IntegralDivideRoundUp(grid.z, groups_size.z);
     return std::make_pair(groups_size, uint3{groups_x, groups_y, groups_z});
   };
 
@@ -235,11 +217,12 @@ std::vector<ComputeTaskDescriptorPtr> Reshapex4(int id, ValueId input_id,
   };
 
   desc->resize_function = [attr](const std::map<ValueId, BHWC>& buffers) {
-    const uint3 groups_size = GetWorkGroupSize(attr.new_shape);
-    int groups_x = IntegralDivideRoundUp(attr.new_shape.w, groups_size.x);
-    int groups_y = IntegralDivideRoundUp(attr.new_shape.h, groups_size.y);
-    const int dst_layers = IntegralDivideRoundUp(attr.new_shape.c, 4);
-    int groups_z = IntegralDivideRoundUp(dst_layers, groups_size.z);
+    const uint3 grid = uint3(attr.new_shape.w, attr.new_shape.h,
+                             IntegralDivideRoundUp(attr.new_shape.c, 4));
+    const uint3 groups_size = GetWorkGroupSizeForGrid(grid);
+    int groups_x = IntegralDivideRoundUp(grid.x, groups_size.x);
+    int groups_y = IntegralDivideRoundUp(grid.y, groups_size.y);
+    int groups_z = IntegralDivideRoundUp(grid.z, groups_size.z);
     return std::make_pair(groups_size, uint3{groups_x, groups_y, groups_z});
   };
 
",0,train
0fdb21c045c7a7c085e2da3766747c8287a7962c,tensorflow/tensorflow,"Function for better work-group picking moved to util.
Pooling is using this function for better performance.

PiperOrigin-RevId: 242153291",util.cc,"@@ -0,0 +1,47 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/lite/delegates/gpu/metal/kernels/util.h""
+
+namespace tflite {
+namespace gpu {
+namespace metal {
+namespace {
+
+unsigned int GetOptimalSize(unsigned int grid_size) {
+  if (grid_size % 8 == 0 || grid_size % 8 >= 4 || grid_size >= 16) {
+    return 8;
+  }
+  if (grid_size % 4 == 0 || grid_size % 4 >= 2 || grid_size >= 8) {
+    return 4;
+  }
+  if (grid_size % 2 == 0 || grid_size >= 4) {
+    return 2;
+  }
+  return 1;
+}
+
+}  // namespace
+
+uint3 GetWorkGroupSizeForGrid(const uint3& grid_size) {
+  unsigned int x_size = GetOptimalSize(grid_size.x);
+  unsigned int y_size = GetOptimalSize(grid_size.y);
+  unsigned int z_size = std::max(1u, 32u / (x_size * y_size));
+  return {x_size, y_size, z_size};
+}
+
+}  // namespace metal
+}  // namespace gpu
+}  // namespace tflite
",0,train
0fdb21c045c7a7c085e2da3766747c8287a7962c,tensorflow/tensorflow,"Function for better work-group picking moved to util.
Pooling is using this function for better performance.

PiperOrigin-RevId: 242153291",util.h,"@@ -0,0 +1,34 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_UTIL_H_
+#define TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_UTIL_H_
+
+#include ""tensorflow/lite/delegates/gpu/common/types.h""
+
+namespace tflite {
+namespace gpu {
+namespace metal {
+
+// returns work-group size for grid that tries to cover grid optimaly
+// If you use work-group size generated by this method you MUST check
+// all three dimensions of thread on out of border in your kernel.
+uint3 GetWorkGroupSizeForGrid(const uint3& grid_size);
+
+}  // namespace metal
+}  // namespace gpu
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_DELEGATES_GPU_METAL_KERNELS_UTIL_H_
",0,train
6c0a2a5beda866e75a1f8546463651360eb725e1,tensorflow/tensorflow,"Remove `tensorflow/core/tfrt/runtime` dependency from XLIR.

PiperOrigin-RevId: 428698704
Change-Id: Ief6451010d3d9f20199c4cd90860df85df417ba2",bef_thunk.cc,"@@ -35,7 +35,6 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/service/gpu/xlir_ops.h""
 #include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/platform/cpu_info.h""
-#include ""tensorflow/core/tfrt/runtime/work_queue_interface.h""
 #include ""tensorflow/stream_executor/device_memory.h""
 #include ""tensorflow/stream_executor/gpu/gpu_executor.h""
 #include ""tensorflow/stream_executor/gpu/gpu_stream.h""
@@ -52,11 +51,13 @@ limitations under the License.
 #include ""tfrt/host_context/async_dispatch.h""  // from @tf_runtime
 #include ""tfrt/host_context/async_value_ref.h""  // from @tf_runtime
 #include ""tfrt/host_context/chain.h""  // from @tf_runtime
+#include ""tfrt/host_context/concurrent_work_queue.h""  // from @tf_runtime
 #include ""tfrt/host_context/diagnostic.h""  // from @tf_runtime
 #include ""tfrt/host_context/execution_context.h""  // from @tf_runtime
 #include ""tfrt/host_context/function.h""  // from @tf_runtime
 #include ""tfrt/host_context/host_allocator.h""  // from @tf_runtime
 #include ""tfrt/host_context/host_context.h""  // from @tf_runtime
+#include ""tfrt/host_context/kernel_registry.h""  // from @tf_runtime
 #include ""tfrt/host_context/resource_context.h""  // from @tf_runtime
 #include ""tfrt/support/error_util.h""  // from @tf_runtime
 
@@ -67,10 +68,9 @@ bool IsBefThunkEnabled() { return true; }
 
 namespace {
 
-struct CoreRuntimeAndWorkQueue {
+struct MlirAndTfrtHostCtx {
   mlir::MLIRContext* mlir_ctx;
-  tfrt::CoreRuntime* core_runtime;
-  tensorflow::tfrt_stub::WorkQueueInterface* work_queue;
+  tfrt::HostContext* host_ctx;
 };
 
 class BefThunk : public Thunk {
@@ -204,35 +204,18 @@ static StatusOr<Thunk::Kind> GetThunkKind(mlir::Operation* op) {
       ""Operation is not supported by BefThunk."");
 }
 
-static StatusOr<CoreRuntimeAndWorkQueue> GetCoreRuntimeAndWorkQueue() {
-  static auto runtime_and_queue_or =
-      [&]() -> StatusOr<CoreRuntimeAndWorkQueue> {
-    // TODO(hanbinyoon): Make these configurable.
-    int num_threads = tensorflow::port::MaxParallelism();
-    int num_blocking_threads = 16;
-
-    // Create work queue.
-    auto work_queue = tensorflow::tfrt_stub::WrapDefaultWorkQueue(
-        tfrt::CreateMultiThreadedWorkQueue(num_threads, num_blocking_threads));
-    if (work_queue == nullptr) {
-      return tensorflow::errors::Internal(""Failed to create TFRT work queue."");
-    }
-    auto* work_queue_ptr = work_queue.get();
-    auto* mlir_ctx = new mlir::MLIRContext;
-
-    // Create core runtime.
-    auto expected_core_runtime = tfrt::CoreRuntime::Create(
+static MlirAndTfrtHostCtx GetMlirAndTfrtHostCtx() {
+  static auto* mlir_ctx = new mlir::MLIRContext;
+  static auto* host_ctx = [&] {
+    auto* result = new tfrt::HostContext(
         tfrt::gpu::GetDiagHandler(mlir_ctx), tfrt::CreateMallocAllocator(),
-        std::move(work_queue), kDefaultHostDeviceName);
-    if (!expected_core_runtime) {
-      auto error = expected_core_runtime.takeError();
-      return tensorflow::errors::Internal(llvm::toString(std::move(error)));
-    }
-
-    return CoreRuntimeAndWorkQueue{mlir_ctx, expected_core_runtime->release(),
-                                   work_queue_ptr};
+        // TODO(hanbinyoon): Make these configurable.
+        tfrt::CreateMultiThreadedWorkQueue(/*num_threads=*/1,
+                                           /*num_blocking_threads=*/16));
+    tfrt::RegisterStaticKernels(result->GetMutableRegistry());
+    return result;
   }();
-  return runtime_and_queue_or;
+  return {mlir_ctx, host_ctx};
 }
 
 // Creates a TFRT module that loads the GPU module and launches the target
@@ -313,10 +296,9 @@ StatusOr<std::unique_ptr<Thunk>> CreateBefThunk(
   auto module = CreateModule(op);
   TF_RETURN_IF_ERROR(RunLmhloGpuToTfrtConversionPipeline(*module));
 
-  TF_ASSIGN_OR_RETURN(auto runtime_and_queue, GetCoreRuntimeAndWorkQueue());
-  TF_ASSIGN_OR_RETURN(
-      auto bef_result,
-      ConvertToBef(*module, runtime_and_queue.core_runtime->GetHostContext()));
+  auto mlir_and_host_ctx = GetMlirAndTfrtHostCtx();
+  TF_ASSIGN_OR_RETURN(auto bef_result,
+                      ConvertToBef(*module, mlir_and_host_ctx.host_ctx));
 
   return std::unique_ptr<Thunk>(
       new BefThunk(kind, thunk_info, std::move(buffers),
@@ -341,10 +323,9 @@ StatusOr<std::unique_ptr<Thunk>> CreateBefCollectivePermuteThunk(
 
   TF_RETURN_IF_ERROR(RunLmhloGpuToTfrtConversionPipeline(*module));
 
-  TF_ASSIGN_OR_RETURN(auto runtime_and_queue, GetCoreRuntimeAndWorkQueue());
-  TF_ASSIGN_OR_RETURN(
-      auto bef_result,
-      ConvertToBef(*module, runtime_and_queue.core_runtime->GetHostContext()));
+  auto mlir_and_host_ctx = GetMlirAndTfrtHostCtx();
+  TF_ASSIGN_OR_RETURN(auto bef_result,
+                      ConvertToBef(*module, mlir_and_host_ctx.host_ctx));
 
   return std::unique_ptr<Thunk>(
       new BefThunk(kind, thunk_info, std::move(buffers),
@@ -362,11 +343,9 @@ StatusOr<std::unique_ptr<Thunk>> CreateBefKernelThunk(
   mlir::OwningOpRef<mlir::ModuleOp> tfrt_module = CreateTfrtKernelLaunchModule(
       &mlir_context, kernel_name, args.size(), launch_dimensions);
 
-  TF_ASSIGN_OR_RETURN(auto runtime_and_queue, GetCoreRuntimeAndWorkQueue());
-  TF_ASSIGN_OR_RETURN(
-      auto bef_result,
-      ConvertToBef(*tfrt_module,
-                   runtime_and_queue.core_runtime->GetHostContext()));
+  auto mlir_and_host_ctx = GetMlirAndTfrtHostCtx();
+  TF_ASSIGN_OR_RETURN(auto bef_result,
+                      ConvertToBef(*tfrt_module, mlir_and_host_ctx.host_ctx));
 
   std::vector<BufferAllocation::Slice> arg_buffers;
   for (auto arg : args) {
@@ -469,9 +448,9 @@ Status BefThunk::ExecuteOnStream(const ExecuteParams& params) {
       tfrt::gpu::MakeBorrowedStream(gpu_context.first, stream->gpu_stream());
 
   // Create execution context.
-  TF_ASSIGN_OR_RETURN(auto runtime_and_queue, GetCoreRuntimeAndWorkQueue());
+  auto mlir_and_host_ctx = GetMlirAndTfrtHostCtx();
   tfrt::RequestContextBuilder request_context_builder(
-      runtime_and_queue.core_runtime->GetHostContext(), gpu_context.second);
+      mlir_and_host_ctx.host_ctx, gpu_context.second);
   if (kind() == Thunk::kKernel) {
     absl::MutexLock lock(&mutex_);
     TF_RETURN_IF_ERROR(
@@ -506,7 +485,7 @@ Status BefThunk::ExecuteOnStream(const ExecuteParams& params) {
   std::string diag_str;
   llvm::raw_string_ostream diag_os(diag_str);
   llvm::SourceMgr src_mgr;
-  mlir::SourceMgrDiagnosticHandler handler(src_mgr, runtime_and_queue.mlir_ctx,
+  mlir::SourceMgrDiagnosticHandler handler(src_mgr, mlir_and_host_ctx.mlir_ctx,
                                            diag_os);
 
   // Execute the function.
",0,test
6c0a2a5beda866e75a1f8546463651360eb725e1,tensorflow/tensorflow,"Remove `tensorflow/core/tfrt/runtime` dependency from XLIR.

PiperOrigin-RevId: 428698704
Change-Id: Ief6451010d3d9f20199c4cd90860df85df417ba2",gpu_executable.cc,"@@ -56,7 +56,6 @@ limitations under the License.
 #include ""mlir/IR/Diagnostics.h""  // from @llvm-project
 #include ""tensorflow/compiler/mlir/utils/name_utils.h""
 #include ""tensorflow/compiler/xla/service/gpu/xlir_ops.h""
-#include ""tensorflow/core/tfrt/runtime/work_queue_interface.h""
 #include ""tensorflow/stream_executor/gpu/gpu_executor.h""
 #include ""tensorflow/stream_executor/gpu/gpu_stream.h""
 #include ""tfrt/gpu/gpu_executor.h""  // from @tf_runtime
",0,test
73f7f1ae9212634cc2c6e0c9c9c6c273c78b4309,tensorflow/tensorflow,"Support entry function name that doesn't match the module name.

PiperOrigin-RevId: 403706067
Change-Id: I76ed606fc5dfb46af48f7a94a8bef762ab20d4cf",gpu_compiler.cc,"@@ -755,7 +755,8 @@ StatusOr<std::unique_ptr<BufferAssignment>> GpuCompiler::AssignBuffers(
 }
 
 #if BEF_EXECUTABLE
-static StatusOr<OwnedBefBuffer> LowerToBef(mlir::ModuleOp mlir_module) {
+static StatusOr<OwnedBefBuffer> LowerToBef(mlir::ModuleOp mlir_module,
+                                           std::string entry_function_name) {
   if (!mlir_module) {
     return tensorflow::errors::FailedPrecondition(
         ""No mlir module to lower to BEF."");
@@ -785,7 +786,7 @@ static StatusOr<OwnedBefBuffer> LowerToBef(mlir::ModuleOp mlir_module) {
   auto ptr = static_cast<uint8_t*>(
       tfrt::AlignedAlloc(tfrt::GetRequiredBefAlignment(), bef.size()));
   std::copy(bef.begin(), bef.end(), ptr);
-  return OwnedBefBuffer(ptr, {bef.size()});
+  return OwnedBefBuffer(ptr, {entry_function_name, bef.size()});
 }
 #endif  // BEF_EXECUTABLE
 
@@ -891,7 +892,8 @@ static Status CompileModuleToLlvmIrImpl(
   }
 
 #if BEF_EXECUTABLE
-  TF_ASSIGN_OR_RETURN(results->thunks_or_bef, LowerToBef(*mlir_module));
+  TF_ASSIGN_OR_RETURN(results->thunks_or_bef,
+                      LowerToBef(*mlir_module, entry_function.getName().str()));
 #else   // BEF_EXECUTABLE
   results->thunks_or_bef =
       absl::make_unique<ThunkSchedule>(ir_emitter->ConsumeThunkSequence());
@@ -1040,7 +1042,7 @@ GpuCompiler::CompileToTargetBinary(const HloModuleConfig& module_config,
   }
 
   llvm::SplitModule(
-      *llvm_module.get(),
+      *llvm_module,
       std::max<unsigned>(
           1, std::min<unsigned>(thread_pool->NumThreads(), num_functions)),
       [&](std::unique_ptr<llvm::Module> module) {
",0,train
73f7f1ae9212634cc2c6e0c9c9c6c273c78b4309,tensorflow/tensorflow,"Support entry function name that doesn't match the module name.

PiperOrigin-RevId: 403706067
Change-Id: I76ed606fc5dfb46af48f7a94a8bef762ab20d4cf",gpu_executable.cc,"@@ -771,9 +771,9 @@ StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStreamImpl(
       return InternalError(""Failed to load BEF file."");
     }
 
-    TF_RETURN_IF_ERROR(ExecuteBef(bef_file, module_name_, run_options,
-                                  buffer_allocations, allocations_.size(),
-                                  block_host_until_done));
+    TF_RETURN_IF_ERROR(ExecuteBef(
+        bef_file, bef_buffer.get_deleter().entry_function_name, run_options,
+        buffer_allocations, allocations_.size(), block_host_until_done));
   } else {
     return FailedPrecondition(""Expected BefBuffer is not supplied."");
   }
",0,train
73f7f1ae9212634cc2c6e0c9c9c6c273c78b4309,tensorflow/tensorflow,"Support entry function name that doesn't match the module name.

PiperOrigin-RevId: 403706067
Change-Id: I76ed606fc5dfb46af48f7a94a8bef762ab20d4cf",gpu_executable.h,"@@ -52,6 +52,7 @@ namespace gpu {
 class GpuExecutable : public Executable {
   struct BefBufferDeleter {
     void operator()(uint8_t* ptr) const;
+    std::string entry_function_name;
     size_t size;
   };
 
",0,train
708090d48995456bfa66615398d8c56dadebe018,tensorflow/tensorflow,"Make TF_DeleteKernelBuilder not crash on nullptr.

After this change, TF_DeleteKernelBuilder will comply with the
conventions established in c_api.h, namely that *Delete* functions are
safe with nullptr parameters.

PiperOrigin-RevId: 230009727",kernels.cc,"@@ -48,9 +48,10 @@ TF_KernelBuilder* TF_NewKernelBuilder(
 }
 
 void TF_DeleteKernelBuilder(TF_KernelBuilder* builder) {
-  DCHECK_NE(builder, nullptr);
-  delete builder->cc_builder;
-  delete builder;
+  if (builder != nullptr) {
+    delete builder->cc_builder;
+    delete builder;
+  }
 }
 
 namespace tensorflow {
",0,train
708090d48995456bfa66615398d8c56dadebe018,tensorflow/tensorflow,"Make TF_DeleteKernelBuilder not crash on nullptr.

After this change, TF_DeleteKernelBuilder will comply with the
conventions established in c_api.h, namely that *Delete* functions are
safe with nullptr parameters.

PiperOrigin-RevId: 230009727",kernels_test.cc,"@@ -224,4 +224,8 @@ TEST(TestKernel, TestInputAndOutputCount) {
   }
 }
 
+TEST(TestKernel, DeleteKernelBuilderIsOkOnNull) {
+  TF_DeleteKernelBuilder(nullptr);
+}
+
 }  // namespace tensorflow
",0,train
1c850ad297ff2b3236a440893b1a3b1ebc8a8ca7,tensorflow/tensorflow,"Change signature of tf.image.crop_and_resize and tf.image.extract_image_patches for TF 2.0.

PiperOrigin-RevId: 222270163",array_ops.py,"@@ -3201,3 +3201,48 @@ def searchsorted(sorted_sequence,
 
 
 quantize.__doc__ = gen_array_ops.quantize_v2.__doc__
+
+
+@tf_export(""image.extract_image_patches"", v1=[])
+def extract_image_patches_v2(
+    images,
+    sizes,
+    strides,
+    rates,
+    padding,
+    name=None):
+  # pylint: disable=line-too-long
+  r""""""Extract `patches` from `images` and put them in the \""depth\"" output dimension.
+
+  Args:
+    images: A 4-D Tensor with shape `[batch, in_rows, in_cols, depth]
+    sizes: The size of the sliding window for each dimension of `images`.
+    strides: A 1-D Tensor of length 4. How far the centers of two consecutive
+      patches are in the images. Must be: `[1, stride_rows, stride_cols, 1]`.
+    rates: A 1-D Tensor of length 4. Must be: `[1, rate_rows, rate_cols, 1]`.
+      This is the input stride, specifying how far two consecutive patch samples
+      are in the input. Equivalent to extracting patches with `patch_sizes_eff =
+      patch_sizes + (patch_sizes - 1) * (rates - 1)`, followed by subsampling
+      them spatially by a factor of `rates`. This is equivalent to `rate` in
+      dilated (a.k.a. Atrous) convolutions.
+    padding: The type of padding algorithm to use.
+      We specify the size-related attributes as: ```python ksizes = [1,
+        ksize_rows, ksize_cols, 1] strides = [1, strides_rows, strides_cols, 1]
+        rates = [1, rates_rows, rates_cols, 1]
+    name: A name for the operation (optional).
+
+  Returns:
+    A 4-D Tensor. Has the same type as `images`, and with shape `[batch,
+    out_rows, out_cols, ksize_rows * ksize_cols * depth]` containing image
+    patches with size `ksize_rows x ksize_cols x depth` vectorized in the
+    \""depth\"" dimension. Note `out_rows` and `out_cols` are the dimensions of
+    the output patches.
+  """"""
+  # pylint: enable=line-too-long
+  return gen_array_ops.extract_image_patches(
+      images, sizes, strides, rates, padding, name)
+
+extract_image_patches_deprecation = deprecation.deprecated_args(
+    None, ""ksizes is deprecated, use sizes instead"", ""ksizes"")
+tf_export(v1=[""image.extract_image_patches"", ""extract_image_patches""])(
+    extract_image_patches_deprecation(gen_array_ops.extract_image_patches))
",0,train
1c850ad297ff2b3236a440893b1a3b1ebc8a8ca7,tensorflow/tensorflow,"Change signature of tf.image.crop_and_resize and tf.image.extract_image_patches for TF 2.0.

PiperOrigin-RevId: 222270163",image_ops_impl.py,"@@ -2861,3 +2861,72 @@ resize_nearest_neighbor_deprecation = deprecation.deprecated(
         'instead.'))
 tf_export(v1=['image.resize_nearest_neighbor'])(
     resize_nearest_neighbor_deprecation(gen_image_ops.resize_nearest_neighbor))
+
+
+@tf_export('image.crop_and_resize', v1=[])
+def crop_and_resize_v2(
+    image,
+    boxes,
+    box_indices,
+    crop_size,
+    method='bilinear',
+    extrapolation_value=0,
+    name=None):
+  """"""Extracts crops from the input image tensor and resizes them.
+
+  Extracts crops from the input image tensor and resizes them using bilinear
+  sampling or nearest neighbor sampling (possibly with aspect ratio change) to a
+  common output size specified by `crop_size`. This is more general than the
+  `crop_to_bounding_box` op which extracts a fixed size slice from the input
+  image and does not allow resizing or aspect ratio change.
+
+  Returns a tensor with `crops` from the input `image` at positions defined at
+  the bounding box locations in `boxes`. The cropped boxes are all resized (with
+  bilinear or nearest neighbor interpolation) to a fixed
+  `size = [crop_height, crop_width]`. The result is a 4-D tensor
+  `[num_boxes, crop_height, crop_width, depth]`. The resizing is corner aligned.
+  In particular, if `boxes = [[0, 0, 1, 1]]`, the method will give identical
+  results to using `tf.image.resize_bilinear()` or
+  `tf.image.resize_nearest_neighbor()`(depends on the `method` argument) with
+  `align_corners=True`.
+
+  Args:
+    image: A 4-D tensor of shape `[batch, image_height, image_width, depth]`.
+      Both `image_height` and `image_width` need to be positive.
+    boxes: A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor
+      specifies the coordinates of a box in the `box_ind[i]` image and is
+      specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized
+      coordinate value of `y` is mapped to the image coordinate at `y *
+      (image_height - 1)`, so as the `[0, 1]` interval of normalized image
+      height is mapped to `[0, image_height - 1]` in image height coordinates.
+      We do allow `y1` > `y2`, in which case the sampled crop is an up-down
+      flipped version of the original image. The width dimension is treated
+      similarly. Normalized coordinates outside the `[0, 1]` range are allowed,
+      in which case we use `extrapolation_value` to extrapolate the input image
+      values.
+    box_indices: A 1-D tensor of shape `[num_boxes]` with int32 values in `[0,
+      batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box
+      refers to.
+    crop_size: A 1-D tensor of 2 elements, `size = [crop_height, crop_width]`.
+      All cropped image patches are resized to this size. The aspect ratio of
+      the image content is not preserved. Both `crop_height` and `crop_width`
+      need to be positive.
+    method: An optional string specifying the sampling method for resizing. It
+      can be either `""bilinear""` or `""nearest""` and default to `""bilinear""`.
+      Currently two sampling methods are supported: Bilinear and Nearest
+      Neighbor.
+    extrapolation_value: An optional `float`. Defaults to `0`. Value used for
+      extrapolation, when applicable.
+    name: A name for the operation (optional).
+
+  Returns:
+    A 4-D tensor of shape `[num_boxes, crop_height, crop_width, depth]`.
+  """"""
+  return gen_image_ops.crop_and_resize(
+      image, boxes, box_indices, crop_size, method, extrapolation_value, name)
+
+
+crop_and_resize_deprecation = deprecation.deprecated_args(
+    None, 'box_ind is deprecated, use box_indices instead', 'box_ind')
+tf_export(v1=['image.crop_and_resize'])(
+    crop_and_resize_deprecation(gen_image_ops.crop_and_resize))
",0,train
1c850ad297ff2b3236a440893b1a3b1ebc8a8ca7,tensorflow/tensorflow,"Change signature of tf.image.crop_and_resize and tf.image.extract_image_patches for TF 2.0.

PiperOrigin-RevId: 222270163",tf_upgrade_v2.py,"@@ -31,6 +31,15 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
     # Maps from a function name to a dictionary that describes how to
     # map from an old argument keyword to the new argument keyword.
     self.function_keyword_renames = {
+        ""tf.image.crop_and_resize"": {
+            ""box_ind"": ""box_indices"",
+        },
+        ""tf.image.extract_image_patches"": {
+            ""ksizes"": ""sizes"",
+        },
+        ""tf.extract_image_patches"": {
+            ""ksizes"": ""sizes"",
+        },
         ""tf.expand_dims"": {
             ""dim"": ""axis"",
         },
",0,train
dfaa328f06e6af0e0a84a6035749bf8be62ee5e2,tensorflow/tensorflow,"Scalar / Trivial folding for mhlo.select

This covers the case where the predicate is a splat or the on_true/on_false
values are the same.

PiperOrigin-RevId: 329622785
Change-Id: I5761e3260b7177f602fd3a4f999d193186071481",hlo_ops.cc,"@@ -1410,6 +1410,29 @@ static LogicalResult Verify(SelectOp op) {
   return success();
 }
 
+OpFoldResult SelectOp::fold(ArrayRef<Attribute> operands) {
+  if (on_true() == on_false()) {
+    return on_true();
+  }
+
+  auto predicate = operands[0].dyn_cast_or_null<DenseIntElementsAttr>();
+  if (!predicate) {
+    return {};
+  }
+
+  auto predicateTy = predicate.getType().cast<ShapedType>();
+  if (!predicateTy.getElementType().isInteger(1)) {
+    return {};
+  }
+
+  if (predicate.isSplat()) {
+    return predicate.getSplatValue<APInt>().getBoolValue() ? on_true()
+                                                           : on_false();
+  }
+
+  return {};
+}
+
 // Makes it such that a SelectOp that is a non-root operation in a DRR infers
 // the return type based on operand type.
 LogicalResult SelectOp::inferReturnTypes(
",0,train
7dfe1e7c1b03ed55d60bccfcc4bd20ec3a981480,tensorflow/tensorflow,"Add a note to `tf.Print` explaining where it prints to.
Change: 150493886",logging_ops.py,"@@ -42,6 +42,10 @@ def Print(input_, data, message=None, first_n=None, summarize=None,
   This is an identity op with the side effect of printing `data` when
   evaluating.
 
+  Note: This op prints to the standard error. It is not currently compatible
+    with jupyter notebook (printing to the notebook *server's* output, not into
+    the notebook).
+
   Args:
     input_: A tensor passed through this op.
     data: A list of tensors to print out when op is evaluated.
",0,train
0dbe3bf9898ea80b95fc23e872458e0f9df306a4,tensorflow/tensorflow,"Fix error msg typo.

PiperOrigin-RevId: 248223631",lite.py,"@@ -172,7 +172,7 @@ class TFLiteConverterBase(object):
             ""Provide an input generator for representative_dataset"")
     elif self._int8_target_required():
       raise ValueError(""representative_dataset is required when specifying ""
-                       ""TFLITE_BUILTINs_INT8 target."")
+                       ""TFLITE_BUILTINS_INT8 target."")
 
   def _int8_target_required(self):
     return set([OpsSet.TFLITE_BUILTINS_INT8]) == set(self._target_ops)
",0,train
604489d05f36647dd8815452ce22b295881f834b,tensorflow/tensorflow,"Removes the `SetIsStateful` mark on op `StatelessRandomGetKeyCounter` (the mark causes problems in tf.data iterator checkpointing).

Theoretically the op's output is device-dependent so it's kind of ""stateful"", but the V2 stateless RNG ops are also device-dependent (when alg=AUTO_SELECT) and not marked as stateful. We are using the same criterion here.

Note that removing the ""stateful"" mark won't cause the op to be constant-folded away (like `StatelessRandomGetAlg`).

PiperOrigin-RevId: 387358689
Change-Id: I1aa46d260b8684894aca7c9f69dbfd0b95cbbcd5",stateless_random_ops_v2.cc,"@@ -120,7 +120,6 @@ REGISTER_OP(""StatelessRandomGetKeyCounter"")
     .Output(""key: uint64"")
     .Output(""counter: uint64"")
     .Attr(""Tseed: {int32, int64} = DT_INT64"")
-    .SetIsStateful()  // because outputs depend on device
     .SetShapeFn([](InferenceContext* c) {
       // Check seed shape
       ShapeHandle seed;
",0,train
7d7dce16b8e7aef53467d8eb08d4249ef6cd71fb,tensorflow/tensorflow,"Fix typo (#16509)

* fix typos",hlo_parser.cc,"@@ -2173,7 +2173,7 @@ bool HloParser::ParseConvolutionDimensionNumbers(
 //
 //  {[2:3:4], [5:6:7], [8:9]}
 //
-// The the parsed result will be:
+// The parsed result will be:
 //
 //  {/*starts=*/{2, 5, 8}, /*limits=*/{3, 6, 9}, /*strides=*/{4, 7, 1}}
 //
",0,train
e498730b1745f78d980a988b2551520d51aea340,tensorflow/tensorflow,"Fix configuration script and add nvinfer_plugin library, add initialization for the plugins in Converter constructor, and fix code formatting.",convert_graph.cc,"@@ -60,7 +60,6 @@ limitations under the License.
 #if GOOGLE_TENSORRT
 #include ""cuda/include/cuda_runtime_api.h""
 #include ""tensorrt/include/NvInfer.h""
-#include ""tensorrt/include/NvInferPlugin.h""
 namespace tensorflow {
 namespace tensorrt {
 namespace convert {
@@ -842,26 +841,6 @@ Status ConvertAfterShapes(const ConversionParams& params) {
   LOG(INFO) << ""Number of TensorRT candidate segments: ""
             << initial_segments.size();
 
-  // Check if plugins can be aaccessed.
-  int num_trt_plugins = 0;
-  nvinfer1::IPluginCreator* const* trt_plugin_creator_list =
-    getPluginRegistry()->getPluginCreatorList(&num_trt_plugins);
-  if (!trt_plugin_creator_list) {
-    LOG(WARNING) << ""Can not find any TensorRT plugins in registry."";
-  }
-  else {
-    VLOG(1) << ""Found the following "" << num_trt_plugins << "" TensorRT plugins in registry:"";
-    for (int i = 0; i < num_trt_plugins; ++i) {
-      if (!trt_plugin_creator_list[i]) {
-        LOG(WARNING) << ""TensorRT plugin at index "" << i <<
-          "" is not accessible (null pointer returned by getPluginCreatorList for this plugin)"";
-      }
-      else {
-        VLOG(1) << ""  "" << trt_plugin_creator_list[i]->getPluginName();
-      }
-    }
-  }
-
   // Get the EngineInfo for each segment.
   std::unordered_map<string, Node*> node_map;
   TF_RETURN_IF_ERROR(BuildNodeMap(graph, &node_map));
",0,train
e498730b1745f78d980a988b2551520d51aea340,tensorflow/tensorflow,"Fix configuration script and add nvinfer_plugin library, add initialization for the plugins in Converter constructor, and fix code formatting.",convert_nodes.cc,"@@ -45,6 +45,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/strings/str_util.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/logging.h""
+#include ""tensorflow/core/platform/mutex.h""
 #include ""tensorflow/core/platform/protobuf.h""
 #include ""tensorflow/core/platform/tensor_coding.h""
 #include ""tensorflow/core/platform/types.h""
@@ -52,6 +53,7 @@ limitations under the License.
 #if GOOGLE_CUDA
 #if GOOGLE_TENSORRT
 #include ""tensorrt/include/NvInfer.h""
+#include ""tensorrt/include/NvInferPlugin.h""
 
 // Check if the types are equal. Cast to int first so that failure log message
 // would work!
@@ -59,7 +61,7 @@ limitations under the License.
 
 #define TFTRT_INTERNAL_ERROR_AT_NODE(node)                           \
   do {                                                               \
-    return errors::Internal(""TFTRT::"", __FUNCTION__,                 \
+    return errors::Internal(""TFTRT::"", __FUNCTION__, "":"", __LINE__,  \
                             "" failed to add TRT layer, at: "", node); \
   } while (0)
 
@@ -970,11 +972,45 @@ Status TrtNodeValidator::ConvertConstToWeights(
   return status;
 }
 
+static void InitializeTrtPlugins() {
+  static mutex plugin_mutex(LINKER_INITIALIZED);
+  static bool plugin_initialized = false;
+  mutex_lock lock(plugin_mutex);
+  if (!plugin_initialized) {
+    Logger trt_logger;
+    plugin_initialized = initLibNvInferPlugins(&trt_logger, """");
+    if (!plugin_initialized) {
+      LOG(ERROR) << ""Failed to initialize TensorRT plugins, and conversion may ""
+                    ""fail later."";
+    }
+
+    int num_trt_plugins = 0;
+    nvinfer1::IPluginCreator* const* trt_plugin_creator_list =
+        getPluginRegistry()->getPluginCreatorList(&num_trt_plugins);
+    if (!trt_plugin_creator_list) {
+      LOG(WARNING) << ""Can not find any TensorRT plugins in registry."";
+    } else {
+      VLOG(1) << ""Found the following "" << num_trt_plugins
+              << "" TensorRT plugins in registry:"";
+      for (int i = 0; i < num_trt_plugins; ++i) {
+        if (!trt_plugin_creator_list[i]) {
+          LOG(WARNING) << ""TensorRT plugin at index "" << i
+                       << "" is not accessible (null pointer returned by ""
+                          ""getPluginCreatorList for this plugin)"";
+        } else {
+          VLOG(1) << ""  "" << trt_plugin_creator_list[i]->getPluginName();
+        }
+      }
+    }
+  }
+}
+
 Converter::Converter(nvinfer1::INetworkDefinition* trt_network,
                      TrtPrecisionMode precision_mode, bool use_calibration)
     : trt_network_(trt_network),
       precision_mode_(precision_mode),
       use_calibration_(use_calibration) {
+  InitializeTrtPlugins();
   this->RegisterOpConverters();
 }
 
@@ -3880,28 +3916,33 @@ Status ConvertTopK(OpConverterParams* params) {
 }
 
 #if NV_TENSORRT_MAJOR > 5 || (NV_TENSORRT_MAJOR == 5 && NV_TENSORRT_MINOR >= 1)
-tensorflow::Status ConvertCombinedNMS(OpConverterParams* params) {
+Status ConvertCombinedNMS(OpConverterParams* params) {
   const auto& inputs = params->inputs;
   const auto& node_def = params->node_def;
 
   if (inputs.size() != 6) {
-    return tensorflow::errors::InvalidArgument(
-        ""Six inputs expected for CombinedNonMaxSuppression, at "", node_def.name());
+    return errors::InvalidArgument(
+        ""Six inputs expected for CombinedNonMaxSuppression, at "",
+        node_def.name());
   }
   if (!(inputs.at(0).is_tensor() || inputs.at(1).is_tensor())) {
-    return tensorflow::errors::Unimplemented(
-        ""CombinedNonMaxSuppression expects tensor for boxes and scores, at "", node_def.name());
+    return errors::Unimplemented(
+        ""CombinedNonMaxSuppression expects tensor for boxes and scores, at "",
+        node_def.name());
   }
   if (!(inputs.at(2).is_weights()) || !(inputs.at(3).is_weights()) ||
-     (!inputs.at(4).is_weights()) || !(inputs.at(5).is_weights())) {
-    return tensorflow::errors::InvalidArgument(
+      (!inputs.at(4).is_weights()) || !(inputs.at(5).is_weights())) {
+    return errors::InvalidArgument(
         ""CombinedNonMaxSuppression expects weights for ""
-        ""max_output_size_per_class, max_total_size, iou_threshold, score_threshold, at "",
+        ""max_output_size_per_class, max_total_size, iou_threshold, ""
+        ""score_threshold, at "",
         node_def.name());
   }
 
-  nvinfer1::ITensor* boxes_tensor = const_cast<nvinfer1::ITensor*>(inputs.at(0).tensor());
-  nvinfer1::ITensor* scores_tensor = const_cast<nvinfer1::ITensor*>(inputs.at(1).tensor());
+  nvinfer1::ITensor* boxes_tensor =
+      const_cast<nvinfer1::ITensor*>(inputs.at(0).tensor());
+  nvinfer1::ITensor* scores_tensor =
+      const_cast<nvinfer1::ITensor*>(inputs.at(1).tensor());
   TRT_ShapedWeights output_size_per_class = inputs.at(2).weights();
   TRT_ShapedWeights total_size = inputs.at(3).weights();
   TRT_ShapedWeights iou_threshold = inputs.at(4).weights();
@@ -3911,54 +3952,56 @@ tensorflow::Status ConvertCombinedNMS(OpConverterParams* params) {
   const auto boxes_dims = boxes_tensor->getDimensions();
   const auto scores_dims = scores_tensor->getDimensions();
   if (boxes_dims.nbDims != 3) {
-    return tensorflow::errors::InvalidArgument(
+    return errors::InvalidArgument(
         ""TensorRT BatchedNMS Plugin input boxes must be 3-D excluding batch "",
-         node_def.name());
+        node_def.name());
   }
   const int num_classes = scores_dims.d[1];
   bool box_check = boxes_dims.d[1] == 1 || boxes_dims.d[1] == num_classes;
   if (!box_check) {
-      return tensorflow::errors::InvalidArgument(
-        ""TensorRT BatchedNMS Plugin third dimension of boxes must be either 1 or num_classes "",
+    return errors::InvalidArgument(
+        ""TensorRT BatchedNMS Plugin third dimension of boxes must be either 1 ""
+        ""or num_classes "",
         node_def.name());
   }
   if (output_size_per_class.shape_.nbDims != 1) {
-    return tensorflow::errors::InvalidArgument(
+    return errors::InvalidArgument(
         ""TensorRT BatchedNMS Plugin max_output_size_per_class must be 0-D "",
         node_def.name());
   }
-  int max_size_per_class = *(static_cast<int*>(const_cast<void*>(
-                                          output_size_per_class.GetValues())));
-  if (max_size_per_class <=0) {
-    return tensorflow::errors::InvalidArgument(
+  int max_size_per_class = *(
+      static_cast<int*>(const_cast<void*>(output_size_per_class.GetValues())));
+  if (max_size_per_class <= 0) {
+    return errors::InvalidArgument(
         ""TensorRT BatchedNMS Plugin max_output_size_per_class should be > 0"",
         node_def.name());
   }
   if (total_size.shape_.nbDims != 1) {
-    return tensorflow::errors::InvalidArgument(
+    return errors::InvalidArgument(
         ""TensorRT BatchedNMS Plugin max_total_size must be 0-D "",
-         node_def.name());
+        node_def.name());
   }
-  int max_total_size = *(static_cast<int*>(const_cast<void*>(
-                                                      total_size.GetValues())));
-  if (max_total_size <=0) {
-    return tensorflow::errors::InvalidArgument(
+  int max_total_size =
+      *(static_cast<int*>(const_cast<void*>(total_size.GetValues())));
+  if (max_total_size <= 0) {
+    return errors::InvalidArgument(
         ""TensorRT BatchedNMS Plugin max_total_size should be > 0"",
         node_def.name());
   }
   if (iou_threshold.shape_.nbDims != 1) {
-    return tensorflow::errors::InvalidArgument(
+    return errors::InvalidArgument(
         ""TensorRT BatchedNMS Plugin iou_threshold must be 0-D "",
         node_def.name());
   }
-  float iou_thresh = *(static_cast<float*>(const_cast<void*>(iou_threshold.GetValues())));
+  float iou_thresh =
+      *(static_cast<float*>(const_cast<void*>(iou_threshold.GetValues())));
   if (iou_thresh < 0.0 || iou_thresh > 1.0) {
-    return tensorflow::errors::InvalidArgument(
+    return errors::InvalidArgument(
         ""TensorRT BatchedNMS Plugin iou_threshold must be in [0, 1]"",
         node_def.name());
   }
   if (score_threshold.shape_.nbDims != 1) {
-    return tensorflow::errors::InvalidArgument(
+    return errors::InvalidArgument(
         ""TensorRT BatchedNMS Plugin score_threshold must be 0-D "",
         node_def.name());
   }
@@ -3967,42 +4010,44 @@ tensorflow::Status ConvertCombinedNMS(OpConverterParams* params) {
 
   // Set plugin fields and the field collection
   TFAttrs attrs(node_def);
-  bool share_location = (boxes_dims.d[1] == 1); 
+  bool share_location = (boxes_dims.d[1] == 1);
   const bool pad_per_class = attrs.get<bool>(""pad_per_class"");
   int topK;
   if (pad_per_class) {
     topK = std::min(max_size_per_class * num_classes, max_total_size);
-  }
-  else {
+  } else {
     topK = max_total_size;
   }
   const int keepTopK = topK;
-  float score_thresh = *(static_cast<float*>(const_cast<void*>(score_threshold.GetValues())));
+  float score_thresh =
+      *(static_cast<float*>(const_cast<void*>(score_threshold.GetValues())));
   const int background_id = -1;
   nvinfer1::PluginField fields[7] = {
-    nvinfer1::PluginField{""shareLocation"", &share_location,
-                          nvinfer1::PluginFieldType::kINT32, 1},
-    nvinfer1::PluginField{""backgroundLabelId"", &background_id,
-                          nvinfer1::PluginFieldType::kINT32, 1},
-    nvinfer1::PluginField{""numClasses"", &num_classes,
-                          nvinfer1::PluginFieldType::kINT32, 1},
-    nvinfer1::PluginField{""topK"", &topK,
-                          nvinfer1::PluginFieldType::kINT32, 1},
-    nvinfer1::PluginField{""keepTopK"", &keepTopK,
-                          nvinfer1::PluginFieldType::kINT32, 1},
-    nvinfer1::PluginField{""scoreThreshold"", &score_thresh,
-                          nvinfer1::PluginFieldType::kFLOAT32, 1},
-    nvinfer1::PluginField{""iouThreshold"", &iou_thresh,
-                          nvinfer1::PluginFieldType::kFLOAT32, 1},
+      nvinfer1::PluginField{""shareLocation"", &share_location,
+                            nvinfer1::PluginFieldType::kINT32, 1},
+      nvinfer1::PluginField{""backgroundLabelId"", &background_id,
+                            nvinfer1::PluginFieldType::kINT32, 1},
+      nvinfer1::PluginField{""numClasses"", &num_classes,
+                            nvinfer1::PluginFieldType::kINT32, 1},
+      nvinfer1::PluginField{""topK"", &topK, nvinfer1::PluginFieldType::kINT32,
+                            1},
+      nvinfer1::PluginField{""keepTopK"", &keepTopK,
+                            nvinfer1::PluginFieldType::kINT32, 1},
+      nvinfer1::PluginField{""scoreThreshold"", &score_thresh,
+                            nvinfer1::PluginFieldType::kFLOAT32, 1},
+      nvinfer1::PluginField{""iouThreshold"", &iou_thresh,
+                            nvinfer1::PluginFieldType::kFLOAT32, 1},
   };
   nvinfer1::PluginFieldCollection fc{7, fields};
 
   // Get plugin creator
-  auto creator = getPluginRegistry()->getPluginCreator(""BatchedNMS_TRT"", ""1"", """");
+  auto creator =
+      getPluginRegistry()->getPluginCreator(""BatchedNMS_TRT"", ""1"", """");
   TFTRT_RETURN_ERROR_IF_NULLPTR(creator, node_def.name());
 
   // Create plugin
-  nvinfer1::IPluginV2* plugin = creator->createPlugin(node_def.name().c_str(), &fc);
+  nvinfer1::IPluginV2* plugin =
+      creator->createPlugin(node_def.name().c_str(), &fc);
   TFTRT_RETURN_ERROR_IF_NULLPTR(plugin, node_def.name());
 
   // Set plugin inputs
@@ -4012,7 +4057,7 @@ tensorflow::Status ConvertCombinedNMS(OpConverterParams* params) {
 
   // Add plugin to network
   nvinfer1::IPluginV2Layer* layer = params->converter->network()->addPluginV2(
-    &plugin_inputs[0], int(plugin_inputs.size()), *plugin);
+      &plugin_inputs[0], int(plugin_inputs.size()), *plugin);
   TFTRT_RETURN_ERROR_IF_NULLPTR(layer, node_def.name());
 
   // Set plugin outputs
@@ -4025,7 +4070,7 @@ tensorflow::Status ConvertCombinedNMS(OpConverterParams* params) {
   params->outputs->push_back(TRT_TensorOrWeights(output_nmsed_classes));
   params->outputs->push_back(TRT_TensorOrWeights(output_num_detections));
 
-  return tensorflow::Status::OK();
+  return Status::OK();
 }
 #endif // CombinedNonMaxSuppression
 
",0,train
98f38b608073e761d75227373b2b2c7d26c483e5,tensorflow/tensorflow,"Add support for parsing the ""gather"" HLO

PiperOrigin-RevId: 187050345",hlo_parser.cc,"@@ -1049,9 +1049,40 @@ bool HloParser::ParseInstruction(HloComputation::Builder* builder,
           HloInstruction::CreateDot(shape, operands[0], operands[1], dnum));
       break;
     }
-    case HloOpcode::kGather:
-      // TODO(b/72710576): HLO parsing is not implemented for Gather.
-      return TokenError(""HLO parsing is not implemented for Gather"");
+    case HloOpcode::kGather: {
+      optional<std::vector<int64>> output_window_dims;
+      attrs[""output_window_dims""] = {
+          /*required=*/true, AttrTy::kBracedInt64List, &output_window_dims};
+      optional<std::vector<int64>> elided_window_dims;
+      attrs[""elided_window_dims""] = {
+          /*required=*/true, AttrTy::kBracedInt64List, &elided_window_dims};
+      optional<std::vector<int64>> gather_dims_to_operand_dims;
+      attrs[""gather_dims_to_operand_dims""] = {/*required=*/true,
+                                              AttrTy::kBracedInt64List,
+                                              &gather_dims_to_operand_dims};
+      optional<int64> index_vector_dim;
+      attrs[""index_vector_dim""] = {/*required=*/true, AttrTy::kInt64,
+                                   &index_vector_dim};
+      optional<std::vector<int64>> window_bounds;
+      attrs[""window_bounds""] = {/*required=*/true, AttrTy::kBracedInt64List,
+                                &window_bounds};
+
+      if (!ParseOperands(&operands, /*expected_size=*/2) ||
+          !ParseAttributes(attrs)) {
+        return false;
+      }
+
+      GatherDimensionNumbers dim_numbers = HloInstruction::MakeGatherDimNumbers(
+          /*output_window_dims=*/*output_window_dims,
+          /*elided_window_dims=*/*elided_window_dims,
+          /*gather_dims_to_operand_dims=*/*gather_dims_to_operand_dims,
+          /*index_vector_dim=*/*index_vector_dim);
+
+      instruction = builder->AddInstruction(HloInstruction::CreateGather(
+          shape, /*operand=*/operands[0], /*gather_indices=*/operands[1],
+          dim_numbers, *window_bounds));
+      break;
+    }
     case HloOpcode::kTrace:
       return TokenError(StrCat(""parsing not yet implemented for op: "",
                                HloOpcodeString(opcode)));
",0,train
98f38b608073e761d75227373b2b2c7d26c483e5,tensorflow/tensorflow,"Add support for parsing the ""gather"" HLO

PiperOrigin-RevId: 187050345",hlo_parser_test.cc,"@@ -716,6 +716,18 @@ ENTRY %sparse_f32_r1 () -> f32[9] {
   ROOT %foo = f32[9]sparse{10} constant(f32[9]{1: 2, 3: 4, 5: 6})
 }
 
+)""
+},
+{
+""gather"",
+R""(HloModule StringifyGather
+
+ENTRY %Gather (input_tensor: f32[50,49,48,47,46], gather_indices: s64[10,9,8,7,5]) -> f32[10,9,8,7,30,29,28,27,26] {
+  %input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0)
+  %gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1)
+  ROOT %gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(f32[50,49,48,47,46]{4,3,2,1,0} %input_tensor, s64[10,9,8,7,5]{4,3,2,1,0} %gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26}
+}
+
 )""
 },
   });
@@ -860,6 +872,18 @@ ENTRY dot {
   ROOT dot = f32[2,3]{1,0} dot(a, b), lhs_batch_dims={0}, lhs_contracting_dims={1}, rhs_contracting_dims={0}
 }
 
+)""
+},
+{
+""gather"",
+R""(HloModule gather
+
+ENTRY Gather {
+  input_tensor = f32[50,49,48,47,46]{4,3,2,1,0} parameter(0)
+  gather_indices = s64[10,9,8,7,5]{4,3,2,1,0} parameter(1)
+  ROOT gather = f32[10,9,8,7,30,29,28,27,26]{8,7,6,5,4,3,2,1,0} gather(input_tensor, gather_indices), output_window_dims={4,5,6,7,8}, elided_window_dims={}, gather_dims_to_operand_dims={0,1,2,3,4}, index_vector_dim=4, window_bounds={30,29,28,27,26}
+}
+
 )""
 },
   });
",0,train
6958623f7330f7268d05c6753959db3093638d37,tensorflow/tensorflow,"Add simple microbenchmarks for SparseDenseCwiseMul.
Change: 121850503",sparse_dense_binary_op_shared_test.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include ""tensorflow/core/common_runtime/kernel_benchmark_testlib.h""
 #include ""tensorflow/core/framework/allocator.h""
 #include ""tensorflow/core/framework/fake_input.h""
 #include ""tensorflow/core/framework/node_def_builder.h""
@@ -20,8 +21,11 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_testutil.h""
 #include ""tensorflow/core/framework/types.h""
+#include ""tensorflow/core/graph/graph.h""
+#include ""tensorflow/core/graph/node_builder.h""
 #include ""tensorflow/core/kernels/ops_testutil.h""
 #include ""tensorflow/core/platform/test.h""
+#include ""tensorflow/core/platform/test_benchmark.h""
 
 namespace tensorflow {
 
@@ -194,6 +198,91 @@ TEST_F(SparseDenseCMulTest, BroadcastDense) {
   test::ExpectTensorEqual<float>(expected, *GetOutput(0));
 }
 
+// Benchmarking code follows.
+
+static Graph* SparseMatCMulDenseMat(Graph* g, Node* sp_indices, Node* sp_vals,
+                                    Node* sp_shape, Node* dense) {
+  Node* ret;
+  TF_CHECK_OK(
+      NodeBuilder(g->NewName(""SparseDenseCwiseMul""), ""SparseDenseCwiseMul"")
+          .Input(sp_indices)
+          .Input(sp_vals)
+          .Input(sp_shape)
+          .Input(dense)
+          .Finalize(g, &ret));
+  return g;
+}
+
+static Node* MakeTensor(Graph* g, int B, int M, int N) {
+  Tensor data(DT_FLOAT, TensorShape({B, M, N}));
+  data.flat<float>().setRandom();
+  return test::graph::Constant(g, data);
+}
+
+struct ST {
+  Node* indices;
+  Node* vals;
+  Node* shape;
+};
+
+static ST MakeSparseTensor(Graph* g, int B, int M, int N, int nnz_inner) {
+  const int total_nnz = B * M * nnz_inner;
+  const int kNumDims = 3;
+
+  Tensor indices(DT_INT64, TensorShape({total_nnz, kNumDims}));
+  Tensor vals(DT_FLOAT, TensorShape({total_nnz}));
+  Tensor shape(DT_INT64, TensorShape({kNumDims}));
+  vals.flat<float>().setRandom();
+  test::FillValues(&shape, gtl::ArraySlice<int64>({B, M, N}));
+  auto indices_mat = indices.matrix<int64>();
+
+  int nnz_cnt = 0;
+  std::unordered_set<int> picked;
+  std::random_device rd;
+  std::mt19937 gen(rd());
+  std::uniform_int_distribution<> dist(0, N - 1);
+
+  for (int i = 0; i < B; ++i) {
+    for (int j = 0; j < M; ++j) {
+      for (int k = 0; k < nnz_inner; ++k) {
+        indices_mat(nnz_cnt, 0) = i;
+        indices_mat(nnz_cnt, 1) = j;
+
+        int inner = dist(gen);
+        while (picked.count(inner) == 1) {
+          inner = dist(gen);
+        }
+        picked.insert(inner);
+        indices_mat(nnz_cnt, 2) = inner;
+
+        ++nnz_cnt;
+      }
+    }
+  }
+
+  return ST{test::graph::Constant(g, indices), test::graph::Constant(g, vals),
+            test::graph::Constant(g, shape)};
+}
+
+// [8, 4, N{nnz}] cmul [8, 4, N]
+static void BM_SparseMatCMulDenseMat(int iters, int N, int nnz_inner) {
+  Graph* g = new Graph(OpRegistry::Global());
+  Node* dense = MakeTensor(g, 8, 4, N);
+  ST sp = MakeSparseTensor(g, 8, 4, N, nnz_inner);
+
+  testing::ItemsProcessed(static_cast<int64>(iters * 8 * 4 * N * 2));
+  test::Benchmark(
+      ""cpu"", SparseMatCMulDenseMat(g, sp.indices, sp.vals, sp.shape, dense))
+      .Run(iters);
+}
+BENCHMARK(BM_SparseMatCMulDenseMat)
+    ->ArgPair(1 << 20, 1)
+    ->ArgPair(1 << 20, 8)
+    ->ArgPair(1 << 20, 32)
+    ->ArgPair(1 << 18, 1)
+    ->ArgPair(1 << 18, 8)
+    ->ArgPair(1 << 18, 32);
+
 }  // namespace
 
 }  // namespace tensorflow
",0,train
1df80e82acd1209f6cc95e68c9eedc59953d4a95,tensorflow/tensorflow,"Minor: default-construct RunOptions and RunOutputs in vanilla Run().
Change: 115714930",direct_session.cc,"@@ -252,8 +252,9 @@ Status DirectSession::Run(const NamedTensorList& inputs,
                           const std::vector<string>& output_names,
                           const std::vector<string>& target_nodes,
                           std::vector<Tensor>* outputs) {
-  return RunWithOpts(kEmptyRunOptions, inputs, output_names, target_nodes,
-                     outputs, &kEmptyRunOutputs);
+  RunOutputs run_outputs;
+  return RunWithOpts(RunOptions(), inputs, output_names, target_nodes, outputs,
+                     &run_outputs);
 }
 
 Status DirectSession::RunWithOpts(const RunOptions& run_options,
",0,train
1df80e82acd1209f6cc95e68c9eedc59953d4a95,tensorflow/tensorflow,"Minor: default-construct RunOptions and RunOutputs in vanilla Run().
Change: 115714930",direct_session.h,"@@ -155,9 +155,6 @@ class DirectSession : public Session {
     Graph* graph = nullptr;
   };
 
-  const RunOptions kEmptyRunOptions = RunOptions();
-  RunOutputs kEmptyRunOutputs = RunOutputs();
-
   // Retrieves an already existing set of executors to run 'inputs' and
   // 'outputs', or creates and caches them for future use.
   ::tensorflow::Status GetOrCreateExecutors(
",0,train
f118ff1538ac7aa8a628bba03fe66dc6811cc7fc,tensorflow/tensorflow,"Memoize HostCPU device using atomic pointer to save mutex lock

PiperOrigin-RevId: 384759508
Change-Id: I3a53c8a2b1b6c0c9582dcd97a8edd09efc4b12dc",device_mgr.h,"@@ -162,7 +162,7 @@ class DynamicDeviceMgr : public DeviceMgr {
   std::unordered_map<string, int> device_type_counts_
       TF_GUARDED_BY(devices_mu_);
 
-  mutable Device* cpu_device_ TF_GUARDED_BY(devices_mu_);
+  mutable std::atomic<Device*> cpu_device_;  // memoize `HostCPU` result
 
   class DeviceCircularBuffer {
    public:
",0,train
f118ff1538ac7aa8a628bba03fe66dc6811cc7fc,tensorflow/tensorflow,"Memoize HostCPU device using atomic pointer to save mutex lock

PiperOrigin-RevId: 384759508
Change-Id: I3a53c8a2b1b6c0c9582dcd97a8edd09efc4b12dc",dynamic_device_mgr.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <atomic>
 #include <iterator>
 #include <memory>
 #include <vector>
@@ -222,17 +223,22 @@ Status DynamicDeviceMgr::RemoveDevicesByName(
 }
 
 Device* DynamicDeviceMgr::HostCPU() const {
+  Device* device = cpu_device_.load(std::memory_order_relaxed);
+
+  // Host CPU device can't be removed, so if we found valid device once, we
+  // do not need to check that it is still in the device list.
+  if (device != nullptr) return device;
+
   mutex_lock l(devices_mu_);
-  if (cpu_device_ == nullptr) {
-    for (int i = 0; i < dynamic_devices_.size(); ++i) {
-      auto* d = dynamic_devices_[i].get();
-      if (d->device_type() == DEVICE_CPU && d->parsed_name().id == 0) {
-        cpu_device_ = d;
-        break;
-      }
+  for (int i = 0; i < dynamic_devices_.size(); ++i) {
+    Device* d = dynamic_devices_[i].get();
+    if (d->device_type() == DEVICE_CPU && d->parsed_name().id == 0) {
+      cpu_device_ = d;
+      break;
     }
   }
-  return cpu_device_;
+
+  return cpu_device_.load(std::memory_order_relaxed);
 }
 
 }  // namespace tensorflow
",0,train
cb5c61a3e11a37fb39a246aaf8ed6d02dd9ae9ab,tensorflow/tensorflow,Refine LeakyRelu codes and update APIs.,pywrap_tfe_src.cc,"@@ -1730,6 +1730,7 @@ bool OpDoesntRequireOutput(const string& op_name) {
           ""SoftplusGrad"",
           ""Softsign"",
           ""ReluGrad"",
+          ""LeakyRelu"",
           ""LeakyReluGrad"",
           ""Conv2D"",
           ""DepthwiseConv2dNative"",
@@ -1800,7 +1801,6 @@ bool OpDoesntRequireInput(const string& op_name) {
           ""BiasAdd"",
           ""Relu"",
           ""Relu6"",
-          ""LeakyRelu"",
           ""Elu"",
           ""Selu"",
           ""SparseSoftmaxCrossEntropyWithLogits"",
",0,train
2bfb700080d6b3aa5b4ec00c5928e87db0d56677,tensorflow/tensorflow,"Change gradients to be computed with respect to variable ref rather than
snapshot. Variables may create another snapshot or their ref may be exposed
via public API (e.g., var.op.outputs[0] or graph.as_graph_element(var) which
happens fairly often inside libraries or collection serialization). On the
other hand, tf.gradients() use convert_to_tensor() which returns a snapshot,
and gradients were computed with respect to this particular snapshot, which
makes the gradients incorrect.
Change: 147800865",imperative_graph.py,"@@ -102,7 +102,7 @@ class ImperativeGraph(ops.Graph):
     # calling the original gradient function.
     def _imperative_op_grad(op, *grad):
       with self.replace_outputs(op):
-        return self._gradient_function_map[op](op, *grad)
+        return self._gradient_function_map[op.name](op, *grad)
 
     ops.RegisterGradient(self._imperative_op_type)(_imperative_op_grad)
 
@@ -166,7 +166,7 @@ class ImperativeGraph(ops.Graph):
     """"""Replaces the outputs of `op` with values recorded in `_outputs_map`.""""""
     # pylint: disable=protected-access
     old_outputs = op._outputs
-    op._outputs = self._outputs_map[op]
+    op._outputs = self._outputs_map[op.name]
     yield
     op._outputs = old_outputs
     # pylint: enable=protected-access
@@ -318,9 +318,9 @@ class ImperativeGraph(ops.Graph):
 
       for i, _ in enumerate(shapes):
         values[i].set_shape(shapes[i])
-      self._outputs_map[orig_op] = values
+      self._outputs_map[orig_op.name] = values
       try:
-        self._gradient_function_map[orig_op] = ops.get_gradient_function(
+        self._gradient_function_map[orig_op.name] = ops.get_gradient_function(
             orig_op)
       except (KeyError, LookupError):
         pass
",0,test
2bfb700080d6b3aa5b4ec00c5928e87db0d56677,tensorflow/tensorflow,"Change gradients to be computed with respect to variable ref rather than
snapshot. Variables may create another snapshot or their ref may be exposed
via public API (e.g., var.op.outputs[0] or graph.as_graph_element(var) which
happens fairly often inside libraries or collection serialization). On the
other hand, tf.gradients() use convert_to_tensor() which returns a snapshot,
and gradients were computed with respect to this particular snapshot, which
makes the gradients incorrect.
Change: 147800865",imperative_test.py,"@@ -107,6 +107,17 @@ class ImperativeTest(test.TestCase):
               b = a + random_ops.random_uniform([], minval=0.1)
               self.assertGreaterEqual(b.value, a.value)
 
+  def testGradientThroughNewStep(self):
+    with imperative_mode.ImperativeMode(self._target) as mode:
+      x = constant_op.constant(np.random.rand(3))
+      y = math_ops.tanh(x)
+
+      with mode.new_step():
+        z = constant_op.constant(np.random.rand(3))
+        w = math_ops.multiply(y, z)
+        dx = gradients_impl.gradients(w, x)
+        self.assertAllClose(dx[0].value, z.value * (1.0 - y.value ** 2))
+
   def testEscape(self):
     """"""Makes sure that values don't escape a `new_step` scope.""""""
     with imperative_mode.ImperativeMode(self._target) as mode:
",0,test
2bfb700080d6b3aa5b4ec00c5928e87db0d56677,tensorflow/tensorflow,"Change gradients to be computed with respect to variable ref rather than
snapshot. Variables may create another snapshot or their ref may be exposed
via public API (e.g., var.op.outputs[0] or graph.as_graph_element(var) which
happens fairly often inside libraries or collection serialization). On the
other hand, tf.gradients() use convert_to_tensor() which returns a snapshot,
and gradients were computed with respect to this particular snapshot, which
makes the gradients incorrect.
Change: 147800865",gradients_impl.py,"@@ -433,7 +433,8 @@ def gradients(ys,
     xs = [x.handle if isinstance(x, resource_variable_ops.ResourceVariable)
           else x
           for x in xs]
-    xs = ops.convert_n_to_tensor_or_indexed_slices(xs, name=""x"")
+    xs = ops.internal_convert_n_to_tensor_or_indexed_slices(xs, name=""x"",
+                                                            as_ref=True)
     grad_ys = _DefaultGradYs(grad_ys, ys, colocate_gradients_with_ops)
 
     # The approach we take here is as follows: Create a list of all ops in the
",0,test
2bfb700080d6b3aa5b4ec00c5928e87db0d56677,tensorflow/tensorflow,"Change gradients to be computed with respect to variable ref rather than
snapshot. Variables may create another snapshot or their ref may be exposed
via public API (e.g., var.op.outputs[0] or graph.as_graph_element(var) which
happens fairly often inside libraries or collection serialization). On the
other hand, tf.gradients() use convert_to_tensor() which returns a snapshot,
and gradients were computed with respect to this particular snapshot, which
makes the gradients incorrect.
Change: 147800865",gradients_test.py,"@@ -44,6 +44,7 @@ from tensorflow.python.ops import nn_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import state_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import tensor_array_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import tensor_array_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.ops.nn_ops import bias_add
 from tensorflow.python.platform import googletest
 
@@ -311,6 +312,27 @@ class GradientsTest(test_util.TensorFlowTestCase):
       grad, = gradients.gradients(target, v)
       self.assertIsNone(grad)
 
+  def testVariableReadValueGradient(self):
+    with ops.Graph().as_default():
+      init = constant_op.constant(100.0)
+      var = variables.Variable(init)
+      gradient = gradients.gradients(var.read_value(), var)
+      self.assertIsNotNone(gradient)
+
+  def testVariableAsGraphElementGradient(self):
+    with ops.Graph().as_default() as graph:
+      init = constant_op.constant(100.0)
+      var = variables.Variable(init)
+      gradient = gradients.gradients(graph.as_graph_element(var), var)
+      self.assertIsNotNone(gradient)
+
+  def testVariableRefGradient(self):
+    with ops.Graph().as_default():
+      init = constant_op.constant(100.0)
+      var = variables.Variable(init)
+      gradient = gradients.gradients(var._ref(), var)
+      self.assertIsNotNone(gradient)
+
 
 class FunctionGradientsTest(test_util.TensorFlowTestCase):
 
",0,test
9b6b179fe33a0daab4c6b4c7314f77e49825f999,tensorflow/tensorflow,"Make ControlFlowContext.AddInnerOp recursively propagate the inner op to the enclosing context by default.

PiperOrigin-RevId: 170099939",control_flow_ops.py,"@@ -1496,7 +1496,8 @@ class ControlFlowContext(object):
 
   def AddInnerOp(self, op):
     """"""Notifies a scope about an operator added to an inner scope.""""""
-    pass
+    if self._outer_context:
+      self._outer_context.AddInnerOp(op)
 
   def GetControlPivot(self):
     """"""Returns the pivot node for this context, or None.""""""
",0,train
6629e3a9863d25a81764f3d3115c1bfc2a7d8e67,tensorflow/tensorflow,"Enabled model pruning by default

PiperOrigin-RevId: 161234072",graph_rewriter.cc,"@@ -29,16 +29,7 @@ GraphRewriter::GraphRewriter(const GrapplerItem& item) {
   }
 
   for (auto& node : item.graph.node()) {
-    for (const auto& input : node.input()) {
-      int position = 0;
-      string input_node_name = ParseNodeName(input, &position);
-      if (position < 0) {
-        // This is a control edge
-        auto itr = nodes_.find(input_node_name);
-        CHECK(itr != nodes_.end());
-        control_dependency_drivers_.insert(itr->second);
-      }
-    }
+    RecordControlDependencyDrivers(node);
   }
 }
 
@@ -46,21 +37,9 @@ void GraphRewriter::ForwardInputs(
     const NodeDef& original_node,
     const std::unordered_set<const NodeDef*>& nodes_to_delete,
     NodeDef* new_node) {
-  for (const auto& input : original_node.input()) {
-    string input_node_name = NodeName(input);
-    auto itr = nodes_.find(input_node_name);
-    if (itr == nodes_.end()) {
-      // Invalid input, preserve it as is.
-      *new_node->add_input() = input;
-    }
-    const NodeDef* input_node = itr->second;
-    if ((input_node->device().empty() || original_node.device().empty() ||
-         input_node->device() == original_node.device()) &&
-        nodes_to_delete.find(input_node) != nodes_to_delete.end()) {
-      ForwardInputs(*input_node, nodes_to_delete, new_node);
-    } else {
-      *new_node->add_input() = input;
-    }
+  ForwardInputsInternal(original_node, nodes_to_delete, new_node);
+  if (!new_node->name().empty()) {
+    optimized_nodes_[new_node->name()] = new_node;
   }
 }
 
@@ -79,5 +58,50 @@ bool GraphRewriter::IsDrivenByControlDependency(const NodeDef& node) const {
   return false;
 }
 
+void GraphRewriter::RecordControlDependencyDrivers(const NodeDef& node) {
+  for (const auto& input : node.input()) {
+    int position = 0;
+    string input_node_name = ParseNodeName(input, &position);
+    if (position < 0) {
+      // This is a control edge
+      auto itr = nodes_.find(input_node_name);
+      CHECK(itr != nodes_.end());
+      control_dependency_drivers_.insert(itr->second);
+    }
+  }
+}
+
+void GraphRewriter::ForwardInputsInternal(
+    const NodeDef& node,
+    const std::unordered_set<const NodeDef*>& nodes_to_delete,
+    NodeDef* new_node) {
+  // To speed things up, use the optimized version of the node if
+  // available.
+  auto itr = optimized_nodes_.find(node.name());
+  if (itr != optimized_nodes_.end()) {
+    for (const string& input : itr->second->input()) {
+      *new_node->add_input() = input;
+    }
+    return;
+  }
+  for (const auto& input : node.input()) {
+    string input_node_name = NodeName(input);
+    auto itr = nodes_.find(input_node_name);
+    if (itr == nodes_.end()) {
+      // Invalid input, preserve it as is.
+      *new_node->add_input() = input;
+      continue;
+    }
+    const NodeDef* input_node = itr->second;
+    if ((input_node->device().empty() || node.device().empty() ||
+         input_node->device() == node.device()) &&
+        nodes_to_delete.find(input_node) != nodes_to_delete.end()) {
+      ForwardInputsInternal(*input_node, nodes_to_delete, new_node);
+    } else {
+      *new_node->add_input() = input;
+    }
+  }
+}
+
 }  // end namespace grappler
 }  // end namespace tensorflow
",0,train
6629e3a9863d25a81764f3d3115c1bfc2a7d8e67,tensorflow/tensorflow,"Enabled model pruning by default

PiperOrigin-RevId: 161234072",graph_rewriter.h,"@@ -48,7 +48,14 @@ class GraphRewriter {
   bool IsDrivenByControlDependency(const NodeDef& node) const;
 
  private:
+  void RecordControlDependencyDrivers(const NodeDef& node);
+  void ForwardInputsInternal(
+      const NodeDef& original_node,
+      const std::unordered_set<const NodeDef*>& nodes_to_delete,
+      NodeDef* new_node);
+
   std::unordered_map<string, const NodeDef*> nodes_;
+  std::unordered_map<string, const NodeDef*> optimized_nodes_;
   std::unordered_set<const NodeDef*> control_dependency_drivers_;
 };
 
",0,train
6629e3a9863d25a81764f3d3115c1bfc2a7d8e67,tensorflow/tensorflow,"Enabled model pruning by default

PiperOrigin-RevId: 161234072",meta_optimizer.cc,"@@ -120,9 +120,9 @@ void MetaOptimizer::Feedback(Cluster* cluster, const GrapplerItem& item,
 }
 
 bool MetaOptimizerEnabled(const RewriterConfig& cfg) {
-  return cfg.optimize_tensor_layout() || cfg.constant_folding() ||
-         cfg.auto_parallel().enable() || cfg.memory_optimization() > 0 ||
-         !cfg.optimizers().empty();
+  return !cfg.disable_model_pruning() || cfg.optimize_tensor_layout() ||
+         cfg.constant_folding() || cfg.auto_parallel().enable() ||
+         cfg.memory_optimization() > 0 || !cfg.optimizers().empty();
 }
 
 Status RunMetaOptimizer(const GrapplerItem& item, const RewriterConfig& cfg,
",0,train
6629e3a9863d25a81764f3d3115c1bfc2a7d8e67,tensorflow/tensorflow,"Enabled model pruning by default

PiperOrigin-RevId: 161234072",model_pruner.cc,"@@ -63,6 +63,11 @@ Status ModelPruner::Optimize(Cluster* cluster, const GrapplerItem& item,
     }
   }
 
+  if (nodes_to_delete.empty()) {
+    *pruned_graph = item.graph;
+    return Status::OK();
+  }
+
   for (auto& node : item.graph.node()) {
     NodeDef* new_node = pruned_graph->add_node();
     *new_node = node;
",0,train
05ca1e9bf3e7a04603eac921c4d95c5dbeca7dd6,tensorflow/tensorflow,"[Grappler] Re-enable DependencyOptimizer in FunctionTest.testControlFlowStrictness.

PiperOrigin-RevId: 232351788",function_test.py,"@@ -497,8 +497,6 @@ class FunctionTest(test.TestCase):
                                          lambda y: AssertFail(y), [x])
       # pylint: enable=unnecessary-lambda
 
-    rewriter_config = rewriter_config_pb2.RewriterConfig(
-        dependency_optimization=rewriter_config_pb2.RewriterConfig.OFF)
     # Enables inlining.
     config = config_pb2.ConfigProto(
         graph_options=config_pb2.GraphOptions(
@@ -506,8 +504,7 @@ class FunctionTest(test.TestCase):
                 opt_level=config_pb2.OptimizerOptions.L0,
                 do_common_subexpression_elimination=True,
                 do_function_inlining=True,
-                do_constant_folding=True),
-            rewrite_options=rewriter_config))
+                do_constant_folding=True)))
 
     with session.Session(config=config) as sess:
       # Since the 'False' branch is not taken, the assertion should not fire.
",0,train
16033c0b3484409a965acc0dd3054695145311a8,tensorflow/tensorflow,Python tf.config tf32 interface,config.py,"@@ -18,10 +18,36 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python import _pywrap_tf32_execution
 from tensorflow.python.eager import context
 from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
 
+def tensor_float32_execution_allowed():
+  """"""Get if TensorFloat-32 operations are enabled on supported hardware.
+
+  Returns:
+    True if TensorFloat-32 execution is enabled and False otherwise.
+  """"""
+  return _pywrap_tf32_execution.is_allowed()
+
+def allow_tensor_float_32_execution(allow):
+  """"""Allow use of TensorFloat-32 with float32 ops on supported hardware.
+
+  TensorFloat-32 is a math mode introduced with the NVIDIA Ampere architecture.
+  TensorFloat-32 kernels take float32 inputs and produce float32 outputs.
+  Internally, the inputs are cast to a custom representation with 10-bit
+  mantissa (similar to float16) and 8-bit exponent (similar to float32) and are
+  executed using TensorCores with float32 accumulation. For more information,
+  see https://blogs.nvidia.com/blog/2020/05/14/tensorfloat-32-precision-format/.
+
+  TensorFloat-32 execution is disabled by default, but this may change in a
+  future version.
+  
+  Args:
+    allow: whether to allow TensorFloat-32 execution
+  """"""
+  _pywrap_tf32_execution.allow(allow)
 
 @tf_export('config.threading.get_intra_op_parallelism_threads')
 def get_intra_op_parallelism_threads():
",0,train
16033c0b3484409a965acc0dd3054695145311a8,tensorflow/tensorflow,Python tf.config tf32 interface,tf32.cc,"@@ -0,0 +1,22 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""pybind11/pybind11.h""
+#include ""tensorflow/core/platform/tf32_utils.h""
+
+PYBIND11_MODULE(_pywrap_tf32_execution, m) {
+  m.def(""allow"", &tensorflow::allow_tf32_execution);
+  m.def(""is_allowed"", &tensorflow::tf32_execution_allowed);
+}
",0,train
690a403686c341e03164b7e39fda6e9cec062296,tensorflow/tensorflow,"GPUToCUDA: attach CUBIN to the nested module rather than to the function

Originally, we were attaching attributes containing CUBIN blobs to the kernel
function called by `gpu.launch_func`. This kernel is now contained in a nested
module that is used as a compilation unit. Attach compiled CUBIN blobs to the
module rather than to the function since we were compiling the module. This
also avoids duplication of the attribute on multiple kernels within the same
module.

PiperOrigin-RevId: 273497303",GPUToCUDAPass.h,"@@ -38,7 +38,8 @@ class LLVMDialect;
 template <typename T> class OpPassBase;
 
 using OwnedCubin = std::unique_ptr<std::vector<char>>;
-using CubinGenerator = std::function<OwnedCubin(const std::string &, FuncOp &)>;
+using CubinGenerator =
+    std::function<OwnedCubin(const std::string &, Location, StringRef)>;
 
 /// Creates a pass to convert kernel functions into CUBIN blobs.
 ///
",0,train
8df4f0ce0acc838dd252ae504d44676c35cb6a6b,tensorflow/tensorflow,"Fixed `Wrapper`'s `get_config` and `from_config`.

* `get_config`: properly serialize the wrapped layer.
  This notably fixes issues when wrapping custom layers that have
  been registered using `tf.keras.utils.register_keras_serializable`.
* `from_config`: properly copy input config to avoid side effects.",wrappers.py,"@@ -68,10 +68,7 @@ class Wrapper(Layer):
 
   def get_config(self):
     config = {
-        'layer': {
-            'class_name': self.layer.__class__.__name__,
-            'config': self.layer.get_config()
-        }
+        'layer': generic_utils.serialize_keras_object(self.layer)
     }
     base_config = super(Wrapper, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))
@@ -80,7 +77,7 @@ class Wrapper(Layer):
   def from_config(cls, config, custom_objects=None):
     from tensorflow.python.keras.layers import deserialize as deserialize_layer  # pylint: disable=g-import-not-at-top
     # Avoid mutating the input dict
-    config = config.copy()
+    config = copy.deepcopy(config)
     layer = deserialize_layer(
         config.pop('layer'), custom_objects=custom_objects)
     return cls(layer, **config)
",0,test
2375810387068fb66ecba85c1fce0b0e4f5568b2,tensorflow/tensorflow,Eliminate the use of q_alpha. Use QuantizedMultiplier instead.,activations.cc,"@@ -76,11 +76,10 @@ struct LogSoftmaxOpData : public OpData {
 };
 
 struct LeakyReluOpData : public OpData {
-  uint8_t q_alpha = 1;
-  uint8_t q_identity = 1;
-  uint8_t zero_point = 0;
-  int32_t output_multiplier = 0;
-  int output_shift = 0;
+  int32_t output_multiplier_alpha = 0;
+  int output_shift_alpha = 0;
+  int32_t output_multiplier_identity = 0;
+  int output_shift_identity = 0;
 };
 
 struct PreluOpData : public OpData {
@@ -367,26 +366,11 @@ TfLiteStatus LeakyReluPrepare(TfLiteContext* context, TfLiteNode* node) {
 
   if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
     const auto* params = reinterpret_cast<TfLiteLeakyReluParams*>(node->builtin_data);
-    // Quantize the alpha with predetermined ZOOM_FACTOR and ZERO_POINT.
-    // Since in most cases 0 < alpha < 1, by setting ZOOM_FACTOR to be 200,
-    // quantized alpha can fit in INT8 range, and preserve most precision.
-    TF_LITE_ENSURE(context, params->alpha >= 0);
-    TF_LITE_ENSURE(context, params->alpha < 1);
-    static const uint8_t ZOOM_FACTOR = 200;
-    static const uint8_t ZERO_POINT = 0;
-    auto q_alpha = std::round(ZERO_POINT + params->alpha * ZOOM_FACTOR);
-    // Make sure quantized alpha is within INT8 range.
-    TF_LITE_ENSURE(context, q_alpha >= std::numeric_limits<uint8_t>::min());
-    TF_LITE_ENSURE(context, q_alpha <= std::numeric_limits<uint8_t>::max());
-    // q_alpha will be stored as uint8_t. It won't affect the input
-    data->q_alpha = q_alpha;
-
-    // q_identity is used to make sure those>0 get correct value after dequantization.
-    data->q_identity = ZOOM_FACTOR;
-    data->zero_point = ZERO_POINT;
-
-    double real_multiplier = input->params.scale / (output->params.scale * ZOOM_FACTOR);
-    QuantizeMultiplierSmallerThanOneExp(real_multiplier, &data->output_multiplier, &data->output_shift);
+
+    double alpha_multiplier = input->params.scale * params->alpha / output->params.scale;
+    QuantizeMultiplier(alpha_multiplier, &data->output_multiplier_alpha, &data->output_shift_alpha);
+    double identity_multiplier = input->params.scale / output->params.scale;
+    QuantizeMultiplier(identity_multiplier, &data->output_multiplier_identity, &data->output_shift_identity);
     }
   return context->ResizeTensor(context, output, TfLiteIntArrayCopy(input->dims));
 }
@@ -1115,13 +1099,12 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
     } break;
     case kTfLiteUInt8:
     {
-      LeakyReluParamsQuant op_params{data->q_alpha,
-                                     data->q_identity,
-                                     data->zero_point,
-                                     input->params.zero_point,
+      LeakyReluParamsQuant op_params{input->params.zero_point,
                                      output->params.zero_point,
-                                     data->output_multiplier,
-                                     data->output_shift};
+                                     data->output_multiplier_alpha,
+                                     data->output_shift_alpha,
+                                     data->output_multiplier_identity,
+                                     data->output_shift_identity};
       reference_ops::QuantizeLeakyRelu(
           op_params,
           GetTensorShape(input),
@@ -1132,21 +1115,20 @@ TfLiteStatus LeakyReluEval(TfLiteContext* context, TfLiteNode* node) {
     } break;
     case kTfLiteInt8:
       {
-      LeakyReluParamsQuant op_params{data->q_alpha,
-                                     data->q_identity,
-                                     data->zero_point,
-                                     input->params.zero_point,
-                                     output->params.zero_point,
-                                     data->output_multiplier,
-                                     data->output_shift};
-      reference_ops::QuantizeLeakyRelu(
-          op_params,
-          GetTensorShape(input),
-          GetTensorData<int8_t >(input),
-          GetTensorShape(output),
-          GetTensorData<int8_t >(output));
-      return kTfLiteOk;
-    } break;
+        LeakyReluParamsQuant op_params{input->params.zero_point,
+                                       output->params.zero_point,
+                                       data->output_multiplier_alpha,
+                                       data->output_shift_alpha,
+                                       data->output_multiplier_identity,
+                                       data->output_shift_identity};
+        reference_ops::QuantizeLeakyRelu(
+            op_params,
+            GetTensorShape(input),
+            GetTensorData<int8_t >(input),
+            GetTensorShape(output),
+            GetTensorData<int8_t >(output));
+        return kTfLiteOk;
+      } break;
     default:
       context->ReportError(
           context, ""Only float32, int8 and uint8 is supported currently, got %s."",
",0,train
2375810387068fb66ecba85c1fce0b0e4f5568b2,tensorflow/tensorflow,Eliminate the use of q_alpha. Use QuantizedMultiplier instead.,reference_ops.h,"@@ -270,20 +270,26 @@ inline void QuantizeLeakyRelu(const LeakyReluParamsQuant& params,
                               const T* input_data,
                               const RuntimeShape& output_shape,
                               T* output_data) {
-  ruy::profiler::ScopeLabel label(""LeakyRelu (not fused)"");
+  ruy::profiler::ScopeLabel label(""Quantized LeakyRelu (not fused)"");
   const int flat_size = MatchingFlatSize(input_shape, output_shape);
   static const int32 quantized_min = std::numeric_limits<T>::min();
   static const int32 quantized_max = std::numeric_limits<T>::max();
-  static const int32 alpha_value = params.q_alpha - params.alpha_offset;
-  static const int32 identity_value = params.q_identity - params.alpha_offset;
   for (int i = 0; i < flat_size; ++i) {
     const int32 input_value = input_data[i] - params.input_offset;
-    auto q_mutliplier = (input_value >= 0) ? identity_value : alpha_value;
-    const int32 unclamped_output =
-        params.output_offset + MultiplyByQuantizedMultiplierSmallerThanOneExp(
-            input_value * q_mutliplier,
-            params.output_multiplier,
-            params.output_shift);
+    int32 unclamped_output;
+    if (input_value >= 0) {
+      unclamped_output = params.output_offset +
+          MultiplyByQuantizedMultiplier(
+              input_value,
+              params.output_multiplier_identity,
+              params.output_shift_identity);
+    } else {
+      unclamped_output = params.output_offset +
+          MultiplyByQuantizedMultiplier(
+              input_value,
+              params.output_multiplier_alpha,
+              params.output_shift_alpha);
+    }
     const T clamped_output =
         std::min(quantized_max, std::max(quantized_min, unclamped_output));
     output_data[i] = static_cast<T>(clamped_output);
",0,train
2375810387068fb66ecba85c1fce0b0e4f5568b2,tensorflow/tensorflow,Eliminate the use of q_alpha. Use QuantizedMultiplier instead.,types.h,"@@ -1085,13 +1085,12 @@ struct LeakyReluParams {
 
 
 struct LeakyReluParamsQuant {
-  uint8_t q_alpha;
-  uint8_t q_identity;
-  int32 alpha_offset;
   int32 input_offset;
   int32 output_offset;
-  int32 output_multiplier;
-  int output_shift;
+  int32 output_multiplier_alpha;
+  int output_shift_alpha;
+  int32 output_multiplier_identity;
+  int output_shift_identity;
 };
 
 template <typename P>
",0,train
a6ee64cd216b3ac440262e1f4ec7872fe7026df6,tensorflow/tensorflow,"Conditionally allow changing a non-fusion computation root_instruction shape.

PiperOrigin-RevId: 213191899",hlo_computation.cc,"@@ -279,11 +279,11 @@ Status HloComputation::RemoveInstruction(HloInstruction* instruction) {
   return Status::OK();
 }
 
-void HloComputation::set_root_instruction(
-    HloInstruction* new_root_instruction) {
+void HloComputation::set_root_instruction(HloInstruction* new_root_instruction,
+                                          bool accept_different_shape) {
   // The shape of the root (ignoring layout) is an invariant of the computation
   // for non-fusion cases.
-  if (!IsFusionComputation()) {
+  if (!IsFusionComputation() && !accept_different_shape) {
     CHECK(ShapeUtil::Compatible(new_root_instruction->shape(),
                                 root_instruction_->shape()))
         << new_root_instruction->shape() << "" is incompatible with ""
",0,train
a6ee64cd216b3ac440262e1f4ec7872fe7026df6,tensorflow/tensorflow,"Conditionally allow changing a non-fusion computation root_instruction shape.

PiperOrigin-RevId: 213191899",hlo_computation.h,"@@ -134,9 +134,11 @@ class HloComputation {
   Status RemoveInstructionAndUnusedOperands(HloInstruction* instruction);
 
   // Set the root of the computation to the given instruction. The instruction
-  // must have already been added to the computation and have the same shape as
-  // the result of the computation for non fusion computations.
-  void set_root_instruction(HloInstruction* new_root_instruction);
+  // must have already been added to the computation. In addition it must have
+  // the same shape as the result of the computation for non fusion
+  // computations, except if accept_different_shape is set to true.
+  void set_root_instruction(HloInstruction* new_root_instruction,
+                            bool accept_different_shape = false);
 
   // Return the root instruction of the computation. The root instruction is the
   // instruction which produces the output of the computation.
",0,train
31a492886cbc4f62494cbe08189ce72d8892c9c1,tensorflow/tensorflow,"Fix go_backwards/mask bug in recurrent_v2.

- When explicitly assigning a GPU device, it complains an error, no argmax
  kernel for int32. Note that the use of argmax to juedge whether a mask is
  right padded or not, is not stable given that the argmax does not guarantee
  to return the lowest indices.
- The behavior of go_backwards in V2 is in correct when cudnn is used.

PiperOrigin-RevId: 254676863",gru_v2_test.py,"@@ -516,6 +516,31 @@ class GRUV2Test(keras_parameterized.TestCase):
                   run_eagerly=testing_utils.should_run_eagerly())
     model.fit(x, y, epochs=1, shuffle=False)
 
+  @test_util.run_v2_only
+  def test_explicit_device_with_go_backward_and_mask(self):
+    batch_size = 8
+    timestep = 7
+    masksteps = 5
+    units = 4
+
+    inputs = np.random.randn(batch_size, timestep, units).astype(np.float32)
+    mask = np.ones((batch_size, timestep)).astype(np.bool)
+    mask[:, masksteps:] = 0
+
+    # Test for V1 behavior.
+    lstm_v1 = rnn_v1.GRU(units, return_sequences=True, go_backwards=True)
+    with test_util.device(use_gpu=True):
+      outputs_masked_v1 = lstm_v1(inputs, mask=constant_op.constant(mask))
+      outputs_trimmed_v1 = lstm_v1(inputs[:, :masksteps])
+    self.assertAllClose(outputs_masked_v1[:, -masksteps:], outputs_trimmed_v1)
+
+    # Test for V2 behavior.
+    lstm = rnn.GRU(units, return_sequences=True, go_backwards=True)
+    with test_util.device(use_gpu=True):
+      outputs_masked = lstm(inputs, mask=constant_op.constant(mask))
+      outputs_trimmed = lstm(inputs[:, :masksteps])
+    self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed)
+
 
 class GRULayerGradientTapeTest(test.TestCase):
 
",0,train
31a492886cbc4f62494cbe08189ce72d8892c9c1,tensorflow/tensorflow,"Fix go_backwards/mask bug in recurrent_v2.

- When explicitly assigning a GPU device, it complains an error, no argmax
  kernel for int32. Note that the use of argmax to juedge whether a mask is
  right padded or not, is not stable given that the argmax does not guarantee
  to return the lowest indices.
- The behavior of go_backwards in V2 is in correct when cudnn is used.

PiperOrigin-RevId: 254676863",lstm_v2_test.py,"@@ -717,6 +717,31 @@ class LSTMV2Test(keras_parameterized.TestCase):
     model.evaluate(x, y)
     model.predict(x)
 
+  @test_util.run_v2_only
+  def test_explicit_device_with_go_backward_and_mask(self):
+    batch_size = 8
+    timestep = 7
+    masksteps = 5
+    units = 4
+
+    inputs = np.random.randn(batch_size, timestep, units).astype(np.float32)
+    mask = np.ones((batch_size, timestep)).astype(np.bool)
+    mask[:, masksteps:] = 0
+
+    # Test for V1 behavior.
+    lstm_v1 = rnn_v1.LSTM(units, return_sequences=True, go_backwards=True)
+    with test_util.device(use_gpu=True):
+      outputs_masked_v1 = lstm_v1(inputs, mask=constant_op.constant(mask))
+      outputs_trimmed_v1 = lstm_v1(inputs[:, :masksteps])
+    self.assertAllClose(outputs_masked_v1[:, -masksteps:], outputs_trimmed_v1)
+
+    # Test for V2 behavior.
+    lstm = rnn.LSTM(units, return_sequences=True, go_backwards=True)
+    with test_util.device(use_gpu=True):
+      outputs_masked = lstm(inputs, mask=constant_op.constant(mask))
+      outputs_trimmed = lstm(inputs[:, :masksteps])
+    self.assertAllClose(outputs_masked[:, -masksteps:], outputs_trimmed)
+
 
 @keras_parameterized.run_all_keras_modes(config=_config)
 class LSTMGraphRewriteTest(keras_parameterized.TestCase):
",0,train
31a492886cbc4f62494cbe08189ce72d8892c9c1,tensorflow/tensorflow,"Fix go_backwards/mask bug in recurrent_v2.

- When explicitly assigning a GPU device, it complains an error, no argmax
  kernel for int32. Note that the use of argmax to juedge whether a mask is
  right padded or not, is not stable given that the argmax does not guarantee
  to return the lowest indices.
- The behavior of go_backwards in V2 is in correct when cudnn is used.

PiperOrigin-RevId: 254676863",recurrent_v2.py,"@@ -529,11 +529,20 @@ def cudnn_gru(inputs, init_h, kernel, recurrent_kernel, bias, mask, time_major,
   if mask is not None:
     sequence_length = calculate_sequence_by_mask(mask, time_major)
     if go_backwards:
+      # Three reversals are required. E.g.,
+      # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
+      # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
+      # output_from_cudnn = [6, 5, 4, 0, 0]
+      # expected_output = [0, 0, 6, 5 ,4]
       inputs = array_ops.reverse_sequence_v2(inputs, sequence_length,
                                              seq_axis=0, batch_axis=1)
     outputs, h, _, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(
         inputs, input_h=init_h, input_c=0, params=params, is_training=True,
         rnn_mode='gru', sequence_lengths=sequence_length)
+    if go_backwards:
+      outputs = array_ops.reverse_sequence_v2(outputs, sequence_length,
+                                              seq_axis=0, batch_axis=1)
+      outputs = array_ops.reverse(outputs, axis=[0])
   else:
     if go_backwards:
       # Reverse axis 0 since the input is already convert to time major.
@@ -1111,11 +1120,20 @@ def cudnn_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask,
   if mask is not None:
     sequence_length = calculate_sequence_by_mask(mask, time_major)
     if go_backwards:
+      # Three reversals are required. E.g.,
+      # normal input = [1, 2, 3, 0, 0]  # where 0 need to be masked
+      # reversed_input_to_cudnn = [3, 2, 1, 0, 0]
+      # output_from_cudnn = [6, 5, 4, 0, 0]
+      # expected_output = [0, 0, 6, 5 ,4]
       inputs = array_ops.reverse_sequence_v2(inputs, sequence_length,
                                              seq_axis=0, batch_axis=1)
     outputs, h, c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(
         inputs, input_h=init_h, input_c=init_c, params=params, is_training=True,
         rnn_mode='lstm', sequence_lengths=sequence_length)
+    if go_backwards:
+      outputs = array_ops.reverse_sequence_v2(outputs, sequence_length,
+                                              seq_axis=0, batch_axis=1)
+      outputs = array_ops.reverse(outputs, axis=[0])
   else:
     # # Fill the array with shape [batch] with value of max timesteps.
     # sequence_length = array_ops.fill([array_ops.shape(inputs)[1]],
@@ -1206,19 +1224,13 @@ def is_sequence_right_padded(mask, time_major):
   Returns:
     boolean scalar tensor, whether the mask is strictly right padded.
   """"""
-  timestep_index = 0 if time_major else 1
-  max_seq_length = array_ops.shape(mask)[timestep_index]
-  reversed_mask = math_ops.cast(array_ops.reverse(mask, axis=[timestep_index]),
-                                dtypes.int32)
-  # Use the argmax to find the index of leading 1 in the reversed mask, which is
-  # the index of the last True value in the original mask.
-  index = math_ops.argmax(reversed_mask, axis=timestep_index,
-                          output_type=dtypes.int32)
-  count_of_true = math_ops.reduce_sum(reversed_mask, axis=timestep_index)
-  # If the data is strictly right padded, then the
-  # ""index = max_seq_length - count_of_true"" should hold.
-  return math_ops.reduce_all(
-      math_ops.equal(index, max_seq_length - count_of_true))
+  if time_major:
+    mask = array_ops.transpose(mask)
+  max_seq_length = array_ops.shape(mask)[1]
+  count_of_true = math_ops.reduce_sum(math_ops.cast(mask, dtypes.int32), axis=1)
+  right_padded_mask = array_ops.sequence_mask(
+      count_of_true, maxlen=max_seq_length)
+  return math_ops.reduce_all(math_ops.equal(mask, right_padded_mask))
 
 
 def calculate_sequence_by_mask(mask, time_major):
@@ -1228,10 +1240,10 @@ def calculate_sequence_by_mask(mask, time_major):
   any timestep that should be masked, the corresponding field will be False.
   Consider the following example:
     a = [[True, True, False, False],
-         [True, False, True, False]]
+         [True, True, True, False]]
   It is a (2, 4) tensor, and the corresponding sequence length result should be
-  1D tensor with value [2, 3]. Note that for the second example, we need to find
-  the index of the last True value, which is 2 and sequence length is 3.
+  1D tensor with value [2, 3]. Note that the masking tensor must be right
+  padded that could be checked by, e.g., `is_sequence_right_padded()`.
 
   Args:
     mask: Boolean tensor with shape [batch, timestep] or [timestep, batch] if
@@ -1242,14 +1254,8 @@ def calculate_sequence_by_mask(mask, time_major):
     sequence_length: 1D int32 tensor.
   """"""
   timestep_index = 0 if time_major else 1
-  max_seq_length = array_ops.shape(mask)[timestep_index]
-  reversed_mask = math_ops.cast(array_ops.reverse(mask, axis=[timestep_index]),
-                                dtypes.int32)
-  # Use the argmax to find the index of leading 1 in the reversed mask, which is
-  # the index of the last True value in the original mask.
-  reversed_index = math_ops.argmax(reversed_mask, axis=timestep_index,
-                                   output_type=dtypes.int32)
-  return max_seq_length - reversed_index
+  return math_ops.reduce_sum(math_ops.cast(mask, dtypes.int32),
+                             axis=timestep_index)
 
 
 def _generate_defun_backend(unique_api_name, preferred_device, func):
",0,train
38c7871ecf1a4ede109fa0ce870c5d8d5401df05,tensorflow/tensorflow,"Manually specify `np.object` in `RaggedTensor.numpy()` as numpy otherwise complains

If given ragged rows, numpy now raises a `VisibleDeprecationWarning` if `np.object` is not manually specified.

PiperOrigin-RevId: 393076916
Change-Id: Ifce4a6fef429f2997df877d477fab25d69f7c183",ragged_tensor.py,"@@ -2088,7 +2088,10 @@ class RaggedTensor(composite_tensor.CompositeTensor,
     # np.ndarray with dtype=object and rank=1.  If they have uniform lengths,
     # they will be combined into a single np.ndarray with dtype=row.dtype and
     # rank=row.rank+1.
-    return np.array(rows)
+    #
+    # Manually set dtype as numpy now complains when given ragged rows.
+    dtype = np.object if any(len(row) != len(rows[0]) for row in rows) else None
+    return np.array(rows, dtype=dtype)
 
   def to_list(self):
     """"""Returns a nested Python `list` with the values for this `RaggedTensor`.
",0,train
85d64fdd40eafc488649b806a85a7a168dc16510,tensorflow/tensorflow,"Apply clang-tidy fixes for llvm-else-after-return in map_mhlo_to_scalar_op.h (NFC)

PiperOrigin-RevId: 434207416",map_mhlo_to_scalar_op.h,"@@ -477,7 +477,8 @@ inline Value MapMhloOpToStdScalarOp<mhlo::ConvertOp>(
                                                targetType)) {
     return b->create<mlir::arith::SIToFPOp>(loc, result_types, args,
                                             mlir::None);
-  } else if (sourceType.isa<FloatType>() && targetType.isa<FloatType>()) {
+  }
+  if (sourceType.isa<FloatType>() && targetType.isa<FloatType>()) {
     FloatType src = sourceType.cast<FloatType>();
     FloatType res = targetType.cast<FloatType>();
     if (src.getWidth() > res.getWidth()) {
@@ -884,7 +885,8 @@ inline Value MapMhloOpToStdScalarOp<mhlo::SignOp>(Location loc,
         b->create<::mlir::arith::ShRSIOp>(loc, args[0], bitwidth_minus_one);
     Value or_op = b->create<::mlir::arith::OrIOp>(loc, ashr, one);
     return b->create<::mlir::arith::SelectOp>(loc, cmp, zero, or_op);
-  } else if (element_type.isa<ComplexType>()) {
+  }
+  if (element_type.isa<ComplexType>()) {
     return b->create<::mlir::complex::SignOp>(loc, element_type, args.front());
   }
   return nullptr;
",0,train
1d89c2079931f401f0831e3d27b66dd942ae3388,tensorflow/tensorflow,Fixes typos and unnecessary import in example,text_classification_character_cnn.py,"@@ -29,7 +29,6 @@ from sklearn import metrics
 import pandas
 
 import tensorflow as tf
-from tensorflow.models.rnn import rnn, rnn_cell
 import skflow
 
 ### Training data
@@ -59,7 +58,7 @@ FILTER_SHAPE2 = [20, N_FILTERS]
 POOLING_WINDOW = 4
 POOLING_STRIDE = 2
 
-def char_rnn_model(X, y):
+def char_cnn_model(X, y):
     """"""Character level convolutional neural network model to predict classes.""""""
     byte_list = tf.reshape(skflow.ops.one_hot_matrix(X, 256), 
         [-1, MAX_DOCUMENT_LENGTH, 256, 1])
@@ -82,7 +81,7 @@ def char_rnn_model(X, y):
     # Apply regular WX + B and classification.
     return skflow.models.logistic_regression(pool2, y)
 
-classifier = skflow.TensorFlowEstimator(model_fn=char_rnn_model, n_classes=15,
+classifier = skflow.TensorFlowEstimator(model_fn=char_cnn_model, n_classes=15,
     steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True)
 
 # Continuesly train for 1000 steps & predict on test set.
",0,train
1d89c2079931f401f0831e3d27b66dd942ae3388,tensorflow/tensorflow,Fixes typos and unnecessary import in example,text_classification_cnn.py,"@@ -17,7 +17,6 @@ from sklearn import metrics
 import pandas
 
 import tensorflow as tf
-from tensorflow.models.rnn import rnn, rnn_cell
 import skflow
 
 ### Training data
",0,train
52d21a8bf7b55b26498b203b902cd417cac1b040,tensorflow/tensorflow,"Remove stale TODOs in TFLite

PiperOrigin-RevId: 369661250
Change-Id: Ib0ece62269524478dc68b7b38f4a7a8197db2abe",subgraph.cc,"@@ -774,8 +774,6 @@ TfLiteStatus Subgraph::AddNodeWithParameters(
   }
 
   node.builtin_data = builtin_data_deleter.release();
-  // TODO(ycling): Filling `custom_initial_data` and `custom_initial_data_size`
-  // properly for nodes generated by ReplaceNodeSubsetsWithDelegateKernels.
 
   if (registration->builtin_code == BuiltinOperator_CUSTOM) {
     // When it's a CUSTOM op, the `custom_options` field in the Flatbuffer
",0,train
3296af253a0cc120175b88c383b27f02f16fb59b,tensorflow/tensorflow,"[XLA] Improve cost analysis for certain operations

Fusion, map, select and scatter and reduce window were not correctly accounted
for. This change makes it easier to analyze their performance in the HLO
profile.
Change: 145729113",hlo_cost_analysis.cc,"@@ -164,8 +164,10 @@ Status HloCostAnalysis::HandleMap(
 
   // Compute the cost of all elements for this Map operation.
   auto element_count = ShapeUtil::ElementsIn(map->shape());
-  flop_count_ += element_count * visitor.flop_count();
   transcendental_count_ += element_count * visitor.transcendental_count();
+  auto hlo_flop_count = element_count * visitor.flop_count();
+  hlo_to_flop_count_[map] = hlo_flop_count;
+  flop_count_ += hlo_flop_count;
   return Status::OK();
 }
 
@@ -180,7 +182,9 @@ Status HloCostAnalysis::HandleReduce(
   // Compute the cost of all elements for this Reduce operation.
   auto reduction_count = ShapeUtil::ElementsIn(arg->shape()) -
                          ShapeUtil::ElementsIn(reduce->shape());
-  flop_count_ += reduction_count * visitor.flop_count();
+  auto hlo_flop_count = reduction_count * visitor.flop_count();
+  hlo_to_flop_count_[reduce] = hlo_flop_count;
+  flop_count_ += hlo_flop_count;
   transcendental_count_ += reduction_count * visitor.transcendental_count();
   return Status::OK();
 }
@@ -201,7 +205,9 @@ Status HloCostAnalysis::HandleReduceWindow(HloInstruction* reduce_window,
   for (const auto& dimension : window.dimensions()) {
     window_size *= dimension.size();
   }
-  flop_count_ += output_size * (window_size - 1) * visitor.flop_count();
+  auto hlo_flop_count = output_size * (window_size - 1) * visitor.flop_count();
+  hlo_to_flop_count_[reduce_window] = hlo_flop_count;
+  flop_count_ += hlo_flop_count;
   transcendental_count_ +=
       output_size * (window_size - 1) * visitor.transcendental_count();
   return Status::OK();
@@ -225,9 +231,11 @@ Status HloCostAnalysis::HandleSelectAndScatter(HloInstruction* instruction) {
   for (const auto& dimension : instruction->window().dimensions()) {
     window_size *= dimension.size();
   }
-  flop_count_ +=
+  auto hlo_flop_count =
       source_element_count * ((window_size - 1) * select_visitor.flop_count() +
                               scatter_visitor.flop_count());
+  hlo_to_flop_count_[instruction] = hlo_flop_count;
+  flop_count_ += hlo_flop_count;
   transcendental_count_ +=
       source_element_count *
       ((window_size - 1) * select_visitor.transcendental_count() +
@@ -303,8 +311,37 @@ Status HloCostAnalysis::HandleRng(HloInstruction* random,
 }
 
 Status HloCostAnalysis::HandleFusion(HloInstruction* fusion) {
-  // Fusion instruction itself does not contribute to computation.
-  return fusion->fused_expression_root()->Accept(this);
+  switch (fusion->fusion_kind()) {
+    case HloInstruction::FusionKind::kLoop:
+    case HloInstruction::FusionKind::kInput: {
+      // Compute the cost of the fused expression.
+      HloInstruction* fused_expression_root = fusion->fused_expression_root();
+      HloCostAnalysis visitor;
+      TF_RETURN_IF_ERROR(fused_expression_root->Accept(&visitor));
+
+      // Compute the cost of all elements for this Fusion operation.
+      auto element_count = ShapeUtil::ElementsIn(fusion->shape());
+      transcendental_count_ += element_count * visitor.transcendental_count();
+      auto hlo_flop_count = element_count * visitor.flop_count();
+      hlo_to_flop_count_[fusion] = hlo_flop_count;
+      flop_count_ += hlo_flop_count;
+      return Status::OK();
+    }
+    case HloInstruction::FusionKind::kTransposeDot:
+    case HloInstruction::FusionKind::kConvBackwardFilter:
+    case HloInstruction::FusionKind::kConvBackwardInput: {
+      // Compute the cost of the fused expression.
+      HloInstruction* fused_expression_root = fusion->fused_expression_root();
+      HloCostAnalysis visitor;
+      TF_RETURN_IF_ERROR(fused_expression_root->Accept(&visitor));
+
+      // Attribute the cost of the fused expression to the fusion node.
+      transcendental_count_ += visitor.transcendental_count();
+      hlo_to_flop_count_[fusion] += visitor.flop_count();
+      flop_count_ += visitor.flop_count();
+      return Status::OK();
+    }
+  }
 }
 
 Status HloCostAnalysis::HandleCall(
",0,train
3296af253a0cc120175b88c383b27f02f16fb59b,tensorflow/tensorflow,"[XLA] Improve cost analysis for certain operations

Fusion, map, select and scatter and reduce window were not correctly accounted
for. This change makes it easier to analyze their performance in the HLO
profile.
Change: 145729113",hlo_cost_analysis.h,"@@ -134,10 +134,10 @@ class HloCostAnalysis : public DfsHloVisitor {
   std::map<const HloInstruction*, double> hlo_to_flop_count_;
 
   // The number of floating point operations in the graph.
-  double flop_count_ = 0;
+  double flop_count_ = 0.0;
 
   // The number of transcendental operations in the graph.
-  double transcendental_count_ = 0;
+  double transcendental_count_ = 0.0;
 
   TF_DISALLOW_COPY_AND_ASSIGN(HloCostAnalysis);
 };
",0,train
3296af253a0cc120175b88c383b27f02f16fb59b,tensorflow/tensorflow,"[XLA] Improve cost analysis for certain operations

Fusion, map, select and scatter and reduce window were not correctly accounted
for. This change makes it easier to analyze their performance in the HLO
profile.
Change: 145729113",hlo_cost_analysis_test.cc,"@@ -333,5 +333,52 @@ TEST_F(HloCostAnalysisTest, TotalOverflowsInt64) {
   EXPECT_GT(matmul_analysis.flop_count(), std::numeric_limits<int64>::max());
 }
 
+class FusionCostAnalysis : public ::testing::Test {
+ protected:
+  FusionCostAnalysis() = default;
+
+  Shape r0f32_ = ShapeUtil::MakeShape(F32, {});
+};
+
+TEST_F(FusionCostAnalysis, LoopFusion) {
+  // Fuse all instructions in complicated expression:
+  //
+  //   add = Add(C1, C2)
+  //   clamp = Clamp(C2, add, add)
+  //   exp = Exp(add)
+  //   mul = Mul(exp, C3)
+  //   sub = Sub(mul, clamp)
+  //   tuple = Tuple({sub, sub, mul, C1})
+  auto c1 = HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(1.1f));
+  auto c2 = HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(2.1f));
+  auto c3 = HloInstruction::CreateConstant(LiteralUtil::CreateR0<float>(9.0f));
+
+  auto add =
+      HloInstruction::CreateBinary(r0f32_, HloOpcode::kAdd, c1.get(), c2.get());
+  auto clamp = HloInstruction::CreateTernary(r0f32_, HloOpcode::kClamp,
+                                             c2.get(), add.get(), add.get());
+  auto exp = HloInstruction::CreateUnary(r0f32_, HloOpcode::kExp, add.get());
+  auto mul = HloInstruction::CreateBinary(r0f32_, HloOpcode::kMultiply,
+                                          exp.get(), c3.get());
+  auto sub = HloInstruction::CreateBinary(r0f32_, HloOpcode::kSubtract,
+                                          mul.get(), clamp.get());
+  auto tuple =
+      HloInstruction::CreateTuple({sub.get(), sub.get(), mul.get(), c1.get()});
+
+  auto fusion = HloInstruction::CreateFusion(
+      r0f32_, HloInstruction::FusionKind::kLoop, tuple.get());
+  fusion->FuseInstruction(sub.get());
+  fusion->FuseInstruction(mul.get());
+  fusion->FuseInstruction(exp.get());
+  fusion->FuseInstruction(clamp.get());
+  fusion->FuseInstruction(add.get());
+
+  HloCostAnalysis fusion_analysis;
+  ASSERT_IS_OK(fusion->Accept(&fusion_analysis));
+
+  EXPECT_EQ(fusion_analysis.flop_count(), 4);
+  EXPECT_EQ(fusion_analysis.transcendental_count(), 1);
+}
+
 }  // namespace
 }  // namespace xla
",0,train
9c405cb0e9475d82ce2b0bef04ad75e206be1267,tensorflow/tensorflow,enable bf16 for Erf,cwise_op_erf.cc,"@@ -17,7 +17,8 @@ limitations under the License.
 
 namespace tensorflow {
 
-REGISTER3(UnaryOp, CPU, ""Erf"", functor::erf, float, Eigen::half, double);
+REGISTER4(UnaryOp, CPU, ""Erf"", functor::erf, float, Eigen::half, double,
+          bfloat16);
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
",0,test
9c405cb0e9475d82ce2b0bef04ad75e206be1267,tensorflow/tensorflow,enable bf16 for Erf,cwise_ops_unary_test.py,"@@ -436,6 +436,7 @@ class UnaryOpTest(test.TestCase):
     self._compareBoth(w, compute_f32(np.arccosh), math_ops.acosh)
     self._compareBoth(k, compute_f32(np.arctanh), math_ops.atanh,
                       grad_tol=1e-2)
+    self._compareBoth(x, compute_f32(np.vectorize(math.erf)), math_ops.erf)
 
   @test.disable_with_predicate(
       pred=test.is_built_with_rocm, skip_message=""On ROCm this test fails"")
",0,test
068e68b762969fe37b08bf069bb97d0356af1949,tensorflow/tensorflow,"Use macros instead of functions for float16 buffer access.

Significantly faster on Adreno devices.

PiperOrigin-RevId: 272546757",object_accessor.cc,"@@ -577,24 +577,16 @@ std::string ObjectAccessor::GetObjectDeclarations() const {
 }
 
 std::string ObjectAccessor::GetFunctionsDeclarations() const {
-  std::string modifier = """";
-  // Mali compiler does not want to compile a function without readonly
-  // modifier. See b/111601761 for the context.
-  if (is_mali_) {
-    modifier = ""readonly "";
-  }
-  // If there is a single object SSBO with F16, then we need to output functions
+  // If there is a single object SSBO with F16, then we need to output macros
   // as well.
   for (const auto& o : name_to_object_) {
     if (o.second.data_type == DataType::FLOAT16 &&
         o.second.object_type == ObjectType::BUFFER) {
-      return absl::StrCat(""vec4 Vec4FromHalf(in "", modifier,
-                          ""uvec2 v) { return vec4(unpackHalf2x16(v.x), ""
-                          ""unpackHalf2x16(v.y)); }\n""
-                          ""uvec2 Vec4ToHalf(in "",
-                          modifier,
-                          ""vec4 v) { return uvec2(packHalf2x16(v.xy), ""
-                          ""packHalf2x16(v.zw)); }\n"");
+      return absl::StrCat(
+          ""#define Vec4FromHalf(v) vec4(unpackHalf2x16(v.x), ""
+          ""unpackHalf2x16(v.y))\n"",
+          ""#define Vec4ToHalf(v) uvec2(packHalf2x16(v.xy), ""
+          ""packHalf2x16(v.zw))"");
     }
   }
   return """";
",0,test
425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks

Note: This change may break clients who have custom
TfLiteDelegate implementations; this API has been and remains
experimental and subject to such changes.
PiperOrigin-RevId: 208683190",context.h,"@@ -452,13 +452,15 @@ typedef struct _TfLiteDelegate {
 
   // Copy the data from delegate buffer handle to raw memory.
   // This can be null if the delegate doesn't use its own buffer.
-  TfLiteStatus (*CopyFromBufferHandle)(TfLiteDelegate* delegate,
+  TfLiteStatus (*CopyFromBufferHandle)(TfLiteContext* context,
+                                       TfLiteDelegate* delegate,
                                        TfLiteBufferHandle buffer_handle,
                                        void* data, size_t size);
 
   // Copy the data from raw memory to delegate buffer handle.
   // This can be null if the delegate doesn't use its own buffer.
-  TfLiteStatus (*CopyToBufferHandle)(TfLiteDelegate* delegate,
+  TfLiteStatus (*CopyToBufferHandle)(TfLiteContext* context,
+                                     TfLiteDelegate* delegate,
                                      TfLiteBufferHandle buffer_handle,
                                      void* data, size_t size);
 
@@ -466,7 +468,7 @@ typedef struct _TfLiteDelegate {
   // this doesn't release the underlying resource (e.g. textures). The
   // resources are either owned by application layer or the delegate.
   // This can be null if the delegate doesn't use its own buffer.
-  void (*FreeBufferHandle)(TfLiteDelegate* delegate,
+  void (*FreeBufferHandle)(TfLiteContext* context, TfLiteDelegate* delegate,
                            TfLiteBufferHandle* handle);
 } TfLiteDelegate;
 
",0,test
425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks

Note: This change may break clients who have custom
TfLiteDelegate implementations; this API has been and remains
experimental and subject to such changes.
PiperOrigin-RevId: 208683190",delegate.cc,"@@ -55,17 +55,15 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteDelegate* delegate) {
   return kTfLiteOk;
 }
 
-TfLiteStatus CopyFromBufferHandle(TfLiteDelegate* delegate,
+TfLiteStatus CopyFromBufferHandle(TfLiteContext* context,
+                                  TfLiteDelegate* delegate,
                                   TfLiteBufferHandle buffer_handle, void* data,
                                   size_t size) {
-  // TODO(nupurgarg): Make BufferMap unique to each interpreter in order to
-  // support multiple interpreters using a single delegate.
   BufferMap* buffer_map =
-      reinterpret_cast<DelegateData*>(delegate->data_)->GetBufferMap();
+      reinterpret_cast<DelegateData*>(delegate->data_)->GetBufferMap(context);
 
-  // TODO(nupurgarg): Use TfLiteContext's ReportError instead of fprinf.
   if (!buffer_map->HasTensor(buffer_handle)) {
-    fprintf(stderr, ""Invalid tensor index %d.\n"", buffer_handle);
+    context->ReportError(context, ""Invalid tensor index %d."", buffer_handle);
     return kTfLiteError;
   }
 
@@ -73,7 +71,8 @@ TfLiteStatus CopyFromBufferHandle(TfLiteDelegate* delegate,
   tensorflow::StringPiece t_data = t.tensor_data();
 
   if (size != t_data.size()) {
-    fprintf(stderr, ""Not enough space to store TensorFlow's aligned buffer.\n"");
+    context->ReportError(
+        context, ""Not enough space to store TensorFlow's aligned buffer."");
     return kTfLiteError;
   }
 
",0,test
425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks

Note: This change may break clients who have custom
TfLiteDelegate implementations; this API has been and remains
experimental and subject to such changes.
PiperOrigin-RevId: 208683190",delegate.h,"@@ -26,8 +26,8 @@ namespace tflite {
 // executed by TensorFlow's runtime via Eager.
 //
 // The interpreter must be constructed after the EagerDelegate and destructed
-// before the EagerDelegate. This delegate can only be used with one
-// interpreter.
+// before the EagerDelegate. This delegate may be used with multiple
+// interpreters, but it is *not* thread-safe.
 //
 // Usage:
 //   EagerDelegate delegate;
",0,test
425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks

Note: This change may break clients who have custom
TfLiteDelegate implementations; this API has been and remains
experimental and subject to such changes.
PiperOrigin-RevId: 208683190",delegate_data.h,"@@ -32,14 +32,18 @@ class DelegateData {
   // The EagerContext that is required for execution of Eager Ops.
   tensorflow::EagerContext* GetEagerContext() { return eager_context_.get(); }
 
-  // Map from TF Lite tensor index to TensorFlow tensor.
-  BufferMap* GetBufferMap() { return &buffer_map_; }
+  // Map from TF Lite tensor index to TensorFlow tensor for a given context.
+  BufferMap* GetBufferMap(const TfLiteContext* context) {
+    return &buffer_map_[context];
+  }
 
  private:
   explicit DelegateData(tensorflow::EagerContext* eager_context);
 
   std::unique_ptr<tensorflow::EagerContext> eager_context_;
-  BufferMap buffer_map_;
+  // TODO(b/112439500): Clean up stale BufferMap instances after adding the
+  // necessary cleanup hook from a TfLiteContext to a TfLiteDelegate.
+  std::unordered_map<const TfLiteContext*, BufferMap> buffer_map_;
 };
 
 }  // namespace eager
",0,test
425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks

Note: This change may break clients who have custom
TfLiteDelegate implementations; this API has been and remains
experimental and subject to such changes.
PiperOrigin-RevId: 208683190",delegate_data_test.cc,"@@ -16,6 +16,7 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include ""tensorflow/contrib/lite/context.h""
 #include ""tensorflow/contrib/lite/testing/util.h""
 
 namespace tflite {
@@ -29,8 +30,12 @@ TEST(DelegateDataTest, Basic) {
   // binary.
   EXPECT_TRUE(DelegateData::Create(&data).ok());
 
+  TfLiteContext dummy_context1 = {};
+  TfLiteContext dummy_context2 = {};
   EXPECT_NE(data->GetEagerContext(), nullptr);
-  EXPECT_NE(data->GetBufferMap(), nullptr);
+  EXPECT_NE(data->GetBufferMap(&dummy_context1), nullptr);
+  EXPECT_NE(data->GetBufferMap(&dummy_context1),
+            data->GetBufferMap(&dummy_context2));
 }
 
 }  // namespace
",0,test
425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks

Note: This change may break clients who have custom
TfLiteDelegate implementations; this API has been and remains
experimental and subject to such changes.
PiperOrigin-RevId: 208683190",delegate_test.cc,"@@ -25,8 +25,6 @@ namespace {
 using ::testing::ContainsRegex;
 using ::testing::ElementsAre;
 
-// TODO(nupurgarg): Add a test with multiple interpreters for one delegate.
-
 class DelegateTest : public testing::EagerModelTest {
  public:
   DelegateTest() {
@@ -139,6 +137,56 @@ TEST_F(DelegateTest, OnlyTFLite) {
   ASSERT_THAT(GetValues(2), ElementsAre(1.1f, 4.4f, 9.9f, 17.6f));
 }
 
+TEST_F(DelegateTest, MultipleInterpretersSameDelegate) {
+  // Build a graph, configure the delegate and set inputs.
+  {
+    AddTensors(9, {0, 3}, {8}, kTfLiteFloat32, {3});
+    AddTfOp(testing::kUnpack, {0}, {1, 2});
+    AddTfOp(testing::kUnpack, {3}, {4, 5});
+    AddTfOp(testing::kAdd, {1, 4}, {6});
+    AddTfOp(testing::kAdd, {2, 5}, {7});
+    AddTfOp(testing::kMul, {6, 7}, {8});
+    ConfigureDelegate();
+    SetShape(0, {2, 2, 1});
+    SetValues(0, {1.1f, 2.2f, 3.3f, 4.4f});
+    SetShape(3, {2, 2, 1});
+    SetValues(3, {1.1f, 2.2f, 3.3f, 4.4f});
+  }
+
+  // Create a new interpreter, inject into the test framework and build
+  // a different graph using the *same* delegate.
+  std::unique_ptr<Interpreter> interpreter(new Interpreter(&error_reporter_));
+  interpreter_.swap(interpreter);
+  {
+    AddTensors(10, {0}, {9}, kTfLiteFloat32, {3});
+    AddTfOp(testing::kUnpack, {0}, {1, 2});
+    AddTfOp(testing::kAdd, {1, 2}, {3});
+    AddTfOp(testing::kUnpack, {3}, {4, 5});
+    AddTfLiteMulOp({4, 5}, {6});
+    AddTfOp(testing::kUnpack, {6}, {7, 8});
+    AddTfOp(testing::kAdd, {7, 8}, {9});
+    ConfigureDelegate();
+    SetShape(0, {2, 2, 2, 1});
+    SetValues(0, {3.0f, 1.0f, 0.5f, -1.0f, 0.0f, 1.0f, 1.5f, 3.0f});
+  }
+
+  // Swap back in the first interpreter and validate inference.
+  interpreter_.swap(interpreter);
+  {
+    ASSERT_TRUE(Invoke());
+    EXPECT_THAT(GetShape(8), ElementsAre(2, 1));
+    EXPECT_THAT(GetValues(8), ElementsAre(14.52f, 38.72f));
+  }
+
+  // Swap in the second interpreter and validate inference.
+  interpreter_.swap(interpreter);
+  {
+    ASSERT_TRUE(Invoke());
+    EXPECT_THAT(GetShape(9), ElementsAre(1));
+    EXPECT_THAT(GetValues(9), ElementsAre(10.0f));
+  }
+}
+
 }  // namespace
 }  // namespace eager
 }  // namespace tflite
",0,test
425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks

Note: This change may break clients who have custom
TfLiteDelegate implementations; this API has been and remains
experimental and subject to such changes.
PiperOrigin-RevId: 208683190",kernel.cc,"@@ -150,8 +150,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   op_data->eager_context =
       reinterpret_cast<DelegateData*>(params->delegate->data_)
           ->GetEagerContext();
-  op_data->buffer_map =
-      reinterpret_cast<DelegateData*>(params->delegate->data_)->GetBufferMap();
+  op_data->buffer_map = reinterpret_cast<DelegateData*>(params->delegate->data_)
+                            ->GetBufferMap(context);
 
   CHECK(params->output_tensors);
   for (auto tensor_index : TfLiteIntArrayView(params->output_tensors)) {
",0,test
425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks

Note: This change may break clients who have custom
TfLiteDelegate implementations; this API has been and remains
experimental and subject to such changes.
PiperOrigin-RevId: 208683190",kernel_test.cc,"@@ -55,12 +55,14 @@ class KernelTest : public testing::EagerModelTest {
     delegate_.data_ = delegate_data_.get();
     delegate_.FreeBufferHandle = nullptr;
     delegate_.Prepare = prepare_function;
-    delegate_.CopyFromBufferHandle = [](TfLiteDelegate* delegate,
+    delegate_.CopyFromBufferHandle = [](TfLiteContext* context,
+                                        TfLiteDelegate* delegate,
                                         TfLiteBufferHandle buffer_handle,
                                         void* data, size_t size) {
       auto* delegate_data = reinterpret_cast<DelegateData*>(delegate->data_);
-      tensorflow::StringPiece values =
-          delegate_data->GetBufferMap()->GetTensor(buffer_handle).tensor_data();
+      tensorflow::StringPiece values = delegate_data->GetBufferMap(context)
+                                           ->GetTensor(buffer_handle)
+                                           .tensor_data();
       memcpy(data, values.data(), values.size());
       return kTfLiteOk;
     };
",0,test
425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks

Note: This change may break clients who have custom
TfLiteDelegate implementations; this API has been and remains
experimental and subject to such changes.
PiperOrigin-RevId: 208683190",interpreter.cc,"@@ -157,7 +157,7 @@ Interpreter::~Interpreter() {
     TfLiteTensor* tensor = &context_.tensors[i];
     if (tensor->buffer_handle != kTfLiteNullBufferHandle &&
         tensor->delegate->FreeBufferHandle != nullptr) {
-      tensor->delegate->FreeBufferHandle(tensor->delegate,
+      tensor->delegate->FreeBufferHandle(&context_, tensor->delegate,
                                          &tensor->buffer_handle);
     }
     TfLiteTensorFree(tensor);
@@ -988,7 +988,7 @@ TfLiteStatus Interpreter::SetBufferHandle(int tensor_index,
   tensor->delegate = delegate;
   if (tensor->buffer_handle != kTfLiteNullBufferHandle) {
     TF_LITE_ENSURE(&context_, tensor->delegate->FreeBufferHandle != nullptr);
-    tensor->delegate->FreeBufferHandle(tensor->delegate,
+    tensor->delegate->FreeBufferHandle(&context_, tensor->delegate,
                                        &tensor->buffer_handle);
   }
   tensor->buffer_handle = buffer_handle;
",0,test
425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks

Note: This change may break clients who have custom
TfLiteDelegate implementations; this API has been and remains
experimental and subject to such changes.
PiperOrigin-RevId: 208683190",interpreter.h,"@@ -350,7 +350,7 @@ class Interpreter {
       // This can be null if the delegate doesn't use its own buffer.
       TF_LITE_ENSURE(&context_,
                      tensor->delegate->CopyFromBufferHandle != nullptr);
-      tensor->delegate->CopyFromBufferHandle(tensor->delegate,
+      tensor->delegate->CopyFromBufferHandle(&context_, tensor->delegate,
                                              tensor->buffer_handle,
                                              tensor->data.raw, tensor->bytes);
       tensor->data_is_stale = false;
",0,test
425b62a344f18c875f6f024b36ae37749cb00feb,tensorflow/tensorflow,"Provide TfLiteContext arg to all TfLiteDelegate callbacks

Note: This change may break clients who have custom
TfLiteDelegate implementations; this API has been and remains
experimental and subject to such changes.
PiperOrigin-RevId: 208683190",interpreter_test.cc,"@@ -1080,21 +1080,22 @@ class TestDelegate : public ::testing::Test {
         return kTfLiteOk;
       };
       delegate_.CopyToBufferHandle =
-          [](TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle,
-             void* data, size_t size) -> TfLiteStatus {
+          [](TfLiteContext* context, TfLiteDelegate* delegate,
+             TfLiteBufferHandle buffer_handle, void* data,
+             size_t size) -> TfLiteStatus {
         // TODO(ycling): Implement tests to test buffer copying logic.
         return kTfLiteOk;
       };
       delegate_.CopyFromBufferHandle =
-          [](TfLiteDelegate* delegate, TfLiteBufferHandle buffer_handle,
-             void* data, size_t size) -> TfLiteStatus {
+          [](TfLiteContext* context, TfLiteDelegate* delegate,
+             TfLiteBufferHandle buffer_handle, void* data,
+             size_t size) -> TfLiteStatus {
         // TODO(ycling): Implement tests to test buffer copying logic.
         return kTfLiteOk;
       };
-      delegate_.FreeBufferHandle = [](TfLiteDelegate* delegate,
-                                      TfLiteBufferHandle* handle) {
-        *handle = kTfLiteNullBufferHandle;
-      };
+      delegate_.FreeBufferHandle =
+          [](TfLiteContext* context, TfLiteDelegate* delegate,
+             TfLiteBufferHandle* handle) { *handle = kTfLiteNullBufferHandle; };
       // Store type-punned data SimpleDelegate structure.
       delegate_.data_ = reinterpret_cast<void*>(this);
     }
",0,test
35a14f9ea24ea4d83fb6e279b7a2e03ac1c386eb,tensorflow/tensorflow,"Undo the removal of an import which was actually required for Ubuntu build.
Fix usage of int64/uint64.

PiperOrigin-RevId: 244229396",util.cc,"@@ -32,13 +32,15 @@ limitations under the License.
 #include ""tensorflow/core/platform/mutex.h""
 #include ""tensorflow/core/platform/stacktrace.h""
 
+namespace xla {
+
 namespace {
 tensorflow::mutex timer_stats_lock(tensorflow::LINKER_INITIALIZED);
 
 struct TimerStats {
   double cumulative_secs = 0;
   double max_secs = 0;
-  int64 times_called = 0;
+  uint64 times_called = 0;
 };
 
 // Global mapping from timer IDs to timer statistics.
@@ -46,8 +48,6 @@ auto& timers_stats GUARDED_BY(timer_stats_lock) =
     *new absl::flat_hash_map<uint64, TimerStats>();
 }  // namespace
 
-namespace xla {
-
 Status WithLogBacktrace(const Status& status) {
   CHECK(!status.ok());
   VLOG(1) << status.ToString();
",0,test
1ed03f85921e36f20b0a27174a5b2d7f103c271d,tensorflow/tensorflow,"Update gmm_ops.py (#7614)

* Update gmm_ops.py

It should be int64 instead of int32

* Remove cast altogether

* cast num_data into int64

As per ashish's comment",gmm_ops.py,"@@ -85,7 +85,7 @@ def _init_clusters_random(data, num_clusters, random_seed):
         maxval=math_ops.cast(num_data, dtypes.int64),
         seed=random_seed,
         dtype=dtypes.int64)
-  indices = math_ops.cast(indices, dtypes.int32) % num_data
+  indices = indices % math_ops.cast(num_data, dtypes.int64)
   clusters_init = embedding_lookup(data, indices, partition_strategy='div')
   return clusters_init
 
",0,train
8d42d1e0b8b00c04d34ea585d360fd54206f6cbb,tensorflow/tensorflow,fixes erroneous collection in contrib batch_norm,layers.py,"@@ -525,7 +525,7 @@ def batch_norm(
       if layer.beta:
         _add_variable_to_collections(layer.beta, variables_collections, 'beta')
       if layer.gamma:
-        _add_variable_to_collections(layer.beta, variables_collections, 'gamma')
+        _add_variable_to_collections(layer.gamma, variables_collections, 'gamma')
 
       if activation_fn is not None:
         outputs = activation_fn(outputs)
",0,train
8b0eee37b0e6d5a2161ef39a86e51ab2d7c9ebeb,tensorflow/tensorflow,"Annotate tf_device.return and tf.Yield with NoSideEffect and ReturnLike traits.

Marking these ops as NoSideEffect will result in such terminators to not be considered as side effecting, in side effect analysis.

PiperOrigin-RevId: 345323762
Change-Id: I82d216bdfd9a214a0f38d2888022e3aa00f69cc8",tf_device.h,"@@ -24,6 +24,8 @@ limitations under the License.
 #include ""mlir/IR/Dialect.h""  // from @llvm-project
 #include ""mlir/IR/OpDefinition.h""  // from @llvm-project
 #include ""mlir/IR/Value.h""  // from @llvm-project
+#include ""mlir/Interfaces/ControlFlowInterfaces.h""  // from @llvm-project
+#include ""mlir/Interfaces/SideEffectInterfaces.h""  // from @llvm-project
 
 namespace mlir {
 namespace tf_device {
",0,test
8b0eee37b0e6d5a2161ef39a86e51ab2d7c9ebeb,tensorflow/tensorflow,"Annotate tf_device.return and tf.Yield with NoSideEffect and ReturnLike traits.

Marking these ops as NoSideEffect will result in such terminators to not be considered as side effecting, in side effect analysis.

PiperOrigin-RevId: 345323762
Change-Id: I82d216bdfd9a214a0f38d2888022e3aa00f69cc8",tf_ops.h,"@@ -29,6 +29,7 @@ limitations under the License.
 #include ""mlir/IR/StandardTypes.h""  // from @llvm-project
 #include ""mlir/IR/TypeUtilities.h""  // from @llvm-project
 #include ""mlir/Interfaces/CallInterfaces.h""  // from @llvm-project
+#include ""mlir/Interfaces/ControlFlowInterfaces.h""  // from @llvm-project
 #include ""mlir/Interfaces/DerivedAttributeOpInterface.h""  // from @llvm-project
 #include ""mlir/Interfaces/InferTypeOpInterface.h""  // from @llvm-project
 #include ""mlir/Interfaces/LoopLikeInterface.h""  // from @llvm-project
",0,test
8b0eee37b0e6d5a2161ef39a86e51ab2d7c9ebeb,tensorflow/tensorflow,"Annotate tf_device.return and tf.Yield with NoSideEffect and ReturnLike traits.

Marking these ops as NoSideEffect will result in such terminators to not be considered as side effecting, in side effect analysis.

PiperOrigin-RevId: 345323762
Change-Id: I82d216bdfd9a214a0f38d2888022e3aa00f69cc8",tf_ops_a_m.h,"@@ -26,6 +26,7 @@ limitations under the License.
 #include ""mlir/IR/StandardTypes.h""  // from @llvm-project
 #include ""mlir/IR/TypeUtilities.h""  // from @llvm-project
 #include ""mlir/Interfaces/CallInterfaces.h""  // from @llvm-project
+#include ""mlir/Interfaces/ControlFlowInterfaces.h""  // from @llvm-project
 #include ""mlir/Interfaces/DerivedAttributeOpInterface.h""  // from @llvm-project
 #include ""mlir/Interfaces/InferTypeOpInterface.h""  // from @llvm-project
 #include ""mlir/Interfaces/LoopLikeInterface.h""  // from @llvm-project
",0,test
8b0eee37b0e6d5a2161ef39a86e51ab2d7c9ebeb,tensorflow/tensorflow,"Annotate tf_device.return and tf.Yield with NoSideEffect and ReturnLike traits.

Marking these ops as NoSideEffect will result in such terminators to not be considered as side effecting, in side effect analysis.

PiperOrigin-RevId: 345323762
Change-Id: I82d216bdfd9a214a0f38d2888022e3aa00f69cc8",tf_ops_n_z.h,"@@ -26,6 +26,7 @@ limitations under the License.
 #include ""mlir/IR/StandardTypes.h""  // from @llvm-project
 #include ""mlir/IR/TypeUtilities.h""  // from @llvm-project
 #include ""mlir/Interfaces/CallInterfaces.h""  // from @llvm-project
+#include ""mlir/Interfaces/ControlFlowInterfaces.h""  // from @llvm-project
 #include ""mlir/Interfaces/DerivedAttributeOpInterface.h""  // from @llvm-project
 #include ""mlir/Interfaces/InferTypeOpInterface.h""  // from @llvm-project
 #include ""mlir/Interfaces/LoopLikeInterface.h""  // from @llvm-project
",0,test
0d9b07979d180d0a04e334b2ea3f3b4ca7790eba,tensorflow/tensorflow,"Retry ""PR #31106: speedup reduce op grads when keep_dims=True""

but now with forward compatibility guards to prevent breakages.

PiperOrigin-RevId: 265151183",math_grad.py,"@@ -193,8 +193,16 @@ def _SumGrad(op, grad):
         return [array_ops.tile(grad, tile_scaling), None]
 
   input_shape = array_ops.shape(op.inputs[0])
-  # TODO(apassos) remove this once device placement for eager ops makes more
-  # sense.
+
+  if compat.forward_compatible(2019, 9, 23):
+    if not op.get_attr(""keep_dims""):
+      with ops.colocate_with(input_shape):
+        # TODO(apassos) remove this once device placement for eager ops makes
+        # more sense.
+        output_shape_kept_dims = math_ops.reduced_shape(input_shape,
+                                                        op.inputs[1])
+      grad = array_ops.reshape(grad, output_shape_kept_dims)
+    return [array_ops.broadcast_to(grad, input_shape), None]
   with ops.colocate_with(input_shape):
     output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
     tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims)
@@ -205,10 +213,13 @@ def _SumGrad(op, grad):
 def _MinOrMaxGrad(op, grad):
   """"""Gradient for Min or Max. Amazingly it's precisely the same code.""""""
   input_shape = array_ops.shape(op.inputs[0])
-  output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
   y = op.outputs[0]
-  y = array_ops.reshape(y, output_shape_kept_dims)
-  grad = array_ops.reshape(grad, output_shape_kept_dims)
+  if not op.get_attr(""keep_dims""):
+    output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
+    y = array_ops.reshape(y, output_shape_kept_dims)
+    grad = array_ops.reshape(grad, output_shape_kept_dims)
+  else:
+    output_shape_kept_dims = array_ops.shape(y)
 
   # Compute the number of selected (maximum or minimum) elements in each
   # reduction dimension. If there are multiple minimum or maximum elements
@@ -263,11 +274,18 @@ def _ProdGrad(op, grad):
   # Reshape reduction indices for the case where the parameter is a scalar
   reduction_indices = array_ops.reshape(op.inputs[1], [-1])
 
-  # Expand grad to full input shape
-  output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
-  tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims)
-  grad = array_ops.reshape(grad, output_shape_kept_dims)
-  grad = array_ops.tile(grad, tile_scaling)
+  if compat.forward_compatible(2019, 9, 23):
+    # Expand grad to full input shape
+    if not op.get_attr(""keep_dims""):
+      output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
+      grad = array_ops.reshape(grad, output_shape_kept_dims)
+
+    grad = array_ops.broadcast_to(grad, input_shape)
+  else:
+    output_shape_kept_dims = math_ops.reduced_shape(input_shape, op.inputs[1])
+    tile_scaling = _safe_shape_div(input_shape, output_shape_kept_dims)
+    grad = array_ops.reshape(grad, output_shape_kept_dims)
+    grad = array_ops.tile(grad, tile_scaling)
 
   # Pack all reduced dimensions into a single one, so we can perform the
   # cumprod ops. If the reduction dims list is empty, it defaults to float32,
",0,train
ae32e9096028d0d0d8fc4c007e4192ba36f80408,tensorflow/tensorflow,"Added support of Floor/FloorDiv/FloorMod in model builder.

PiperOrigin-RevId: 364625346
Change-Id: I29df47dab26f2958b6dfdeb71d18277785b0cac5",model_builder.cc,"@@ -725,6 +725,7 @@ class ElementwiseOperationParser : public TFLiteOperationParser {
       case OperationType::COS:
       case OperationType::ELU:
       case OperationType::EXP:
+      case OperationType::FLOOR:
       case OperationType::LOG:
       case OperationType::NEG:
       case OperationType::RSQRT:
@@ -742,6 +743,8 @@ class ElementwiseOperationParser : public TFLiteOperationParser {
   bool IsTwoArgumentOperation() const {
     switch (operation_type_) {
       case OperationType::DIV:
+      case OperationType::FLOOR_DIV:
+      case OperationType::FLOOR_MOD:
       case OperationType::MAXIMUM:
       case OperationType::MINIMUM:
       case OperationType::POW:
@@ -756,6 +759,8 @@ class ElementwiseOperationParser : public TFLiteOperationParser {
   bool IsTwoArgumentOperationWithConst() const {
     switch (operation_type_) {
       case OperationType::DIV:
+      case OperationType::FLOOR_DIV:
+      case OperationType::FLOOR_MOD:
       case OperationType::MAXIMUM:
       case OperationType::MINIMUM:
       case OperationType::POW:
@@ -2367,6 +2372,14 @@ std::unique_ptr<TFLiteOperationParser> NewOperationParser(
       return std::make_unique<ElementwiseOperationParser>(OperationType::ELU);
     case kTfLiteBuiltinExp:
       return std::make_unique<ElementwiseOperationParser>(OperationType::EXP);
+    case kTfLiteBuiltinFloor:
+      return std::make_unique<ElementwiseOperationParser>(OperationType::FLOOR);
+    case kTfLiteBuiltinFloorDiv:
+      return std::make_unique<ElementwiseOperationParser>(
+          OperationType::FLOOR_DIV);
+    case kTfLiteBuiltinFloorMod:
+      return std::make_unique<ElementwiseOperationParser>(
+          OperationType::FLOOR_MOD);
     case kTfLiteBuiltinFullyConnected:
       return std::make_unique<FullyConnectedOperationParser>();
     case kTfLiteBuiltinHardSwish:
",0,train
f5b3248917c55e91f77df97dd86d1fe77eadb4e3,tensorflow/tensorflow,"Fix logistic_regression() summary name conflict. (#2446)

Error when calling logistic_regression() more
than once:
Duplicate tag logistic_regression.X found in summary inputs",models.py,"@@ -137,8 +137,8 @@ def logistic_regression(X,
     uniform_unit_scaling_initialzer will be used.
   """"""
   with vs.variable_scope('logistic_regression'):
-    logging_ops.histogram_summary('logistic_regression.X', X)
-    logging_ops.histogram_summary('logistic_regression.y', y)
+    logging_ops.histogram_summary('%s.X' % vs.get_variable_scope().name, X)
+    logging_ops.histogram_summary('%s.y' % vs.get_variable_scope().name, y)
     # Set up the requested initialization.
     if init_mean is None:
       weights = vs.get_variable('weights',
@@ -152,8 +152,8 @@ def logistic_regression(X,
       bias = vs.get_variable('bias', [y.get_shape()[-1]],
                              initializer=init_ops.random_normal_initializer(
                                  init_mean, init_stddev))
-    logging_ops.histogram_summary('logistic_regression.weights', weights)
-    logging_ops.histogram_summary('logistic_regression.bias', bias)
+    logging_ops.histogram_summary('%s.weights' % vs.get_variable_scope().name, weights)
+    logging_ops.histogram_summary('%s.bias' % vs.get_variable_scope().name, bias)
     # If no class weight provided, try to retrieve one from pre-defined
     # tensor name in the graph.
     if not class_weight:
",0,test
b83bc10e831c44488d56220aec27117f8dc0cc3d,tensorflow/tensorflow,"Avoid divisions when the divisor is a power of two.

PiperOrigin-RevId: 260738949",block_map.cc,"@@ -183,11 +183,11 @@ void MakeBlockMap(int rows, int cols, int depth, int kernel_rows,
   const int smallc =
       round_down_pot(cols >> num_blocks_of_cols_log2, kernel_cols);
   const int missr =
-      round_up_pot(rows - (smallr << num_blocks_of_rows_log2), kernel_rows) /
-      kernel_rows;
+      round_up_pot(rows - (smallr << num_blocks_of_rows_log2), kernel_rows) >>
+      floor_log2(kernel_rows);
   const int missc =
-      round_up_pot(cols - (smallc << num_blocks_of_cols_log2), kernel_cols) /
-      kernel_cols;
+      round_up_pot(cols - (smallc << num_blocks_of_cols_log2), kernel_cols) >>
+      floor_log2(kernel_cols);
 
   block_map->dims[Side::kLhs] = rows;
   block_map->dims[Side::kRhs] = cols;
",0,train
19c51a72a3199b1abbdd41a9b89a01c2aef31a78,tensorflow/tensorflow,"Eliminate pass through return values from tf_device.cluster op

Values that are not defined in the cluster doesn't need to be returned from the cluster return op. Otherwise, any value with unsupported type will fail legalization in the phase 2.

PiperOrigin-RevId: 366764596
Change-Id: I239b56dfb38cabdca6c487da992224822c2665cc",tf_device.cc,"@@ -677,6 +677,69 @@ bool ReplicateOp::WrapsSingleOp() { return BlockWrapsSingleOp(&GetBody()); }
 // Canonicalization patterns
 //===----------------------------------------------------------------------===//
 
+//===----------------------------------------------------------------------===//
+// tf_device.cluster
+//===----------------------------------------------------------------------===//
+
+namespace {
+
+// Eliminates cluster op results that are not defined within the cluster and are
+// defined outside. cluster op can be rewritten to remove those results.
+static LogicalResult EliminatePassThroughResults(ClusterOp op,
+                                                 PatternRewriter& rewriter) {
+  mlir::Block& body = op.GetBody();
+  Operation* return_op = body.getTerminator();
+  int num_results = return_op->getNumOperands();
+
+  // Values defined within the cluster.
+  llvm::SmallVector<Value, 4> cluster_vals;
+  cluster_vals.reserve(num_results);
+
+  // New results stores values to use while replacing the old cluster op.
+  llvm::SmallVector<Value, 4> new_results;
+  new_results.reserve(num_results);
+  for (Value result : return_op->getOperands()) {
+    if (result.getParentBlock() == &body) {
+      // This result will be populated with the new result after rewriting the
+      // cluster op.
+      new_results.push_back(nullptr);
+      cluster_vals.push_back(result);
+    } else {
+      new_results.push_back(result);
+    }
+  }
+
+  // Return failure if there are no pass through results and op is already
+  // canonical.
+  if (cluster_vals.size() == num_results) return failure();
+
+  // Rewrite return op in the cluster.
+  rewriter.setInsertionPoint(return_op);
+  auto new_return =
+      rewriter.replaceOpWithNewOp<tf_device::ReturnOp>(return_op, cluster_vals);
+
+  // Rewrite the cluster op.
+  rewriter.setInsertionPoint(op);
+  auto new_op = rewriter.create<tf_device::ClusterOp>(
+      op->getLoc(), new_return.getOperandTypes(), op->getOperands(),
+      op->getAttrs());
+  rewriter.inlineRegionBefore(op.getBodyRegion(), new_op.getBodyRegion(),
+                              new_op.getBodyRegion().end());
+
+  int idx = 0;
+  for (Value& result : new_results) {
+    if (result == nullptr) result = new_op.getResult(idx++);
+  }
+  rewriter.replaceOp(op, new_results);
+  return success();
+}
+}  // anonymous namespace
+
+void ClusterOp::getCanonicalizationPatterns(OwningRewritePatternList& results,
+                                            MLIRContext* context) {
+  results.insert(EliminatePassThroughResults);
+}
+
 //===----------------------------------------------------------------------===//
 // tf_device.launch
 //===----------------------------------------------------------------------===//
",0,test
82f4a53775ba338dbaed0c329959d9ed53d428e4,tensorflow/tensorflow,"Clarify error message when batch size not divisible by num_replicas

PiperOrigin-RevId: 277162934
Change-Id: Ic65ae36bdb78f2e75ee14e69d24ac4005b6a7dde",keras_utils_test.py,"@@ -665,8 +665,8 @@ class TestDistributionStrategyWithStaticShapes(test.TestCase,
   def test_input_batch_size_not_divisible_by_num_replicas(self, distribution):
     with distribution.scope():
       with self.assertRaisesRegexp(
-          ValueError, 'The `batch_size` argument value 5 cannot be divisible '
-          'by number of replicas 2'):
+          ValueError, r'The `batch_size` argument \(5\) must be divisible by '
+                      r'the number of replicas \(2\)'):
         keras.layers.Input(shape=(3,), batch_size=5, name='input')
 
   @combinations.generate(
",0,train
82f4a53775ba338dbaed0c329959d9ed53d428e4,tensorflow/tensorflow,"Clarify error message when batch size not divisible by num_replicas

PiperOrigin-RevId: 277162934
Change-Id: Ic65ae36bdb78f2e75ee14e69d24ac4005b6a7dde",input_layer.py,"@@ -72,8 +72,8 @@ class InputLayer(base_layer.Layer):
     if strategy and batch_size is not None and \
         distributed_training_utils.global_batch_size_supported(strategy):
       if batch_size % strategy.num_replicas_in_sync != 0:
-        raise ValueError('The `batch_size` argument value {} cannot be '
-                         'divisible by number of replicas {}'.format(
+        raise ValueError('The `batch_size` argument ({}) must be divisible by '
+                         'the number of replicas ({})'.format(
                              batch_size, strategy.num_replicas_in_sync))
       batch_size = batch_size // strategy.num_replicas_in_sync
 
",0,train
82f4a53775ba338dbaed0c329959d9ed53d428e4,tensorflow/tensorflow,"Clarify error message when batch size not divisible by num_replicas

PiperOrigin-RevId: 277162934
Change-Id: Ic65ae36bdb78f2e75ee14e69d24ac4005b6a7dde",training.py,"@@ -1894,8 +1894,8 @@ class Model(network.Network):
         # Check `batch_size` argument is consistent with InputLayer.
         if batch_size is not None:
           if batch_size % num_splits_for_ds != 0:
-            raise ValueError('The `batch_size` argument value {} cannot be '
-                             'divisible by number of replicas {}'.format(
+            raise ValueError('The `batch_size` argument ({}) must be divisible '
+                             'the by number of replicas ({})'.format(
                                  batch_size, num_splits_for_ds))
           per_replica_batch_size = batch_size // num_splits_for_ds
 
",0,train
478594457f185c4651120cf20453158a04cdbefe,tensorflow/tensorflow,Move code to a function to make the code more clear.,mark_for_compilation_pass.cc,"@@ -1076,42 +1076,9 @@ StatusOr<bool> IsIdentityDrivingConstsInLoop(Node* node) {
   return true;
 }
 
-Status MarkForCompilationPassImpl::FindCompilationCandidates() {
-  OptimizerOptions opts;
-  std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
-      new ProcessFunctionLibraryRuntime(nullptr, env_, /*config=*/nullptr,
-                                        TF_GRAPH_DEF_VERSION, flib_def_, opts));
-  FunctionLibraryRuntime* lib_runtime =
-      pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice);
-  std::vector<bool> compile_time_const_nodes(graph_->num_node_ids(), false);
-  TF_RETURN_IF_ERROR(BackwardsConstAnalysis(
-      *graph_, /*compile_time_const_arg_indices=*/nullptr,
-      &compile_time_const_nodes, lib_runtime));
-
-  // Iterate over nodes in sorted order so that compiler fuel is deterministic.
-  // We can't simply pass op_nodes().begin() and op_nodes().end() to the
-  // std::vector constructor because they're not proper iterators, with
-  // iterator_traits defined and so on.
-  std::vector<Node*> sorted_nodes;
-  for (Node* node : graph_->op_nodes()) {
-    sorted_nodes.push_back(node);
-  }
-  std::sort(sorted_nodes.begin(), sorted_nodes.end(), NodeComparatorID());
-
-  if (*debug_options_.fuel >= std::numeric_limits<int64>::max() / 2) {
-    // The assumption is that if fuel started out as INT64_MAX, it will forever
-    // stay greater than INT64_MAX / 2.
-    VLOG(2) << ""Starting fuel: infinity"";
-  } else {
-    VLOG(2) << ""Starting fuel: "" << *debug_options_.fuel;
-  }
-
-  VLOG(2) << ""sorted_nodes.size() = "" << sorted_nodes.size();
-
+std::unique_ptr<absl::flat_hash_set<string>> GetWhitelist() {
   MarkForCompilationPassFlags* flags = GetMarkForCompilationPassFlags();
-  absl::flat_hash_set<string> whitelist;
-  auto vall_ops = XlaOpRegistry::GetAllRegisteredOps();
-  absl::flat_hash_set<string> all_ops(vall_ops.begin(), vall_ops.end());
+  auto whitelist = absl::WrapUnique(new absl::flat_hash_set<string>());
 
   for (auto s : absl::StrSplit(flags->tf_xla_supported_nodes, "","")) {
     bool fusible = s == ""FUSIBLE"";
@@ -1119,7 +1086,7 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() {
     if (s == ""PW"" || fusible) {
       added = true;
       // Unary
-      whitelist.insert(
+      whitelist->insert(
           {""ComplexAbs"", ""Angle"", ""Conj"", ""Abs"", ""Acos"", ""Acosh"", ""Asin"",
            ""Atan"", ""Atanh"", ""Ceil"", ""Cos"", ""Cosh"", ""Sin"", ""Exp"", ""Expm1"",
            ""Floor"", ""IsFinite"", ""IsInf"", ""IsNan"", ""Inv"", ""Reciprocal"", ""Log"",
@@ -1147,27 +1114,27 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() {
     }
     if (s == ""RED"" || fusible) {
       added = true;
-      whitelist.insert({""All"", ""Any"", ""Min"", ""Max"", ""Mean"", ""Prod"", ""Sum""});
+      whitelist->insert({""All"", ""Any"", ""Min"", ""Max"", ""Mean"", ""Prod"", ""Sum""});
     }
     if (s == ""PWRED"" || fusible) {
       added = true;
-      whitelist.insert({""ArgMax"", ""ArgMin"", ""DiagPart"", ""Softmax"",
+      whitelist->insert({""ArgMax"", ""ArgMin"", ""DiagPart"", ""Softmax"",
                         ""SparseSoftmaxCrossEntropyWithLogits"", ""LogSoftmax""});
     }
     if (s == ""REDUCEWINDOW"" || fusible) {
       added = true;
-      whitelist.insert({""MaxPoolV2"", ""MaxPool3D"", ""AvgPool"", ""AvgPool3D"",
+      whitelist->insert({""MaxPoolV2"", ""MaxPool3D"", ""AvgPool"", ""AvgPool3D"",
                         ""MaxPoolGrad"", ""MaxPool3DGrad"", ""AvgPoolGrad"",
                         ""AvgPool3DGrad"", ""MaxPoolGradGrad"", ""MaxPoolGradGradV2"",
                         ""MaxPool3DGradGrad""});
     }
     if (s == ""REDUCEWINDOPW"" || fusible) {
       added = true;
-      whitelist.insert({""LRN"", ""LRNGrad""});
+      whitelist->insert({""LRN"", ""LRNGrad""});
     }
     if (s == ""BN"" || fusible) {
       added = true;
-      whitelist.insert({""FusedBatchNorm"", ""FusedBatchNormV2"",
+      whitelist->insert({""FusedBatchNorm"", ""FusedBatchNormV2"",
                         ""FusedBatchNormV3"", ""_FusedBatchNormEx"",
                         ""FusedBatchNormGrad"", ""FusedBatchNormGradV2"",
                         ""FusedBatchNormGradV3""});
@@ -1176,7 +1143,7 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() {
       // Fill => Broadcast
       // BroadcastTo => Broadcast + maybe Reshape
       added = true;
-      whitelist.insert({""BroadcastTo"",
+      whitelist->insert({""BroadcastTo"",
                         ""ExpandDims"",
                         ""Fill"",
                         ""Max"",
@@ -1222,21 +1189,61 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() {
     }
 
     if (!added && s.size() > 0) {
-      if (!all_ops.contains(string(s))) {
-        return errors::InvalidArgument(
-            ""The operation '"", s,
-            ""' passed to --tf_xla_supported_nodes is not supported by XLA."");
-      }
-      whitelist.insert(string(s));
+      whitelist->insert(string(s));
     }
   }
 
-  if (VLOG_IS_ON(2) && whitelist.size() > 0) {
-    std::vector<string> vwhitelist(whitelist.begin(), whitelist.end());
+  if (VLOG_IS_ON(2) && whitelist->size() > 0) {
+    std::vector<string> vwhitelist(whitelist->begin(), whitelist->end());
     std::sort(vwhitelist.begin(), vwhitelist.end());
     VLOG(2) << ""XLA clustering will only consider the following TF operations: ""
             << absl::StrJoin(vwhitelist, "" "");
   }
+  return whitelist;
+}
+
+Status MarkForCompilationPassImpl::FindCompilationCandidates() {
+  OptimizerOptions opts;
+  std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
+    new ProcessFunctionLibraryRuntime(nullptr, env_, /*config=*/nullptr,
+				      TF_GRAPH_DEF_VERSION, flib_def_, opts));
+  FunctionLibraryRuntime* lib_runtime =
+    pflr->GetFLR(ProcessFunctionLibraryRuntime::kDefaultFLRDevice);
+  std::vector<bool> compile_time_const_nodes(graph_->num_node_ids(), false);
+  TF_RETURN_IF_ERROR(BackwardsConstAnalysis(
+    *graph_, /*compile_time_const_arg_indices=*/nullptr,
+    &compile_time_const_nodes, lib_runtime));
+  // Iterate over nodes in sorted order so that compiler fuel is deterministic.
+  // We can't simply pass op_nodes().begin() and op_nodes().end() to the
+  // std::vector constructor because they're not proper iterators, with
+  // iterator_traits defined and so on.
+  std::vector<Node*> sorted_nodes;
+  for (Node* node : graph_->op_nodes()) {
+    sorted_nodes.push_back(node);
+  }
+  std::sort(sorted_nodes.begin(), sorted_nodes.end(), NodeComparatorID());
+
+  if (*debug_options_.fuel >= std::numeric_limits<int64>::max() / 2) {
+    // The assumption is that if fuel started out as INT64_MAX, it will forever
+    // stay greater than INT64_MAX / 2.
+    VLOG(2) << ""Starting fuel: infinity"";
+  } else {
+    VLOG(2) << ""Starting fuel: "" << *debug_options_.fuel;
+  }
+
+  VLOG(2) << ""sorted_nodes.size() = "" << sorted_nodes.size();
+
+  auto whitelist = GetWhitelist();
+
+  auto vall_ops = XlaOpRegistry::GetAllRegisteredOps();
+  absl::flat_hash_set<string> all_ops(vall_ops.begin(), vall_ops.end());
+  for (auto s = whitelist->begin(); s != whitelist->end(); ++s) {
+    if (!all_ops.contains(string(*s))) {
+      return errors::InvalidArgument(
+          ""The operation '"", *s,
+          ""' passed to --tf_xla_supported_nodes is not supported by XLA."");
+    }
+  }
 
   for (Node* node : sorted_nodes) {
     if (*debug_options_.fuel <= 0) {
@@ -1275,7 +1282,7 @@ Status MarkForCompilationPassImpl::FindCompilationCandidates() {
       continue;
     }
 
-    if (whitelist.size() > 0 && !whitelist.contains(node->def().op())) {
+    if (whitelist->size() > 0 && !whitelist->contains(node->def().op())) {
       VLOG(1) << ""Rejecting "" << node->name()
               << "" as is was not listed in --tf_xla_supported_nodes."";
       continue;
",0,train
609a60b44bbf934b31a1dce4f0aa84e731b83c35,tensorflow/tensorflow,Refactor AutoCastVariable tests to rely on strategy_combinations,autocast_variable_test.py,"@@ -17,20 +17,23 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import contextlib
 import os
 
 from absl.testing import parameterized
 import numpy as np
 
 from tensorflow.python import tf2
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import distribution_strategy_context as ds_context
 from tensorflow.python.distribute import mirrored_strategy
+from tensorflow.python.distribute import strategy_combinations
 from tensorflow.python.eager import context
 from tensorflow.python.eager import def_function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import indexed_slices
 from tensorflow.python.framework import ops
-from tensorflow.python.keras import combinations
 from tensorflow.python.keras.mixed_precision.experimental import autocast_variable
 from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_v2
 from tensorflow.python.ops import array_ops
@@ -40,30 +43,17 @@ from tensorflow.python.platform import test
 from tensorflow.python.training import gradient_descent as gradient_descent_v1
 from tensorflow.python.training.tracking import util as trackable_utils
 
-TESTCASES = ({
-    'testcase_name': 'base',
-    'distribute': False
-}, {
-    'testcase_name': 'distribute',
-    'distribute': True
-})
-
-
-def get_distribute_scope(distribute):
-
-  class DummyContextManager(object):
-
-    def __enter__(self):
-      pass
-
-    def __exit__(self, *args):
-      pass
-
-  if distribute:
-    return mirrored_strategy.MirroredStrategy(['cpu:0']).scope()
-  else:
-    return DummyContextManager()
+class DummyStrategy(object):
+  @contextlib.contextmanager
+  def scope(self):
+    yield
 
+maybe_distribute = combinations.combine(
+    distribution=[
+        combinations.NamedDistribution(
+            ""Dummy"", lambda: DummyStrategy(), required_gpus=None),
+        strategy_combinations.mirrored_strategy_with_cpu_1_and_2
+    ])
 
 def get_var(val, dtype, name=None):
   return variables.VariableV1(val, use_resource=True, dtype=dtype, name=name)
@@ -71,10 +61,13 @@ def get_var(val, dtype, name=None):
 
 @combinations.generate(combinations.combine(mode=['graph', 'eager']))
 class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
+  def setUp(self):
+    strategy_combinations.set_virtual_cpus_to_at_least(3)
+    super(AutoCastVariableTest, self).setUp()
 
-  @parameterized.named_parameters(*TESTCASES)
-  def test_read(self, distribute):
-    with get_distribute_scope(distribute):
+  @combinations.generate(maybe_distribute)
+  def test_read(self, distribution):
+    with distribution.scope():
       x = get_var(1., dtypes.float32)
       x = autocast_variable.create_autocast_variable(x)
       self.evaluate(x.initializer)
@@ -116,9 +109,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
       self.assertEqual(x.sparse_read([0]).dtype, dtypes.float16)
       self.assertEqual(x.gather_nd([0]).dtype, dtypes.float16)
 
-  @parameterized.named_parameters(*TESTCASES)
-  def test_read_nested_scopes(self, distribute):
-    with get_distribute_scope(distribute):
+  @combinations.generate(maybe_distribute)
+  def test_read_nested_scopes(self, distribution):
+    with distribution.scope():
       x = get_var(1., dtypes.float32)
       x = autocast_variable.create_autocast_variable(x)
       self.evaluate(x.initializer)
@@ -136,9 +129,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
         self.assertEqual(x.dtype, dtypes.float16)
         self.assertEqual(x.read_value().dtype, dtypes.float16)
 
-  @parameterized.named_parameters(*TESTCASES)
-  def test_dtype_is_not_string(self, distribute):
-    with get_distribute_scope(distribute):
+  @combinations.generate(maybe_distribute)
+  def test_dtype_is_not_string(self, distribution):
+    with distribution.scope():
       x = get_var(1., dtypes.float32)
       x = autocast_variable.create_autocast_variable(x)
       self.assertEqual(x.dtype, dtypes.float32)
@@ -153,13 +146,13 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
         self.assertEqual(x.true_dtype, dtypes.float32)
         self.assertIsInstance(x.true_dtype, dtypes.DType)
 
-  @parameterized.named_parameters(*TESTCASES)
-  def test_method_delegations(self, distribute):
+  @combinations.generate(maybe_distribute)
+  def test_method_delegations(self, distribution):
     # Test AutoCastVariable correctly delegates Variable methods to the
     # underlying variable.
-    with self.test_session(), get_distribute_scope(distribute):
+    with self.test_session(), distribution.scope():
       for read_dtype in (dtypes.float32, dtypes.float16):
-        if distribute:
+        if ds_context.has_strategy():
           # MirroredVariable.assign will (incorrectly) return a Mirrored value
           # instead of a MirroredVariable. So we cannot properly wrap it in an
           # AutoCastVariable.
@@ -183,14 +176,14 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
           self.assertEqual(x.aggregation, x._variable.aggregation)
           self.assertEqual(self.evaluate(x.initialized_value()), 7)
           if not context.executing_eagerly():
-            if not distribute:
+            if not ds_context.has_strategy():
               # These functions are not supported for DistributedVariables
               x.load(9)
               self.assertEqual(x.eval(), 9)
             self.assertEqual(self.evaluate(x.initial_value), 7)
             self.assertEqual(x.op, x._variable.op)
             self.assertEqual(x.graph, x._variable.graph)
-          if not distribute:
+          if not ds_context.has_strategy():
             # These attributes are not supported for DistributedVariables
             self.assertIsNone(x.constraint)
             self.assertEqual(x.initializer, x._variable.initializer)
@@ -202,7 +195,7 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
           self.assertEqual(x.shape, ())
           self.assertEqual(x.get_shape(), ())
 
-        if not distribute:
+        if not ds_context.has_strategy():
           # Test scatter_* methods. These are not supported for
           # DistributedVariables
           x = get_var([7, 8], dtypes.float32)
@@ -233,9 +226,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
             self.assertAllEqual(
                 evaluate(x.scatter_nd_update([[0], [1]], [1., 2.])), [1, 2])
 
-  @parameterized.named_parameters(*TESTCASES)
-  def test_operator_overloads(self, distribute):
-    with get_distribute_scope(distribute):
+  @combinations.generate(maybe_distribute)
+  def test_operator_overloads(self, distribution):
+    with distribution.scope():
       for read_dtype in (dtypes.float32, dtypes.float16):
         x = get_var(7., dtypes.float32)
         x = autocast_variable.create_autocast_variable(x)
@@ -280,9 +273,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
             self.assertAllEqual(x == [7., 8., 10.], [True, True, False])
             self.assertAllEqual(x != [7., 8., 10.], [False, False, True])
 
-  @parameterized.named_parameters(*TESTCASES)
-  def test_assign(self, distribute):
-    with get_distribute_scope(distribute):
+  @combinations.generate(maybe_distribute)
+  def test_assign(self, distribution):
+    with distribution.scope():
       x = get_var(0., dtypes.float32)
       x = autocast_variable.create_autocast_variable(x)
       self.evaluate(x.initializer)
@@ -318,18 +311,19 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
         self.assertAllClose(3., self.evaluate(x.assign_sub(3.)))
 
         # Assign multiple times
-        assign = x.assign(1.)
-        self.assertAllClose(1., self.evaluate(assign))
-        self.assertAllClose(0., self.evaluate(assign.assign(0.)))
-        assign_add = x.assign_add(3.)
-        self.assertAllClose(3., self.evaluate(assign_add))
-        self.assertAllClose(3. * 3,
-                            self.evaluate(x.assign_add(3.).assign_add(3.)))
-        self.assertAllClose(3. * 3, x)
-        assign_sub = x.assign_sub(3.)
-        self.assertAllClose(3. * 2, self.evaluate(assign_sub))
-        self.assertAllClose(0.,
-                            self.evaluate(x.assign_sub(3.).assign_sub(3.)))
+        if not ds_context.has_strategy():
+          assign = x.assign(1.)
+          self.assertAllClose(1., self.evaluate(assign))
+          self.assertAllClose(0., self.evaluate(assign.assign(0.)))
+          assign_add = x.assign_add(3.)
+          self.assertAllClose(3., self.evaluate(assign_add))
+          self.assertAllClose(3. * 3,
+                              self.evaluate(x.assign_add(3.).assign_add(3.)))
+          self.assertAllClose(3. * 3, x)
+          assign_sub = x.assign_sub(3.)
+          self.assertAllClose(3. * 2, self.evaluate(assign_sub))
+          self.assertAllClose(0.,
+                              self.evaluate(x.assign_sub(3.).assign_sub(3.)))
 
         # Assign with read_value=False
         self.assertIsNone(self.evaluate(x.assign(1., read_value=False)))
@@ -355,9 +349,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
         # assign still expect float32 value even if in float16 scope
         run_and_check()
 
-  @parameterized.named_parameters(*TESTCASES)
-  def test_assign_stays_in_true_dtype(self, distribute):
-    with get_distribute_scope(distribute):
+  @combinations.generate(maybe_distribute)
+  def test_assign_stays_in_true_dtype(self, distribution):
+    with distribution.scope():
       x = get_var(1., dtypes.float32)
       x = autocast_variable.create_autocast_variable(x)
       self.evaluate(x.initializer)
@@ -382,10 +376,10 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
         self.assertEqual(1., self.evaluate(x.value()))
       self.assertEqual(1. + small_val, self.evaluate(x.value()))
 
-  @parameterized.named_parameters(*TESTCASES)
-  def test_checkpoint(self, distribute):
+  @combinations.generate(maybe_distribute)
+  def test_checkpoint(self, distribution):
     with self.test_session():
-      with get_distribute_scope(distribute):
+      with distribution.scope():
         x = get_var(1., dtypes.float32)
         x = autocast_variable.create_autocast_variable(x)
       self.evaluate(x.initializer)
@@ -398,9 +392,9 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
       checkpoint.restore(save_path).assert_consumed().run_restore_ops()
       self.assertEqual(self.evaluate(x), 123.)
 
-  @parameterized.named_parameters(*TESTCASES)
-  def test_invalid_wrapped_variable(self, distribute):
-    with get_distribute_scope(distribute):
+  @combinations.generate(maybe_distribute)
+  def test_invalid_wrapped_variable(self, distribution):
+    with distribution.scope():
       # Wrap a non-variable
       with self.assertRaisesRegexp(ValueError, 'variable must be of type'):
         x = constant_op.constant([1.], dtype=dtypes.float32)
@@ -443,7 +437,7 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
         )
 
   def test_repr_distributed(self):
-    with get_distribute_scope(distribute=True):
+    with mirrored_strategy.MirroredStrategy([""/cpu:1"", ""/cpu:2""]).scope():
       x = get_var(1., dtypes.float32)
       x = autocast_variable.create_autocast_variable(x)
       self.assertRegexpMatches(
",0,train
cd8dd009b31f80344bd69f8dc5b404b5189646a5,tensorflow/tensorflow,"Expose pipeline as command line option for TPU graph exporting pipeline

This pipeline is ran post tf-tpu-bridge and tf-tpu-bridge-v1 during RunTPUBridge, enables running the equivalent of that function via tf-opt without specifying individual passes.

PiperOrigin-RevId: 359855632
Change-Id: I6f1287f8dcbf0c368c15a626fa0bec537c814559",bridge.cc,"@@ -39,21 +39,8 @@ void EnableLogging(PassManager *pm) {
 }  // namespace
 
 namespace TFTPU {
-namespace {
-void AddGraphExportLoweringPasses(OpPassManager &pm) {
-  auto add_pass = [&](std::unique_ptr<Pass> pass) {
-    pm.addNestedPass<FuncOp>(std::move(pass));
-    pm.addPass(CreateBreakUpIslandsPass());
-  };
-
-  add_pass(CreateFunctionalToExecutorDialectConversionPass());
-  add_pass(TFDevice::CreateReplicateToIslandPass());
-  add_pass(TFDevice::CreateParallelExecuteToIslandsPass());
-  add_pass(TFDevice::CreateLaunchToDeviceAttributePass());
-  pm.addNestedPass<FuncOp>(CreateTPUDevicePropagationPass());
-  pm.addPass(createSymbolDCEPass());
-}
 
+namespace {
 tensorflow::Status RunTPUBridge(
     ModuleOp module, bool enable_logging,
     llvm::function_ref<void(OpPassManager &pm)> pipeline_builder) {
@@ -68,7 +55,7 @@ tensorflow::Status RunTPUBridge(
   pipeline_builder(bridge);
 
   // Add set of passes to lower back to graph (from tf_executor).
-  AddGraphExportLoweringPasses(bridge);
+  TF::AddGraphExportLoweringPasses(bridge);
 
   // Run the bridge on the module, in case of failure, the `diag_handler`
   // converts MLIR errors emitted to the MLIRContext into a tensorflow::Status.
@@ -166,6 +153,20 @@ tensorflow::Status TPUBridgeV1Compat(ModuleOp module, bool enable_logging) {
 
 namespace TF {
 
+void AddGraphExportLoweringPasses(OpPassManager &pm) {
+  auto add_pass = [&](std::unique_ptr<Pass> pass) {
+    pm.addNestedPass<FuncOp>(std::move(pass));
+    pm.addPass(CreateBreakUpIslandsPass());
+  };
+
+  add_pass(CreateFunctionalToExecutorDialectConversionPass());
+  add_pass(TFDevice::CreateReplicateToIslandPass());
+  add_pass(TFDevice::CreateParallelExecuteToIslandsPass());
+  add_pass(TFDevice::CreateLaunchToDeviceAttributePass());
+  pm.addNestedPass<FuncOp>(TFTPU::CreateTPUDevicePropagationPass());
+  pm.addPass(createSymbolDCEPass());
+}
+
 tensorflow::Status RunBridgeWithStandardPipeline(ModuleOp module,
                                                  bool enable_logging,
                                                  bool enable_inliner) {
",0,train
cd8dd009b31f80344bd69f8dc5b404b5189646a5,tensorflow/tensorflow,"Expose pipeline as command line option for TPU graph exporting pipeline

This pipeline is ran post tf-tpu-bridge and tf-tpu-bridge-v1 during RunTPUBridge, enables running the equivalent of that function via tf-opt without specifying individual passes.

PiperOrigin-RevId: 359855632
Change-Id: I6f1287f8dcbf0c368c15a626fa0bec537c814559",bridge_pass.cc,"@@ -20,20 +20,32 @@ limitations under the License.
 #include ""tensorflow/compiler/mlir/tensorflow/transforms/passes.h""
 #include ""tensorflow/compiler/mlir/tensorflow/utils/error_util.h""
 
+namespace mlir {
+namespace TFTPU {
+extern void AddGraphExportLoweringPasses(OpPassManager &pm);
+}  // namespace TFTPU
+}  // namespace mlir
+
 namespace {
 
-// Registers an existing pipeline builder function.
+// Registers a pipeline builder function for TF TPU bridge.
 mlir::PassPipelineRegistration<> tpu_pipeline(
     ""tf-tpu-bridge"",
     ""Run all the passes involved in transforming the graph before execution so ""
     ""that it is suitable for targeting TPUs."",
     mlir::TFTPU::CreateTPUBridgePipeline);
 
-// Registers an existing pipeline builder function.
+// Registers a pipeline builder function for TF TPU V1 bridge.
 mlir::PassPipelineRegistration<> tpu_pipeline_v1(
     ""tf-tpu-bridge-v1"",
     ""Run all the passes involved in transforming a TensorFlow V1 graph before ""
     ""execution so that it is suitable for targeting TPUs."",
     mlir::TFTPU::CreateTPUBridgePipelineV1);
 
+// Registers a pipeline builder function for TF Graph export.
+mlir::PassPipelineRegistration<> tpu_export(
+    ""tf-graph-export"",
+    ""Run passes to prepare for exporting module back to TF Graph."",
+    mlir::TF::AddGraphExportLoweringPasses);
+
 }  // anonymous namespace
",0,train
cd8dd009b31f80344bd69f8dc5b404b5189646a5,tensorflow/tensorflow,"Expose pipeline as command line option for TPU graph exporting pipeline

This pipeline is ran post tf-tpu-bridge and tf-tpu-bridge-v1 during RunTPUBridge, enables running the equivalent of that function via tf-opt without specifying individual passes.

PiperOrigin-RevId: 359855632
Change-Id: I6f1287f8dcbf0c368c15a626fa0bec537c814559",passes.h,"@@ -203,6 +203,10 @@ std::unique_ptr<OperationPass<mlir::ModuleOp>> CreateCrossHostTransferPass();
 // will replicate the tf.Const op once for each device.
 std::unique_ptr<OperationPass<ModuleOp>> CreateConstantOpDeviceAssignmentPass();
 
+// Populates the supplied passmanager with the passes required to export
+// to TensorFlow Graph.
+void AddGraphExportLoweringPasses(OpPassManager& pm);
+
 }  // namespace TF
 
 namespace tf_executor {
",0,train
44acd839c57494860666c799afd24360f1df3bed,tensorflow/tensorflow,"Fix reported cuDNN default version during configuration.

PiperOrigin-RevId: 215272308",configure.py,"@@ -884,7 +884,7 @@ def set_tf_cudnn_version(environ_cp):
   """"""Set CUDNN_INSTALL_PATH and TF_CUDNN_VERSION.""""""
   ask_cudnn_version = (
       'Please specify the cuDNN version you want to use. '
-      '[Leave empty to default to cuDNN %s.0]: ') % _DEFAULT_CUDNN_VERSION
+      '[Leave empty to default to cuDNN %s]: ') % _DEFAULT_CUDNN_VERSION
 
   for _ in range(_DEFAULT_PROMPT_ASK_ATTEMPTS):
     tf_cudnn_version = get_from_env_or_user_or_default(
",0,train
48eb150c8e044e233b60c9c65681aaba00f083b6,tensorflow/tensorflow,"Fix the comment about where the weak declaration of AcquireXNNPACKDelegate could be found, and combined code sections w.r.t TFLITE_BUILD_WITH_XNNPACK_DELEGATE macro.

PiperOrigin-RevId: 364983825
Change-Id: I58400bd0023b5e09a84f68872a6f9199d4edd0bb",tflite_with_xnnpack.cc,"@@ -18,7 +18,8 @@ limitations under the License.
 #include ""tensorflow/lite/delegates/xnnpack/xnnpack_delegate.h""
 
 namespace tflite {
-// Corresponding weak declaration found in lite/interpreter_builder.cc.
+// Corresponding weak declaration found in lite/tflite_with_xnnpack_optional.cc
+// when TFLITE_BUILD_WITH_XNNPACK_DELEGATE macro isn't defined.
 std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>
 AcquireXNNPACKDelegate(int num_threads) {
   auto opts = TfLiteXNNPackDelegateOptionsDefault();
",0,train
48eb150c8e044e233b60c9c65681aaba00f083b6,tensorflow/tensorflow,"Fix the comment about where the weak declaration of AcquireXNNPACKDelegate could be found, and combined code sections w.r.t TFLITE_BUILD_WITH_XNNPACK_DELEGATE macro.

PiperOrigin-RevId: 364983825
Change-Id: I58400bd0023b5e09a84f68872a6f9199d4edd0bb",tflite_with_xnnpack_optional.cc,"@@ -28,16 +28,6 @@ namespace tflite {
 using TfLiteDelegatePtr =
     std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>;
 
-#ifndef TFLITE_BUILD_WITH_XNNPACK_DELEGATE
-// Using weak symbols to create a delegate allows automatic injection of the
-// delegate simply by adding it as a dependency. See the strong override in
-// lite/tflite_with_xnnpack.cc,
-TFLITE_ATTRIBUTE_WEAK TfLiteDelegatePtr
-AcquireXNNPACKDelegate(int num_threads) {
-  return TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {});
-}
-#endif
-
 #ifdef TFLITE_BUILD_WITH_XNNPACK_DELEGATE
 TfLiteDelegatePtr MaybeCreateXNNPACKDelegate(int num_threads) {
   auto opts = TfLiteXNNPackDelegateOptionsDefault();
@@ -47,6 +37,14 @@ TfLiteDelegatePtr MaybeCreateXNNPACKDelegate(int num_threads) {
                            TfLiteXNNPackDelegateDelete);
 }
 #else
+// Using weak symbols to create a delegate allows automatic injection of the
+// delegate simply by adding it as a dependency. See the strong override in
+// lite/tflite_with_xnnpack.cc,
+TFLITE_ATTRIBUTE_WEAK TfLiteDelegatePtr
+AcquireXNNPACKDelegate(int num_threads) {
+  return TfLiteDelegatePtr(nullptr, [](TfLiteDelegate*) {});
+}
+
 TfLiteDelegatePtr MaybeCreateXNNPACKDelegate(int num_threads) {
   return AcquireXNNPACKDelegate(num_threads);
 }
",0,train
b03209bbc26f8947cbd1d49e1a232c09e7dcd17a,tensorflow/tensorflow,"Bugfix to error message reporting when portpicker is not available.

PiperOrigin-RevId: 156895715",test_util.py,"@@ -29,9 +29,11 @@ import threading
 import numpy as np
 import six
 
+_portpicker_import_error = None
 try:
   import portpicker  # pylint: disable=g-import-not-at-top
-except ImportError as _portpicker_import_error:
+except ImportError as _error:
+  _portpicker_import_error = _error
   portpicker = None
 
 # pylint: disable=g-import-not-at-top
@@ -820,8 +822,8 @@ def create_local_cluster(num_workers, num_ps, protocol=""grpc""):
   Raises:
     ImportError: if portpicker module was not found at load time
   """"""
-  if not portpicker:
-    raise _portpicker_import_error
+  if _portpicker_import_error:
+    raise _portpicker_import_error  # pylint: disable=raising-bad-type
   worker_ports = [portpicker.pick_unused_port() for _ in range(num_workers)]
   ps_ports = [portpicker.pick_unused_port() for _ in range(num_ps)]
   cluster_dict = {
",0,test
f8ac6c10fd9660b0575dcdb2eb3bc1d6ac90f399,tensorflow/tensorflow,"Switch from all_reduce_merge_scope to num_packs; generate single pack.

`MultiWorkerMirroredStrategy` packs gradients into chunks of size
`all_reduce_merge_scope` via the `ScopedAllocator` Grappler optimizer so that
we can convert many small all-reduces into fewer larger all-reduces.  This
change switches the knob from merge scope to `num_packs`, essentially identical
to `MirroredStrategy`.  We also set default value to 1, like
`MirroredStrategy`.

The optimal value of `num_packs` may be dependent on various factors,
including the choice of all reduce implementation.

PiperOrigin-RevId: 265724850",cross_device_ops.py,"@@ -1007,21 +1007,19 @@ class CollectiveAllReduce(CrossDeviceOps):
   def __init__(self,
                num_workers=1,
                num_gpus_per_worker=0,
-               all_reduce_merge_scope=32,
+               num_packs=1,
                collective_keys=None):
     """"""Initializes the object.
 
     Args:
       num_workers: number of workers in the between-graph replicated training.
       num_gpus_per_worker: number of GPUs per worker.
-      all_reduce_merge_scope: size of groups into which to partition consecutive
-        gradients grouped under a common 'allreduce' name scope. This is useful
-        for some optimization of collective ops.
+      num_packs: gradients will be packed into `num_packs` chunks.
       collective_keys: an optional CollectiveKey object.
     """"""
     self._num_workers = num_workers
     self._num_gpus_per_worker = num_gpus_per_worker
-    self._all_reduce_merge_scope = all_reduce_merge_scope
+    self._num_packs = num_packs
     self._collective_keys = (collective_keys or
                              cross_device_utils.CollectiveKeys())
     super(CollectiveAllReduce, self).__init__()
@@ -1075,21 +1073,31 @@ class CollectiveAllReduce(CrossDeviceOps):
           for t, v in value_destination_pairs
       ]
 
-  def _make_gradient_chunks(self, per_replica_values, all_reduce_merge_scope):
+  def _make_gradient_chunks(self, per_replica_values, num_packs):
     """"""Make `per_replica_values` into chunks.""""""
-    grouped_by_device = _group_value_by_device(per_replica_values)
-
-    grouped_by_var = list(zip(*grouped_by_device))
-    # grouped_by_var is grouped by variables and takes the following format:
+    chunked_by_device = _group_value_by_device(per_replica_values)
+    chunked_by_var = list(zip(*chunked_by_device))
+    # chunked_by_var is chunked by variables and takes the following format:
     # [((grad0_gpu0, v0_gpu0), (grad0_gpu1, v0_gpu1), (grad0_gpu2, v0_gpu2) ..),
     #  ((grad1_gpu0, v1_gpu0), (grad1_gpu1, v1_gpu1), (grad1_gpu0, v1_gpu2) ..),
     #  ((grad2_gpu0, v2_gpu0), (grad2_gpu1, v2_gpu1), (grad2_gpu0, v2_gpu2) ..),
     #  ...
     # ]
+
+    # First n-1 chunks get `chunk_size` grads, last chunk gets leftover grads.
+    # This strategy can cause the last chunk to have larger size compared to the
+    # first n-1 chunks.  Alternatively, we can increment chunk_size by 1 to get
+    # slightly larger first n-1 chunks and smaller last chunk.
+    # TODO(ayushd): compare different packing strategies.
+    chunk_size = len(chunked_by_var) // num_packs
+    leftover_size = len(chunked_by_var) - chunk_size * (num_packs - 1)
+    assert leftover_size > 0
     chunked_gv = [
-        grouped_by_var[x:x + all_reduce_merge_scope]
-        for x in range(0, len(grouped_by_var), all_reduce_merge_scope)
+        chunked_by_var[x:x + chunk_size]
+        for x in range(0, len(chunked_by_var) - leftover_size, chunk_size)
     ]
+    chunked_gv.append(chunked_by_var[-leftover_size:])
+
     return chunked_gv
 
   def _batch_all_reduce(self, reduce_op, per_replica_values):
@@ -1115,11 +1123,13 @@ class CollectiveAllReduce(CrossDeviceOps):
         logging.INFO, ""Collective batch_all_reduce: %d all-reduces, ""
         ""num_workers = %d"" % (len(per_replica_values), self._num_workers), 10)
 
-    chunked_gv = self._make_gradient_chunks(per_replica_values,
-                                            self._all_reduce_merge_scope)
+    chunked_gv = self._make_gradient_chunks(per_replica_values, self._num_packs)
 
     reduced_gv_list = []
     for chunk in chunked_gv:
+      # By placing all collective ops in a chunk under single name scope, we
+      # ensure they will be picked up by the `ScopedAllocator` grappler
+      # optimizer and packed into a single all-reduce.
       with ops.name_scope(""allreduce""):
         for grad_and_vars in chunk:
           # Gradients for the same variable but from different devices.
@@ -1147,8 +1157,7 @@ class CollectiveAllReduce(CrossDeviceOps):
         ""%d all-reduces, num_workers = %d"" %
         (len(per_replica_values), self._num_workers), 10)
 
-    chunked_gv = self._make_gradient_chunks(per_replica_values,
-                                            self._all_reduce_merge_scope)
+    chunked_gv = self._make_gradient_chunks(per_replica_values, self._num_packs)
 
     reduced_gv_list = []
     for chunk in chunked_gv:
",0,train
e6dbfb7a221563336ed3c28178c6e908aa8a6943,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-01-26

PiperOrigin-RevId: 353817494
Change-Id: Id78484cbae933b3f44384241a4939fa5c94fa7d6",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 1, 25)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 1, 26)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
c40c656e78203afa78797dad5529f1b0f1e69519,tensorflow/tensorflow,"Fix OpKernel access issue to coordination service when executing eager functions.

PiperOrigin-RevId: 387614843
Change-Id: I7bddb8e75180878a5d2ac28c6f525f6c658d7007",executor.cc,"@@ -366,6 +366,7 @@ class ExecutorState {
   const ImmutableExecutorState& immutable_state_;
   ExecutorImpl::KernelStats* const kernel_stats_;
   CancellationManager* cancellation_manager_;
+  CoordinationServiceAgent* coordination_service_agent_;
   // If not null, use this device to schedule intra-op operation
   std::unique_ptr<DeviceBase> user_device_;
   Executor::Args::Runner runner_;
@@ -413,6 +414,7 @@ ExecutorState<PropagatorStateType>::ExecutorState(
       immutable_state_(immutable_state),
       kernel_stats_(kernel_stats),
       cancellation_manager_(args.cancellation_manager),
+      coordination_service_agent_(args.coordination_service_agent),
       runner_(args.runner),
       sync_on_finish_(args.sync_on_finish),
       run_all_kernels_inline_(args.run_all_kernels_inline),
@@ -706,6 +708,7 @@ void ExecutorState<PropagatorStateType>::Process(TaggedNode tagged_node,
   params.session_metadata = session_metadata_;
   params.tensor_store = tensor_store_;
   params.cancellation_manager = cancellation_manager_;
+  params.coordination_service_agent = coordination_service_agent_;
   params.call_frame = call_frame_;
   params.function_library = immutable_state_.params().function_library;
   params.resource_manager = device->resource_manager();
",0,train
c40c656e78203afa78797dad5529f1b0f1e69519,tensorflow/tensorflow,"Fix OpKernel access issue to coordination service when executing eager functions.

PiperOrigin-RevId: 387614843
Change-Id: I7bddb8e75180878a5d2ac28c6f525f6c658d7007",op_kernel.h,"@@ -669,7 +669,7 @@ class OpKernelContext {
     bool* outputs_required_array = nullptr;
 
     // For access to distributed coordination service.
-    CoordinationServiceAgent* coordination_service_agent;
+    CoordinationServiceAgent* coordination_service_agent = nullptr;
   };
 
   // params must outlive the OpKernelContext.
",0,train
448a16182065bd08a202d9057dd8ca541e67996c,tensorflow/tensorflow,"Prevent stack overflow when FunctionLib in GraphDef has a self-recursive function.

It is likely that no recursivity is supported, but we should handle this separately.

PiperOrigin-RevId: 414860329
Change-Id: I02a2270e86282b37362ddd485eeef16fb986a9e0",loader.cc,"@@ -25,6 +25,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/attr_value.pb.h""
 #include ""tensorflow/core/framework/function.pb.h""
 #include ""tensorflow/core/framework/node_def.pb.h""
+#include ""tensorflow/core/framework/op_def.pb.h""
 #include ""tensorflow/core/framework/tensor.pb.h""
 #include ""tensorflow/core/lib/io/path.h""
 #include ""tensorflow/core/lib/monitoring/counter.h""
@@ -99,6 +100,19 @@ static Status ValidateNode(const NodeDef& node) {
   return Status::OK();
 }
 
+static Status ValidateFunctionNotRecursive(const FunctionDef& function) {
+  const auto& function_name = function.signature().name();
+  for (const auto& node : function.node_def()) {
+    if (node.op() == function_name) {
+      return errors::FailedPrecondition(
+          ""Function "", function_name,
+          "" is self recursive and TensorFlow does not support this scenario."");
+    }
+  }
+
+  return Status::OK();
+}
+
 static Status ValidateSavedTensors(const GraphDef& graph_def) {
   for (const auto& node : graph_def.node()) {
     TF_RETURN_IF_ERROR(ValidateNode(node));
@@ -110,6 +124,10 @@ static Status ValidateSavedTensors(const GraphDef& graph_def) {
       for (const auto& node : function.node_def()) {
         TF_RETURN_IF_ERROR(ValidateNode(node));
       }
+
+      // Also check that there is no recursivity in the library
+      // TODO(mihaimaruseac): Do more than self-recursivity
+      TF_RETURN_IF_ERROR(ValidateFunctionNotRecursive(function));
     }
   }
 
",0,test
c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution

PiperOrigin-RevId: 186342760",logging_ops.cc,"@@ -90,4 +90,23 @@ class PrintOp : public OpKernel {
 
 REGISTER_KERNEL_BUILDER(Name(""Print"").Device(DEVICE_CPU), PrintOp);
 
+class TimestampOp : public OpKernel {
+ public:
+  explicit TimestampOp(OpKernelConstruction* context) : OpKernel(context) {}
+
+  void Compute(OpKernelContext* context) override {
+    TensorShape output_shape;  // Default shape is 0 dim, 1 element
+    Tensor* output_tensor = nullptr;
+    OP_REQUIRES_OK(context,
+                   context->allocate_output(0, output_shape, &output_tensor));
+
+    auto output_scalar = output_tensor->scalar<double>();
+    double now_us = static_cast<double>(Env::Default()->NowMicros());
+    double now_s = now_us / 1000000;
+    output_scalar() = now_s;
+  }
+};
+
+REGISTER_KERNEL_BUILDER(Name(""Timestamp"").Device(DEVICE_CPU), TimestampOp);
+
 }  // end namespace tensorflow
",0,train
c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution

PiperOrigin-RevId: 186342760",logging_ops_test.cc,"@@ -13,6 +13,9 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <chrono>
+#include <thread>
+
 #include ""tensorflow/core/framework/fake_input.h""
 #include ""tensorflow/core/framework/node_def_builder.h""
 #include ""tensorflow/core/framework/tensor.h""
@@ -96,5 +99,27 @@ TEST_F(PrintingGraphTest, FirstNSuccess) {
   test::ExpectTensorEqual<int32>(expected, *GetOutput(0));
 }
 
+class TimestampTest : public OpsTestBase {
+ protected:
+  Status Init() {
+    TF_CHECK_OK(NodeDefBuilder(""op"", ""Timestamp"").Finalize(node_def()));
+    return InitOp();
+  }
+};
+
+TEST_F(TimestampTest, WaitAtLeast) {
+  TF_ASSERT_OK(Init());
+  TF_ASSERT_OK(RunOpKernel());
+  double ts1 = *((*GetOutput(0)).flat<double>().data());
+
+  // wait 1 second
+  std::this_thread::sleep_for(std::chrono::seconds(1));
+
+  TF_ASSERT_OK(RunOpKernel());
+  double ts2 = *((*GetOutput(0)).flat<double>().data());
+
+  EXPECT_LE(1.0, ts2 - ts1);
+}
+
 }  // end namespace
 }  // end namespace tensorflow
",0,train
c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution

PiperOrigin-RevId: 186342760",logging_ops.cc,"@@ -111,4 +111,9 @@ REGISTER_OP(""MergeSummary"")
     .Attr(""N : int >= 1"")
     .SetShapeFn(shape_inference::ScalarShape);
 
+REGISTER_OP(""Timestamp"")
+    .Output(""ts: float64"")
+    .SetIsStateful()
+    .SetShapeFn(shape_inference::ScalarShape);
+
 }  // end namespace tensorflow
",0,train
c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution

PiperOrigin-RevId: 186342760",control_flow_ops.py,"@@ -44,6 +44,7 @@ See the @{$python/control_flow_ops} guide.
 @@add_check_numerics_ops
 @@Assert
 @@Print
+@@timestamp
 """"""
 # pylint: disable=g-bad-name
 from __future__ import absolute_import
",0,train
c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution

PiperOrigin-RevId: 186342760",logging_ops.py,"@@ -356,3 +356,4 @@ ops.NotDifferentiable(""AudioSummary"")
 ops.NotDifferentiable(""AudioSummaryV2"")
 ops.NotDifferentiable(""MergeSummary"")
 ops.NotDifferentiable(""ScalarSummary"")
+ops.NotDifferentiable(""Timestamp"")
",0,train
c07a6e6568b776037f052bc0d385a509ec2647aa,tensorflow/tensorflow,"Add Timestamp Op which returns the current timestamp during graph execution

PiperOrigin-RevId: 186342760",standard_ops.py,"@@ -60,6 +60,7 @@ from tensorflow.python.ops.io_ops import *
 from tensorflow.python.ops.linalg_ops import *
 from tensorflow.python.ops.logging_ops import Print
 from tensorflow.python.ops.logging_ops import get_summary_op
+from tensorflow.python.ops.logging_ops import timestamp
 from tensorflow.python.ops.lookup_ops import initialize_all_tables
 from tensorflow.python.ops.lookup_ops import tables_initializer
 from tensorflow.python.ops.manip_ops import *
@@ -232,7 +233,7 @@ _allowed_symbols_clip_ops = [
     ""global_norm"",
 ]
 
-_allowed_symbols_image_ops = [
+_allowed_symbols_logging_ops = [
     # Documented in training.py.
     # We are not importing training.py to avoid complex dependencies.
     ""audio_summary"",
@@ -262,8 +263,8 @@ _allowed_symbols = (_allowed_symbols_array_ops +
                     _allowed_symbols_clip_ops +
                     _allowed_symbols_control_flow_ops +
                     _allowed_symbols_functional_ops +
-                    _allowed_symbols_image_ops +
                     _allowed_symbols_gradients +
+                    _allowed_symbols_logging_ops +
                     _allowed_symbols_math_ops +
                     _allowed_symbols_variable_scope_ops +
                     _allowed_symbols_misc +
",0,train
a0a4d37e44419edc582c069ecd2de15b6d0c19ac,tensorflow/tensorflow,"[XLA] Add support to specify boundary nodes in interactive_grahviz tool.

The enhanced command is `<instr> [<width>] [/ <boundary_instr1> <boundary_instr2> ...]`.

The boundary nodes are optional. This is useful in cases where one wants to furhter prune
a graph when using a large <width>.",hlo_graph_dumper.cc,"@@ -1281,8 +1281,9 @@ namespace {
 
 // Gets a NodeFilter that includes roughly all instructions whose distance from
 // root is <= radius.
-NodeFilter MakeNodeRadiusAroundFilter(const HloInstruction* root,
-                                      int64 radius) {
+NodeFilter MakeNodeRadiusAroundFilter(
+    const HloInstruction* root, int64 radius,
+    const std::set<const HloInstruction*>* boundary) {
   // First, find the neighborhood of nodes with distance from root <= radius.
   // These nodes are our initial set of ""normal"" nodes.
   absl::flat_hash_map<const HloInstruction*, NodeFilterResult> nodes;
@@ -1298,6 +1299,9 @@ NodeFilter MakeNodeRadiusAroundFilter(const HloInstruction* root,
     if (depth == radius) {
       continue;
     }
+    if (boundary->count(instr) != 0) {
+      continue;
+    }
 
     // Traverse into instr's operands.
     //
@@ -1513,11 +1517,12 @@ string DumpGraph(const HloComputation& computation, const string& label,
 }
 
 string DumpNeighborhoodAround(const HloInstruction& node, int radius,
+                              const std::set<const HloInstruction*>* boundary,
                               bool show_backend_config) {
   auto debug_options = node.GetModule()->config().debug_options();
   string label =
       StrCat(""Neighborhood of "", radius, "" nodes around "", node.name());
-  NodeFilter filter = MakeNodeRadiusAroundFilter(&node, radius);
+  NodeFilter filter = MakeNodeRadiusAroundFilter(&node, radius, boundary);
   string graph =
       HloDotDumper(node.parent(), label, debug_options, show_backend_config,
                    /*profile=*/nullptr, filter)
",0,train
a0a4d37e44419edc582c069ecd2de15b6d0c19ac,tensorflow/tensorflow,"[XLA] Add support to specify boundary nodes in interactive_grahviz tool.

The enhanced command is `<instr> [<width>] [/ <boundary_instr1> <boundary_instr2> ...]`.

The boundary nodes are optional. This is useful in cases where one wants to furhter prune
a graph when using a large <width>.",hlo_graph_dumper.h,"@@ -63,8 +63,13 @@ string DumpGraph(const HloComputation& computation, const string& label,
 // The number of nodes dumped is controlled by the radius parameter, which
 // (roughly) corresponds to the max distance a node may be from the primary node
 // before it's omitted from the graph.
-string DumpNeighborhoodAround(const HloInstruction& node, int radius,
-                              bool show_backend_config = false);
+// 
+// The optional boundary parameter specifies the set of boundary nodes which
+// will be omitted when they are within the radius.
+string DumpNeighborhoodAround(
+    const HloInstruction& node, int radius,
+    const std::set<const HloInstruction*>* boundary = nullptr,
+    bool show_backend_config = false);
 
 // Dumps nodes on any of the paths from `from` to `to`.  If there are more than
 // max_nodes on all paths, restricts to the max_nodes nodes on the shortest
",0,train
a0a4d37e44419edc582c069ecd2de15b6d0c19ac,tensorflow/tensorflow,"[XLA] Add support to specify boundary nodes in interactive_grahviz tool.

The enhanced command is `<instr> [<width>] [/ <boundary_instr1> <boundary_instr2> ...]`.

The boundary nodes are optional. This is useful in cases where one wants to furhter prune
a graph when using a large <width>.",interactive_graphviz.cc,"@@ -139,9 +139,10 @@ HloComputation* FindComputation(const HloModule& module,
 // Print a help message describing the various available commands.
 void DoHelpCommand() {
   std::cout << R""(Commands:
-  <instruction> [<width>]
-    Renders a neighborhood of <width> nodes around <instruction>.  If <width>
-    is not provided, the default value is )""
+  <instruction> [<width>] [/ <boundary_instruction>+]
+    Renders a neighborhood of <width> nodes around <instruction>, without going
+    beyond the optional boundary instructions.  If <width> is not provided, 
+    the default value is )""
             << kDefaultWidth << R""(.
   allpaths <instruction> <instruction> [<n>]
     Renders a subset of all paths from one instruction to the other.  Either
@@ -457,12 +458,6 @@ void DoAllPathsCommand(const Options& opts, const HloModule& module,
 // Plot a given instruction neighborhood or computation with graphviz.
 void DoPlotCommand(const Options& opts, const HloModule& module,
                    const std::vector<string>& tokens) {
-  if (tokens.size() > 2) {
-    std::cerr << R""(Illegal input.  Enter e.g. ""%fusion.1 42"" or ""%fusion.1"".)""
-              << std::endl;
-    return;
-  }
-
   string node_name = tokens[0];
 
   // Find the node with the given name.
@@ -475,16 +470,43 @@ void DoPlotCommand(const Options& opts, const HloModule& module,
   }
 
   uint64 graph_width = kDefaultWidth;
-  if (tokens.size() == 2) {
+  std::set<const HloInstruction*> boundary;
+  if (tokens.size() >= 2) {
     if (comp) {
       std::cerr << ""Can only use graph-size parameter with instructions, but ""
                 << node_name << "" is a computation."" << std::endl;
       return;
     }
+
+    int bound_index = tokens.size();
     if (!absl::SimpleAtoi(tokens[1], &graph_width)) {
-      std::cerr << ""Can't parse '"" << tokens[1] << ""' as an integer.""
-                << std::endl;
-      return;
+      if (tokens[1] != ""/"") {
+        std::cerr << ""Can't parse '"" << tokens[1] << ""' as an integer.""
+                  << std::endl;
+        return;
+      }
+      graph_width = kDefaultWidth;
+      bound_index = 2;
+    } else {
+      if (tokens.size() > 2) {
+        if (tokens[2] != ""/"") {
+          std::cerr << ""Expect a /, but get a '"" << tokens[1] << ""'.""
+                    << std::endl;
+          return;
+        }
+        bound_index = 3;
+      }
+    }
+    while (bound_index < tokens.size()) {
+      string bnode_name = tokens[bound_index];
+      const HloInstruction* binstr = FindInstruction(module, bnode_name);
+      if (!binstr) {
+        std::cerr << ""Couldn't find HloInstruction named "" << node_name << "".""
+                  << std::endl;
+        return;
+      }
+      boundary.insert(binstr);
+      bound_index++;
     }
   }
 
@@ -496,7 +518,9 @@ void DoPlotCommand(const Options& opts, const HloModule& module,
         /*show_backend_config=*/show_backend_config));
   } else {
     DisplayGraphHandle(opts, hlo_graph_dumper::DumpNeighborhoodAround(
-        *instr, graph_width, /*show_backend_config=*/show_backend_config));
+                                 *instr, graph_width,
+                                 /*boundary=*/&boundary,
+                                 /*show_backend_config=*/show_backend_config));
   }
 }
 
",0,train
1ba89338bdb4afb85ae56e64b47acc93a3a28703,tensorflow/tensorflow,"Fixing a subtle bug where in some cases the post cancellation work wasn't being done correctly. This is the scenario in which FunctionBufferingResource::Cancel() got called while buffering was being done, but then the buffer filled up in which case FillBuffer() wasn't ever called and the Cancel() method would get stuck waiting on a notification from the condition variable leading to timeouts. This CL fixes this by making sure FillBuffer() got called one last time in this case.

Tested by running contrib/data/python/kernel_tests:prefetching_ops_test 500 times and ran contrib/distribute/python:values_test 500 times with no timeouts.

PiperOrigin-RevId: 191007895",prefetching_kernels.cc,"@@ -224,6 +224,13 @@ class FunctionBufferingResource : public ResourceBase {
                   if (buffer_.size() < buffer_size_ && !end_of_sequence_) {
                     restart_buffering = true;
                   } else {
+                    // When the buffer is full, we don't want to call
+                    // FillBuffer() unless we're in cancellation phase in which
+                    // case FillBuffer() will do the final cleanup post
+                    // cancellation.
+                    if (cancelled_) {
+                      restart_buffering = true;
+                    }
                     is_buffering_ = false;
                   }
                 }
",0,train
e5249d6dddc469e68c09b3af32a9adfdffdb5ef1,tensorflow/tensorflow,"List all removed stable ops in the error message, not just the first.
Change: 121309048",op_compatibility_lib.cc,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/core/errors.h""
 #include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/lib/io/path.h""
+#include ""tensorflow/core/lib/strings/str_util.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/protobuf.h""
 
@@ -54,19 +55,26 @@ Status OpCompatibilityLib::ValidateCompatible(Env* env, int* changed_ops,
 
   if (stable_ops_ != nullptr) {
     printf(""Verifying no stable ops have been removed...\n"");
+    std::vector<string> removed;
     // We rely on stable_ops_ and op_list_ being in sorted order.
     auto iter = stable_ops_->begin();
     for (int cur = 0; iter != stable_ops_->end() && cur < op_list_.op_size();
          ++cur) {
       const string& op_name = op_list_.op(cur).name();
-      if (op_name > *iter) {
-        return errors::InvalidArgument(""Error, stable op removed: "", *iter);
-      } else if (op_name == *iter) {
+      while (op_name > *iter) {
+        removed.push_back(*iter);
         ++iter;
       }
+      if (op_name == *iter) {
+        ++iter;
+      }
+    }
+    for (; iter != stable_ops_->end(); ++iter) {
+      removed.push_back(*iter);
     }
-    if (iter != stable_ops_->end()) {
-      return errors::InvalidArgument(""Error, stable op removed: "", *iter);
+    if (!removed.empty()) {
+      return errors::InvalidArgument(""Error, stable op(s) removed: "",
+                                     str_util::Join(removed, "", ""));
     }
   }
 
",0,train
80ee15562599e4042675705936ccc24db2e74e9d,tensorflow/tensorflow,"Fix AlignedVector uniform value constructor

- The old constructor didn't work as expected because Eigen::half has a
  template constructor that accepts anything (which means
  std::is_constructible always returns true) but its implementation can
  still fail for some types.",gpu_kernel_helper.h,"@@ -214,15 +214,18 @@ class alignas(alignof(T) * N) AlignedVector {
 
   AlignedVector() = default;
 
-  // Explicitly construct with uniform value.
-  // Note: This emulates an explicit constructor of T, so that
-  // AlignedVector(args...) works whenever T(args...) does.
-  template <
-      typename... Args,
-      typename std::enable_if<std::is_constructible<value_type, Args...>::value,
-                              int>::type = 0>
-  __host__ __device__ explicit AlignedVector(Args&&... args) {
-    value_type uniform(std::forward<Args>(args)...);
+  // Uniform initialization.
+  __host__ __device__ explicit AlignedVector(value_type uniform) {
+    UNROLL_ON_DEVICE for (int i = 0; i < kSize; ++i) { values_[i] = uniform; }
+  }
+  // Uniform initialization with explicit conversion.
+  // Note: This is required for T=Eigen::half because it only supports explicit
+  // conversions from other types and its template constructor is too relaxed
+  // to be able to use std::is_constructible.
+  template <typename U, typename std::enable_if<std::is_arithmetic<U>::value,
+                                                int>::type = 0>
+  __host__ __device__ explicit AlignedVector(U uniform_u) {
+    value_type uniform(uniform_u);
     UNROLL_ON_DEVICE for (int i = 0; i < kSize; ++i) { values_[i] = uniform; }
   }
 
",0,train
88d8e664ce36476bde41aa032815ab66c0b08f59,tensorflow/tensorflow,"Add tf.acos unranked kernel and test

PiperOrigin-RevId: 352533456
Change-Id: I04a0ac4cee8a01cc3da4c29e28d792fd59bc662b",cwise_op_acos.cc,"@@ -19,7 +19,10 @@ namespace tensorflow {
 REGISTER2(UnaryOp, CPU, ""Acos"", functor::acos, float, double);
 
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
 REGISTER2(UnaryOp, GPU, ""Acos"", functor::acos, float, double);
 #endif
+#endif
 
 }  // namespace tensorflow
",0,train
88d8e664ce36476bde41aa032815ab66c0b08f59,tensorflow/tensorflow,"Add tf.acos unranked kernel and test

PiperOrigin-RevId: 352533456
Change-Id: I04a0ac4cee8a01cc3da4c29e28d792fd59bc662b",cwise_op_gpu_acos.cu.cc,"@@ -19,7 +19,10 @@ limitations under the License.
 
 namespace tensorflow {
 namespace functor {
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_EXPERIMENTAL_GPU_KERNELS_ENABLED)
 DEFINE_UNARY2(acos, float, double);
+#endif
 }  // namespace functor
 }  // namespace tensorflow
 
",0,train
88d8e664ce36476bde41aa032815ab66c0b08f59,tensorflow/tensorflow,"Add tf.acos unranked kernel and test

PiperOrigin-RevId: 352533456
Change-Id: I04a0ac4cee8a01cc3da4c29e28d792fd59bc662b",gpu_op_acos.cc,"@@ -0,0 +1,24 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
+#include ""tensorflow/core/kernels/mlir_generated/gpu_ops_base.h""
+
+namespace tensorflow {
+
+GENERATE_AND_REGISTER_UNARY_KERNEL(Acos, f32, DT_FLOAT, float);
+GENERATE_AND_REGISTER_UNARY_KERNEL(Acos, f64, DT_DOUBLE, double);
+
+}  // namespace tensorflow
",0,train
88d8e664ce36476bde41aa032815ab66c0b08f59,tensorflow/tensorflow,"Add tf.acos unranked kernel and test

PiperOrigin-RevId: 352533456
Change-Id: I04a0ac4cee8a01cc3da4c29e28d792fd59bc662b",gpu_unary_ops_test.cc,"@@ -180,6 +180,17 @@ GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES(
     Abs, DT_INT64, DT_INT64, test::NearZeroAndExtremeInput<int64>(), std::abs,
     test::GpuOpsTestConfig().ExpectStrictlyEqual())
 
+/// Test `tf.Acos`.
+
+// Test only values in the function domain. The othweise returned nan value
+// fails comparison for equality.
+GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES(
+    Acos, DT_FLOAT, DT_FLOAT, test::DefaultInputBetweenZeroAndOne<float>(),
+    std::acos, test::GpuOpsTestConfig().ExpectStrictlyEqual())
+GENERATE_DEFAULT_TEST_WITH_SPECIFIC_INPUT_VALUES(
+    Acos, DT_DOUBLE, DT_DOUBLE, test::DefaultInputBetweenZeroAndOne<double>(),
+    std::acos, test::GpuOpsTestConfig().ExpectStrictlyEqual())
+
 /// Test `tf.Asin`.
 
 // Test only values in the function domain. The othweise returned nan value
",0,train
982bd0d982b5907c05ffa4699566d0b3056734be,tensorflow/tensorflow,"Add fake quant ops to mobile build targets.
Change: 138526951",fake_quant_ops_functor.h,"@@ -24,6 +24,15 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor_types.h""
 #include ""tensorflow/core/platform/types.h""
 
+EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE float StdRound(float input) {
+// On Android, std::round() isn't present, just round().
+#if defined(__ANDROID__)
+  return round(input);
+#else
+  return std::round(input);
+#endif
+}
+
 namespace tensorflow {
 
 static constexpr int kSteps = 255;
@@ -45,7 +54,7 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void Nudge(const float min,
     } else if (zero_point_from_min > kStepsFloat) {
       return static_cast<uint8>(kSteps);
     } else {
-      return static_cast<uint8>(std::round(zero_point_from_min));
+      return static_cast<uint8>(StdRound(zero_point_from_min));
     }
   }();
 
@@ -53,21 +62,25 @@ EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE void Nudge(const float min,
   *nudged_max = (kStepsFloat - nudged_zero_point) * (*scale);
 }
 
-template<typename T> using ConstScalar =
-  typename tensorflow::TTypes<T>::ConstScalar;
-template<typename T> using Scalar = typename tensorflow::TTypes<T>::Scalar;
-template<typename T> using ConstVec = typename tensorflow::TTypes<T>::ConstVec;
-template<typename T> using Vec = typename tensorflow::TTypes<T>::Vec;
-template<typename T> using ConstFlat =
-  typename tensorflow::TTypes<T>::ConstFlat;
-template<typename T> using Flat = typename tensorflow::TTypes<T>::Flat;
+template <typename T>
+using ConstScalar = typename tensorflow::TTypes<T>::ConstScalar;
+template <typename T>
+using Scalar = typename tensorflow::TTypes<T>::Scalar;
+template <typename T>
+using ConstVec = typename tensorflow::TTypes<T>::ConstVec;
+template <typename T>
+using Vec = typename tensorflow::TTypes<T>::Vec;
+template <typename T>
+using ConstFlat = typename tensorflow::TTypes<T>::ConstFlat;
+template <typename T>
+using Flat = typename tensorflow::TTypes<T>::Flat;
 
 // Functor called by FakeQuantWithMinMaxArgsOp to do the work.  Compiles both
 // for CPU and GPU.
 template <typename Device>
 struct FakeQuantWithMinMaxArgsFunctor {
-  void operator()(const Device& d, ConstFlat<float> inputs,
-                  const float min, const float max, Flat<float> outputs) {
+  void operator()(const Device& d, ConstFlat<float> inputs, const float min,
+                  const float max, Flat<float> outputs) {
     eigen_assert(min <= 0.0f && ""min should be <= 0.0"");
     eigen_assert(max >= 0.0f && ""max should be >= 0.0"");
     eigen_assert(min < max && ""min should be < max"");
@@ -78,8 +91,9 @@ struct FakeQuantWithMinMaxArgsFunctor {
 
     auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
     auto clamped_shifted = clamped - nudged_min;
-    outputs.device(d) = (clamped_shifted * inv_nudged_scale + 0.5f).floor() *
-        nudged_scale + nudged_min;
+    outputs.device(d) =
+        (clamped_shifted * inv_nudged_scale + 0.5f).floor() * nudged_scale +
+        nudged_min;
   }
 };
 
@@ -97,8 +111,9 @@ struct FakeQuantWithMinMaxArgsGradientFunctor {
     float nudged_min, nudged_max, nudged_scale;
     Nudge(min, max, &nudged_min, &nudged_max, &nudged_scale);
 
-    auto between_nudged_min_max = (inputs >= nudged_min && inputs <= nudged_max)
-        .select(inputs.constant(1.0f), inputs.constant(0.0f));
+    auto between_nudged_min_max =
+        (inputs >= nudged_min && inputs <= nudged_max)
+            .select(inputs.constant(1.0f), inputs.constant(0.0f));
     backprops.device(d) = gradients * between_nudged_min_max;
   }
 };
@@ -129,7 +144,8 @@ struct FakeQuantWithMinMaxVarsFunctor {
     const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
     const auto clamped_shifted = clamped - nudged_min;
     outputs.device(d) = (clamped_shifted / nudged_scale_repl + 0.5f).floor() *
-        nudged_scale_repl + nudged_min;
+                            nudged_scale_repl +
+                        nudged_min;
   }
 };
 
@@ -137,9 +153,9 @@ struct FakeQuantWithMinMaxVarsFunctor {
 // both for CPU and GPU.
 template <typename Device>
 struct FakeQuantWithMinMaxVarsGradientFunctor {
-  void operator()(const Device& d,
-                  ConstFlat<float> gradients, ConstFlat<float> inputs,
-                  ConstScalar<float> min, ConstScalar<float> max,
+  void operator()(const Device& d, ConstFlat<float> gradients,
+                  ConstFlat<float> inputs, ConstScalar<float> min,
+                  ConstScalar<float> max,
 #ifndef FAKE_QUANT_NO_DEBUG
                   Scalar<bool> check_min_max,
 #endif
@@ -158,16 +174,19 @@ struct FakeQuantWithMinMaxVarsGradientFunctor {
     float nudged_min, nudged_max, nudged_scale;
     Nudge(min(), max(), &nudged_min, &nudged_max, &nudged_scale);
 
-    const auto between_min_max = (inputs >= nudged_min && inputs <= nudged_max)
-        .select(inputs.constant(1.0f), inputs.constant(0.0f));
+    const auto between_min_max =
+        (inputs >= nudged_min && inputs <= nudged_max)
+            .select(inputs.constant(1.0f), inputs.constant(0.0f));
     backprops_wrt_input.device(d) = gradients * between_min_max;
 
-    const auto below_min = (inputs < nudged_min)
-        .select(inputs.constant(1.0f), inputs.constant(0.0f));
+    const auto below_min =
+        (inputs < nudged_min)
+            .select(inputs.constant(1.0f), inputs.constant(0.0f));
     backprop_wrt_min.device(d) = (gradients * below_min).sum();
 
-    const auto above_max = (inputs > nudged_max)
-        .select(inputs.constant(1.0f), inputs.constant(0.0f));
+    const auto above_max =
+        (inputs > nudged_max)
+            .select(inputs.constant(1.0f), inputs.constant(0.0f));
     backprop_wrt_max.device(d) = (gradients * above_max).sum();
   }
 };
@@ -180,8 +199,8 @@ using Index = typename tensorflow::TTypes<float>::ConstTensor::Index;
 // Already verified: inputs, outputs, min, max are of shape [d].
 template <typename Device>
 struct FakeQuant1WithMinMaxVarsPerChannelFunctor {
-  void operator()(const Device& d, ConstVec<float> inputs,
-                  ConstVec<float> min, ConstVec<float> max,
+  void operator()(const Device& d, ConstVec<float> inputs, ConstVec<float> min,
+                  ConstVec<float> max,
 #ifndef FAKE_QUANT_NO_DEBUG
                   Scalar<bool> check_min_max,
 #endif
@@ -202,8 +221,8 @@ struct FakeQuant1WithMinMaxVarsPerChannelFunctor {
           std::max(std::min(inputs(i), nudged_max), nudged_min);
       const float clamped_shifted = clamped - nudged_min;
 
-      outputs(i) = std::round(clamped_shifted / nudged_scale) * nudged_scale +
-          nudged_min;
+      outputs(i) =
+          StdRound(clamped_shifted / nudged_scale) * nudged_scale + nudged_min;
     }
   }
 };
@@ -213,8 +232,8 @@ struct FakeQuant1WithMinMaxVarsPerChannelFunctor {
 template <typename Device>
 struct FakeQuant2WithMinMaxVarsPerChannelFunctor {
   void operator()(const Device& d, const Index batch_size, const Index depth,
-                  ConstFlat<float> inputs,
-                  ConstVec<float> min, ConstVec<float> max,
+                  ConstFlat<float> inputs, ConstVec<float> min,
+                  ConstVec<float> max,
 #ifndef FAKE_QUANT_NO_DEBUG
                   Scalar<bool> check_min_max,
 #endif
@@ -233,13 +252,13 @@ struct FakeQuant2WithMinMaxVarsPerChannelFunctor {
     for (Index i = 0; i < min.size(); ++i) {
       float nudged_min, nudged_max, nudged_scale;
       Nudge(min(i), max(i), &nudged_min, &nudged_max, &nudged_scale);
-      const auto clamped = inputs_restored.chip<1>(i)
-          .cwiseMin(nudged_max).cwiseMax(nudged_min);
+      const auto clamped =
+          inputs_restored.chip<1>(i).cwiseMin(nudged_max).cwiseMax(nudged_min);
       const auto clamped_shifted = clamped - nudged_min;
 
       outputs.reshape(restored).chip<1>(i).device(d) =
           (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale +
-              nudged_min;
+          nudged_min;
     }
   }
 };
@@ -249,8 +268,7 @@ struct FakeQuant2WithMinMaxVarsPerChannelFunctor {
 template <typename Device>
 struct FakeQuant4WithMinMaxVarsPerChannelFunctor {
   void operator()(const Device& d, const Index batch_size, const Index height,
-                  const Index width, const Index depth,
-                  ConstFlat<float> inputs,
+                  const Index width, const Index depth, ConstFlat<float> inputs,
                   ConstVec<float> min, ConstVec<float> max,
 #ifndef FAKE_QUANT_NO_DEBUG
                   Scalar<bool> check_min_max,
@@ -270,13 +288,13 @@ struct FakeQuant4WithMinMaxVarsPerChannelFunctor {
     for (Index i = 0; i < min.size(); ++i) {
       float nudged_min, nudged_max, nudged_scale;
       Nudge(min(i), max(i), &nudged_min, &nudged_max, &nudged_scale);
-      const auto clamped = inputs_restored.chip<3>(i)
-          .cwiseMin(nudged_max).cwiseMax(nudged_min);
+      const auto clamped =
+          inputs_restored.chip<3>(i).cwiseMin(nudged_max).cwiseMax(nudged_min);
       const auto clamped_shifted = clamped - nudged_min;
 
       outputs.reshape(restored).chip<3>(i).device(d) =
           (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale +
-              nudged_min;
+          nudged_min;
     }
   }
 };
@@ -288,9 +306,9 @@ struct FakeQuant4WithMinMaxVarsPerChannelFunctor {
 // backprop_wrt_min, backprop_wrt_max are of shape [d].
 template <typename Device>
 struct FakeQuant1WithMinMaxVarsPerChannelGradientFunctor {
-  void operator()(const Device& d,
-                  ConstVec<float> gradients, ConstVec<float> inputs,
-                  ConstVec<float> min, ConstVec<float> max,
+  void operator()(const Device& d, ConstVec<float> gradients,
+                  ConstVec<float> inputs, ConstVec<float> min,
+                  ConstVec<float> max,
 #ifndef FAKE_QUANT_NO_DEBUG
                   Scalar<bool> check_min_max,
 #endif
@@ -332,8 +350,8 @@ struct FakeQuant2WithMinMaxVarsPerChannelGradientFunctor {
 #ifndef FAKE_QUANT_NO_DEBUG
                   Scalar<bool> check_min_max,
 #endif
-                  Flat<float> backprops_wrt_input,
-                  Vec<float> backprop_wrt_min, Vec<float> backprop_wrt_max) {
+                  Flat<float> backprops_wrt_input, Vec<float> backprop_wrt_min,
+                  Vec<float> backprop_wrt_max) {
 #ifndef FAKE_QUANT_NO_DEBUG
     check_min_max.device(d) = (min <= 0.0f).all();
     eigen_assert(check_min_max() && ""min should be <= 0.0 coeff-wise"");
@@ -358,14 +376,16 @@ struct FakeQuant2WithMinMaxVarsPerChannelGradientFunctor {
       backprops_wrt_input.reshape(restored).chip<1>(i).device(d) =
           gradients_chip * between_min_max;
 
-      const auto below_min = (inputs_chip < nudged_min)
-          .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+      const auto below_min =
+          (inputs_chip < nudged_min)
+              .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
       Eigen::DSizes<Index, 1> reduce(0);
       backprop_wrt_min.chip<0>(i).device(d) =
           (gradients_chip * below_min).sum(reduce);
 
-      const auto above_max = (inputs_chip > nudged_max)
-          .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+      const auto above_max =
+          (inputs_chip > nudged_max)
+              .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
       backprop_wrt_max.chip<0>(i).device(d) =
           (gradients_chip * above_max).sum(reduce);
     }
@@ -383,8 +403,8 @@ struct FakeQuant4WithMinMaxVarsPerChannelGradientFunctor {
 #ifndef FAKE_QUANT_NO_DEBUG
                   Scalar<bool> check_min_max,
 #endif
-                  Flat<float> backprops_wrt_input,
-                  Vec<float> backprop_wrt_min, Vec<float> backprop_wrt_max) {
+                  Flat<float> backprops_wrt_input, Vec<float> backprop_wrt_min,
+                  Vec<float> backprop_wrt_max) {
 #ifndef FAKE_QUANT_NO_DEBUG
     check_min_max.device(d) = (min <= 0.0f).all();
     eigen_assert(check_min_max() && ""min should be <= 0.0 coeff-wise"");
@@ -409,14 +429,16 @@ struct FakeQuant4WithMinMaxVarsPerChannelGradientFunctor {
       backprops_wrt_input.reshape(restored).chip<3>(i).device(d) =
           gradients_chip * between_min_max;
 
-      const auto below_min = (inputs_chip < nudged_min)
-          .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+      const auto below_min =
+          (inputs_chip < nudged_min)
+              .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
       Eigen::DSizes<Index, 3> reduce(0, 1, 2);
       backprop_wrt_min.chip<0>(i).device(d) =
           (gradients_chip * below_min).sum(reduce);
 
-      const auto above_max = (inputs_chip > nudged_max)
-          .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
+      const auto above_max =
+          (inputs_chip > nudged_max)
+              .select(inputs_chip.constant(1.0f), inputs_chip.constant(0.0f));
       backprop_wrt_max.chip<0>(i).device(d) =
           (gradients_chip * above_max).sum(reduce);
     }
",0,train
63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff.

PiperOrigin-RevId: 208752057",generate_testspec.cc,"@@ -114,7 +114,13 @@ bool GenerateTestSpecFromTensorflowModel(
     // different set.
     std::vector<string> input_values =
         GenerateInputValues(input_layer, input_layer_type, input_layer_shape);
-    if (input_values.empty()) return false;
+    if (input_values.empty()) {
+      std::cerr << ""Unable to generate input values for the TensorFlow model. ""
+                   ""Make sure the correct values are defined for ""
+                   ""input_layer, input_layer_type, and input_layer_shape.""
+                << std::endl;
+      return false;
+    }
 
     // Run TensorFlow.
     for (int j = 0; j < input_values.size(); j++) {
",0,train
63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff.

PiperOrigin-RevId: 208752057",tf_driver.cc,"@@ -179,7 +179,9 @@ void TfDriver::Invoke() {
   auto status = session_->Run({input_tensors_.begin(), input_tensors_.end()},
                               output_names_, {}, &output_tensors_);
   if (!status.ok()) {
-    Invalidate(""Failed to run input data on graph"");
+    Invalidate(
+        ""Failed to run input data on graph. Make sure the correct value is ""
+        ""defined for the input and output arrays."");
   }
 }
 
",0,train
63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff.

PiperOrigin-RevId: 208752057",tflite_diff_flags.h,"@@ -33,6 +33,7 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) {
     string input_layer_shape;
     string output_layer;
     int32_t num_runs_per_pass = 100;
+    string delegate;
   } values;
 
   std::vector<tensorflow::Flag> flags = {
@@ -42,18 +43,21 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) {
                        ""Path of tensorflow lite model.""),
       tensorflow::Flag(""input_layer"", &values.input_layer,
                        ""Names of input tensors, separated by comma. Example: ""
-                       ""input_1,input_2""),
+                       ""input_1,input_2.""),
       tensorflow::Flag(""input_layer_type"", &values.input_layer_type,
                        ""Data types of input tensors, separated by comma. ""
-                       ""Example: float,int""),
+                       ""Example: float,int.""),
       tensorflow::Flag(
           ""input_layer_shape"", &values.input_layer_shape,
-          ""Shapes of input tensors, separated by colon. Example: 1,3,4,1:2""),
+          ""Shapes of input tensors, separated by colon. Example: 1,3,4,1:2.""),
       tensorflow::Flag(""output_layer"", &values.output_layer,
-                       ""Names of output tensors, separated by comma. Example ""
-                       ""output_1,output_2""),
+                       ""Names of output tensors, separated by comma. Example: ""
+                       ""output_1,output_2.""),
       tensorflow::Flag(""num_runs_per_pass"", &values.num_runs_per_pass,
-                       ""Number of full runs in each pass.""),
+                       ""[optional] Number of full runs in each pass.""),
+      tensorflow::Flag(""delegate"", &values.delegate,
+                       ""[optional] Delegate to use for executing ops. Must be ""
+                       ""`{\""\"", EAGER}`""),
   };
 
   bool no_inputs = *argc == 1;
@@ -61,6 +65,14 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) {
   if (!success || no_inputs || (*argc == 2 && !strcmp(argv[1], ""--helpfull""))) {
     fprintf(stderr, ""%s"", tensorflow::Flags::Usage(argv[0], flags).c_str());
     return {};
+  } else if (values.tensorflow_model.empty() || values.tflite_model.empty() ||
+             values.input_layer.empty() || values.input_layer_type.empty() ||
+             values.input_layer_shape.empty() || values.output_layer.empty()) {
+    fprintf(stderr, ""%s"", tensorflow::Flags::Usage(argv[0], flags).c_str());
+    return {};
+  } else if (!(values.delegate == """" || values.delegate == ""EAGER"")) {
+    fprintf(stderr, ""%s"", tensorflow::Flags::Usage(argv[0], flags).c_str());
+    return {};
   }
 
   return {values.tensorflow_model,
@@ -69,7 +81,8 @@ DiffOptions ParseTfliteDiffFlags(int* argc, char** argv) {
           Split<string>(values.input_layer_type, "",""),
           Split<string>(values.input_layer_shape, "":""),
           Split<string>(values.output_layer, "",""),
-          values.num_runs_per_pass};
+          values.num_runs_per_pass,
+          values.delegate};
 }
 
 }  // namespace testing
",0,train
63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff.

PiperOrigin-RevId: 208752057",tflite_diff_util.cc,"@@ -33,7 +33,7 @@ bool RunDiffTest(const DiffOptions& options, int num_invocations) {
           options.input_layer_shape, options.output_layer)) {
     return false;
   }
-  TfLiteDriver tflite_driver(/*use_nnapi=*/true);
+  TfLiteDriver tflite_driver(/*use_nnapi=*/true, options.delegate);
   tflite_driver.LoadModel(options.tflite_model);
   return tflite::testing::ParseAndRunTests(&tflite_stream, &tflite_driver);
 }
",0,train
63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff.

PiperOrigin-RevId: 208752057",tflite_diff_util.h,"@@ -44,6 +44,9 @@ struct DiffOptions {
   // each of the passes. The first pass has a single inference, while the
   // second pass does multiple inferences back to back.
   int num_runs_per_pass;
+  // Path to the delegate library to be loaded in order to execute ops. Must be
+  // `{"""", EAGER}`.
+  string delegate;
 };
 
 // Run a single TensorFLow Lite diff test with a given options.
",0,train
63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff.

PiperOrigin-RevId: 208752057",tflite_driver.cc,"@@ -17,6 +17,7 @@ limitations under the License.
 #include <iostream>
 
 #include ""tensorflow/contrib/lite/builtin_op_data.h""
+#include ""tensorflow/contrib/lite/delegates/eager/delegate.h""
 #include ""tensorflow/contrib/lite/testing/split.h""
 
 namespace tflite {
@@ -135,7 +136,13 @@ class TfLiteDriver::Expectation {
   size_t num_elements_;
 };
 
-TfLiteDriver::TfLiteDriver(bool use_nnapi) : use_nnapi_(use_nnapi) {}
+TfLiteDriver::TfLiteDriver(bool use_nnapi, const string& delegate_name)
+    : use_nnapi_(use_nnapi) {
+  if (delegate_name == ""EAGER"") {
+    delegate_.reset(new EagerDelegate());
+  }
+}
+
 TfLiteDriver::~TfLiteDriver() {}
 
 void TfLiteDriver::AllocateTensors() {
@@ -165,6 +172,13 @@ void TfLiteDriver::LoadModel(const string& bin_file_path) {
   }
   interpreter_->UseNNAPI(use_nnapi_);
 
+  if (delegate_) {
+    if (delegate_->Apply(interpreter_.get()) != kTfLiteOk) {
+      Invalidate(""Unable to the build graph using the delegate"");
+      return;
+    }
+  }
+
   must_allocate_tensors_ = true;
 }
 
",0,train
63a49c712edd3b2ee990a9f98b766b24190d3ccb,tensorflow/tensorflow,"Adds support for Eager delegate to tflite_diff.

PiperOrigin-RevId: 208752057",tflite_driver.h,"@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <map>
 
+#include ""tensorflow/contrib/lite/delegates/eager/delegate.h""
 #include ""tensorflow/contrib/lite/interpreter.h""
 #include ""tensorflow/contrib/lite/kernels/register.h""
 #include ""tensorflow/contrib/lite/model.h""
@@ -28,7 +29,7 @@ namespace testing {
 // A test runner that feeds inputs into TF Lite and verifies its outputs.
 class TfLiteDriver : public TestRunner {
  public:
-  explicit TfLiteDriver(bool use_nnapi);
+  explicit TfLiteDriver(bool use_nnapi, const string& delegate = """");
   ~TfLiteDriver() override;
 
   void LoadModel(const string& bin_file_path) override;
@@ -52,6 +53,7 @@ class TfLiteDriver : public TestRunner {
 
   class Expectation;
 
+  std::unique_ptr<EagerDelegate> delegate_;
   bool use_nnapi_ = false;
   std::unique_ptr<FlatBufferModel> model_;
   std::unique_ptr<Interpreter> interpreter_;
",0,train
10fb2155fb720f9e0e70d9e48a934383b4b42c91,tensorflow/tensorflow,"Revert ""modify docstring""

This reverts commit 5f2e0240ee7977042e41d9c29c349a7b14301290.",loader_impl.py,"@@ -73,7 +73,7 @@ def parse_saved_model(export_dir):
   """"""Reads the savedmodel.pb or savedmodel.pbtxt file containing `SavedModel`.
 
   Args:
-    export_dir: String or Pathlike, path to the directory containing the SavedModel file.
+    export_dir: Directory containing the SavedModel file.
 
   Returns:
     A `SavedModel` protocol buffer.
",0,train
60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a

PiperOrigin-RevId: 235649273",api.py,"@@ -28,7 +28,6 @@ from enum import Enum
 
 # pylint:disable=g-bad-import-order
 import numpy as np
-import six
 # pylint:enable=g-bad-import-order
 
 
@@ -182,18 +181,18 @@ def _call_unconverted(f, args, kwargs):
   Returns:
     The return value of f(*args, **kwargs).
   """"""
-  if inspect_utils.istfmethodtarget(f):
-    return f.__self__.call(args, kwargs)
-
-  return f(*args, **kwargs)
+  # TODO(mdan): This may be inconsistent in certain situations.
+  # If the function had already been annotated with @tf.function, it
+  # may be bound to the incorrect object. It's unclear if those situations
+  # are possible, but if they happen, we need to check if f is bound
+  # to a shim like WeakrefSelf and unpack it.
 
+  if tf_inspect.ismethod(f) and args:
+    f_self = inspect_utils.getmethodself(f)
+    if args[0] is f_self:
+      args = args[1:]
 
-def _is_known_loaded_type(f, module_name, entity_name):
-  if tf_inspect.ismethod(f):
-    f = six.get_unbound_function(f)
-  return (module_name in sys.modules and
-          hasattr(sys.modules[module_name], entity_name) and
-          isinstance(f, getattr(sys.modules[module_name], entity_name)))
+  return f(*args, **kwargs)
 
 
 def converted_call(f, owner, options, args, kwargs):
@@ -220,12 +219,13 @@ def converted_call(f, owner, options, args, kwargs):
     return py_builtins.overload_of(f)(*args, **kwargs)
 
   # TODO(b/122265385): Remove this bypass.
-  if (_is_known_loaded_type(f, 'wrapt', 'FunctionWrapper') or
-      _is_known_loaded_type(f, 'wrapt', 'BoundFunctionWrapper')):
+  if ('wrapt' in sys.modules and
+      hasattr(sys.modules['wrapt'], 'FunctionWrapper') and
+      isinstance(f, sys.modules['wrapt'].FunctionWrapper)):
     logging.warn(
         'Entity {} appears to be decorated by wrapt, which is not yet supported'
         ' by AutoGraph. The function will be called without transformation.'
-        ' You may however apply AutoGraph before the decorator.'.format(f))
+        ' You may however apply AutoGraph before the decorator.'.format(f), 1)
     logging.log(2, 'Permanently whitelisted: %s: wrapt decorated', f)
     return _call_unconverted(f, args, kwargs)
 
",0,test
60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a

PiperOrigin-RevId: 235649273",api_test.py,"@@ -402,6 +402,21 @@ class ApiTest(test.TestCase):
     self.evaluate(variables.global_variables_initializer())
     self.assertAllEqual([[0.0, 0.0]], self.evaluate(x))
 
+  def test_converted_call_whitelisted_method_extra_self(self):
+
+    opts = converter.ConversionOptions()
+
+    model = sequential.Sequential([
+        core.Dense(2)
+    ])
+
+    x = api.converted_call(model.call, None, opts,
+                           (model, constant_op.constant([[0.0]])),
+                           {'training': True})
+
+    self.evaluate(variables.global_variables_initializer())
+    self.assertAllEqual([[0.0, 0.0]], self.evaluate(x))
+
   def test_converted_call_whitelisted_method_via_owner(self):
 
     opts = converter.ConversionOptions()
",0,test
60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a

PiperOrigin-RevId: 235649273",inspect_utils.py,"@@ -191,12 +191,9 @@ def getdefiningclass(m, owner_class):
   return owner_class
 
 
-def istfmethodtarget(m):
-  """"""Tests whether an object is a `function.TfMethodTarget`.""""""
-  # See eager.function.TfMethodTarget for more details.
-  return (hasattr(m, '__self__') and
-          hasattr(m.__self__, 'weakrefself_target__') and
-          hasattr(m.__self__, 'weakrefself_func__'))
+def isweakrefself(m):
+  """"""Tests whether an object is a ""weakref self"" wrapper, see getmethodself.""""""
+  return hasattr(m, '__self__') and hasattr(m.__self__, 'ag_self_weakref__')
 
 
 def getmethodself(m):
@@ -209,8 +206,8 @@ def getmethodself(m):
   # A fallback allowing methods to be actually bound to a type different
   # than __self__. This is useful when a strong reference from the method
   # to the object is not desired, for example when caching is involved.
-  if istfmethodtarget(m):
-    return m.__self__.target
+  if isweakrefself(m):
+    return m.__self__.ag_self_weakref__()
 
   return m.__self__
 
",0,test
60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a

PiperOrigin-RevId: 235649273",inspect_utils_test.py,"@@ -28,7 +28,6 @@ import six
 
 from tensorflow.python import lib
 from tensorflow.python.autograph.pyct import inspect_utils
-from tensorflow.python.eager import function
 from tensorflow.python.framework import constant_op
 from tensorflow.python.platform import test
 
@@ -359,13 +358,15 @@ class InspectUtilsTest(test.TestCase):
   def test_getmethodclass_weakref_mechanism(self):
     test_obj = TestClass()
 
+    class WeakrefWrapper(object):
+
+      def __init__(self):
+        self.ag_self_weakref__ = weakref.ref(test_obj)
+
     def test_fn(self):
       return self
 
-    bound_method = types.MethodType(
-        test_fn,
-        function.TfMethodTarget(
-            weakref.ref(test_obj), test_obj.member_function))
+    bound_method = types.MethodType(test_fn, WeakrefWrapper())
     self.assertEqual(inspect_utils.getmethodclass(bound_method), TestClass)
 
   def test_getmethodclass_no_bool_conversion(self):
",0,test
60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a

PiperOrigin-RevId: 235649273",dataset_ops.py,"@@ -2366,8 +2366,6 @@ class StructuredFunctionWrapper(object):
     else:
       defun_kwargs.update({""func_name"": func_name})
 
-      # TODO(b/124254153): Enable autograph once the overhead is low enough.
-      # TODO(mdan): Make sure autograph recurses into _wrapper_helper when on.
       @eager_function.defun_with_attributes(
           input_signature=[
               tensor_spec.TensorSpec(input_shape, input_type)  # pylint: disable=g-complex-comprehension
@@ -2375,7 +2373,6 @@ class StructuredFunctionWrapper(object):
                   self._input_structure._flat_shapes,
                   self._input_structure._flat_types)
           ],
-          autograph=False,
           attributes=defun_kwargs)
       def wrapper_fn(*args):  # pylint: disable=missing-docstring
         ret = _wrapper_helper(*args)
",0,test
60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a

PiperOrigin-RevId: 235649273",multi_device_iterator_ops.py,"@@ -42,15 +42,13 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2):
         gen_dataset_ops.multi_device_iterator_to_string_handle(
             multi_device_iterator_resource))
 
-    # TODO(b/124254153): Enable autograph once the overhead is low enough.
-    @function.defun(autograph=False)  # Pure graph code.
+    @function.defun()
     def _init_func():
       return multi_device_iterator_string_handle
 
     init_func_concrete = _init_func._get_concrete_function_internal()  # pylint: disable=protected-access
 
-    # TODO(b/124254153): Enable autograph once the overhead is low enough.
-    @function.defun(autograph=False)  # Pure graph code.
+    @function.defun()
     def _remote_init_func():
       return functional_ops.remote_call(
           target=source_device,
@@ -61,10 +59,7 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2):
     self._init_func = _remote_init_func._get_concrete_function_internal()  # pylint: disable=protected-access
     self._init_captured_args = self._init_func.captured_inputs
 
-    # TODO(b/124254153): Enable autograph once the overhead is low enough.
-    @function.defun(
-        input_signature=[tensor_spec.TensorSpec([], dtypes.string)],
-        autograph=False)  # Pure graph code.
+    @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.string)])
     def _next_func(string_handle):
       # pylint: disable=protected-access
       multi_device_iterator = (
@@ -81,11 +76,9 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2):
 
     next_func_concrete = _next_func._get_concrete_function_internal()  # pylint: disable=protected-access
 
-    # TODO(b/124254153): Enable autograph once the overhead is low enough.
     @function.defun_with_attributes(
         input_signature=[tensor_spec.TensorSpec([], dtypes.string)],
-        attributes={""experimental_ints_on_device"": True},
-        autograph=False)  # Pure graph code.
+        attributes={""experimental_ints_on_device"": True})
     def _remote_next_func(string_handle):
       return functional_ops.remote_call(
           target=source_device,
@@ -101,19 +94,13 @@ class _PerDeviceGenerator(dataset_ops.DatasetV2):
       if arg == incarnation_id:
         self._incarnation_id_index = i
 
-    # TODO(b/124254153): Enable autograph once the overhead is low enough.
-    @function.defun(
-        input_signature=[tensor_spec.TensorSpec([], dtypes.string)],
-        autograph=False)  # Pure graph code.
+    @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.string)])
     def _finalize_func(unused_string_handle):
       return array_ops.constant(0, dtypes.int64)
 
     finalize_func_concrete = _finalize_func._get_concrete_function_internal()  # pylint: disable=protected-access
 
-    # TODO(b/124254153): Enable autograph once the overhead is low enough.
-    @function.defun(
-        input_signature=[tensor_spec.TensorSpec([], dtypes.string)],
-        autograph=False)  # Pure graph code.
+    @function.defun(input_signature=[tensor_spec.TensorSpec([], dtypes.string)])
     def _remote_finalize_func(string_handle):
       return functional_ops.remote_call(
           target=source_device,
",0,test
60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a

PiperOrigin-RevId: 235649273",def_function.py,"@@ -514,7 +514,7 @@ class Function(object):
     """"""Make and call a `ConcreteFunction` which initializes variables.""""""
 
     # Note: using defun here avoids an infinite recursion.
-    @function_lib.defun(autograph=False)  # Pure graph code.
+    @function_lib.defun
     def initialize_variables():
       for v, init in initializer_map.items():
         with ops.init_scope():
",0,test
60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a

PiperOrigin-RevId: 235649273",function.py,"@@ -61,7 +61,6 @@ from tensorflow.python.util import tf_inspect
 FORWARD_FUNCTION_ATTRIBUTE_NAME = ""forward_function_name""
 BACKWARD_FUNCTION_ATTRIBUTE_NAME = ""backward_function_name""
 
-
 class CacheKey(
     collections.namedtuple(""CacheKey"", [
         ""input_signature"", ""parent_graph"", ""device_functions"",
@@ -2020,23 +2019,13 @@ def defun_with_attributes(func=None,
 
 
 # When a method is bound to objects of this type, it allows AutoGraph to
-# recover a weak reference the original method's self pointer, so that it can
-# execute it consistent with class_method_to_instance_method's
-# bound_method_wrapper.
+# recover a weak reference the original method's self pointer. This uses the
+# mechanism from pyct.inspect_utils.getmethodclass.
 # TODO(b/119246461): This is not pretty. Use a descriptor instead?
-class TfMethodTarget(object):
-  """"""Binding target for methods replaced by function and defun.""""""
-
-  def __init__(self, target, original_python_function):
-    self.weakrefself_target__ = target
-    self.weakrefself_func__ = weakref.ref(original_python_function)
-
-  @property
-  def target(self):
-    return self.weakrefself_target__()
+class _WeakrefSelf(object):
 
-  def call(self, args, kwargs):
-    return self.weakrefself_func__()(*args, **kwargs)
+  def __init__(self, target):
+    self.ag_self_weakref__ = target
 
 
 def class_method_to_instance_method(original_function, instance):
@@ -2045,9 +2034,8 @@ def class_method_to_instance_method(original_function, instance):
 
   # Note: while we could bind to a weakref proxy instead, that causes the
   # bound method to be unhashable.
-  bound_method = types_lib.MethodType(
-      original_function.python_function,
-      TfMethodTarget(weak_instance, original_function.python_function))
+  bound_method = types_lib.MethodType(original_function.python_function,
+                                      _WeakrefSelf(weak_instance))
 
   # original_function is expected to be of one of the two `Function` types
   # (defined either in function.py or def_function.py).
@@ -2065,7 +2053,6 @@ def class_method_to_instance_method(original_function, instance):
 
     if wrapped_fn is strong_bound_method_wrapper.__original_wrapped__:
       # If __wrapped__ was not replaced, then call original_function.
-      # TODO(mdan): For better consistency, use the wrapper's call().
       wrapped_fn = original_function.python_function
       if tf_inspect.ismethod(wrapped_fn):
         wrapped_fn = six.get_unbound_function(wrapped_fn)
",0,test
60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a

PiperOrigin-RevId: 235649273",func_graph.py,"@@ -679,8 +679,7 @@ def func_graph_from_py_func(name,
         # Wrapping around a decorator allows checks like tf_inspect.getargspec
         # to be accurate.
         converted_func = tf_decorator.make_decorator(original_func, wrapper)
-        python_func = tf_decorator.rewrap(python_func, original_func,
-                                          converted_func)
+        tf_decorator.rewrap(python_func, original_func, converted_func)
 
       func_outputs = python_func(*func_args, **func_kwargs)
 
",0,test
60089e430e34c870e074c6d5746ed88ea9b39909,tensorflow/tensorflow,"Automated rollback of commit f37a66cf1f7568c40b858878b34d2efca5f4566a

PiperOrigin-RevId: 235649273",tf_decorator.py,"@@ -138,10 +138,6 @@ def rewrap(decorator_func, previous_target, new_target):
     decorator_func: Callable returned by `wrap`.
     previous_target: Callable that needs to be replaced.
     new_target: Callable to replace previous_target with.
-
-  Returns:
-    The updated decorator. If decorator_func is not a tf_decorator, new_target
-    is returned.
   """"""
   # Because the process mutates the decorator, we only need to alter the
   # innermost function that wraps previous_target.
@@ -154,15 +150,9 @@ def rewrap(decorator_func, previous_target, new_target):
     if target.decorated_target is previous_target:
       break
     cur = target.decorated_target
-    assert cur is not None
 
-  # If decorator_func is not a decorator, new_target replaces it directly.
   if innermost_decorator is None:
-    # Consistency check. The caller should always pass the result of
-    # tf_decorator.unwrap as previous_target. If decorator_func is not a
-    # decorator, that will have returned decorator_func itself.
-    assert decorator_func is previous_target
-    return new_target
+    return
 
   target.decorated_target = new_target
 
@@ -178,8 +168,6 @@ def rewrap(decorator_func, previous_target, new_target):
   else:
     innermost_decorator.__wrapped__ = new_target
 
-  return decorator_func
-
 
 def unwrap(maybe_tf_decorator):
   """"""Unwraps an object into a list of TFDecorators and a final target.
",0,test
547bd9c88b1a86f0543fff3460e2d4d1c8009cb4,tensorflow/tensorflow,"[TF:TRT] Limit the number of times that a warning message is printed out.

Add LOG_FIRST_FEW_WARNING_WITH_PREFIX for only printing out the first five
occurences of a warning message.

Use the new macro to replace the use of LOG_WARNING_WITH_PREFIX in the
TRTEngineOp runtime. This can avoid repeating the same warning message at each
inference step.

PiperOrigin-RevId: 339068142
Change-Id: Ibb3cc172fcd23f76df6cec67085233b347263668",trt_engine_op.cc,"@@ -60,6 +60,9 @@ using absl::StrCat;
 using ::nvinfer1::IRuntime;
 using ::stream_executor::port::StatusOr;
 
+#define LOG_FIRST_FEW_WARNING_WITH_PREFIX \
+  LOG_FIRST_N(WARNING, 5) << ""TF-TRT Warning: ""
+
 // A helper class to call done() when destructed for asynchronous execution.
 // Helps simultaneous execution of native and TRT engines.
 
@@ -584,9 +587,10 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx,
   Status verify_input_shape_status = VerifyInputShapes(input_concrete_shapes);
   // TODO(bixia): Fix the segmentation.
   if (!verify_input_shape_status.ok()) {
-    LOG_FIRST_N(WARNING, 5) << ""Running native segment for"" << name()
-                            << "" due to failure in verifying input shapes: ""
-                            << verify_input_shape_status.error_message();
+    LOG_FIRST_FEW_WARNING_WITH_PREFIX
+        << ""Running native segment for"" << name()
+        << "" due to failure in verifying input shapes: ""
+        << verify_input_shape_status.error_message();
     ExecuteNativeSegment(ctx, helper);
     return;
   }
@@ -625,7 +629,7 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx,
     return true;
   };
   if (!engine_context->cuda_engine) {
-    LOG_WARNING_WITH_PREFIX
+    LOG_FIRST_FEW_WARNING_WITH_PREFIX
         << ""Engine retrieval for input shapes: ""
         << TensorShapeUtils::ShapeListString(input_concrete_shapes)
         << "" failed. Running native segment for "" << name();
@@ -636,8 +640,9 @@ void TRTEngineOp::ComputeAsync(OpKernelContext* ctx,
   }
   Status stat = ExecuteTrtEngine(ctx, engine_context, trt_context_idx);
   if (!stat.ok()) {
-    LOG_WARNING_WITH_PREFIX << ""Failed to execute engine: "" << stat
-                            << "" Retrying with native segment for "" << name();
+    LOG_FIRST_FEW_WARNING_WITH_PREFIX << ""Failed to execute engine: "" << stat
+                                      << "" Retrying with native segment for ""
+                                      << name();
     if (!may_execute_native_segment()) {
       return;
     }
@@ -755,9 +760,10 @@ StatusOr<TrtUniquePtrType<nvinfer1::ICudaEngine>> TRTEngineOp::BuildEngine(
       calibrator, &engine, use_calibration, use_implicit_batch_, nullptr,
       &cache_resource->profiles_);
   if (!status.ok()) {
-    LOG_WARNING_WITH_PREFIX << ""Engine creation for "" << name() << "" failed. ""
-                            << ""The native segment will be used instead. ""
-                            << ""Reason: "" << status;
+    LOG_FIRST_FEW_WARNING_WITH_PREFIX
+        << ""Engine creation for "" << name() << "" failed. ""
+        << ""The native segment will be used instead. ""
+        << ""Reason: "" << status;
     // Store an empty engine in the cache for these input shapes so we don't try
     // to build the same failing engine again.
     cache_resource->cache_.emplace(input_concrete_shapes,
@@ -822,9 +828,9 @@ StatusOr<std::pair<EngineContext*, int>> TRTEngineOp::GetEngine(
               FunctionDefToGraphDef(func_handle_, lib, &segment_graph_def_);
         }
         if (!status.ok()) {
-          LOG_WARNING_WITH_PREFIX << ""Getting segment graph for "" << name()
-                                  << "" failed. ""
-                                  << ""Reason: "" << status;
+          LOG_FIRST_FEW_WARNING_WITH_PREFIX << ""Getting segment graph for ""
+                                            << name() << "" failed. ""
+                                            << ""Reason: "" << status;
         }
       }
       auto result = BuildEngine(input_concrete_shapes, batch_size,
@@ -883,7 +889,7 @@ StatusOr<std::pair<EngineContext*, int>> TRTEngineOp::GetEngine(
   // If cache does not have a compatible engine then create a new engine.
   if (engine_contexts == nullptr) {
     if (!allow_build_at_runtime_) {
-      LOG_WARNING_WITH_PREFIX
+      LOG_FIRST_FEW_WARNING_WITH_PREFIX
           << ""Found no engine in cache matching input shapes. ""
           << ""Not building a new engine because ""
           << ""allow_build_at_runtime=False. ""
",0,train
484f0e5fd96c850c5a1ba87b8a6b8b23b11582e0,tensorflow/tensorflow,"Support folding TF::TransposeOp when perm is a constant instead of TF::ConstOp

PiperOrigin-RevId: 328149666
Change-Id: I0c5561152383f12126ab9568c0facc4c3043c6a3",tf_ops_n_z.cc,"@@ -1939,11 +1939,9 @@ void TransposeOp::build(OpBuilder &builder, OperationState &result, Value x,
 namespace {
 
 OpFoldResult FoldIdentityTranspose(TransposeOp op) {
-  auto const_perm = dyn_cast_or_null<TF::ConstOp>(op.perm().getDefiningOp());
-  if (!const_perm) return {};
-
-  auto const_value = const_perm.value();
-  const auto elements = const_value.getValues<APInt>();
+  DenseIntElementsAttr perm;
+  if (!matchPattern(op.perm(), m_Constant(&perm))) return {};
+  const auto elements = perm.getValues<APInt>();
 
   for (auto it : llvm::enumerate(elements)) {
     if (it.index() != it.value()) return {};
@@ -1966,14 +1964,14 @@ OpFoldResult FoldCancellableTranspose(TransposeOp op) {
   if (!transpose) return {};
 
   // Permutations defined by constant operations.
-  auto perm0 = dyn_cast_or_null<TF::ConstOp>(op.perm().getDefiningOp());
-  auto perm1 = dyn_cast_or_null<TF::ConstOp>(transpose.perm().getDefiningOp());
-  if (!perm0 || !perm1) return {};
+  DenseIntElementsAttr perm0;
+  DenseIntElementsAttr perm1;
+  if (!matchPattern(op.perm(), m_Constant(&perm0)) ||
+      !matchPattern(transpose.perm(), m_Constant(&perm1)))
+    return {};
 
   // With permutation indices that cancel each other
-  auto perm0_value = perm0.value().cast<DenseIntElementsAttr>();
-  auto perm1_value = perm1.value().cast<DenseIntElementsAttr>();
-  if (!AreCancellablePermutations(perm0_value, perm1_value)) return {};
+  if (!AreCancellablePermutations(perm0, perm1)) return {};
 
   return transpose.x();
 }
",0,train
d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D.

PiperOrigin-RevId: 272023638",conv_powervr.cc,"@@ -274,9 +274,9 @@ std::string GenerateConvPowerVR1x1(
           if (src_tensor_type == TensorStorageType::IMAGE_BUFFER) {
             if (op_def.precision == CalculationsPrecision::F32_F16) {
               c += ""    src"" + id + "" = "" +
-                   src_tensor.ReadAsFloat3D(""src_a_"" + id) + multiplier + "";\n"";
+                   src_tensor.ReadAsFloat(""src_a_"" + id) + multiplier + "";\n"";
             } else {
-              c += ""    src"" + id + "" = "" + src_tensor.Read3D(""src_a_"" + id) +
+              c += ""    src"" + id + "" = "" + src_tensor.Read(""src_a_"" + id) +
                    multiplier + "";\n"";
             }
           }
",0,train
d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D.

PiperOrigin-RevId: 272023638",conv_texture.cc,"@@ -151,10 +151,10 @@ std::string GenerateConvCode(
   }
   c += ""  for (int s = 0; s < src_size.w; ++s) {\n"";
   if (is_image_buffer) {
-    c += ""    FLT4 src0 = "" + src_tensor.Read3D(""addr_0"") + "";\n"";
-    c += ""    FLT4 src1 = "" + src_tensor.Read3D(""addr_1"") + "";\n"";
-    c += ""    FLT4 src2 = "" + src_tensor.Read3D(""addr_2"") + "";\n"";
-    c += ""    FLT4 src3 = "" + src_tensor.Read3D(""addr_3"") + "";\n"";
+    c += ""    FLT4 src0 = "" + src_tensor.Read(""addr_0"") + "";\n"";
+    c += ""    FLT4 src1 = "" + src_tensor.Read(""addr_1"") + "";\n"";
+    c += ""    FLT4 src2 = "" + src_tensor.Read(""addr_2"") + "";\n"";
+    c += ""    FLT4 src3 = "" + src_tensor.Read(""addr_3"") + "";\n"";
   }
   std::string fc0 = ""(int2)(Z, "" + f_y + "")"";
   std::string fc1 = ""(int2)(Z + 1, "" + f_y + "")"";
",0,train
d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D.

PiperOrigin-RevId: 272023638",convolution_transposed_3x3_thin.cc,"@@ -103,10 +103,10 @@ std::string GenerateConvolutionTransposedCode(
       c += ""  c1 = select(-1, c1, x_in);\n"";
       c += ""  c2 = select(-1, c2, y_in);\n"";
       c += ""  c3 = select(-1, c3, x_in && y_in);\n"";
-      c += ""  FLT4 src0 = "" + src_tensor.Read3D(""c0"") + "";\n"";
-      c += ""  FLT4 src1 = "" + src_tensor.Read3D(""c1"") + "";\n"";
-      c += ""  FLT4 src2 = "" + src_tensor.Read3D(""c2"") + "";\n"";
-      c += ""  FLT4 src3 = "" + src_tensor.Read3D(""c3"") + "";\n"";
+      c += ""  FLT4 src0 = "" + src_tensor.Read(""c0"") + "";\n"";
+      c += ""  FLT4 src1 = "" + src_tensor.Read(""c1"") + "";\n"";
+      c += ""  FLT4 src2 = "" + src_tensor.Read(""c2"") + "";\n"";
+      c += ""  FLT4 src3 = "" + src_tensor.Read(""c3"") + "";\n"";
     } else {
       const auto mode = GetFastestZeroMode(device);
       c += ""  FLT4 src0 = "" + src_tensor.Read3D(""X"", ""Y"", z, mode) + "";\n"";
",0,train
d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D.

PiperOrigin-RevId: 272023638",max_unpooling.cc,"@@ -62,16 +62,15 @@ std::string GetMaxUnoolingKernelCode(
     code += ""  FLT4 src = (FLT4)(0.0f);\n"";
     code += ""  int4 ind = (int4)(0);\n"";
     code += ""  if (!outside) {\n"";
-    code +=
-        ""    src = "" + src.Read3D(""src_adr"", TextureAddressMode::DONT_CARE) +
-        "";\n"";
+    code += ""    src = "" + src.Read(""src_adr"", TextureAddressMode::DONT_CARE) +
+            "";\n"";
     code += ""    ind = convert_int4("" +
-            src_ind.Read3D(""src_adr"", TextureAddressMode::DONT_CARE) + "");\n"";
+            src_ind.Read(""src_adr"", TextureAddressMode::DONT_CARE) + "");\n"";
     code += ""  }\n"";
   } else {
-    code += ""  FLT4 src = "" + src.Read3D(""src_adr"", address_mode) + "";\n"";
+    code += ""  FLT4 src = "" + src.Read(""src_adr"", address_mode) + "";\n"";
     code += ""  int4 ind = convert_int4("" +
-            src_ind.Read3D(""src_adr"", address_mode) + "");\n"";
+            src_ind.Read(""src_adr"", address_mode) + "");\n"";
   }
   code += ""  int t_x = X - (src_x * stride.x - padding.x);\n"";
   code += ""  int t_y = Y - (src_y * stride.y - padding.y);\n"";
",0,train
d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D.

PiperOrigin-RevId: 272023638",pooling.cc,"@@ -154,12 +154,11 @@ std::string GetMaxPoolingKernelCode(
   code += ""      };\n"";
   code += ""    }\n"";
   code += ""  }\n"";
-  code += ""  "" + dst_tensor.GetAddress(""address"", ""X"", ""Y"", ""Z"") + ""\n"";
   const LinkingContext context{""maximum"", ""X"", ""Y"", ""Z""};
   code += PostProcess(linked_operations, context);
-  code += ""  "" + dst_tensor.Write3D(""maximum"", ""address"");
+  code += ""  "" + dst_tensor.Write3D(""maximum"", ""X"", ""Y"", ""Z"");
   if (output_indices) {
-    code += ""  "" + indices_tensor.Write3D(""indexes"", ""address"");
+    code += ""  "" + indices_tensor.Write3D(""indexes"", ""X"", ""Y"", ""Z"");
   }
   code += ""}\n"";
 
",0,train
d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D.

PiperOrigin-RevId: 272023638",softmax.cc,"@@ -61,10 +61,10 @@ std::string GetSoftmaxKernelCode(
   code += ""    sum += dot(mask, exp(t));\n"";
   code += ""  }\n"";
   code += ""  for (int d = 0; d < size.w; ++d) {\n"";
-  code += ""    "" + src_tensor.GetAddress(""address"", ""X"", ""Y"", ""d"") + ""\n"";
-  code += ""    float4 t = "" +
-          src_tensor.ReadAsFloat3D(""address"", TextureAddressMode::DONT_CARE) +
-          "";\n"";
+  code +=
+      ""    float4 t = "" +
+      src_tensor.ReadAsFloat3D(""X"", ""Y"", ""d"", TextureAddressMode::DONT_CARE) +
+      "";\n"";
   code += ""    t = exp(t) / sum;\n"";
   code += ""    FLT4 result = TO_FLT4(t);\n"";
   const LinkingContext context{""result"", ""X"", ""Y"", ""d""};
",0,train
d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D.

PiperOrigin-RevId: 272023638",softmax1x1.cc,"@@ -83,13 +83,13 @@ std::string GetSoftmaxKernelCode(
   code += ""  do {\n"";
   code += ""    int z = offset + tid;\n"";
   code += ""    if (z < size.x) {\n"";
-  code += ""    "" + dst_tensor.GetAddress(""address"", ""0"", ""0"", ""z"") + ""\n"";
-  code += ""      FLT4 value = TO_FLT4(exp("" +
-          src_tensor.ReadAsFloat3D(""address"", TextureAddressMode::DONT_CARE) +
-          "") * sum);\n"";
+  code +=
+      ""      FLT4 value = TO_FLT4(exp("" +
+      src_tensor.ReadAsFloat3D(""0"", ""0"", ""z"", TextureAddressMode::DONT_CARE) +
+      "") * sum);\n"";
   const LinkingContext context{""value"", ""0"", ""0"", ""z""};
   code += PostProcess(linked_operations, context);
-  code += ""    "" + dst_tensor.Write3D(""value"", ""address"");
+  code += ""    "" + dst_tensor.Write3D(""value"", ""0"", ""0"", ""z"");
   code += ""      offset += 32;\n"";
   code += ""    }\n"";
   code += ""    s++;\n"";
",0,train
d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D.

PiperOrigin-RevId: 272023638",util.cc,"@@ -182,31 +182,21 @@ std::string TensorCodeGenerator::Read3D(const std::string& x,
                                         const std::string& y,
                                         const std::string& z,
                                         TextureAddressMode address_mode) const {
-  return Read3D(GetGlobalAddressNoDeclaration(x, y, z), address_mode);
+  return Read(GetGlobalAddressNoDeclaration(x, y, z), address_mode);
 }
 
 std::string TensorCodeGenerator::Read4D(const std::string& x,
                                         const std::string& y,
                                         const std::string& z,
                                         const std::string& b) const {
-  return Read3D(GetGlobalAddressNoDeclaration(x, y, z, b),
-                TextureAddressMode::DONT_CARE);
+  return Read(GetGlobalAddressNoDeclaration(x, y, z, b),
+              TextureAddressMode::DONT_CARE);
 }
 
 std::string TensorCodeGenerator::ReadAsFloat3D(
     const std::string& x, const std::string& y, const std::string& z,
     TextureAddressMode address_mode) const {
-  return ReadAsFloat3D(GetGlobalAddressNoDeclaration(x, y, z), address_mode);
-}
-
-std::string TensorCodeGenerator::Read3D(const std::string& global_address,
-                                        TextureAddressMode address_mode) const {
-  return ReadGlobalFLT4(global_address, address_mode);
-}
-
-std::string TensorCodeGenerator::ReadAsFloat3D(
-    const std::string& global_address, TextureAddressMode address_mode) const {
-  return ReadGlobalFloat4(global_address, address_mode);
+  return ReadAsFloat(GetGlobalAddressNoDeclaration(x, y, z), address_mode);
 }
 
 std::string TensorCodeGenerator::GetAddress(const std::string& var_name,
@@ -265,12 +255,7 @@ std::string TensorCodeGenerator::Write3D(const std::string& var_name,
                                          const std::string& x,
                                          const std::string& y,
                                          const std::string& z) const {
-  return Write3D(var_name, GetGlobalAddressNoDeclaration(x, y, z));
-}
-
-std::string TensorCodeGenerator::Write3D(
-    const std::string& var_name, const std::string& global_address) const {
-  return WriteGlobalFLT4(var_name, global_address);
+  return Write(var_name, GetGlobalAddressNoDeclaration(x, y, z));
 }
 
 std::string TensorCodeGenerator::Write4D(const std::string& var_name,
@@ -278,11 +263,11 @@ std::string TensorCodeGenerator::Write4D(const std::string& var_name,
                                          const std::string& y,
                                          const std::string& z,
                                          const std::string& b) const {
-  return WriteGlobalFLT4(var_name, GetGlobalAddressNoDeclaration(x, y, z, b));
+  return Write(var_name, GetGlobalAddressNoDeclaration(x, y, z, b));
 }
 
-std::string TensorCodeGenerator::ReadGlobalFLT4(
-    const std::string& global_address, TextureAddressMode address_mode) const {
+std::string TensorCodeGenerator::Read(const std::string& global_address,
+                                      TextureAddressMode address_mode) const {
   switch (descriptor_.storage_type) {
     case TensorStorageType::BUFFER:
       return absl::StrCat(tensor_name_, ""["", global_address, ""]"");
@@ -301,7 +286,7 @@ std::string TensorCodeGenerator::ReadGlobalFLT4(
   }
 }
 
-std::string TensorCodeGenerator::ReadGlobalFloat4(
+std::string TensorCodeGenerator::ReadAsFloat(
     const std::string& global_address, TextureAddressMode address_mode) const {
   switch (descriptor_.storage_type) {
     case TensorStorageType::BUFFER:
@@ -322,7 +307,7 @@ std::string TensorCodeGenerator::ReadGlobalFloat4(
   }
 }
 
-std::string TensorCodeGenerator::WriteGlobalFLT4(
+std::string TensorCodeGenerator::Write(
     const std::string& var_name, const std::string& global_address) const {
   switch (descriptor_.storage_type) {
     case TensorStorageType::BUFFER:
",0,train
d5e600c22aa3f45c8573a772309f829fea00260e,tensorflow/tensorflow,"Renamed Read3D/Write3D(with global address) to Read/Write so as global_address can be calculated based on 3D so and 4D.

PiperOrigin-RevId: 272023638",util.h,"@@ -87,16 +87,16 @@ class TensorCodeGenerator {
                       const std::string& y, const std::string& z,
                       const std::string& b) const;
 
-  std::string Read3D(
+  std::string Read(
       const std::string& global_address,
       TextureAddressMode address_mode = TextureAddressMode::ZERO) const;
   // Optimization for textures, so as in opencl we can use read_imagef for any
   // texture type.
-  std::string ReadAsFloat3D(
+  std::string ReadAsFloat(
       const std::string& global_address,
       TextureAddressMode address_mode = TextureAddressMode::ZERO) const;
-  std::string Write3D(const std::string& var_name,
-                      const std::string& global_address) const;
+  std::string Write(const std::string& var_name,
+                    const std::string& global_address) const;
 
  private:
   std::string GetGlobalAddressNoDeclaration(const std::string& x,
@@ -107,15 +107,6 @@ class TensorCodeGenerator {
                                             const std::string& z,
                                             const std::string& b) const;
 
-  std::string ReadGlobalFLT4(const std::string& global_address,
-                             TextureAddressMode address_mode) const;
-
-  std::string ReadGlobalFloat4(const std::string& global_address,
-                               TextureAddressMode address_mode) const;
-
-  std::string WriteGlobalFLT4(const std::string& var_name,
-                              const std::string& global_address) const;
-
   std::string tensor_name_;
   std::string uniform_size_name_;
   TensorDescriptor descriptor_;
",0,train
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_allocator_retry.cc,"@@ -14,10 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include ""tensorflow/core/common_runtime/gpu/gpu_allocator_retry.h""
+#include ""tensorflow/core/platform/env.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
-#include ""tensorflow/core/public/env.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_allocator_retry.h,"@@ -16,9 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ALLOCATOR_RETRY_H_
 #define TENSORFLOW_CORE_COMMON_RUNTIME_GPU_GPU_ALLOCATOR_RETRY_H_
 
+#include ""tensorflow/core/platform/env.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
-#include ""tensorflow/core/public/env.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_allocator_retry_test.cc,"@@ -17,12 +17,12 @@ limitations under the License.
 
 #include <vector>
 #include ""tensorflow/core/lib/core/notification.h""
+#include ""tensorflow/core/platform/env.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/test.h""
 #include ""tensorflow/core/platform/thread_annotations.h""
-#include ""tensorflow/core/public/env.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
 namespace {
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_bfc_allocator.cc,"@@ -24,8 +24,8 @@ limitations under the License.
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace gpu = ::perftools::gputools;
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_bfc_allocator.h,"@@ -27,9 +27,9 @@ limitations under the License.
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
 #include ""tensorflow/core/platform/thread_annotations.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_bfc_allocator_test.cc,"@@ -24,9 +24,9 @@ limitations under the License.
 #include ""tensorflow/core/lib/gtl/inlined_vector.h""
 #include ""tensorflow/core/lib/random/simple_philox.h""
 #include ""tensorflow/core/platform/logging.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
 #include ""tensorflow/core/platform/test.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace gpu = ::perftools::gputools;
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_debug_allocator.h,"@@ -22,8 +22,8 @@ limitations under the License.
 
 #include ""tensorflow/core/common_runtime/gpu/visitable_allocator.h""
 #include ""tensorflow/core/platform/macros.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_debug_allocator_test.cc,"@@ -24,9 +24,9 @@ limitations under the License.
 #include ""tensorflow/core/common_runtime/gpu/gpu_init.h""
 #include ""tensorflow/core/lib/gtl/inlined_vector.h""
 #include ""tensorflow/core/platform/logging.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
 #include ""tensorflow/core/platform/test.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace gpu = ::perftools::gputools;
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_device.cc,"@@ -38,20 +38,20 @@ limitations under the License.
 #include ""tensorflow/core/framework/allocator.h""
 #include ""tensorflow/core/framework/device_base.h""
 #include ""tensorflow/core/framework/op_kernel.h""
+#include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/graph/types.h""
+#include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/lib/gtl/stl_util.h""
 #include ""tensorflow/core/lib/strings/numbers.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/cuda.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/macros.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
 #include ""tensorflow/core/platform/tracing.h""
+#include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/core/public/session_options.h""
-#include ""tensorflow/core/public/status.h""
-#include ""tensorflow/core/public/tensor.h""
 #include ""tensorflow/core/util/device_name_utils.h""
 
 namespace gpu = ::perftools::gputools;
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_device.h,"@@ -29,12 +29,12 @@ limitations under the License.
 #include ""tensorflow/core/framework/allocator.h""
 #include ""tensorflow/core/framework/device_base.h""
 #include ""tensorflow/core/framework/op_kernel.h""
+#include ""tensorflow/core/framework/tensor.h""
+#include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
+#include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/core/public/session_options.h""
-#include ""tensorflow/core/public/status.h""
-#include ""tensorflow/core/public/tensor.h""
 
 namespace tensorflow {
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_event_mgr.h,"@@ -18,15 +18,15 @@ limitations under the License.
 
 #include <deque>
 #include <vector>
+#include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_reference.h""
 #include ""tensorflow/core/lib/core/notification.h""
 #include ""tensorflow/core/lib/core/threadpool.h""
 #include ""tensorflow/core/lib/gtl/inlined_vector.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
 #include ""tensorflow/core/platform/thread_annotations.h""
-#include ""tensorflow/core/public/tensor.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace perftools {
 namespace gputools {
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_init.cc,"@@ -21,8 +21,8 @@ limitations under the License.
 #include ""tensorflow/core/lib/strings/numbers.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/logging.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace gpu = ::perftools::gputools;
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_region_allocator.cc,"@@ -25,8 +25,8 @@ limitations under the License.
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
+#include ""tensorflow/core/platform/types.h""
 
 // If true, the CUDA gpu manager checks that all allocated memory
 // through the GPU memory pool implementation has been freed.
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_region_allocator.h,"@@ -26,9 +26,9 @@ limitations under the License.
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
 #include ""tensorflow/core/platform/thread_annotations.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_region_allocator_test.cc,"@@ -23,9 +23,9 @@ limitations under the License.
 #include ""tensorflow/core/common_runtime/gpu/gpu_init.h""
 #include ""tensorflow/core/lib/gtl/inlined_vector.h""
 #include ""tensorflow/core/platform/logging.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
 #include ""tensorflow/core/platform/test.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace gpu = ::perftools::gputools;
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_stream_util.h,"@@ -19,7 +19,7 @@ limitations under the License.
 #include <unordered_map>
 
 #include ""tensorflow/core/graph/graph.h""
-#include ""tensorflow/core/public/status.h""
+#include ""tensorflow/core/lib/core/status.h""
 
 namespace tensorflow {
 namespace gpu_stream_util {
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_util.cc,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""tensorflow/core/common_runtime/gpu/gpu_event_mgr.h""
 #include ""tensorflow/core/common_runtime/gpu/process_state.h""
 #include ""tensorflow/core/common_runtime/gpu_device_context.h""
+#include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_reference.h""
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/lib/core/errors.h""
@@ -34,7 +35,6 @@ limitations under the License.
 #include ""tensorflow/core/platform/stream_executor.h""
 #include ""tensorflow/core/platform/tensor_coding.h""
 #include ""tensorflow/core/platform/tracing.h""
-#include ""tensorflow/core/public/tensor.h""
 #include ""tensorflow/core/util/util.h""
 
 // If this need to be runtime configurable, consider adding options to
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_util.h,"@@ -18,9 +18,9 @@ limitations under the License.
 
 #include ""tensorflow/core/common_runtime/device.h""
 #include ""tensorflow/core/common_runtime/dma_helper.h""
+#include ""tensorflow/core/framework/tensor.h""
+#include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/platform/stream_executor.h""
-#include ""tensorflow/core/public/status.h""
-#include ""tensorflow/core/public/tensor.h""
 
 namespace tensorflow {
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",gpu_util_platform_specific.cc,"@@ -16,9 +16,9 @@ limitations under the License.
 #include ""tensorflow/core/common_runtime/gpu/gpu_util.h""
 #include ""tensorflow/core/common_runtime/device.h""
 #include ""tensorflow/core/common_runtime/gpu_device_context.h""
+#include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/platform/stream_executor.h""
-#include ""tensorflow/core/public/tensor.h""
 
 namespace tensorflow {
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",pool_allocator.cc,"@@ -24,7 +24,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/strings/numbers.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",pool_allocator.h,"@@ -30,8 +30,8 @@ limitations under the License.
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/mem.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
 
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",process_state.cc,"@@ -25,8 +25,8 @@ limitations under the License.
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/stream_executor.h""
+#include ""tensorflow/core/platform/types.h""
 
 // If these flags need to be runtime configurable, consider adding
 // options to ConfigProto.
",0,test
f8fa35b8a1910772d6d6ba7b621f905358640c2c,tensorflow/tensorflow,"Global search & replace to move to the new location for
tensorflow/core/ files and build targets.
Change: 113080048",process_state.h,"@@ -22,8 +22,8 @@ limitations under the License.
 
 #include ""tensorflow/core/framework/allocator.h""
 #include ""tensorflow/core/platform/mutex.h""
-#include ""tensorflow/core/platform/port.h""
 #include ""tensorflow/core/platform/thread_annotations.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
 
",0,test
d0819763b64c693b33b0b8ba454f80b30c9f0590,tensorflow/tensorflow,"Add test case for GitHub issue 53300.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",stack_op_test.py,"@@ -288,6 +288,16 @@ class StackOpTest(test.TestCase):
             c = array_ops.stack(xs)
             self.assertAllEqual(self.evaluate(c), data)
 
+  def testZeroDimUnmatch(self):
+    # Test case for GitHub issue 53300.
+    # Error message is `Shapes of all inputs must match` in eager mode,
+    # and `Shapes ...` in graph mode. Below is to capture both:
+    with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError),
+                                r""Shapes""):
+      with self.session():
+        t = [array_ops.zeros([0, 3]), array_ops.zeros([1, 3])]
+        self.evaluate(array_ops.stack(t))
+
 
 class AutomaticStackingTest(test.TestCase):
 
",0,train
19673dfff5231471524cdcf257c0f5f5790696c4,tensorflow/tensorflow,"[RunHandler] Respect the operation timeout in `RunHandlerPool::Get()`.

PiperOrigin-RevId: 289754304
Change-Id: I4be1bf1a2799899f27240de580779b83b627e976",direct_session.cc,"@@ -584,11 +584,20 @@ Status DirectSession::RunInternal(
     }
   }
 
+  const int64 call_timeout = run_options.timeout_in_ms() > 0
+                                 ? run_options.timeout_in_ms()
+                                 : operation_timeout_in_ms_;
+
   std::unique_ptr<RunHandler> handler;
   if (ShouldUseRunHandlerPool(run_options) &&
       run_options.experimental().use_run_handler_pool()) {
     VLOG(1) << ""Using RunHandler to scheduler inter-op closures."";
-    handler = GetOrCreateRunHandlerPool(options_)->Get(step_id);
+    handler = GetOrCreateRunHandlerPool(options_)->Get(step_id, call_timeout);
+    if (!handler) {
+      return errors::DeadlineExceeded(
+          ""Could not obtain RunHandler for request after waiting for "",
+          call_timeout, ""ms."");
+    }
   }
   auto* handler_ptr = handler.get();
 
@@ -607,9 +616,6 @@ Status DirectSession::RunInternal(
   }
 
   // Start parallel Executors.
-  const int64 call_timeout = run_options.timeout_in_ms() > 0
-                                 ? run_options.timeout_in_ms()
-                                 : operation_timeout_in_ms_;
   const bool can_execute_synchronously = pool == nullptr && call_timeout == 0;
 
   Executor::Args args;
",0,test
19673dfff5231471524cdcf257c0f5f5790696c4,tensorflow/tensorflow,"[RunHandler] Respect the operation timeout in `RunHandlerPool::Get()`.

PiperOrigin-RevId: 289754304
Change-Id: I4be1bf1a2799899f27240de580779b83b627e976",run_handler.cc,"@@ -879,7 +879,12 @@ class RunHandlerPool::Impl {
     return run_handler_thread_pool_.get();
   }
 
-  std::unique_ptr<RunHandler> Get(int64 step_id) LOCKS_EXCLUDED(mu_) {
+  bool has_free_handler() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
+    return !free_handlers_.empty();
+  }
+
+  std::unique_ptr<RunHandler> Get(int64 step_id, int64 timeout_in_ms)
+      LOCKS_EXCLUDED(mu_) {
     std::unique_ptr<Eigen::MaxSizeVector<ThreadWorkSource*>>
         thread_work_sources;
     uint64 version;
@@ -894,8 +899,10 @@ class RunHandlerPool::Impl {
                                      ""#"");
             },
             profiler::TraceMeLevel::kInfo);
-        while (free_handlers_.empty()) {
-          one_handler_free_.wait(l);
+        if (!mu_.AwaitWithDeadline(
+                Condition(this, &Impl::has_free_handler),
+                EnvTime::NowNanos() + timeout_in_ms * 1000 * 1000)) {
+          return nullptr;
         }
       }
       // Remove the last entry from free_handlers_ and add to the end of
@@ -992,7 +999,6 @@ class RunHandlerPool::Impl {
       LogInfo();
     }
     RecomputePoolStats(num_active_requests, version, *thread_work_sources);
-    one_handler_free_.notify_one();
   }
 
  private:
@@ -1022,7 +1028,6 @@ class RunHandlerPool::Impl {
   histogram::Histogram time_hist_ GUARDED_BY(mu_);
 
   int64 iterations_ GUARDED_BY(mu_);
-  condition_variable one_handler_free_;
   mutex mu_;
   int64 version_ GUARDED_BY(mu_);
   const std::vector<double> sub_thread_pool_end_request_percentage_;
@@ -1130,8 +1135,9 @@ RunHandlerPool::RunHandlerPool(int num_inter_op_threads,
 
 RunHandlerPool::~RunHandlerPool() {}
 
-std::unique_ptr<RunHandler> RunHandlerPool::Get(int64 step_id) {
-  return impl_->Get(step_id);
+std::unique_ptr<RunHandler> RunHandlerPool::Get(int64 step_id,
+                                                int64 timeout_in_ms) {
+  return impl_->Get(step_id, timeout_in_ms);
 }
 
 RunHandler::RunHandler(Impl* impl) : impl_(impl) {}
",0,test
19673dfff5231471524cdcf257c0f5f5790696c4,tensorflow/tensorflow,"[RunHandler] Respect the operation timeout in `RunHandlerPool::Get()`.

PiperOrigin-RevId: 289754304
Change-Id: I4be1bf1a2799899f27240de580779b83b627e976",run_handler.h,"@@ -62,7 +62,7 @@ class RunHandlerPool {
   // unique_ptr is destroyed.
   //
   // Will block unless there is an inactive handler.
-  std::unique_ptr<RunHandler> Get(int64 step_id = 0);
+  std::unique_ptr<RunHandler> Get(int64 step_id = 0, int64 timeout_in_ms = 0);
 
  private:
   class Impl;
",0,test
6eff291a056d06f8c159485f81228f685b6f719c,tensorflow/tensorflow,"Use python tracer to control TraceMe in python launguage. This should have better performance than go through pybind11.

PiperOrigin-RevId: 315547199
Change-Id: I64c4d9f5dce6a23fbeed7fcde10c7a8e839494a4",python_tracer.cc,"@@ -23,7 +23,6 @@ limitations under the License.
 #include ""tensorflow/core/profiler/profiler_options.pb.h""
 #include ""tensorflow/core/profiler/protobuf/xplane.pb.h""
 #include ""tensorflow/core/protobuf/config.pb.h""
-#include ""tensorflow/core/util/env_var.h""
 #include ""tensorflow/python/profiler/internal/python_hooks.h""
 
 namespace tensorflow {
@@ -34,7 +33,8 @@ namespace {
 // the events to TraceMeRecorder.
 class PythonTracer : public ProfilerInterface {
  public:
-  explicit PythonTracer() = default;
+  explicit PythonTracer(const PythonHooksOptions& options)
+      : options_(options) {}
   ~PythonTracer() override;
 
   // Starts recording TraceMes.
@@ -51,6 +51,7 @@ class PythonTracer : public ProfilerInterface {
 
  private:
   bool recording_ = false;
+  const PythonHooksOptions options_;
 
   TF_DISALLOW_COPY_AND_ASSIGN(PythonTracer);
 };
@@ -66,7 +67,7 @@ Status PythonTracer::Start() {
   }
   VLOG(1) << __FUNCTION__;
   recording_ = true;
-  PythonHooks::GetSingleton()->Start();
+  PythonHooks::GetSingleton()->Start(options_);
   return Status::OK();
 }
 
@@ -75,7 +76,7 @@ Status PythonTracer::Stop() {
     return errors::Internal(""TraceMeRecorder not started"");
   }
   VLOG(1) << __FUNCTION__;
-  PythonHooks::GetSingleton()->Stop();
+  PythonHooks::GetSingleton()->Stop(options_);
   recording_ = false;
   return Status::OK();
 }
@@ -105,18 +106,15 @@ Status PythonTracer::CollectData(XSpace* space) {
 // Not in anonymous namespace for testing purposes.
 std::unique_ptr<ProfilerInterface> CreatePythonTracer(
     const ProfileOptions& options) {
-  if (options.python_tracer_level() == 0) return nullptr;
-  // This ProfilerInterface rely on TraceMeRecorder to be active.
-  if (options.host_tracer_level() == 0) return nullptr;
-  return absl::make_unique<PythonTracer>();
+  PythonHooksOptions pyhooks_options;
+  pyhooks_options.enable_trace_python_function =
+      options.python_tracer_level() && options.host_tracer_level();
+  pyhooks_options.enable_python_traceme = options.host_tracer_level() != 0;
+  return absl::make_unique<PythonTracer>(pyhooks_options);
 }
 
 auto register_python_tracer_factory = [] {
-  bool enable;
-  TF_CHECK_OK(ReadBoolFromEnvVar(""TF_ENABLE_OSS_PYTHON_TRACER"", true, &enable));
-  if (enable) {
-    RegisterProfilerFactory(&CreatePythonTracer);
-  }
+  RegisterProfilerFactory(&CreatePythonTracer);
   return 0;
 }();
 
",0,test
6eff291a056d06f8c159485f81228f685b6f719c,tensorflow/tensorflow,"Use python tracer to control TraceMe in python launguage. This should have better performance than go through pybind11.

PiperOrigin-RevId: 315547199
Change-Id: I64c4d9f5dce6a23fbeed7fcde10c7a8e839494a4",python_hooks.cc,"@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include ""tensorflow/python/profiler/internal/python_hooks.h""
 
+#include ""absl/strings/string_view.h""
 #include ""absl/strings/strip.h""
 #include ""tensorflow/core/platform/path.h""
 
@@ -44,16 +45,30 @@ PythonHooks* PythonHooks::GetSingleton() {
   return singleton;
 }
 
-void PythonHooks::Start() {
-  PyGILState_STATE gil_state = PyGILState_Ensure();
-  SetProfilerInAllThreads();
-  PyGILState_Release(gil_state);
+void PythonHooks::Start(const PythonHooksOptions& option) {
+  if (option.enable_python_traceme || option.enable_trace_python_function) {
+    PyGILState_STATE gil_state = PyGILState_Ensure();
+    if (option.enable_trace_python_function) {
+      SetProfilerInAllThreads();
+    }
+    if (option.enable_python_traceme) {
+      EnableTraceMe(true);
+    }
+    PyGILState_Release(gil_state);
+  }
 }
 
-void PythonHooks::Stop() {
-  PyGILState_STATE gil_state = PyGILState_Ensure();
-  ClearProfilerInAllThreads();
-  PyGILState_Release(gil_state);
+void PythonHooks::Stop(const PythonHooksOptions& option) {
+  if (option.enable_python_traceme || option.enable_trace_python_function) {
+    PyGILState_STATE gil_state = PyGILState_Ensure();
+    if (option.enable_trace_python_function) {
+      ClearProfilerInAllThreads();
+    }
+    if (option.enable_python_traceme) {
+      EnableTraceMe(false);
+    }
+    PyGILState_Release(gil_state);
+  }
 }
 
 void PythonHooks::Finalize() { tracemes_.clear(); }
@@ -180,5 +195,12 @@ void PythonHooks::ClearProfilerInAllThreads() {
   ThreadingSetProfile(py::none());
 }
 
+void PythonHooks::EnableTraceMe(bool enable) {
+  const char* kModuleName =
+      ""tensorflow.python.profiler.internal._pywrap_traceme"";
+  auto trace_module = py::module::import(kModuleName);
+  trace_module.attr(""enabled"") = enable;
+}
+
 }  // namespace profiler
 }  // namespace tensorflow
",0,test
6eff291a056d06f8c159485f81228f685b6f719c,tensorflow/tensorflow,"Use python tracer to control TraceMe in python launguage. This should have better performance than go through pybind11.

PiperOrigin-RevId: 315547199
Change-Id: I64c4d9f5dce6a23fbeed7fcde10c7a8e839494a4",python_hooks.h,"@@ -30,19 +30,26 @@ namespace profiler {
 
 namespace py = ::pybind11;
 
+struct PythonHooksOptions {
+  bool enable_trace_python_function = false;
+  bool enable_python_traceme = true;
+};
+
 // Singleton for tracing python function calls.
 class PythonHooks {
  public:
   static PythonHooks* GetSingleton();
 
-  void Start();
-  void Stop();
+  void Start(const PythonHooksOptions& option);
+  void Stop(const PythonHooksOptions& option);
   void Finalize();
   void ProfileSlow(const py::object& frame, const string& event,
                    const py::object& arg);
   void ProfileFast(PyFrameObject* frame, int what, PyObject* arg);
 
  private:
+  void EnableTraceMe(bool enable);
+
   void SetProfilerInAllThreads();
   void ClearProfilerInAllThreads();
 
",0,test
6eff291a056d06f8c159485f81228f685b6f719c,tensorflow/tensorflow,"Use python tracer to control TraceMe in python launguage. This should have better performance than go through pybind11.

PiperOrigin-RevId: 315547199
Change-Id: I64c4d9f5dce6a23fbeed7fcde10c7a8e839494a4",traceme_wrapper.cc,"@@ -23,8 +23,10 @@ namespace py = ::pybind11;
 using ::tensorflow::profiler::TraceMeWrapper;
 
 PYBIND11_MODULE(_pywrap_traceme, m) {
+  // This variable will be modified by PythonHooks::Start/Stop(). such
+  // arrangement will reduce the number of calls through pybind11.
+  m.attr(""enabled"") = py::bool_(false);
   py::class_<TraceMeWrapper>(m, ""TraceMe"", py::module_local())
       .def(py::init<const py::str&, const py::kwargs&>())
-      .def(""SetMetadata"", &TraceMeWrapper::SetMetadata)
-      .def_static(""IsEnabled"", &TraceMeWrapper::IsEnabled);
+      .def(""SetMetadata"", &TraceMeWrapper::SetMetadata);
 };
",0,test
6eff291a056d06f8c159485f81228f685b6f719c,tensorflow/tensorflow,"Use python tracer to control TraceMe in python launguage. This should have better performance than go through pybind11.

PiperOrigin-RevId: 315547199
Change-Id: I64c4d9f5dce6a23fbeed7fcde10c7a8e839494a4",trace.py,"@@ -72,7 +72,7 @@ class Trace(object):
       The example above uses the keyword argument ""step_num"" to specify the
       training step being traced.
     """"""
-    if _pywrap_traceme.TraceMe.IsEnabled():
+    if _pywrap_traceme.enabled:
       # Creating _pywrap_traceme.TraceMe starts the clock.
       self._traceme = _pywrap_traceme.TraceMe(name, **kwargs)
     else:
",0,test
2bccece3856992080f7902d4434c8df973901e99,tensorflow/tensorflow,"Add missing space in error message.
Change: 135981721",tensor_shape.cc,"@@ -33,7 +33,7 @@ static void AppendTo(const TensorShape& s, gtl::InlinedVector<int64, 8>* vals) {
 }
 
 void TensorShape::CheckDimsEqual(int NDIMS) const {
-  CHECK_EQ(NDIMS, dims()) << ""Asking for tensor of "" << NDIMS << ""dimensions""
+  CHECK_EQ(NDIMS, dims()) << ""Asking for tensor of "" << NDIMS << "" dimensions""
                           << "" from a tensor of "" << dims() << "" dimensions"";
 }
 
",0,train
1427bfc12ec5a3a2c6a4ffd57fc5b465d3eedfae,tensorflow/tensorflow,"Update gradient_checker_v2 to use a step size in the finite difference approximation that is exactly representable as a binary floating point number. This is an old trick that in some cases avoids polluting the finite difference approximation with rounding errors that cause false negatives in gradient tests.

PiperOrigin-RevId: 343348502
Change-Id: I3539ae7de7105177c5a1b9144b491f36369344f4",relu_op_test.py,"@@ -19,9 +19,7 @@ from __future__ import division
 from __future__ import print_function
 
 import numpy as np
-from six.moves import xrange  # pylint: disable=redefined-builtin
 
-from tensorflow.python import tf2
 from tensorflow.python.eager import backprop
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -29,7 +27,6 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import gradient_checker_v2
-from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import variables
@@ -117,45 +114,19 @@ class ReluTest(test.TestCase):
           order=""F"")
       err = gradient_checker_v2.max_error(*gradient_checker_v2.compute_gradient(
           nn_ops.relu, [x], delta=1.0 / 1024))
-    self.assertLess(err, 1e-4)
+    self.assertLess(err, 1e-6)
 
-  # The gradient for fp16 is inaccurate due to the low-precision.
-  # We compare the fp16 analytical gradient against their fp32 counterpart.
+  # The gradient test for ReLU is a bit tricky as the derivative is not well
+  # defined at around zero and we want to avoid that in terms of input values.
   def testGradientFloat16(self):
-
-    def grad(x):
-      with backprop.GradientTape() as tape:
-        tape.watch(x)
-        y = nn_ops.l2_loss(nn_ops.relu(x))
-      return tape.gradient(y, x)
-
-    def f():
-      with test_util.use_gpu():
-        # Randomly construct a 1D shape from [1, 40)
-        shape = random_ops.random_uniform([1],
-                                          minval=1,
-                                          maxval=40,
-                                          dtype=dtypes.int32)
-        x32 = random_ops.random_uniform(shape, minval=-1, maxval=1)
-        x16 = math_ops.cast(x32, dtype=dtypes.float16)
-        return grad(x32), grad(x16)
-
-    # We're going to ensure that the fp16 and fp32 gradients
-    # are ""close"" to each other for ~100 random values.
-    #
-    # In TensorFlow 1.x, invoking f() (without eager execution enabled)
-    # would construct a graph. Instead of construct a graph with O(100) nodes,
-    # we construct a single graph to be executed ~100 times in a Session.
-    if not tf2.enabled():
-      d32_tensor, d16_tensor = f()
-      with self.cached_session() as sess:
-        f = lambda: sess.run([d32_tensor, d16_tensor])
-
-    # Repeat the experiment for 100 times. All tensor shapes and its tensor
-    # values are randomly generated for each run.
-    for _ in xrange(100):
-      d32, d16 = f()
-      self.assertAllClose(d32, d16, atol=3e-4)
+    with self.cached_session():
+      x = np.asarray(
+          [[-0.9, -0.7, -0.5, -0.3, -0.1], [0.1, 0.3, 0.5, 0.7, 0.9]],
+          dtype=np.float16,
+          order=""F"")
+      err = gradient_checker_v2.max_error(
+          *gradient_checker_v2.compute_gradient(nn_ops.relu, [x]))
+    self.assertLess(err, 1e-6)
 
   def testGradientFloat64(self):
     with self.cached_session():
@@ -165,7 +136,7 @@ class ReluTest(test.TestCase):
           order=""F"")
       err = gradient_checker_v2.max_error(*gradient_checker_v2.compute_gradient(
           nn_ops.relu, [x], delta=1.0 / 1024))
-    self.assertLess(err, 1e-10)
+    self.assertLess(err, 1e-15)
 
   def testGradGradFloat32(self):
     with self.cached_session():
",0,train
1427bfc12ec5a3a2c6a4ffd57fc5b465d3eedfae,tensorflow/tensorflow,"Update gradient_checker_v2 to use a step size in the finite difference approximation that is exactly representable as a binary floating point number. This is an old trick that in some cases avoids polluting the finite difference approximation with rounding errors that cause false negatives in gradient tests.

PiperOrigin-RevId: 343348502
Change-Id: I3539ae7de7105177c5a1b9144b491f36369344f4",gradient_checker_v2.py,"@@ -292,7 +292,7 @@ def _compute_gradient_list(f, xs, delta):
 
 
 @tf_export(""test.compute_gradient"", v1=[])
-def compute_gradient(f, x, delta=1e-3):
+def compute_gradient(f, x, delta=None):
   """"""Computes the theoretical and numeric Jacobian of `f`.
 
   With y = f(x), computes the theoretical and numeric Jacobian dy/dx.
@@ -329,6 +329,12 @@ def compute_gradient(f, x, delta=1e-3):
     raise ValueError(
         ""`x` must be a list or tuple of values convertible to a Tensor ""
         ""(arguments to `f`), not a %s"" % type(x))
+  if delta is None:
+    # By default, we use a step size for the central finite difference
+    # approximation that is exactly representable as a binary floating
+    # point number, since this reduces the amount of noise due to rounding
+    # in the approximation of some functions.
+    delta = 1.0 / 1024
   return _compute_gradient_list(f, x, delta)
 
 
",0,train
3f56b1402409ad4efb8dd931d5b1b7bdc713597e,tensorflow/tensorflow,"Log initialization and warmup time to proto results in benchmark tool.

PiperOrigin-RevId: 172792563",graph_compiler.cc,"@@ -38,7 +38,6 @@ limitations under the License.
 #include ""tensorflow/core/graph/algorithm.h""
 #include ""tensorflow/core/graph/graph_constructor.h""
 #include ""tensorflow/core/graph/node_builder.h""
-#include ""tensorflow/core/lib/gtl/cleanup.h""
 #include ""tensorflow/core/lib/hash/hash.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/public/version.h""
@@ -85,20 +84,9 @@ Status PrepareArguments(XlaOpKernelContext* ctx, Graph* graph,
 }
 }  // namespace
 Status GraphCompiler::Compile() {
-  // Maintain a mapping from node id to node outputs.
-  using NodeOutputs = std::vector<TensorValue>;
-  std::vector<NodeOutputs> output_registry(graph_->num_node_ids());
-  auto output_registry_cleanup = gtl::MakeCleanup([&output_registry] {
-    for (const NodeOutputs& outputs : output_registry) {
-      for (const TensorValue& value : outputs) {
-        CHECK(!value.is_ref());
-        delete value.tensor;
-      }
-    }
-  });
-
-  // XLA requires determinism, generate a stable ordering from DFS.
+  OutputRegistry output_registry(graph_->num_node_ids());
   std::vector<Node*> topo_sorted_nodes;
+  // XLA requires determinism, generate a stable ordering from DFS.
   GetReversePostOrder(*graph_, &topo_sorted_nodes,
                       /*stable_comparator=*/NodeComparatorName());
 
@@ -106,6 +94,7 @@ Status GraphCompiler::Compile() {
   PartiallySetupParams(&params);
 
   for (Node* n : topo_sorted_nodes) {
+    NodeOutputs node_outputs;
     OpKernel* op_kernel_raw = nullptr;
     Status s = flib_->CreateKernel(n->def(), &op_kernel_raw);
     // Transfer ownership of the kernel to a local smart pointer.
@@ -133,9 +122,9 @@ Status GraphCompiler::Compile() {
       if (e->IsControlEdge()) continue;
       Node* src = e->src();
       TF_RET_CHECK(src->id() < output_registry.size());
-      const NodeOutputs& src_outputs = output_registry[src->id()];
+      const NodeOutputs& outputs = output_registry[src->id()];
 
-      tensor_inputs_[e->dst_input()] = src_outputs[e->src_output()];
+      tensor_inputs_[e->dst_input()] = outputs.values[e->src_output()];
     }
 
     OpKernelContext op_context(&params, n->num_outputs());
@@ -149,15 +138,15 @@ Status GraphCompiler::Compile() {
 
     // Set up outputs. Also check if outputs from the previous computation is
     // valid.
-    NodeOutputs& outputs = output_registry[n->id()];
-    outputs.resize(n->num_outputs());
     for (int o = 0; o < n->num_outputs(); ++o) {
-      outputs[o] = op_context.release_output(o);
-      if (*op_context.is_output_dead() || outputs[o].tensor == nullptr) {
+      const auto tensor_val = op_context.release_output(o);
+      if (*op_context.is_output_dead() || tensor_val.tensor == nullptr) {
         return errors::Internal(""Missing xla_context "", o, ""-th output from "",
                                 (*op_context.is_output_dead() ? ""(dead)"" : """"),
                                 SummarizeNode(*n));
       }
+      // Set up outputs
+      output_registry[n->id()].values.push_back(tensor_val);
     }
   }
   return Status::OK();
",0,train
3f56b1402409ad4efb8dd931d5b1b7bdc713597e,tensorflow/tensorflow,"Log initialization and warmup time to proto results in benchmark tool.

PiperOrigin-RevId: 172792563",graph_compiler.h,"@@ -69,6 +69,23 @@ class GraphCompiler {
   Status Compile();
 
  private:
+  // NodeOutputs is a wrapper over TensorValues that represents outputs of a
+  // node.
+  struct NodeOutputs {
+    ~NodeOutputs() {
+      for (auto& v : values) {
+        CHECK(!v.is_ref());
+        delete v.tensor;
+      }
+    }
+
+    // Output values of this node.
+    std::vector<TensorValue> values;
+  };
+
+  // A mapping from node id to node output.
+  using OutputRegistry = std::vector<NodeOutputs>;
+
   // Partially sets params. This partially set params can be reused
   // across multple nodes visit.
   void PartiallySetupParams(OpKernelContext::Params* params);
",0,train
3f8dcd3e288f213001eace4aea0f22cfb1b65946,tensorflow/tensorflow,"Allow exceptions in code that will be staged. Consider all exceptions to be exiting the CFG, with no explicit support for exception-based control-flow. It may incidentally work to use exception-based control flow in code that is never staged.

PiperOrigin-RevId: 232719435",side_effect_guards.py,"@@ -125,6 +125,10 @@ class SideEffectGuardTransformer(converter.Base):
     node.orelse = self._visit_and_reindent(node.orelse)
     return node
 
+  # TODO(b/123995141) Remove once ExceptionHandlers are in the CFG
+  def visit_ExceptHandler(self, node):
+    return node
+
   def visit_Expr(self, node):
     self.generic_visit(node)
     if isinstance(node.value, gast.Call):
",0,train
3f8dcd3e288f213001eace4aea0f22cfb1b65946,tensorflow/tensorflow,"Allow exceptions in code that will be staged. Consider all exceptions to be exiting the CFG, with no explicit support for exception-based control-flow. It may incidentally work to use exception-based control flow in code that is never staged.

PiperOrigin-RevId: 232719435",cfg.py,"@@ -393,6 +393,8 @@ class GraphBuilder(object):
   def _connect_jump_to_finally_sections(self, node):
     """"""Connects a jump node to the finally sections protecting it.""""""
     cursor = set((node,))
+    if node not in self.finally_sections:
+      return cursor
     for guard_section_id in self.finally_sections[node]:
       guard_begin, guard_ends = self.finally_section_subgraphs[guard_section_id]
       self._connect_nodes(cursor, guard_begin)
@@ -620,10 +622,10 @@ class AstToCfg(gast.NodeVisitor):
     leaving_node = self.lexical_scopes.pop()
     assert node == leaving_node
 
-  def _get_enclosing_scopes(self, include, stop_at):
+  def _get_enclosing_finally_scopes(self, stop_at):
     included = []
     for node in reversed(self.lexical_scopes):
-      if isinstance(node, include):
+      if isinstance(node, gast.Try) and node.finalbody:
         included.append(node)
       if isinstance(node, stop_at):
         return node, included
@@ -635,10 +637,8 @@ class AstToCfg(gast.NodeVisitor):
 
   def _process_exit_statement(self, node, *exits_nodes_of_type):
     # Note: this is safe because we process functions separately.
-    try_node, guards = self._get_enclosing_scopes(
-        include=(gast.Try,),
-        stop_at=tuple(exits_nodes_of_type),
-    )
+    try_node, guards = self._get_enclosing_finally_scopes(
+        tuple(exits_nodes_of_type))
     if try_node is None:
       raise ValueError(
           '%s that is not enclosed by any of %s' % (node, exits_nodes_of_type))
@@ -646,10 +646,8 @@ class AstToCfg(gast.NodeVisitor):
 
   def _process_continue_statement(self, node, *loops_to_nodes_of_type):
     # Note: this is safe because we process functions separately.
-    try_node, guards = self._get_enclosing_scopes(
-        include=(gast.Try,),
-        stop_at=tuple(loops_to_nodes_of_type),
-    )
+    try_node, guards = self._get_enclosing_finally_scopes(
+        tuple(loops_to_nodes_of_type))
     if try_node is None:
       raise ValueError('%s that is not enclosed by any of %s' %
                        (node, loops_to_nodes_of_type))
@@ -698,10 +696,7 @@ class AstToCfg(gast.NodeVisitor):
     self._process_basic_statement(node)
 
   def visit_Raise(self, node):
-    try_node, guards = self._get_enclosing_scopes(
-        include=(gast.Try,),
-        stop_at=(gast.FunctionDef,),
-    )
+    try_node, guards = self._get_enclosing_finally_scopes((gast.FunctionDef,))
     if try_node is None:
       raise ValueError('%s that is not enclosed by any FunctionDef' % node)
     self.builder.add_error_node(node, guards)
@@ -797,16 +792,13 @@ class AstToCfg(gast.NodeVisitor):
     for stmt in node.orelse:
       self.visit(stmt)
 
-    if node.handlers:
-      # TODO(mdan): Should we still support bare try/except? Might be confusing.
-      raise NotImplementedError('exceptions are not yet supported')
-
     self._exit_lexical_scope(node)
 
-    self.builder.enter_finally_section(node)
-    for stmt in node.finalbody:
-      self.visit(stmt)
-    self.builder.exit_finally_section(node)
+    if node.finalbody:
+      self.builder.enter_finally_section(node)
+      for stmt in node.finalbody:
+        self.visit(stmt)
+      self.builder.exit_finally_section(node)
 
   def visit_With(self, node):
     # TODO(mdan): Mark the context manager's exit call as exit guard.
",0,train
3f8dcd3e288f213001eace4aea0f22cfb1b65946,tensorflow/tensorflow,"Allow exceptions in code that will be staged. Consider all exceptions to be exiting the CFG, with no explicit support for exception-based control-flow. It may incidentally work to use exception-based control flow in code that is never staged.

PiperOrigin-RevId: 232719435",liveness.py,"@@ -219,6 +219,10 @@ class Annotator(transformer.Base):
                  frozenset(self.current_analyzer.out[cfg_node]))
     return node
 
+  def visit_ExceptHandler(self, node):
+    # TODO(b/123995141) Add Exception Handlers to the CFG
+    return node
+
 
 def resolve(node, source_info, graphs):
   """"""Resolves the live symbols at the exit of control flow statements.
",0,train
3f8dcd3e288f213001eace4aea0f22cfb1b65946,tensorflow/tensorflow,"Allow exceptions in code that will be staged. Consider all exceptions to be exiting the CFG, with no explicit support for exception-based control-flow. It may incidentally work to use exception-based control flow in code that is never staged.

PiperOrigin-RevId: 232719435",reaching_definitions.py,"@@ -223,6 +223,10 @@ class TreeAnnotator(transformer.Base):
   def visit_global(self, node):
     raise NotImplementedError()
 
+  def visit_ExceptHandler(self, node):
+    # TODO(b/123995141) Add Exception Handlers to the CFG
+    return node
+
   def visit_Name(self, node):
     if self.current_analyzer is None:
       # Names may appear outside function defs - for example in class
@@ -232,7 +236,8 @@ class TreeAnnotator(transformer.Base):
     analyzer = self.current_analyzer
     cfg_node = self.current_cfg_node
 
-    assert cfg_node is not None, 'name node outside of any statement?'
+    assert cfg_node is not None, ('name node, %s, outside of any statement?'
+                                  % node.id)
 
     qn = anno.getanno(node, anno.Basic.QN)
     if isinstance(node.ctx, gast.Load):
",0,train
2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched.

Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty.

PiperOrigin-RevId: 368873433
Change-Id: Ie98b41206a134af693034215aeb902206398551e",cpu_device.cc,"@@ -51,7 +51,8 @@ StatusOr<std::unique_ptr<PjRtClient>> GetCpuClient(bool asynchronous) {
     TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
                         platform->GetExecutor(config));
     auto device_state = absl::make_unique<LocalDeviceState>(
-        executor, client, LocalDeviceState::kSynchronous, asynchronous,
+        executor, client, LocalDeviceState::kSynchronous,
+        /*max_inflight_computations=*/32,
         /*allow_event_reuse=*/false, /*use_callback_stream=*/false);
     auto device = absl::make_unique<CpuDevice>(i, std::move(device_state));
     devices.push_back(std::move(device));
",0,train
2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched.

Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty.

PiperOrigin-RevId: 368873433
Change-Id: Ie98b41206a134af693034215aeb902206398551e",gpu_device.cc,"@@ -212,7 +212,7 @@ StatusOr<std::vector<std::unique_ptr<LocalDeviceState>>> BuildLocalDeviceStates(
         xla_client->backend().stream_executor(i).ValueOrDie();
     addressable_devices.push_back(absl::make_unique<LocalDeviceState>(
         executor, xla_client, LocalDeviceState::kComputeSynchronized,
-        asynchronous,
+        /*max_inflight_computations=*/32,
         /*allow_event_reuse=*/true, /*use_callback_stream=*/true));
   }
   return std::move(addressable_devices);
",0,train
2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched.

Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty.

PiperOrigin-RevId: 368873433
Change-Id: Ie98b41206a134af693034215aeb902206398551e",interpreter_device.cc,"@@ -45,7 +45,8 @@ StatusOr<std::unique_ptr<PjRtClient>> GetInterpreterClient() {
   se::StreamExecutor* executor =
       client->backend().stream_executor(0).ValueOrDie();
   auto device_state = absl::make_unique<LocalDeviceState>(
-      executor, client, LocalDeviceState::kSynchronous, /*asynchronous=*/false,
+      executor, client, LocalDeviceState::kSynchronous,
+      /*max_inflight_computations=*/1,
       /*allow_event_reuse=*/false, /*use_callback_stream=*/false);
   auto device =
       absl::make_unique<InterpreterDevice>(0, std::move(device_state));
",0,train
2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched.

Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty.

PiperOrigin-RevId: 368873433
Change-Id: Ie98b41206a134af693034215aeb902206398551e",local_device_state.cc,"@@ -31,11 +31,13 @@ namespace xla {
 LocalDeviceState::LocalDeviceState(se::StreamExecutor* executor,
                                    LocalClient* client,
                                    AllocationModel allocation_model,
-                                   bool asynchronous, bool allow_event_reuse,
+                                   int max_inflight_computations,
+                                   bool allow_event_reuse,
                                    bool use_callback_stream)
     : allocation_model_(allocation_model),
       event_pool_(allow_event_reuse),
-      compute_semaphore_(/*capacity=*/asynchronous ? 32 : 1),
+      compute_semaphore_(
+          /*capacity=*/max_inflight_computations),
       executor_(executor),
       client_(client),
       prng_seed_generator_(prng_seed_device_()),
",0,train
2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched.

Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty.

PiperOrigin-RevId: 368873433
Change-Id: Ie98b41206a134af693034215aeb902206398551e",local_device_state.h,"@@ -89,8 +89,9 @@ class LocalDeviceState {
   // If asynchronous is false, the host will synchronize to the device after
   // each execution or transfer. This is intended for debugging only.
   LocalDeviceState(se::StreamExecutor* executor, LocalClient* client,
-                   AllocationModel allocation_model, bool asynchronous,
-                   bool allow_event_reuse, bool use_callback_stream);
+                   AllocationModel allocation_model,
+                   int max_inflight_computations, bool allow_event_reuse,
+                   bool use_callback_stream);
   virtual ~LocalDeviceState();
 
   se::StreamExecutor* executor() const { return executor_; }
",0,train
2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched.

Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty.

PiperOrigin-RevId: 368873433
Change-Id: Ie98b41206a134af693034215aeb902206398551e",tpu_client.cc,"@@ -49,7 +49,7 @@ namespace {
 class TpuDeviceState : public LocalDeviceState {
  public:
   TpuDeviceState(se::StreamExecutor* executor, LocalClient* client,
-                 bool asynchronous);
+                 int max_inflight_computations);
 
   Status ThenMemcpyDeviceToDevice(se::Stream* transfer_stream,
                                   se::Stream* dst_stream,
@@ -58,9 +58,10 @@ class TpuDeviceState : public LocalDeviceState {
 };
 
 TpuDeviceState::TpuDeviceState(se::StreamExecutor* executor,
-                               LocalClient* client, bool asynchronous)
+                               LocalClient* client,
+                               int max_inflight_computations)
     : LocalDeviceState(executor, client, LocalDeviceState::kAsynchronous,
-                       asynchronous,
+                       max_inflight_computations,
                        /*allow_event_reuse=*/false,
                        /*use_callback_stream=*/true) {}
 
@@ -194,7 +195,7 @@ StatusOr<std::vector<std::unique_ptr<PjRtStreamExecutorDevice>>> GetTpuDevices(
 }  // namespace
 
 StatusOr<std::shared_ptr<PjRtClient>> GetTpuClient(
-    bool asynchronous, absl::Duration init_retry_timeout) {
+    int max_inflight_computations, absl::Duration init_retry_timeout) {
   tf_tpu::TpuPlatformInterface* platform =
       tf_tpu::TpuPlatformInterface::GetRegisteredPlatform(
           /*initialize_platform=*/true, /*num_tries=*/1);
@@ -230,8 +231,8 @@ StatusOr<std::shared_ptr<PjRtClient>> GetTpuClient(
   for (int i = 0; i < client->device_count(); ++i) {
     se::StreamExecutor* executor =
         client->backend().stream_executor(i).ValueOrDie();
-    local_device_states.push_back(
-        absl::make_unique<TpuDeviceState>(executor, client, asynchronous));
+    local_device_states.push_back(absl::make_unique<TpuDeviceState>(
+        executor, client, max_inflight_computations));
   }
 
   TF_ASSIGN_OR_RETURN(auto devices,
",0,train
2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched.

Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty.

PiperOrigin-RevId: 368873433
Change-Id: Ie98b41206a134af693034215aeb902206398551e",tpu_client.h,"@@ -53,7 +53,7 @@ class PjRtTpuDevice : public PjRtStreamExecutorDevice {
 };
 
 StatusOr<std::shared_ptr<PjRtClient>> GetTpuClient(
-    bool asynchronous,
+    int max_inflight_computations,
     absl::Duration init_retry_timeout = absl::ZeroDuration());
 
 }  // namespace xla
",0,train
2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched.

Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty.

PiperOrigin-RevId: 368873433
Change-Id: Ie98b41206a134af693034215aeb902206398551e",outfeed_receiver_test.cc,"@@ -90,7 +90,8 @@ StatusOr<std::unique_ptr<PjRtClient>> GetCpuClientWithNonLocalDevice() {
   TF_ASSIGN_OR_RETURN(se::StreamExecutor * executor,
                       platform->GetExecutor(config));
   auto device_state = absl::make_unique<LocalDeviceState>(
-      executor, client, LocalDeviceState::kSynchronous, /*asynchronous=*/true,
+      executor, client, LocalDeviceState::kSynchronous,
+      /*max_inflight_computations=*/32,
       /*allow_event_reuse=*/false, /*use_callback_stream=*/false);
 
   std::vector<std::unique_ptr<PjRtStreamExecutorDevice>> devices;
",0,train
2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched.

Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty.

PiperOrigin-RevId: 368873433
Change-Id: Ie98b41206a134af693034215aeb902206398551e",xla.cc,"@@ -262,12 +262,12 @@ PYBIND11_MODULE(xla_extension, m) {
       py::arg(""distributed_client"") = nullptr, py::arg(""node_id"") = 0);
   m.def(
       ""get_tpu_client"",
-      [](bool asynchronous) -> StatusOr<std::shared_ptr<PyClient>> {
+      [](int max_inflight_computations) -> StatusOr<std::shared_ptr<PyClient>> {
         TF_ASSIGN_OR_RETURN(std::shared_ptr<PjRtClient> client,
-                            GetTpuClient(asynchronous));
+                            GetTpuClient(max_inflight_computations));
         return std::make_shared<PyClient>(std::move(client));
       },
-      py::arg(""asynchronous"") = true);
+      py::arg(""max_inflight_computations"") = 32);
 
   TF_CHECK_OK(PyBuffer::RegisterTypes(m));
 
",0,train
2c0afa6b6d44801c29621e5c5b7a83d123c36e16,tensorflow/tensorflow,"[JAX] Allow control of the number of max inflight async computations dispatched.

Due to allocator semantics on some backends, peak device memory usage grows with the number of inflight computations dispatched. Allowing users to dispatch fewer async computations will allow them to avoid OOM or fragmentation errors, with potentially little to no performance penalty.

PiperOrigin-RevId: 368873433
Change-Id: Ie98b41206a134af693034215aeb902206398551e",xla_client.py,"@@ -96,7 +96,7 @@ def _gpu_backend_factory(distributed_client=None, node_id=0):
 
 
 def _tpu_backend_factory():
-  return _xla.get_tpu_client(asynchronous=True)
+  return _xla.get_tpu_client(max_inflight_computations=32)
 
 
 # Backend factories, keyed by user-visible name, in increasing priority order.
",0,train
5b04fe0d14fef00df44a53b8e5dcd8fa4705a6f9,tensorflow/tensorflow,"Add a few more transpose benchmarks.

PiperOrigin-RevId: 407128419
Change-Id: Ie44a8fccf0fb81a56478a7fe7664c134a4b01d91",transpose_test.cc,"@@ -438,12 +438,34 @@ static std::vector<TransposeTestCase> BenchmarkCases() {
   return std::vector<TransposeTestCase>{
       TransposeTestCase(/*dims=*/{256, 256},
                         /*permutation=*/{1, 0}),
+      TransposeTestCase(/*dims=*/{512, 512},
+                        /*permutation=*/{1, 0}),
+      TransposeTestCase(/*dims=*/{1024, 1024},
+                        /*permutation=*/{1, 0}),
+      TransposeTestCase(/*dims=*/{256, 256, 256},
+                        /*permutation=*/{0, 2, 1}),
+      TransposeTestCase(/*dims=*/{256, 256, 256},
+                        /*permutation=*/{1, 0, 2}),
+      TransposeTestCase(/*dims=*/{256, 256, 256},
+                        /*permutation=*/{1, 2, 0}),
+      TransposeTestCase(/*dims=*/{256, 256, 256},
+                        /*permutation=*/{2, 0, 1}),
+      TransposeTestCase(/*dims=*/{256, 256, 256},
+                        /*permutation=*/{2, 1, 0}),
+      TransposeTestCase(/*dims=*/{512, 512, 512},
+                        /*permutation=*/{0, 2, 1}),
+      TransposeTestCase(/*dims=*/{512, 512, 512},
+                        /*permutation=*/{1, 0, 2}),
+      TransposeTestCase(/*dims=*/{512, 512, 512},
+                        /*permutation=*/{1, 2, 0}),
+      TransposeTestCase(/*dims=*/{512, 512, 512},
+                        /*permutation=*/{2, 0, 1}),
+      TransposeTestCase(/*dims=*/{512, 512, 512},
+                        /*permutation=*/{2, 1, 0}),
       TransposeTestCase(/*dims=*/{64, 224, 224, 3},
                         /*permutation=*/{1, 2, 3, 0}),
       TransposeTestCase(/*dims=*/{256, 64, 64, 3},
                         /*permutation=*/{1, 3, 2, 0}),
-      TransposeTestCase(/*dims=*/{1024, 1024},
-                        /*permutation=*/{1, 0}),
   };
 }
 
@@ -488,7 +510,6 @@ void BM_Transpose(const TransposeTestCase& bm, int parallelism,
     plan->Execute(input.data(), output.data(), [&](std::function<void()> fn) {
       threadpool.Schedule(std::move(fn));
     });
-
     tensorflow::testing::DoNotOptimize(output);
   }
 }
@@ -515,9 +536,10 @@ static void* benchmarks = []() {
   for (const auto& benchmark_case : benchmark_cases) {
     for (const auto& variant : variants) {
       for (int num_threads : std::get<2>(variant)) {
-        std::string name = absl::StrCat(
-            std::get<0>(variant), ""_"", absl::StrJoin(benchmark_case.dims, ""_""),
-            ""_perm_"", absl::StrJoin(benchmark_case.permutation, ""_""));
+        std::string name =
+            absl::StrCat(std::get<0>(variant), ""_threads_"", num_threads, ""_"",
+                         absl::StrJoin(benchmark_case.dims, ""_""), ""_perm_"",
+                         absl::StrJoin(benchmark_case.permutation, ""_""));
 
         TransposeTestCase testcase = benchmark_case;
         BenchmarkFn fn = std::get<1>(variant);
",0,train
7483a659271621e79ca13867a6268aedac0e87f9,tensorflow/tensorflow,"[XLA] Fix Broadcast implementation in HloEvaluator to handle the special case of scalar broadcast to be consistent with other backends. Also add a test for scalar broadcast.

PiperOrigin-RevId: 164781786",hlo_evaluator.cc,"@@ -177,6 +177,29 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
         parent_->GetEvaluatedLiteralFor(broadcast->operand(0));
     std::vector<int64> broadcast_indices(
         ShapeUtil::Rank(broadcast->operand(0)->shape()), 0);
+
+    // Special case for broadcasting scalars: ignore broadcast dimension and
+    // broadcast to whatever the output dimension is.
+    // TODO(b/64533549): Remove the need of this once this bug is resolved.
+    if (ShapeUtil::IsScalar(operand_to_broadcast.shape())) {
+      return output->Populate<ReturnT>(
+          [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
+            return operand_to_broadcast.Get<ReturnT>({});
+          });
+    }
+
+    TF_RET_CHECK(broadcast->dimensions().size() ==
+                 ShapeUtil::Rank(operand_to_broadcast.shape()))
+        << ""broadcast dimensions is of size: "" << broadcast->dimensions().size()
+        << "" and rank of operand_to_broadcast is: ""
+        << ShapeUtil::Rank(operand_to_broadcast.shape());
+    // Checks that operand's dimensions are the same as the broadcast's
+    // dimensions along the dimensions to be broadcasted.
+    for (int64 i = 0; i < broadcast->dimensions().size(); ++i) {
+      TF_RET_CHECK(broadcast->shape().dimensions(broadcast->dimensions(i)) ==
+                   operand_to_broadcast.shape().dimensions(i));
+    }
+
     return output->Populate<ReturnT>(
         [&](tensorflow::gtl::ArraySlice<int64> multi_index) {
           for (int64 i = 0; i < broadcast->dimensions().size(); ++i) {
@@ -184,7 +207,7 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
           }
           return operand_to_broadcast.Get<ReturnT>(broadcast_indices);
         });
-  }
+  };
 
   Status HandleCeil(HloInstruction* ceil, HloInstruction* operand) override {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[ceil],
",0,train
7483a659271621e79ca13867a6268aedac0e87f9,tensorflow/tensorflow,"[XLA] Fix Broadcast implementation in HloEvaluator to handle the special case of scalar broadcast to be consistent with other backends. Also add a test for scalar broadcast.

PiperOrigin-RevId: 164781786",hlo_evaluator_test.cc,"@@ -311,6 +311,27 @@ TEST_F(HloEvaluatorTest, DoesBroadcast) {
   LiteralTestUtil::ExpectEqual(*result, *output_literal);
 }
 
+TEST_F(HloEvaluatorTest, DoesBroadcastScalar) {
+  HloComputation::Builder b(TestName());
+  auto input_literal = Literal::CreateR0<int32>(111);
+  auto output_literal = Literal::CreateR2<int32>(
+      {{111, 111}, {111, 111}, {111, 111}, {111, 111}, {111, 111}, {111, 111}});
+
+  HloInstruction* literal_instruction = b.AddInstruction(
+      HloInstruction::CreateConstant(std::move(input_literal)));
+  // Broadcast dimension is ignored in the case of scalars.
+  b.AddInstruction(HloInstruction::CreateBroadcast(
+      output_literal->shape(), literal_instruction,
+      /*broadcast_dimensions=*/{1}));
+  HloModule module(TestName());
+  auto computation = module.AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result =
+      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+
+  LiteralTestUtil::ExpectEqual(*result, *output_literal);
+}
+
 TEST_F(HloEvaluatorTest, ConvertWithSameLayout) {
   HloComputation::Builder b(TestName());
 
",0,train
563a0184e11c1b960853a66532fc4780af828333,tensorflow/tensorflow,"Legalize TensorFlow XlaGather and CollectivePermute ops to HLO

XlaGather requires constant slice_sizes operand and CollectivePermute requires constant source_target_pairs operand as these are attributes in the corresponding MHLO dialect ops.

PiperOrigin-RevId: 326960529
Change-Id: I0a7c2eaa81b39c0f01993b1d789c678157b55a9a",legalize_tf.cc,"@@ -50,6 +50,7 @@ limitations under the License.
 #include ""tensorflow/compiler/mlir/hlo/include/mlir-hlo/utils/hlo_utils.h""
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h""
 #include ""tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h""
+#include ""tensorflow/compiler/mlir/xla/attribute_importer.h""
 #include ""tensorflow/compiler/mlir/xla/transforms/passes.h""
 #include ""tensorflow/compiler/xla/client/lib/conv_grad_size_util.h""
 #include ""tensorflow/compiler/xla/client/padding.h""
@@ -1065,6 +1066,21 @@ static void BuildSortComparisonBody(llvm::ArrayRef<Type> element_types,
   builder->create<mhlo::ReturnOp>(loc, compare);
 }
 
+//===----------------------------------------------------------------------===//
+// XlaGather op utilities.
+//===----------------------------------------------------------------------===//
+
+bool HasValidGatherDims(StringAttr attr) {
+  ::xla::GatherDimensionNumbers dims;
+  return dims.ParseFromString(attr.getValue().str());
+}
+
+GatherDimensionNumbers GetGatherDimNumsAttr(StringAttr attr, Builder *builder) {
+  ::xla::GatherDimensionNumbers dims;
+  if (!dims.ParseFromString(attr.getValue().str())) return {};
+  return ::xla::ConvertGatherDimensionNumbers(dims, builder);
+}
+
 //===----------------------------------------------------------------------===//
 // Op converters.
 //===----------------------------------------------------------------------===//
",0,train
43a7963cd58f04d0ceac097a859c51d3f760db8b,tensorflow/tensorflow,"Put the dynamic libraries into the platlib (platform specific) instead of the
purelib (default).

This allows auditwheel to audit the wheel without errors.

PiperOrigin-RevId: 251828232",setup.py,"@@ -139,6 +139,7 @@ class InstallCommand(InstallCommandBase):
     ret = InstallCommandBase.finalize_options(self)
     self.install_headers = os.path.join(self.install_purelib,
                                         'tensorflow', 'include')
+    self.install_lib = self.install_platlib
     return ret
 
 
",0,train
9b44cbafe53d1409e6d7d1086284109824495abf,tensorflow/tensorflow,"Add logging to indicate which checkpoint is being restored.
Change: 145817772",saver.py,"@@ -1435,6 +1435,7 @@ class Saver(object):
     """"""
     if self._is_empty:
       return
+    logging.info(""Restoring parameters from %s"", save_path)
     sess.run(self.saver_def.restore_op_name,
              {self.saver_def.filename_tensor_name: save_path})
 
",0,test
cc9bdb70b88c76f293f27e29e91cb7739ba3fdc4,tensorflow/tensorflow,"Optimizing code and adding from http to https (#15714)

* Removing extra space, preventing double declared ""if"" statement and from http to https

* Optimizing code for reducing if statement

* Replacing in above line

* Reverting my changes back for 'else'.

* Reverting back changes as discussed

* ""No new line at end of file"" indication of lint.",while_op.cc,"@@ -39,7 +39,7 @@ Status MakeXlaCompilerArgumentsFromInputs(
   *has_uninitialized_vars = false;
   *has_tensor_arrays = false;
   for (int i = 0; i < ctx->num_inputs(); ++i) {
-    VLOG(2) << ""  Input "" << i
+    VLOG(2) << "" Input "" << i
             << "" type: "" << DataTypeString(ctx->input_type(i))
             << "" shape: "" << ctx->InputShape(i).DebugString();
     XlaCompiler::Argument& arg = (*args)[i];
",0,test
cc9bdb70b88c76f293f27e29e91cb7739ba3fdc4,tensorflow/tensorflow,"Optimizing code and adding from http to https (#15714)

* Removing extra space, preventing double declared ""if"" statement and from http to https

* Optimizing code for reducing if statement

* Replacing in above line

* Reverting my changes back for 'else'.

* Reverting back changes as discussed

* ""No new line at end of file"" indication of lint.",model.cc,"@@ -80,8 +80,7 @@ FlatBufferModel::FlatBufferModel(const char* filename, bool mmap_file,
   } else {
     allocation_ = new FileCopyAllocation(filename, error_reporter);
   }
-  if (!allocation_->valid()) return;
-  if (!CheckModelIdentifier()) return;
+  if (!allocation_->valid() || !CheckModelIdentifier()) return;
 
   model_ = VerifyAndGetModel(allocation_->base(), allocation_->bytes());
 }
",0,test
c28ca27b96b3a141922523c005c71af51cc61906,tensorflow/tensorflow,"Fixing a TensorFlow control flow bug.

Calling `nest.map_structure` with a lambda that does not return (i.e. only for its side-effect) will fail on structures that contain composite tensors because the `map_structure` implementation will try to reconstruct the composite tensors from the return values of the lambda, which will be None.

PiperOrigin-RevId: 284197904
Change-Id: I9b3e43bbd28712281839eaf77b2e4280db7c585c",control_flow_ops.py,"@@ -749,10 +749,10 @@ class ControlFlowContext(object):
   def ExitResult(self, result):
     """"""Make a list of tensors available in the outer context.""""""
     if self._outer_context:
-      nest.map_structure(
-          lambda x: self._outer_context.AddName(x.name),
-          result,
-          expand_composites=True)
+      def fn(x):
+        self._outer_context.AddName(x.name)
+        return x
+      nest.map_structure(fn, result, expand_composites=True)
 
   def GetWhileContext(self):
     """"""Return the while context containing this context.""""""
",0,train
7687debbf63e31375d960d663373da8d469f2d2e,tensorflow/tensorflow,"Add unit-test for questions:
 - http://stackoverflow.com/q/45109305
 - #10766

PiperOrigin-RevId: 162026912",mvn_diag_test.py,"@@ -24,7 +24,12 @@ from tensorflow.contrib import distributions
 from tensorflow.contrib.distributions.python.ops import bijectors
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gradients_impl
+from tensorflow.python.ops import init_ops
+from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import variable_scope
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
@@ -233,6 +238,43 @@ class MultivariateNormalDiagTest(test.TestCase):
       self.assertAllClose(mu, samps.mean(axis=0), atol=0.1)
       self.assertAllClose(cov_mat, np.cov(samps.T), atol=0.1)
 
+  def testMultivariateNormalDiagNegLogLikelihood(self):
+    num_draws = 50
+    dims = 3
+    with self.test_session() as sess:
+      x_pl = array_ops.placeholder(dtype=dtypes.float32,
+                                   shape=[None, dims],
+                                   name=""x"")
+      mu_var = variable_scope.get_variable(
+          name=""mu"",
+          shape=[dims],
+          dtype=dtypes.float32,
+          initializer=init_ops.constant_initializer(1.))
+      sess.run([variables.global_variables_initializer()])
+
+      mvn = ds.MultivariateNormalDiag(
+          loc=mu_var,
+          scale_diag=array_ops.ones(shape=[dims], dtype=dtypes.float32))
+
+      # Typically you'd use `mvn.log_prob(x_pl)` which is always at least as
+      # numerically stable as `tf.log(mvn.prob(x_pl))`. However in this test
+      # we're testing a bug specific to `prob` and not `log_prob`;
+      # http://stackoverflow.com/q/45109305. (The underlying issue was not
+      # related to `Distributions` but that `reduce_prod` didn't correctly
+      # handle negative indexes.)
+      neg_log_likelihood = -math_ops.reduce_sum(math_ops.log(mvn.prob(x_pl)))
+      grad_neg_log_likelihood = gradients_impl.gradients(
+          neg_log_likelihood, variables.trainable_variables())
+
+      x = np.zeros([num_draws, dims], dtype=np.float32)
+      grad_neg_log_likelihood_ = sess.run(
+          grad_neg_log_likelihood,
+          feed_dict={x_pl: x})
+      self.assertEqual(1, len(grad_neg_log_likelihood_))
+      self.assertAllClose(grad_neg_log_likelihood_[0],
+                          np.tile(num_draws, dims),
+                          rtol=1e-6, atol=0.)
+
 
 if __name__ == ""__main__"":
   test.main()
",0,test
bea42a1b32d7faf2effcafc5ef1d6d1e4436ae31,tensorflow/tensorflow,"allow metadata tf_op override level0's tf_op.

PiperOrigin-RevId: 347040091
Change-Id: I13bed86c9c5b5acd7d95a3c4db96a279755f6a76",cupti_collector.cc,"@@ -224,27 +224,25 @@ struct PerDeviceCollector {
 
     std::vector<Annotation> annotation_stack =
         ParseAnnotationStack(event.annotation);
+    if (!annotation_stack.empty()) {
+      xevent.AddStatValue(
+          *plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kTfOp)),
+          *plane->GetOrCreateStatMetadata(annotation_stack.begin()->name));
+    }
     // If multiple metadata have the same key name, show the values from the top
     // of the stack (innermost annotation). Concatenate the values from
     // ""hlo_op"".
     absl::flat_hash_set<absl::string_view> key_set;
-    std::vector<absl::string_view> hlo_op_names;
+
     for (auto annotation = annotation_stack.rbegin();
          annotation != annotation_stack.rend(); ++annotation) {
       for (const Annotation::Metadata& metadata : annotation->metadata) {
-        if (metadata.key == ""tf_op"") {
-          continue;  // ignored, obtained from HLO proto via DebugInfoMap
-        } else if (key_set.insert(metadata.key).second) {
+        if (key_set.insert(metadata.key).second) {
           xevent.ParseAndAddStatValue(
               *plane->GetOrCreateStatMetadata(metadata.key), metadata.value);
         }
       }
     }
-    if (!annotation_stack.empty()) {
-      xevent.AddStatValue(
-          *plane->GetOrCreateStatMetadata(GetStatTypeStr(StatType::kTfOp)),
-          *plane->GetOrCreateStatMetadata(annotation_stack.begin()->name));
-    }
   }
 
   absl::optional<int> GetDeviceAttribute(CUdevice device,
",0,test
bea42a1b32d7faf2effcafc5ef1d6d1e4436ae31,tensorflow/tensorflow,"allow metadata tf_op override level0's tf_op.

PiperOrigin-RevId: 347040091
Change-Id: I13bed86c9c5b5acd7d95a3c4db96a279755f6a76",traceme_encode.h,"@@ -133,6 +133,12 @@ TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOp(
     absl::string_view op_name, absl::string_view op_type) {
   return absl::StrCat(op_name, "":"", op_type);
 }
+
+TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOp(const char* op_name,
+                                                        const char* op_type) {
+  return absl::StrCat(op_name, "":"", op_type);
+}
+
 TF_ATTRIBUTE_ALWAYS_INLINE inline std::string TraceMeOp(
     std::string&& op_name, absl::string_view op_type) {
   absl::StrAppend(&op_name, "":"", op_type);
",0,test
f8c008aa1833eab6c5ef4523e1bff2f2769c8ac0,tensorflow/tensorflow,Further fixes to test case,check_ops_test.py,"@@ -240,44 +240,6 @@ First 2 elements of y:
       out = array_ops.identity(larry)
     self.evaluate(out)
 
-  def test_error_message_eager(self):
-    expected_error_msg_full = r""""""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.'
-b'Condition x != y did not hold for every single element:'
-b'x (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, 2.0, 3.0, 4.0, 5.0
-b'y (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, 2.0, 3.0, 4.0, 5.0""""""
-    expected_error_msg_default = r""""""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.'
-b'Condition x != y did not hold for every single element:'
-b'x (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, 2.0, ...
-b'y (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, 2.0, ...""""""
-    expected_error_msg_short = r""""""Expected 'tf.Tensor(False, shape=(), dtype=bool)' to be true. Summarized data: b'This is the error message.'
-b'Condition x != y did not hold for every single element:'
-b'x (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, ...
-b'y (shape=(2, 3) dtype=float32) = '
-0.0, 1.0, ...""""""
-    with context.eager_mode():
-      t = constant_op.constant(np.array(range(6)), shape=[2,3], dtype=np.float32)
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   expected_error_msg_full):
-        check_ops.assert_none_equal(t, t, message=""This is the error message."",
-                               summarize=10)
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   expected_error_msg_full):
-        check_ops.assert_equal(t, t, message=""This is the error message."",
-                               summarize=-1)
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   expected_error_msg_default):
-        check_ops.assert_equal(t, t, message=""This is the error message."")
-      with self.assertRaisesRegexp(errors.InvalidArgumentError,
-                                   expected_error_msg_short):
-        check_ops.assert_equal(t, t, message=""This is the error message."",
-                               summarize=2)
-
-
 
 class AssertNoneEqualTest(test.TestCase):
 
@@ -340,6 +302,43 @@ class AssertNoneEqualTest(test.TestCase):
       x = check_ops.assert_none_equal(t1, t2)
       assert x is None
 
+  def test_error_message_eager(self):
+    expected_error_msg_full = r""""""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.'
+b'Condition x != y did not hold for every single element:'
+b'x \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, 2.0, 3.0, 4.0, 5.0
+b'y \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, 2.0, 3.0, 4.0, 5.0""""""
+    expected_error_msg_default = r""""""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.'
+b'Condition x != y did not hold for every single element:'
+b'x \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, 2.0, ...
+b'y \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, 2.0, ...""""""
+    expected_error_msg_short = r""""""Expected 'tf.Tensor\(False, shape=\(\), dtype=bool\)' to be true. Summarized data: b'This is the error message.'
+b'Condition x != y did not hold for every single element:'
+b'x \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, ...
+b'y \(shape=\(2, 3\) dtype=float32\) = '
+0.0, 1.0, ...""""""
+    with context.eager_mode():
+      t = constant_op.constant(np.array(range(6)), shape=[2,3], dtype=np.float32)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_full):
+        check_ops.assert_none_equal(t, t, message=""This is the error message."",
+                               summarize=10)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_full):
+        check_ops.assert_equal(t, t, message=""This is the error message."",
+                               summarize=-1)
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_default):
+        check_ops.assert_equal(t, t, message=""This is the error message."")
+      with self.assertRaisesRegexp(errors.InvalidArgumentError,
+                                   expected_error_msg_short):
+        check_ops.assert_equal(t, t, message=""This is the error message."",
+                               summarize=2)
+
 
 class AssertAllCloseTest(test.TestCase):
 
",0,train
2ba26da2794775576d50003414460ebcf92bf945,tensorflow/tensorflow,"Improving error messages for more python/ops/... files

PiperOrigin-RevId: 397769627
Change-Id: Ifef986322f9deaac7c8163d2c6a02b67b9625670",losses_impl.py,"@@ -70,7 +70,8 @@ class Reduction(object):
   @classmethod
   def validate(cls, key):
     if key not in cls.all():
-      raise ValueError(""Invalid Reduction Key %s."" % key)
+      raise ValueError(f""Invalid Reduction Key {key}. Key should be one of ""
+                       f""{cls.all()}."")
 
 
 def _safe_mean(losses, num_present):
@@ -256,9 +257,9 @@ def absolute_difference(
   @end_compatibility
   """"""
   if labels is None:
-    raise ValueError(""labels must not be None."")
+    raise ValueError(""Argument `labels` must not be None."")
   if predictions is None:
-    raise ValueError(""predictions must not be None."")
+    raise ValueError(""Argument `predictions` must not be None."")
   with ops.name_scope(scope, ""absolute_difference"",
                       (predictions, labels, weights)) as scope:
     predictions = math_ops.cast(predictions, dtype=dtypes.float32)
@@ -309,11 +310,11 @@ def cosine_distance(
   """"""
   axis = deprecated_argument_lookup(""axis"", axis, ""dim"", dim)
   if axis is None:
-    raise ValueError(""You must specify 'axis'."")
+    raise ValueError(""You must specify argument `axis`."")
   if labels is None:
-    raise ValueError(""labels must not be None."")
+    raise ValueError(""Argument `labels` must not be None."")
   if predictions is None:
-    raise ValueError(""predictions must not be None."")
+    raise ValueError(""Argument `predictions` must not be None."")
   with ops.name_scope(scope, ""cosine_distance_loss"",
                       (predictions, labels, weights)) as scope:
     predictions = math_ops.cast(predictions, dtype=dtypes.float32)
@@ -361,9 +362,9 @@ def hinge_loss(labels, logits, weights=1.0, scope=None,
   @end_compatibility
   """"""
   if labels is None:
-    raise ValueError(""labels must not be None."")
+    raise ValueError(""Argument `labels` must not be None."")
   if logits is None:
-    raise ValueError(""logits must not be None."")
+    raise ValueError(""Argument `logits` must not be None."")
   with ops.name_scope(scope, ""hinge_loss"", (logits, labels, weights)) as scope:
     logits = math_ops.cast(logits, dtype=dtypes.float32)
     labels = math_ops.cast(labels, dtype=dtypes.float32)
@@ -428,9 +429,9 @@ def huber_loss(labels, predictions, weights=1.0, delta=1.0, scope=None,
   @end_compatibility
   """"""
   if labels is None:
-    raise ValueError(""labels must not be None."")
+    raise ValueError(""Argument `labels` must not be None."")
   if predictions is None:
-    raise ValueError(""predictions must not be None."")
+    raise ValueError(""Argument `predictions` must not be None."")
   with ops.name_scope(scope, ""huber_loss"",
                       (predictions, labels, weights)) as scope:
     predictions = math_ops.cast(predictions, dtype=dtypes.float32)
@@ -495,9 +496,9 @@ def log_loss(labels, predictions, weights=1.0, epsilon=1e-7, scope=None,
   @end_compatibility
   """"""
   if labels is None:
-    raise ValueError(""labels must not be None."")
+    raise ValueError(""Argument `labels` must not be None."")
   if predictions is None:
-    raise ValueError(""predictions must not be None."")
+    raise ValueError(""Argument `predictions` must not be None."")
   with ops.name_scope(scope, ""log_loss"",
                       (predictions, labels, weights)) as scope:
     predictions = math_ops.cast(predictions, dtype=dtypes.float32)
@@ -564,9 +565,9 @@ def mean_pairwise_squared_error(
   @end_compatibility
   """"""
   if labels is None:
-    raise ValueError(""labels must not be None."")
+    raise ValueError(""Argument `labels` must not be None."")
   if predictions is None:
-    raise ValueError(""predictions must not be None."")
+    raise ValueError(""Argument `predictions` must not be None."")
   with ops.name_scope(scope, ""mean_pairwise_squared_error"",
                       (predictions, labels, weights)) as scope:
     weights = math_ops.cast(weights, dtype=dtypes.float32)
@@ -757,9 +758,9 @@ def mean_squared_error(
   @end_compatibility
   """"""
   if labels is None:
-    raise ValueError(""labels must not be None."")
+    raise ValueError(""Argument `labels` must not be None."")
   if predictions is None:
-    raise ValueError(""predictions must not be None."")
+    raise ValueError(""Argument `predictions` must not be None."")
   with ops.name_scope(scope, ""mean_squared_error"",
                       (predictions, labels, weights)) as scope:
     predictions = math_ops.cast(predictions, dtype=dtypes.float32)
@@ -816,9 +817,9 @@ def sigmoid_cross_entropy(
   @end_compatibility
   """"""
   if multi_class_labels is None:
-    raise ValueError(""multi_class_labels must not be None."")
+    raise ValueError(""Argument `multi_class_labels` must not be None."")
   if logits is None:
-    raise ValueError(""logits must not be None."")
+    raise ValueError(""Argument `logits` must not be None."")
   with ops.name_scope(scope, ""sigmoid_cross_entropy_loss"",
                       (logits, multi_class_labels, weights)) as scope:
     logits = ops.convert_to_tensor(logits)
@@ -969,9 +970,9 @@ def softmax_cross_entropy(
   @end_compatibility
   """"""
   if onehot_labels is None:
-    raise ValueError(""onehot_labels must not be None."")
+    raise ValueError(""Argument `onehot_labels` must not be None."")
   if logits is None:
-    raise ValueError(""logits must not be None."")
+    raise ValueError(""Argument `logits` must not be None."")
   with ops.name_scope(scope, ""softmax_cross_entropy_loss"",
                       (logits, onehot_labels, weights)) as scope:
     logits = ops.convert_to_tensor(logits)
@@ -1087,9 +1088,9 @@ def sparse_softmax_cross_entropy(
   @end_compatibility
   """"""
   if labels is None:
-    raise ValueError(""labels must not be None."")
+    raise ValueError(""Argument `labels` must not be None."")
   if logits is None:
-    raise ValueError(""logits must not be None."")
+    raise ValueError(""Argument `logits` must not be None."")
   with ops.name_scope(scope, ""sparse_softmax_cross_entropy_loss"",
                       (logits, labels, weights)) as scope:
     # As documented above in Args, labels contain class IDs and logits contains
",0,train
2ba26da2794775576d50003414460ebcf92bf945,tensorflow/tensorflow,"Improving error messages for more python/ops/... files

PiperOrigin-RevId: 397769627
Change-Id: Ifef986322f9deaac7c8163d2c6a02b67b9625670",util.py,"@@ -160,7 +160,8 @@ def check_per_example_loss_rank(per_example_loss):
     if loss_rank == 0:
       raise ValueError(
           ""Invalid value passed for `per_example_loss`. Expected a tensor with ""
-          ""at least rank 1, received: {}"".format(per_example_loss))
+          f""at least rank 1. Received per_example_loss={per_example_loss} with ""
+          f""rank {loss_rank}"")
     yield
   else:
     # Handle dynamic rank.
",0,train
2ba26da2794775576d50003414460ebcf92bf945,tensorflow/tensorflow,"Improving error messages for more python/ops/... files

PiperOrigin-RevId: 397769627
Change-Id: Ifef986322f9deaac7c8163d2c6a02b67b9625670",nn_loss_scaling_utilities_test.py,"@@ -151,7 +151,7 @@ class LossUtilitiesTest(test_lib.TestCase, parameterized.TestCase):
     # Static rank
     with self.assertRaisesRegex(
         ValueError, ""Invalid value passed for `per_example_loss`. ""
-        ""Expected a tensor with at least rank 1,""):
+        ""Expected a tensor with at least rank 1.""):
       nn_impl.compute_average_loss(per_example_loss)
 
     with context.graph_mode():
",0,train
2ba26da2794775576d50003414460ebcf92bf945,tensorflow/tensorflow,"Improving error messages for more python/ops/... files

PiperOrigin-RevId: 397769627
Change-Id: Ifef986322f9deaac7c8163d2c6a02b67b9625670",control_flow_ops.py,"@@ -72,9 +72,9 @@ def for_loop(loop_fn, loop_fn_dtypes, iters, parallel_iterations=None):
     fn_output = nest.flatten(loop_fn(i))
     if len(fn_output) != len(flat_loop_fn_dtypes):
       raise ValueError(
-          ""Number of expected outputs, %d, does not match the number of ""
-          ""actual outputs, %d, from loop_fn"" % (len(flat_loop_fn_dtypes),
-                                                len(fn_output)))
+          f""Number of expected outputs {len(flat_loop_fn_dtypes)}, does not ""
+          f""match the number of actual outputs {len(fn_output)} from loop_fn: ""
+          f""{loop_fn} with output {fn_output}."")
     outputs = []
     del is_none_list[:]
     is_none_list.extend(x is None for x in fn_output)
@@ -222,10 +222,9 @@ def _composite_to_tensors(value, is_batched=False):
   if _should_expand_composite(value):
     spec = value._type_spec
     if not isinstance(spec, type_spec.BatchableTypeSpec):
-      raise ValueError(""CompositeTensor instance {} returned from ""
+      raise ValueError(f""CompositeTensor instance {value} returned from ""
                        ""parallel_for or vectorized_map loop body must provide ""
-                       ""a `BatchableTypeSpec` (saw: {})."".format(
-                           value, spec))
+                       f""a `BatchableTypeSpec` (saw: {spec})."")
     if is_batched:
       return spec._to_batched_tensor_list(value)
     return spec._to_tensor_list(value)
@@ -258,7 +257,7 @@ def _loop_fn_has_config(loop_fn):
   else:
     loop_class = tf_decorator.unwrap(loop_fn)[1]
     if not hasattr(loop_class, ""__call__""):
-      raise ValueError(""loop_fn object did not have a __call__ method"")
+      raise ValueError(""`loop_fn` object did not have a __call__ method"")
     argspec = tf_inspect.getargspec(loop_class.__call__)
     return PFOR_CONFIG_ARG in argspec.args
 
@@ -309,9 +308,12 @@ def _pfor_impl(loop_fn,
   iters = ops.convert_to_tensor(iters)
   if parallel_iterations is not None:
     if parallel_iterations < 1:
-      raise ValueError(""parallel_iterations must be None or a positive integer"")
+      raise ValueError(
+          ""Argument `parallel_iterations` must be None or a positive integer. ""
+          f""Received: {parallel_iterations}."")
     if parallel_iterations == 1:
-      raise ValueError(""Found parallel_iterations == 1. Use for_loop instead."")
+      raise ValueError(
+          ""Found `parallel_iterations == 1`. Use `for_loop` instead."")
     if iters_value is not None and iters_value < parallel_iterations:
       parallel_iterations = None
   if parallel_iterations is None:
@@ -325,8 +327,8 @@ def _pfor_impl(loop_fn,
         flattened_output_tensors.append(output)
   else:
     if pfor_config is not None and pfor_config._has_reductions():  # pylint: disable=protected-access
-      raise ValueError(""Setting parallel_iterations currently unsupported if""
-                       "" reductions across iterations are performed."")
+      raise ValueError(""Setting `parallel_iterations` currently unsupported if ""
+                       ""reductions across iterations are performed."")
     num_tiled_iterations = iters // parallel_iterations
     num_remaining_iterations = iters % parallel_iterations
     # TODO(agarwal): Avoid calling loop_fn twice. Generate the loop body inside
",0,train
2ba26da2794775576d50003414460ebcf92bf945,tensorflow/tensorflow,"Improving error messages for more python/ops/... files

PiperOrigin-RevId: 397769627
Change-Id: Ifef986322f9deaac7c8163d2c6a02b67b9625670",control_flow_ops_test.py,"@@ -133,7 +133,7 @@ class PForTest(PForTestCase):
           lambda i: 1, dtypes.int32, 8, parallel_iterations=0)
 
   def test_parallel_iterations_one(self):
-    with self.assertRaisesRegex(ValueError, ""Use for_loop instead""):
+    with self.assertRaisesRegex(ValueError, ""Use `for_loop` instead""):
       pfor_control_flow_ops.pfor(lambda i: 1, 8, parallel_iterations=1)
 
   def test_vectorized_map(self):
@@ -330,7 +330,7 @@ class ReductionTest(PForTestCase):
       return pfor_config.reduce_sum(x_i)
 
     with self.assertRaisesRegex(ValueError,
-                                ""parallel_iterations currently unsupported""):
+                                ""`parallel_iterations` currently unsupported""):
       pfor_control_flow_ops.pfor(loop_fn, 8, parallel_iterations=2)
 
   def test_var_loop_len(self):
",0,train
d8ef9b091839b4edfbb3826af920c996e3b1982a,tensorflow/tensorflow,removing some nonsense in AddNodeWithParameters,subgraph.cc,"@@ -808,13 +808,9 @@ TfLiteStatus Subgraph::AddNodeWithParameters(
 
   int new_node_index = nodes_and_registration_.size();
   if (node_index) *node_index = new_node_index;
-  nodes_and_registration_.resize(nodes_and_registration_.size() + 1);
+  nodes_and_registration_.emplace_back();
   auto& node_and_reg = nodes_and_registration_.back();
   TfLiteNode& node = node_and_reg.first;
-  if (node.inputs) TfLiteIntArrayFree(node.inputs);
-  if (node.outputs) TfLiteIntArrayFree(node.outputs);
-  if (node.intermediates) TfLiteIntArrayFree(node.intermediates);
-  if (node.temporaries) TfLiteIntArrayFree(node.temporaries);
 
   // NOTE, here we are not using move semantics yet, since our internal
   // representation isn't std::vector, but in the future we would like to avoid
",0,train
2ba2e6b25891e63c425c7f239ac75bcb7f8f9bda,tensorflow/tensorflow,"Change signature of tf.count_nonzero for TF 2.0.

PiperOrigin-RevId: 221536352",math_ops.py,"@@ -1408,7 +1408,7 @@ def reduce_sum(input_tensor,
                                    name=name))
 
 
-@tf_export(""math.count_nonzero"", ""count_nonzero"")
+@tf_export(v1=[""math.count_nonzero"", ""count_nonzero""])
 @deprecation.deprecated_args(
     None, ""keep_dims is deprecated, use keepdims instead"", ""keep_dims"")
 def count_nonzero(input_tensor,
@@ -1469,20 +1469,79 @@ def count_nonzero(input_tensor,
   """"""
   keepdims = deprecation.deprecated_argument_lookup(""keepdims"", keepdims,
                                                     ""keep_dims"", keep_dims)
+  axis = deprecation.deprecated_argument_lookup(
+      ""axis"", axis,
+      ""reduction_indices"", reduction_indices
+      )
   if keepdims is None:
     keepdims = False
 
-  with ops.name_scope(name, ""count_nonzero"", [input_tensor]):
-    input_tensor = ops.convert_to_tensor(input_tensor, name=""input_tensor"")
+  return count_nonzero_v2(input_tensor, axis, keepdims, dtype, name)
+
+
+@tf_export(""math.count_nonzero"", v1=[])
+def count_nonzero_v2(input,  # pylint: disable=redefined-builtin
+                     axis=None,
+                     keepdims=None,
+                     dtype=dtypes.int64,
+                     name=None):
+  """"""Computes number of nonzero elements across dimensions of a tensor.
+
+  Reduces `input` along the dimensions given in `axis`.
+  Unless `keepdims` is true, the rank of the tensor is reduced by 1 for each
+  entry in `axis`. If `keepdims` is true, the reduced dimensions
+  are retained with length 1.
+
+  If `axis` has no entries, all dimensions are reduced, and a
+  tensor with a single element is returned.
+
+  **NOTE** Floating point comparison to zero is done by exact floating point
+  equality check.  Small values are **not** rounded to zero for purposes of
+  the nonzero check.
+
+  For example:
+
+  ```python
+  x = tf.constant([[0, 1, 0], [1, 1, 0]])
+  tf.count_nonzero(x)  # 3
+  tf.count_nonzero(x, 0)  # [1, 2, 0]
+  tf.count_nonzero(x, 1)  # [1, 2]
+  tf.count_nonzero(x, 1, keepdims=True)  # [[1], [2]]
+  tf.count_nonzero(x, [0, 1])  # 3
+  ```
+
+  **NOTE** Strings are compared against zero-length empty string `""""`. Any
+  string with a size greater than zero is already considered as nonzero.
+
+  For example:
+  ```python
+  x = tf.constant(["""", ""a"", ""  "", ""b"", """"])
+  tf.count_nonzero(x) # 3, with ""a"", ""  "", and ""b"" as nonzero strings.
+  ```
+
+  Args:
+    input: The tensor to reduce. Should be of numeric type, `bool`,
+      or `string`.
+    axis: The dimensions to reduce. If `None` (the default),
+      reduces all dimensions. Must be in the range
+      `[-rank(input), rank(input))`.
+    keepdims: If true, retains reduced dimensions with length 1.
+    dtype: The output dtype; defaults to `tf.int64`.
+    name: A name for the operation (optional).
+
+  Returns:
+    The reduced tensor (number of nonzero values).
+  """"""
+  with ops.name_scope(name, ""count_nonzero"", [input]):
+    input = ops.convert_to_tensor(input, name=""input"")
     # A scalar of 'zero' is enough as `not_equal` will broadcast.
-    zero = array_ops.zeros([], dtype=input_tensor.dtype)
+    zero = array_ops.zeros([], dtype=input.dtype)
     return cast(
         reduce_sum(
             # int64 reduction happens on GPU
-            to_int64(gen_math_ops.not_equal(input_tensor, zero)),
+            to_int64(gen_math_ops.not_equal(input, zero)),
             axis=axis,
-            keepdims=keepdims,
-            reduction_indices=reduction_indices),
+            keepdims=keepdims),
         dtype=dtype)
 
 
",0,test
2ba2e6b25891e63c425c7f239ac75bcb7f8f9bda,tensorflow/tensorflow,"Change signature of tf.count_nonzero for TF 2.0.

PiperOrigin-RevId: 221536352",tf_upgrade_v2.py,"@@ -37,9 +37,14 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
         ""tf.convert_to_tensor"": {
             ""preferred_dtype"": ""dtype_hint""
         },
+        ""tf.math.count_nonzero"": {
+            ""input_tensor"": ""input"",
+            ""keep_dims"": ""keepdims"",
+            ""reduction_indices"": ""axis"",
+        },
         ""tf.nn.pool"": {
             ""dilation_rate"": ""dilations""
-        }
+        },
     }
 
     # Mapping from function to the new name of the function
",0,test
2ba2e6b25891e63c425c7f239ac75bcb7f8f9bda,tensorflow/tensorflow,"Change signature of tf.count_nonzero for TF 2.0.

PiperOrigin-RevId: 221536352",tf_upgrade_v2_test.py,"@@ -110,6 +110,18 @@ class TestUpgrade(test_util.TensorFlowTestCase):
                               % ""tf.estimator.LinearClassifier""])
     self.assertIn(""loss_reduction has been changed"", report)
 
+  def testCountNonZeroChanges(self):
+    text = (
+        ""tf.math.count_nonzero(input_tensor=input, dtype=dtype, name=name, ""
+        ""reduction_indices=axis, keep_dims=keepdims)\n""
+        )
+    _, unused_report, unused_errors, new_text = self._upgrade(text)
+    expected_text = (
+        ""tf.math.count_nonzero(input=input, dtype=dtype, name=name, ""
+        ""axis=axis, keepdims=keepdims)\n""
+        )
+    self.assertEqual(new_text, expected_text)
+
 
 class TestUpgradeFiles(test_util.TensorFlowTestCase):
 
",0,test
1284047dca0dd58745a31cd2fd68da3173c7e120,tensorflow/tensorflow,"* Don't copy on-host and on-device shapes locally.
  * Use ForEachMutableElement rather than the iterators, as it is much quicker.

There is still room for improvement; ForEachMutableElement is linear in the number of nodes in the shape tree but we want to be linear in the number of nodes in the sub shape tree. But I feel this is a good enough improvement.

PiperOrigin-RevId: 195384423",xla_launch_util.cc,"@@ -77,16 +77,16 @@ Status XlaAllocator::Deallocate(int device_ordinal, se::DeviceMemoryBase* mem) {
   return Status::OK();
 }
 
-namespace {
+namespace internal {
 // Return the 'index''th subtree of the given ShapedBuffer as a
 // ScopedShapedBuffer. The returned ScopedShapedBuffer takes ownership of the
 // subtree, and sets the input's buffer pointers to nullptr for the subtree.
 ScopedShapedBuffer ExtractSubShapedBuffer(
     ShapedBuffer* shaped_buffer, int index,
     xla::DeviceMemoryAllocator* allocator) {
-  xla::Shape on_host_shape = xla::ShapeUtil::GetTupleElementShape(
+  const xla::Shape& on_host_shape = xla::ShapeUtil::GetTupleElementShape(
       shaped_buffer->on_host_shape(), index);
-  xla::Shape on_device_shape = xla::ShapeUtil::GetTupleElementShape(
+  const xla::Shape& on_device_shape = xla::ShapeUtil::GetTupleElementShape(
       shaped_buffer->on_device_shape(), index);
 
   ShapedBuffer sub_shaped_buffer(on_host_shape, on_device_shape,
@@ -98,14 +98,18 @@ ScopedShapedBuffer ExtractSubShapedBuffer(
   sub_shape_tree.CopySubtreeFrom(shape_tree,
                                  /*source_base_index=*/{index},
                                  /*target_base_index=*/{});
-  for (auto& index_to_buffer : shape_tree) {
-    if (!index_to_buffer.first.empty() && index_to_buffer.first[0] == index) {
-      index_to_buffer.second = se::DeviceMemoryBase(nullptr, 0);
-    }
-  }
+  shape_tree.ForEachMutableElement(
+      [index](const xla::ShapeIndex& shape_index,
+              tensorflow::se::DeviceMemoryBase* data) {
+        // shape_index is empty for the root node. Ignore that.
+        if (!shape_index.empty() && shape_index[0] == index) {
+          *data = tensorflow::se::DeviceMemoryBase(nullptr, 0);
+        }
+      });
   return ScopedShapedBuffer(std::move(sub_shaped_buffer), allocator);
 }
-}  // namespace
+}  // namespace internal
+using internal::ExtractSubShapedBuffer;
 
 XlaComputationLaunchContext::XlaComputationLaunchContext(
     int64 num_resource_args, xla::LocalClient* client,
",0,test
1284047dca0dd58745a31cd2fd68da3173c7e120,tensorflow/tensorflow,"* Don't copy on-host and on-device shapes locally.
  * Use ForEachMutableElement rather than the iterators, as it is much quicker.

There is still room for improvement; ForEachMutableElement is linear in the number of nodes in the shape tree but we want to be linear in the number of nodes in the sub shape tree. But I feel this is a good enough improvement.

PiperOrigin-RevId: 195384423",xla_launch_util.h,"@@ -140,6 +140,17 @@ class XlaTensorBuffer : public TensorBuffer {
   Allocator* allocator_;
 };
 
+// Exposed in this header file for microbenchmarking purposes, but this is an
+// internal implementation detail.
+namespace internal {
+// Return the 'index''th subtree of the given ShapedBuffer as a
+// ScopedShapedBuffer. The returned ScopedShapedBuffer takes ownership of the
+// subtree, and sets the input's buffer pointers to nullptr for the subtree.
+xla::ScopedShapedBuffer ExtractSubShapedBuffer(
+    xla::ShapedBuffer* shaped_buffer, int index,
+    xla::DeviceMemoryAllocator* allocator);
+}  // namespace internal
+
 }  // namespace tensorflow
 
 #endif
",0,test
1284047dca0dd58745a31cd2fd68da3173c7e120,tensorflow/tensorflow,"* Don't copy on-host and on-device shapes locally.
  * Use ForEachMutableElement rather than the iterators, as it is much quicker.

There is still room for improvement; ForEachMutableElement is linear in the number of nodes in the shape tree but we want to be linear in the number of nodes in the sub shape tree. But I feel this is a good enough improvement.

PiperOrigin-RevId: 195384423",xla_launch_util_test.cc,"@@ -0,0 +1,64 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Contains microbenchmarks for performance critical functions in
+// xla_launch_util.cc.
+
+#include ""tensorflow/compiler/jit/xla_launch_util.h""
+#include ""tensorflow/compiler/tf2xla/shape_util.h""
+#include ""tensorflow/core/platform/test.h""
+#include ""tensorflow/core/platform/test_benchmark.h""
+
+// Test ExtractSubBuffer with different depths (depth of ShapeTree) and fan-outs
+// (cardinality of each non-leaf node's children).
+void BM_ExtractSubBuffer(int iters, int depth, int fan_out) {
+  tensorflow::testing::StopTiming();
+  xla::Shape shape = xla::ShapeUtil::MakeShape(xla::F32, {32, 64, 128});
+  for (int i = 0; i < depth; ++i) {
+    std::vector<xla::Shape> shapes(fan_out, shape);
+    shape = xla::ShapeUtil::MakeTupleShape(shapes);
+  }
+  xla::ShapedBuffer shaped_buffer(shape, shape, /*platform=*/nullptr,
+                                  /*device_ordinal=*/0);
+  tensorflow::testing::StartTiming();
+  for (int i = 0; i < iters; ++i) {
+    // Extract a buffer from approximately the middle of the first level of the
+    // tree.
+    tensorflow::internal::ExtractSubShapedBuffer(&shaped_buffer,
+                                                 /*index=*/fan_out / 2,
+                                                 /*allocator=*/nullptr)
+        .release();
+  }
+}
+
+BENCHMARK(BM_ExtractSubBuffer)
+    ->ArgPair(1, 4)
+    ->ArgPair(1, 8)
+    ->ArgPair(1, 32)
+    ->ArgPair(1, 64)
+    ->ArgPair(1, 128)
+    ->ArgPair(1, 256)
+    ->ArgPair(1, 512)
+    ->ArgPair(2, 4)
+    ->ArgPair(2, 8)
+    ->ArgPair(2, 32)
+    ->ArgPair(2, 64)
+    ->ArgPair(2, 128);
+
+int main(int argc, char** argv) {
+  testing::InitGoogleTest(&argc, argv);
+  tensorflow::testing::RunBenchmarks();
+  return RUN_ALL_TESTS();
+}
",0,test
6381a7b127bd276a3817a93e5423b15a06c33419,tensorflow/tensorflow,"[tf.data] Add a check for ram_budget == 0 to avoid division by 0 exception when ram_budget is not set.

PiperOrigin-RevId: 410071934
Change-Id: Ida9fb401ba24367e48066c8a899962877429c3da",model.cc,"@@ -46,6 +46,9 @@ bool AreAllParametersMax(const Model::ModelParameters& parameters) {
 
 // Records the ram usage of hill climbing algorithm.
 void RecordAutotuneRamUsage(int64 ram_budget, double max_buffered_bytes) {
+  if (ram_budget == 0) {
+    return;
+  }
   const auto memory_info = port::GetMemoryInfo();
   // Records ratio of memory used since RootDataset was created over the ram
   // budget.
",0,train
eb4577c283452c601afcaa07da3e21722b826df7,tensorflow/tensorflow,"Initialize alloc_fns with 0s
Change: 150117973",grpc_server_lib.cc,"@@ -15,6 +15,7 @@ limitations under the License.
 
 #include ""tensorflow/core/distributed_runtime/rpc/grpc_server_lib.h""
 
+#include <cstring>
 #include <limits>
 #include <memory>
 
@@ -330,6 +331,7 @@ class GrpcServerRegistrar {
  public:
   GrpcServerRegistrar() {
     gpr_allocation_functions alloc_fns;
+    memset(&alloc_fns, 0, sizeof(alloc_fns));
     alloc_fns.malloc_fn = port::Malloc;
     alloc_fns.realloc_fn = port::Realloc;
     alloc_fns.free_fn = port::Free;
",0,train
7f37ded0367a87bc4ed7e83679ce941c0bac13b7,tensorflow/tensorflow,"Adding an environment variable to control whether to use Cudnn with AvgPool:
TF_AVGPOOL_USE_CUDNN. The default is false for now.
Change: 127123594",avgpooling_op.cc,"@@ -39,6 +39,7 @@ limitations under the License.
 #if GOOGLE_CUDA
 #include ""tensorflow/core/kernels/maxpooling_op_gpu.h""
 #include ""tensorflow/core/kernels/pooling_ops_common_gpu.h""
+#include ""tensorflow/core/util/use_cudnn.h""
 #endif  // GOOGLE_CUDA
 
 namespace tensorflow {
@@ -150,7 +151,7 @@ class AvgPoolingOp<GPUDevice, T> : public UnaryOp<T> {
 
     TensorShape output_shape = params.forward_output_shape();
 
-    if (data_format_ == FORMAT_NCHW) {
+    if (internal::AvgPoolUseCudnn() || data_format_ == FORMAT_NCHW) {
       DnnPoolingOp<T>::Compute(
           context, perftools::gputools::dnn::PoolingMode::kAverage, ksize_,
           stride_, padding_, data_format_, tensor_in, output_shape);
",0,train
7f37ded0367a87bc4ed7e83679ce941c0bac13b7,tensorflow/tensorflow,"Adding an environment variable to control whether to use Cudnn with AvgPool:
TF_AVGPOOL_USE_CUDNN. The default is false for now.
Change: 127123594",use_cudnn.cc,"@@ -40,4 +40,11 @@ bool CudnnUseAutotune() {
   return ReadBoolFromEnvVar(""TF_CUDNN_USE_AUTOTUNE"", true);
 }
 
+namespace internal {
+
+bool AvgPoolUseCudnn() {
+  return ReadBoolFromEnvVar(""TF_AVGPOOL_USE_CUDNN"", false);
+}
+
+}  // namespace internal
 }  // namespace tensorflow
",0,train
7f37ded0367a87bc4ed7e83679ce941c0bac13b7,tensorflow/tensorflow,"Adding an environment variable to control whether to use Cudnn with AvgPool:
TF_AVGPOOL_USE_CUDNN. The default is false for now.
Change: 127123594",use_cudnn.h,"@@ -23,6 +23,12 @@ namespace tensorflow {
 bool CanUseCudnn();
 bool CudnnUseAutotune();
 
+namespace internal {
+
+// This function is for transition only. And it may go away at any time.
+bool AvgPoolUseCudnn();
+
+}  // namespace internal
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_UTIL_USE_CUDNN_H_
",0,train
a18853eb0e18b47952bab3ba5df582b0f8b3516d,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-06-25

PiperOrigin-RevId: 318231043
Change-Id: Ic8bf82284920a04fe9d1589753905c69bbf8b8e4",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 6, 24)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 6, 25)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
e4c94c279e1f7990d23eab8bdc29ad1ed6277916,tensorflow/tensorflow,"Simplify control flow in ops/functional_ops.py when creating PartitionedCall

The current branching is confusing: reviewers glancing at the code assume that
the first branch corresponds to the eager mode, and miss `or len(tout)`.
Handling two separate cases in this branch requires a correlated if- at the
end, which can only take the empty branch in eager mode.

Moreover, the logic is subtly incorrect: the user-provided `config` is not
considered in the second branch.

PiperOrigin-RevId: 286595076
Change-Id: I29fb3533f19923a8b63fe1cd7ea94848a74ac6ec",functional_ops.py,"@@ -1125,7 +1125,7 @@ def partitioned_call(args,
   if executor_type is None:
     executor_type = """"
 
-  if executing_eagerly or len(tout):
+  if executing_eagerly:
     if f.stateful_ops:
       outputs = gen_functional_ops.stateful_partitioned_call(
           args=args,
@@ -1158,8 +1158,7 @@ def partitioned_call(args,
   # When running in graph mode, the graph and function graphs are optimized
   # (i.e. run through grappler) per the session options, so we can disable any
   # eager-specific rewriting.
-  config_proto = attr_value_pb2.AttrValue(
-      s=function_utils.get_disabled_rewriter_config())
+  config_proto = attr_value_pb2.AttrValue(s=config)
 
   graph = ops.get_default_graph()
   f.add_to_graph(graph)
@@ -1168,7 +1167,7 @@ def partitioned_call(args,
       op_name,
       args,
       tout,
-      name=""PartitionedFunctionCall"",
+      name=op_name,
       attrs={
           ""Tin"": tin_attr,
           ""Tout"": tout_attr,
",0,train
e4c94c279e1f7990d23eab8bdc29ad1ed6277916,tensorflow/tensorflow,"Simplify control flow in ops/functional_ops.py when creating PartitionedCall

The current branching is confusing: reviewers glancing at the code assume that
the first branch corresponds to the eager mode, and miss `or len(tout)`.
Handling two separate cases in this branch requires a correlated if- at the
end, which can only take the empty branch in eager mode.

Moreover, the logic is subtly incorrect: the user-provided `config` is not
considered in the second branch.

PiperOrigin-RevId: 286595076
Change-Id: I29fb3533f19923a8b63fe1cd7ea94848a74ac6ec",utils_test.py,"@@ -57,7 +57,7 @@ class UtilsTest(test.TestCase):
     x = constant_op.constant(1, name=""x"")
     y = constant_op.constant(2, name=""y"")
     init_op_info = utils.build_tensor_info_from_op(my_init_fn(x, y))
-    self.assertEqual(""PartitionedFunctionCall"", init_op_info.name)
+    self.assertEqual(""PartitionedCall"", init_op_info.name)
     self.assertEqual(types_pb2.DT_INVALID, init_op_info.dtype)
     self.assertEqual(0, len(init_op_info.tensor_shape.dim))
 
",0,train
20ab9fbeba8652b17e87e956f4cbf2c457128fdf,tensorflow/tensorflow,"Add code path in estimator to use the new distributed strategy api

PiperOrigin-RevId: 202214880",tpu_strategy.py,"@@ -47,7 +47,12 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
     return self._call_dataset_fn(dataset_fn)
 
   # TODO(priyag): Deal with OutOfRange errors.
-  def run_steps_on_dataset(self, fn, iterator, iterations):
+  # TODO(sourabhbajaj): Remove the initial_values parameter
+  def _run_steps_on_dataset(self, fn, iterator, iterations,
+                            initial_values=None):
+    if initial_values is None:
+      initial_values = []
+
     # Enqueue ops
     shapes = nest.flatten(iterator.output_shapes)
     if any([not s.is_fully_defined() for s in shapes]):
@@ -93,23 +98,35 @@ class TPUStrategy(one_device_strategy.OneDeviceStrategy):
       return nest.pack_sequence_as(iterator.output_shapes, dequeued)
 
     # Wrap `fn` for repeat.
-    run_fn = lambda: fn(dequeue_fn())
+    def run_fn(*args, **kwargs):
+      del args, kwargs
+      return fn(dequeue_fn())
 
     # Repeat
+    # TODO(sourabhbajaj): The input to while loop should be based on the output
+    # type of the step_fn
     def iterate_on_tpu():
-      return tpu.repeat(iterations, run_fn, [])
+      return tpu.repeat(iterations, run_fn, initial_values)
 
     # Re-write and distribute computation.
+    # TODO(sourabhbajaj): Convert the output to perDevice variable and
+    # implement support for that in reduce.
     tpu_result = tpu.batch_parallel(
         iterate_on_tpu, [], num_shards=self._num_cores_per_host)
 
-    return control_flow_ops.group(tpu_result, enqueue_ops)
+    return control_flow_ops.group(tpu_result, enqueue_ops), tpu_result
 
   def _call_for_each_tower(self, fn, *args, **kwargs):
     kwargs.pop('run_concurrently', None)
     with one_device_strategy._OneDeviceTowerContext(self):  # pylint: disable=protected-access
       return fn(*args, **kwargs)
 
+  def get_initialization_ops(self):
+    return [tpu.initialize_system()]
+
+  def get_finalize_ops(self):
+    return [tpu.shutdown_system()]
+
   def _reduce(self, method_string, value, destinations):
     del destinations  # TPU is graph mode only.  Rely on implicit Send/Recv.
     if method_string == 'mean':
",0,train
20ab9fbeba8652b17e87e956f4cbf2c457128fdf,tensorflow/tensorflow,"Add code path in estimator to use the new distributed strategy api

PiperOrigin-RevId: 202214880",estimator.py,"@@ -71,6 +71,7 @@ from tensorflow.python.util.tf_export import estimator_export
 
 _VALID_MODEL_FN_ARGS = set(
     ['features', 'labels', 'mode', 'params', 'self', 'config'])
+_INITIAL_TRAINING_LOSS = 1e7
 
 
 @estimator_export('estimator.Estimator')
@@ -1183,25 +1184,76 @@ class Estimator(object):
       Loss from training
     """"""
     self._distribution.configure(self._session_config)
+
+    # TODO(sourabhbajaj): Remove this hack once we migrate the other strategies
+    # to use the new API
+    is_tpu_strategy = self._distribution.__class__.__name__ == 'TPUStrategy'
+
     worker_hooks = []
     with ops.Graph().as_default() as g:
       with self._distribution.scope():
         random_seed.set_random_seed(self._config.tf_random_seed)
-        features, labels, input_hooks = (
-            self._get_features_and_labels_from_input_fn(
-                input_fn, model_fn_lib.ModeKeys.TRAIN))
-        worker_hooks.extend(input_hooks)
-        global_step_tensor = self._create_and_assert_global_step(g)
-        # we want to add to the global collection in the main thread not the
-        # tower threads.
-        ops.add_to_collection(training_util.GLOBAL_STEP_READ_KEY,
-                              self._distribution.read_var(global_step_tensor))
-        grouped_estimator_spec = self._distribution.call_for_each_tower(
-            self._call_model_fn,
-            features,
-            labels,  # although this will be None it seems
-            model_fn_lib.ModeKeys.TRAIN,
-            self.config)
+
+        if is_tpu_strategy:
+          # Create the iterator for run_on_dataset function
+          # TODO(sourabhbajaj): refactor this out to call a function on the
+          # strategy
+          dataset = self._distribution.distribute_dataset(
+              lambda: self._call_input_fn(input_fn,  # pylint: disable=g-long-lambda
+                                          model_fn_lib.ModeKeys.TRAIN))
+          iterator = dataset.make_initializable_iterator()
+          worker_hooks.append(
+              estimator_util._DatasetInitializerHook(iterator))  # pylint: disable=protected-access
+
+          global_step_tensor = self._create_and_assert_global_step(g)
+          # we want to add to the global collection in the main thread not the
+          # tower threads.
+          ops.add_to_collection(training_util.GLOBAL_STEP_READ_KEY,
+                                self._distribution.read_var(global_step_tensor))
+
+          # TODO(sourabhbajaj): Remove this once the context input to step_fn
+          # is implemented
+          estimator_spec_wrapper = {}
+
+          # Create a step_fn from the train_op of grouped_estimator_spec
+          def step_fn(inputs):
+            """"""A single step that is passed to run_on_dataset.""""""
+            features, labels = inputs
+            estimator_spec = self._distribution.call_for_each_tower(
+                self._call_model_fn,
+                features,
+                labels,
+                model_fn_lib.ModeKeys.TRAIN,
+                self.config)
+            estimator_spec_wrapper['grouped_estimator_spec'] = estimator_spec
+            with ops.control_dependencies([estimator_spec.train_op]):
+              return array_ops.identity(estimator_spec.loss)
+
+          # Create new train_op post graph rewrites
+          # TODO(sourabhbajaj): Make sure train_steps and tpu_iterations
+          # work correctly. Currently hardcoded at 2
+          distributed_train_op, tpu_result = \
+              self._distribution._run_steps_on_dataset(  # pylint: disable=protected-access
+                  step_fn, iterator, 2, [_INITIAL_TRAINING_LOSS])
+
+          grouped_estimator_spec = estimator_spec_wrapper[
+              'grouped_estimator_spec']
+        else:
+          features, labels, input_hooks = (
+              self._get_features_and_labels_from_input_fn(
+                  input_fn, model_fn_lib.ModeKeys.TRAIN))
+          worker_hooks.extend(input_hooks)
+          global_step_tensor = self._create_and_assert_global_step(g)
+          # we want to add to the global collection in the main thread not the
+          # tower threads.
+          ops.add_to_collection(training_util.GLOBAL_STEP_READ_KEY,
+                                self._distribution.read_var(global_step_tensor))
+          grouped_estimator_spec = self._distribution.call_for_each_tower(
+              self._call_model_fn,
+              features,
+              labels,  # although this will be None it seems
+              model_fn_lib.ModeKeys.TRAIN,
+              self.config)
 
         # TODO(anjalisridhar): Figure out how to resolve the following scaffold
         # parameters: init_feed_dict, init_fn.
@@ -1287,13 +1339,28 @@ class Estimator(object):
         training_chief_hooks = get_hooks_from_the_first_device(
             grouped_estimator_spec.training_chief_hooks)
 
+        # TODO(sourabhbajaj): Merge the two code paths once we can
+        # handle per device variables correctly in reduce and can output
+        # the loss scaler.
+        if is_tpu_strategy:
+          loss = self._distribution.unwrap(
+              self._distribution.reduce(distribute_lib.get_loss_reduction(),
+                                        tpu_result[0])[0])[0]
+          worker_hooks.append(
+              estimator_util.StrategyInitFinalizeHook(
+                  self._distribution.get_initialization_ops,
+                  self._distribution.get_finalize_ops))
+        else:
+          loss = self._distribution.unwrap(
+              self._distribution.reduce(distribute_lib.get_loss_reduction(),
+                                        grouped_estimator_spec.loss,
+                                        destinations='/device:CPU:0'))[0]
+          distributed_train_op = grouped_estimator_spec.train_op
+
         estimator_spec = model_fn_lib.EstimatorSpec(
             mode=grouped_estimator_spec.mode,
-            loss=self._distribution.unwrap(
-                self._distribution.reduce(distribute_lib.get_loss_reduction(),
-                                          grouped_estimator_spec.loss,
-                                          destinations='/device:CPU:0'))[0],
-            train_op=self._distribution.group(grouped_estimator_spec.train_op),
+            loss=loss,
+            train_op=self._distribution.group(distributed_train_op),
             training_hooks=training_hooks,
             training_chief_hooks=training_chief_hooks,
             scaffold=scaffold)
",0,train
20ab9fbeba8652b17e87e956f4cbf2c457128fdf,tensorflow/tensorflow,"Add code path in estimator to use the new distributed strategy api

PiperOrigin-RevId: 202214880",util.py,"@@ -22,6 +22,7 @@ from __future__ import print_function
 import os
 import time
 
+from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import training
@@ -129,3 +130,24 @@ class _DatasetInitializerHook(training.SessionRunHook):
   def after_create_session(self, session, coord):
     del coord
     session.run(self._initializer)
+
+
+class StrategyInitFinalizeHook(training.SessionRunHook):
+  """"""Creates a SessionRunHook that initializes and shutsdown devices.""""""
+
+  def __init__(self, initialization_fn, finalize_fn):
+    self._initialization_fn = initialization_fn
+    self._finalize_fn = finalize_fn
+
+  def begin(self):
+    self._init_ops = self._initialization_fn()
+    self._finalize_ops = self._finalize_fn()
+
+  def after_create_session(self, session, coord):
+    logging.info('Initialize system')
+    session.run(self._init_ops,
+                options=config_pb2.RunOptions(timeout_in_ms=5 * 60 * 1000))
+
+  def end(self, session):
+    logging.info('Finalize system.')
+    session.run(self._finalize_ops)
",0,train
cf549e5f36d122e91c774bd7da44519c9f7792ff,tensorflow/tensorflow,"Update Softmax interface for fixed point int8 Op/kernel. Use 1/256 as scale and -128 as zero point for output.

PiperOrigin-RevId: 232406765",activations.cc,"@@ -60,9 +60,9 @@ namespace {
 TfLiteStatus CheckOutputQuantParams(TfLiteContext* context,
                                     const TfLiteTensor* input,
                                     const TfLiteTensor* output) {
+  TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
   if (input->type == kTfLiteUInt8) {
     TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
-    TF_LITE_ENSURE(context, output->params.scale == 1. / 256);
   } else {
     TF_LITE_ENSURE_EQ(context, output->params.zero_point, -128);
   }
",0,train
cf549e5f36d122e91c774bd7da44519c9f7792ff,tensorflow/tensorflow,"Update Softmax interface for fixed point int8 Op/kernel. Use 1/256 as scale and -128 as zero point for output.

PiperOrigin-RevId: 232406765",subgraph_quantizer.cc,"@@ -325,6 +325,27 @@ TfLiteStatus SubgraphQuantizer::AsymmetricQuantizeSingleInputOutputOp(
   return kTfLiteOk;
 }
 
+TfLiteStatus SubgraphQuantizer::AsymmetricQuantizeSoftmax(
+    BuiltinOperator op_code, OperatorT* op) {
+  TF_LITE_ENSURE_EQ(this->error_reporter_, op->inputs.size(), 1);
+  TF_LITE_ENSURE_EQ(this->error_reporter_, op->outputs.size(), 1);
+
+  if (IsSubgraphInput(op->inputs[0])) {
+    TF_LITE_ENSURE_STATUS(AsymmetricQuantizeTensor(op_code, op->inputs[0]));
+  }
+
+  auto output_tensor = subgraph_->tensors[op->outputs[0]].get();
+  if (output_tensor->type != TensorType_FLOAT32) {
+    return kTfLiteOk;
+  }
+
+  // Softmax output is hardcoded to have 1/256 as scale and -128 as zero point.
+  output_tensor->type = TensorType_INT8;
+  output_tensor->quantization->scale = {1.0f / 256.0f};
+  output_tensor->quantization->zero_point = {-128};
+  return kTfLiteOk;
+}
+
 bool SubgraphQuantizer::IsSubgraphInput(int32_t tensor_idx) const {
   return std::find(subgraph_->inputs.begin(), subgraph_->inputs.end(),
                    tensor_idx) != subgraph_->inputs.end();
@@ -342,8 +363,9 @@ TfLiteStatus SubgraphQuantizer::QuantizeOperator(int op_idx) {
     case BuiltinOperator_MAX_POOL_2D:
       return PropagateMinMaxForAvgAndMaxPool(op_code, op);
     case BuiltinOperator_SQUEEZE:
-    case BuiltinOperator_SOFTMAX:
       return AsymmetricQuantizeSingleInputOutputOp(op_code, op);
+    case BuiltinOperator_SOFTMAX:
+      return AsymmetricQuantizeSoftmax(op_code, op);
     default:
       return kTfLiteError;
   }
",0,train
cf549e5f36d122e91c774bd7da44519c9f7792ff,tensorflow/tensorflow,"Update Softmax interface for fixed point int8 Op/kernel. Use 1/256 as scale and -128 as zero point for output.

PiperOrigin-RevId: 232406765",subgraph_quantizer.h,"@@ -51,6 +51,12 @@ class SubgraphQuantizer {
   TfLiteStatus AsymmetricQuantizeSingleInputOutputOp(BuiltinOperator op_code,
                                                      OperatorT* op);
 
+  // Asymmetric quantizes inputs and outputs of an Softmax Op.
+  // Input is quantized with the min-max range and output is hardcoded to have
+  // 1/256 as scale and -128 as zero point.
+  TfLiteStatus AsymmetricQuantizeSoftmax(BuiltinOperator op_code,
+                                         OperatorT* op);
+
   TfLiteStatus AsymmetricQuantizeTensor(BuiltinOperator op_code,
                                         int32_t tensor_idx);
 
",0,train
cf549e5f36d122e91c774bd7da44519c9f7792ff,tensorflow/tensorflow,"Update Softmax interface for fixed point int8 Op/kernel. Use 1/256 as scale and -128 as zero point for output.

PiperOrigin-RevId: 232406765",subgraph_quantizer_test.cc,"@@ -291,6 +291,7 @@ TEST(SubgraphQuantizerTest, VerifySoftmaxQuantization) {
   ASSERT_EQ(op->outputs.size(), 1);
   auto float_graph = readonly_model->subgraphs()->Get(0);
 
+  // Verify input.
   ASSERT_EQ(float_graph->tensors()->Get(op->inputs[0])->type(),
             TensorType_FLOAT32);
   ASSERT_EQ(float_graph->tensors()->Get(op->outputs[0])->type(),
@@ -306,12 +307,18 @@ TEST(SubgraphQuantizerTest, VerifySoftmaxQuantization) {
   VerifyAsymmetricQuantizationScale(*float_input_quant_params,
                                     *input_quant_params);
 
+  // Verify output.
   auto float_output_quant_params =
       float_graph->tensors()->Get(op->outputs[0])->quantization();
   auto output_quant_params =
       subgraph->tensors[op->outputs[0]]->quantization.get();
-  VerifyAsymmetricQuantizationScale(*float_output_quant_params,
-                                    *output_quant_params);
+  ASSERT_EQ(float_output_quant_params->min()->size(), 1);
+  ASSERT_EQ(float_output_quant_params->max()->size(), 1);
+
+  ASSERT_EQ(output_quant_params->scale.size(), 1);
+  ASSERT_EQ(output_quant_params->zero_point.size(), 1);
+  ASSERT_EQ(1.0f / 256.0f, output_quant_params->scale[0]);
+  ASSERT_EQ(-128, output_quant_params->zero_point[0]);
 }
 
 TEST(SubgraphQuantizerTest, VerifyAvgPoolQuantization) {
",0,train
589deaa9fb5cb1d1b5bddf07538729abbbbee996,tensorflow/tensorflow,"Extracts the 'simplify squeeze node' optimization into its own method.

PiperOrigin-RevId: 197968452",constant_folding.cc,"@@ -1937,22 +1937,8 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node,
     }
   }
 
-  if (use_shape_info && IsSqueeze(*node) &&
-      !properties->GetInputProperties(node->name()).empty()) {
-    // https://www.tensorflow.org/api_docs/python/tf/squeeze mentions it's
-    // error to squeeze a dimension that is not 1, so we only need to check
-    // whether the input has > 1 size for each dimension.
-    const auto& shape = properties->GetInputProperties(node->name())[0].shape();
-    // The node is replaceable iff
-    // unknown_rank == false && (dim_size == 0 || all dims have size > 1)
-    bool replaceable = !shape.unknown_rank();
-    for (int j = 0; replaceable && j < shape.dim_size(); ++j) {
-      replaceable &= shape.dim(j).size() > 1;
-    }
-    if (replaceable) {
-      ReplaceOperationWithIdentity(0, *properties, node, optimized_graph);
-      return Status::OK();
-    }
+  if (SimplifySqueeze(*properties, use_shape_info, optimized_graph, node)) {
+    return Status::OK();
   }
 
   if (SimplifyPack(optimized_graph, node)) {
@@ -2024,6 +2010,30 @@ Status ConstantFolding::SimplifyNode(bool use_shape_info, NodeDef* node,
   return Status::OK();
 }
 
+bool ConstantFolding::SimplifySqueeze(const GraphProperties& properties,
+                                      bool use_shape_info,
+                                      GraphDef* optimized_graph,
+                                      NodeDef* node) {
+  if (use_shape_info && IsSqueeze(*node) &&
+      !properties.GetInputProperties(node->name()).empty()) {
+    // https://www.tensorflow.org/api_docs/python/tf/squeeze mentions it's
+    // error to squeeze a dimension that is not 1, so we only need to check
+    // whether the input has > 1 size for each dimension.
+    const auto& shape = properties.GetInputProperties(node->name())[0].shape();
+    // The node is replaceable iff
+    // unknown_rank == false && (dim_size == 0 || all dims have size > 1)
+    bool replaceable = !shape.unknown_rank();
+    for (int j = 0; replaceable && j < shape.dim_size(); ++j) {
+      replaceable &= shape.dim(j).size() > 1;
+    }
+    if (replaceable) {
+      ReplaceOperationWithIdentity(0, properties, node, optimized_graph);
+      return true;
+    }
+  }
+  return false;
+}
+
 bool ConstantFolding::SimplifyPack(GraphDef* optimized_graph, NodeDef* node) {
   if (IsPack(*node) && NumNonControlInputs(*node) == 1 &&
       !OptimizedNodeExists(*node, ""_const_axis"")) {
",0,train
589deaa9fb5cb1d1b5bddf07538729abbbbee996,tensorflow/tensorflow,"Extracts the 'simplify squeeze node' optimization into its own method.

PiperOrigin-RevId: 197968452",constant_folding.h,"@@ -170,6 +170,10 @@ class ConstantFolding : public GraphOptimizer {
   // Simplifies Pack operation if applicable.
   bool SimplifyPack(GraphDef* optimized_graph, NodeDef* node);
 
+  // Simplifies a Squeeze operation to an Identity operation if applicable.
+  bool SimplifySqueeze(const GraphProperties& properties, bool use_shape_info,
+                       GraphDef* optimized_graph, NodeDef* node);
+
   // Points to an externally provided device or to owned_device_;
   RewriterConfig::Toggle opt_level_;
   DeviceBase* cpu_device_;
",0,train
ecbb8b1ccac295537827dfe1ca25ddb03ca5f22b,tensorflow/tensorflow,"Add helper function for Xor in HLO.

RELNOTES: n/a
PiperOrigin-RevId: 188119450",computation_builder.cc,"@@ -868,6 +868,14 @@ ComputationDataHandle ComputationBuilder::Or(
   return BinaryOp(BINOP_OR, lhs, rhs, broadcast_dimensions);
 }
 
+// TODO(b/65209188): Create a dedicated lowering for Xor
+ComputationDataHandle ComputationBuilder::Xor(
+    const ComputationDataHandle& lhs, const ComputationDataHandle& rhs,
+    tensorflow::gtl::ArraySlice<int64> broadcast_dimensions) {
+  return Or(And(Not(lhs), rhs, broadcast_dimensions),
+            And(lhs, Not(rhs), broadcast_dimensions));
+}
+
 ComputationDataHandle ComputationBuilder::Not(
     const ComputationDataHandle& operand) {
   return UnaryOp(UNOP_NOT, operand);
",0,train
ecbb8b1ccac295537827dfe1ca25ddb03ca5f22b,tensorflow/tensorflow,"Add helper function for Xor in HLO.

RELNOTES: n/a
PiperOrigin-RevId: 188119450",computation_builder.h,"@@ -512,6 +512,10 @@ class ComputationBuilder {
       const ComputationDataHandle& lhs, const ComputationDataHandle& rhs,
       tensorflow::gtl::ArraySlice<int64> broadcast_dimensions = {});
 
+  ComputationDataHandle Xor(
+      const ComputationDataHandle& lhs, const ComputationDataHandle& rhs,
+      tensorflow::gtl::ArraySlice<int64> broadcast_dimensions = {});
+
   ComputationDataHandle Not(const ComputationDataHandle& operand);
 
   ComputationDataHandle ShiftLeft(
",0,train
d2d9df08a6b3f64977e22881cc59b0011f58b9df,tensorflow/tensorflow,Finish the port to upstream TF from TF 2.4,ir_emitter_unnested.cc,"@@ -1806,6 +1806,20 @@ StatusOr<MlirEmitterInput> IrEmitterUnnested::GetMlirEmitterInput(
   return input;
 }
 
+bool IsRowMajor(mlir::Operation* op) {
+  if (auto attr = mlir::GetLayoutFromMlirHlo(op)) {
+    std::vector<int64> minor_to_major;
+    absl::c_transform(
+        attr, std::back_inserter(minor_to_major),
+        std::function<int64(const llvm::APInt&)>(&llvm::APInt::getZExtValue));
+    bool ret = std::is_sorted(minor_to_major.begin(),
+                              minor_to_major.end(), std::greater<int64>());
+    return ret;
+  }
+  // It is row major by default.
+  return true;
+}
+
 // TODO(timshen): update the comment once the HandleFusion code path deleted.
 //
 // This is migrated from IrEmitter::HandleFusion() with IrEmitterUnnested as the
@@ -1882,18 +1896,23 @@ Status IrEmitterUnnested::EmitLoopFusionFromMlir(
   }();
 
   bool row_optimized = fusion.getFusionResults().size() == 1 && // Not tested with MOF.
-      absl::c_all_of(GetHloOperands(fusion), [](const mlir::Value& op) {
+      absl::c_all_of(GetHloOperands(fusion), [](const mlir::Value& value) {
           // Only tested when the inputs are row-major. So only enable that case.
           // Maybe it would works if only the inner dimensions is contiguous.
-          return true;//TODO: LayoutUtil::IsMonotonicWithDim0Major(instr->shape().layout());
+          if (auto op = value.getDefiningOp()) {
+            return IsRowMajor(value.getDefiningOp());
+          }
+          // Reuse TypeToShape to not duplicate the layout convertion code.
+          return LayoutUtil::IsMonotonicWithDim0Major(TypeToShape(value.getType()).layout());
         }) &&
       // Only tested when the output is row-major.
-      //LayoutUtil::IsMonotonicWithDim0Major(hlo.shape().layout());
-      true;
+      absl::c_all_of(GetOutputOps(fusion), IsRowMajor);
+
   bool some_row_broadcasting = false;
   for (mlir::Operation& op : fusion.region().front()) {
     if (mlir::isa<mlir::memref::TensorLoadOp, mlir::memref::TensorStoreOp,
-        mlir::lmhlo::TerminatorOp, mlir::mhlo::ReturnOp>(op) ) {
+        mlir::lmhlo::TerminatorOp, mlir::mhlo::ReturnOp,
+        mlir::mhlo::ConstOp, mlir::lmhlo::ConstOp>(op) ) {
       continue;
     }
     HloOpcode opcode = *MhloToHloOpcode(&op);
@@ -1901,10 +1920,10 @@ Status IrEmitterUnnested::EmitLoopFusionFromMlir(
       continue;
     }
 
-    if (auto broadcast = mlir::dyn_cast<mlir::mhlo::BroadcastOp>(op)) {
+    if (auto broadcast = mlir::dyn_cast<mlir::mhlo::BroadcastInDimOp>(op)) {
       std::vector<int64> broadcast_dimensions;
-      if (broadcast.broadcast_sizes().size() > 0) {
-        for (const llvm::APInt& int_value : broadcast.broadcast_sizes()) {
+      if (broadcast.broadcast_dimensions().size() > 0) {
+        for (const llvm::APInt& int_value : broadcast.broadcast_dimensions()) {
           broadcast_dimensions.push_back(int_value.getSExtValue());
         }
       }
@@ -1915,12 +1934,13 @@ Status IrEmitterUnnested::EmitLoopFusionFromMlir(
         continue;
       }
       if (broadcast_dimensions.size() == 1 &&
-          broadcast_dimensions.back() != (rank - 1)) {
+          broadcast_dimensions.back() == (rank - 1)) {
         some_row_broadcasting = true;
+        continue;
       }
     }
     row_optimized = false;
-    VLOG(3) << ""Row vectorization not enabled due to this op: "" << HloOpcodeString(opcode);
+    VLOG(2) << ""Row vectorization not enabled due to this op: "" << MlirToString(&op);
     break;
   }
   // Trigger only when there is a row broadcasting.
",0,test
06eea697f10cd0004b6d68dda49a74bd3a7870f6,tensorflow/tensorflow,"Replace TraceMe for EagerCopyToDevice with TraceMe recording loops calling it.

PiperOrigin-RevId: 257290089",execute.cc,"@@ -214,6 +214,8 @@ Status ValidateInputTypeAndPlacement(
     EagerContext* ctx, EagerOperation* op,
     const core::RefCountPtr<KernelAndDevice>& kernel,
     RunMetadata* run_metadata) {
+  profiler::TraceMe activity(""ValidateInputTypeAndPlacement"",
+                             profiler::TraceMeLevel::kInfo);
   if (kernel->num_inputs() != op->Inputs().size()) {
     return errors::InvalidArgument(""expected "", kernel->num_inputs(),
                                    "" inputs, got "", op->Inputs().size());
@@ -487,6 +489,8 @@ Status EagerLocalExecute(EagerOperation* op,
   std::unordered_map<int, DtypeAndPartialTensorShape>
       input_resource_variable_dtypes_and_shapes;
   if (is_multi_device_function) {
+    profiler::TraceMe activity(""EagerCopyToDeviceAndAddCacheKey"",
+                               profiler::TraceMeLevel::kInfo);
     input_dev_ptrs.reserve(op->Inputs().size());
     // All inputs need to be on local devices.
     // TODO(b/122851476): This is a limitation of the current code base (but
@@ -807,34 +811,38 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
 
   eager::Operation* remote_op = request->add_queue()->mutable_operation();
 
-  for (int i = 0; i < op->Inputs().size(); i++) {
-    tensorflow::TensorHandle* input = op->Inputs()[i];
-    tensorflow::Device* input_device = input->device();
-    if (op->Device() != input_device &&
-        // If the expected and actual devices are on the same task, don't
-        // explicitly copy, and instead depend on the copy to happen locally
-        // when the op is executed on the device.
-        !ctx->OnSameTask(op->Device(), input_device)) {
-      tensorflow::Device* remote_cpu_device;
-      TF_RETURN_IF_ERROR(
-          ctx->CPUDeviceOnTask(op->Device(), &remote_cpu_device));
-      // TODO(b/110044833): It's possible the same tensor gets copied to the
-      // remote device repeatedly.
-      // Always copy to the remote CPU so that the actual device can be
-      // correctly determined after the kernel is selected/instantiated, since
-      // the op might have its inputs on host memory.
-      TensorHandle* handle = nullptr;
-      TF_RETURN_IF_ERROR(
-          MaybeCopyInputToExpectedDevice(op, op->Device(), i, remote_cpu_device,
-                                         /* run_metadata= */ nullptr, &handle));
-      op->UpdateInput(i, handle);
-      input = handle;
-      input_device = remote_cpu_device;
-      // Unref handle since it has a ref as an input now
-      handle->Unref();
-    }
+  {
+    profiler::TraceMe activity(""CopyInputToExpectedDevice"",
+                               profiler::TraceMeLevel::kInfo);
+    for (int i = 0; i < op->Inputs().size(); i++) {
+      tensorflow::TensorHandle* input = op->Inputs()[i];
+      tensorflow::Device* input_device = input->device();
+      if (op->Device() != input_device &&
+          // If the expected and actual devices are on the same task, don't
+          // explicitly copy, and instead depend on the copy to happen locally
+          // when the op is executed on the device.
+          !ctx->OnSameTask(op->Device(), input_device)) {
+        tensorflow::Device* remote_cpu_device;
+        TF_RETURN_IF_ERROR(
+            ctx->CPUDeviceOnTask(op->Device(), &remote_cpu_device));
+        // TODO(b/110044833): It's possible the same tensor gets copied to the
+        // remote device repeatedly.
+        // Always copy to the remote CPU so that the actual device can be
+        // correctly determined after the kernel is selected/instantiated, since
+        // the op might have its inputs on host memory.
+        TensorHandle* handle = nullptr;
+        TF_RETURN_IF_ERROR(MaybeCopyInputToExpectedDevice(
+            op, op->Device(), i, remote_cpu_device,
+            /* run_metadata= */ nullptr, &handle));
+        op->UpdateInput(i, handle);
+        input = handle;
+        input_device = remote_cpu_device;
+        // Unref handle since it has a ref as an input now
+        handle->Unref();
+      }
 
-    TF_RETURN_IF_ERROR(AddRemoteInput(remote_op, input, input_device));
+      TF_RETURN_IF_ERROR(AddRemoteInput(remote_op, input, input_device));
+    }
   }
 
   PrepareRemoteOp(remote_op, op);
@@ -1259,8 +1267,7 @@ Status ExecuteSend(EagerContext* ctx, Device* device, TensorHandle* h,
   } else {
     eager::EagerClient* eager_client;
     uint64 context_id = ctx->GetContextId();
-    TF_RETURN_IF_ERROR(
-        ctx->GetClient(device, &eager_client));
+    TF_RETURN_IF_ERROR(ctx->GetClient(device, &eager_client));
 
     std::unique_ptr<eager::EnqueueRequest> request(new eager::EnqueueRequest);
     eager::EnqueueResponse response;
@@ -1328,8 +1335,7 @@ Status ExecuteRecv(EagerContext* ctx, Device* device, DataType dtype,
   } else {
     eager::EagerClient* eager_client;
     uint64 context_id = ctx->GetContextId();
-    TF_RETURN_IF_ERROR(
-        ctx->GetClient(device, &eager_client));
+    TF_RETURN_IF_ERROR(ctx->GetClient(device, &eager_client));
 
     std::unique_ptr<eager::EnqueueRequest> request(new eager::EnqueueRequest);
     eager::EnqueueResponse response;
@@ -1385,8 +1391,6 @@ string GetUniqueWireID() {
 
 Status EagerCopyToDevice(TensorHandle* h, EagerContext* ctx, Device* device,
                          bool mirror, TensorHandle** result) {
-  profiler::TraceMe activity(""EagerCopyToDevice"",
-                             profiler::TraceMeLevel::kInfo);
   Device* send_device = h->DeviceOrHostCPU(ctx);
 
   bool sender_is_local = ctx->IsLocal(send_device);
",0,train
d82c766b445eea2a52a3f7b6dba05356d8b03648,tensorflow/tensorflow,"Avoid adding the same edge multiple times which will cause checking failure in
Graph::ToGraphDefSubRange().

PiperOrigin-RevId: 239905587",convert_graph.cc,"@@ -611,17 +611,18 @@ Status CreateTRTNode(const ConversionParams& params,
       UpdateToEngineNode(infos, pos, *engine_nodes, /*is_input_edge=*/false,
                          conn.outside_node_name, &output_node, &port);
     }
-    VLOG(1) << ""Updating "" << engine_node->name() << "":"" << conn.port_number
-            << "" to "" << output_node->name() << "":"" << port;
     if (conn.is_control_edge()) {
+      VLOG(1) << ""Updating control edge from "" << engine_node->name() << "" to ""
+              << output_node->name();
       QCHECK_EQ(Graph::kControlSlot, port);
       graph->AddControlEdge(engine_node, output_node);
     } else {
-      auto new_edge =
-          graph->AddEdge(engine_node, conn.port_number, output_node, port);
-      QCHECK(new_edge) << ""Adding a new edge failed "" << engine_node->name()
-                       << "":"" << conn.port_number << "" -> ""
-                       << output_node->name() << "":"" << conn.outside_port;
+      VLOG(1) << ""Updating data edge from "" << engine_node->name() << "":""
+              << conn.port_number << "" to "" << output_node->name() << "":""
+              << port;
+      // Use UpdateEdge() to avoid adding the same edge multiple times.
+      TF_CHECK_OK(
+          graph->UpdateEdge(engine_node, conn.port_number, output_node, port));
     }
   }
   return Status::OK();
",0,test
d82c766b445eea2a52a3f7b6dba05356d8b03648,tensorflow/tensorflow,"Avoid adding the same edge multiple times which will cause checking failure in
Graph::ToGraphDefSubRange().

PiperOrigin-RevId: 239905587",convert_graph_test.cc,"@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include ""tensorflow/cc/framework/ops.h""
 #include ""tensorflow/cc/framework/scope.h""
 #include ""tensorflow/cc/ops/standard_ops.h""
 #include ""tensorflow/compiler/tf2tensorrt/convert/convert_nodes.h""
@@ -222,6 +223,76 @@ TEST(ConvertGraphTest, GetDeviceAndAllocator) {
   }
 }
 
+class ConvertAfterShapesTest : public ::testing::Test {
+ public:
+  Status RunConvertAfterShape(Scope s, GraphDef* output_graph_def) {
+    // Create GraphProperties.
+    grappler::GrapplerItem item;
+    TF_EXPECT_OK(s.ToGraphDef(&item.graph));
+    grappler::GraphProperties graph_properties(item);
+    TF_EXPECT_OK(graph_properties.InferStatically(true));
+
+    // Construct ConversionParams.
+    const std::vector<string> output_names{""output""};
+    ConversionParams params;
+    params.input_graph_def = &item.graph;
+    params.output_names = &output_names;
+    params.max_workspace_size_bytes = 8 << 20;
+    params.output_graph_def = output_graph_def;
+    params.minimum_segment_size = 2;
+    params.graph_properties = &graph_properties;
+    params.use_calibration = false;
+
+    return ConvertAfterShapes(params);
+  }
+};
+
+TEST_F(ConvertAfterShapesTest, DirectlyConnectedEngines) {
+  // Create the graph. There will be two TRTEngineOps after the conversion, and
+  // the upstream TRTEngineOp will have two output connections from the same
+  // node:port inside the op to the downstream TRTEngineOp. Then, if it adds the
+  // downstream TRTEngineOp first, when adding the upstream op it'll need to
+  // update the same output connection twice. This test ensures the correctness
+  // of the conversion under such condition.
+  Scope s = Scope::NewRootScope();
+  auto input = ops::Placeholder(s.WithOpName(""input""), DT_FLOAT,
+                                ops::Placeholder::Shape({2, 1}));
+  // We purposefully choose the name of the root node of each segment, so it'll
+  // process the segment in the downstream first, then, when it tries to update
+  // the edge between the two TRTEngineOps, it'll try to add the same edge
+  // multiple times.
+  auto segment_root_1 = ops::Identity(s.WithOpName(""segment_root_b""), input);
+  auto add1 = ops::Add(s.WithOpName(""add1""), segment_root_1, segment_root_1);
+  // Add incompatible reshapes that change the batch dimension.
+  auto incompatible =
+      ops::Reshape(s.WithOpName(""reshape1""), add1, Input({1, 2}));
+  incompatible =
+      ops::Reshape(s.WithOpName(""reshape2""), incompatible, Input({2, 1}));
+
+  auto add2 = ops::Add(s.WithOpName(""add2""), incompatible, add1);
+  auto segment_root_2 = ops::Identity(s.WithOpName(""segment_root_a""), add1);
+  auto add3 = ops::Add(s.WithOpName(""add3""), add2, segment_root_2);
+  ops::Identity(s.WithOpName(""output""), add3);
+
+  GraphDef output_graph_def;
+  TF_EXPECT_OK(RunConvertAfterShape(s, &output_graph_def));
+
+  int num_trt_ops = 0;
+  for (const NodeDef& node : output_graph_def.node()) {
+    if (node.name() == ""TRTEngineOp_1"") {
+      EXPECT_EQ(1, node.input_size());
+      EXPECT_EQ(""input"", node.input(0));
+      ++num_trt_ops;
+    } else if (node.name() == ""TRTEngineOp_0"") {
+      EXPECT_EQ(2, node.input_size());
+      EXPECT_EQ(""TRTEngineOp_1"", node.input(0));
+      EXPECT_EQ(""reshape2"", node.input(1));
+      ++num_trt_ops;
+    }
+  }
+  EXPECT_EQ(2, num_trt_ops);
+}
+
 }  // namespace convert
 }  // namespace tensorrt
 }  // namespace tensorflow
",0,test
24cbb2a8ce16da1157e6ac64a7d6391bead6b633,tensorflow/tensorflow,"Allow use of configure for gpu remote execution (i.e., no checks on local cuda)

PiperOrigin-RevId: 167995887",configure.py,"@@ -995,7 +995,8 @@ def main():
     set_computecpp_toolkit_path(environ_cp)
 
   set_action_env_var(environ_cp, 'TF_NEED_CUDA', 'CUDA', False)
-  if environ_cp.get('TF_NEED_CUDA') == '1':
+  if (environ_cp.get('TF_NEED_CUDA') == '1' and
+      'TF_CUDA_CONFIG_REPO' not in environ_cp):
     set_tf_cuda_version(environ_cp)
     set_tf_cunn_version(environ_cp)
     set_tf_cuda_compute_capabilities(environ_cp)
",0,train
1166a62a552ae43301b38ddfd8d6aa8aac6a2824,tensorflow/tensorflow,"Revert ""fix gpu_device.cc sanity""

This reverts commit 0816dc52e439d190df5950860438e3dd05c76f41.",gpu_device.cc,"@@ -34,7 +34,6 @@ limitations under the License.
 #include <vector>
 
 #include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
-
 #include ""tensorflow/core/common_runtime/device_factory.h""
 #include ""tensorflow/core/common_runtime/gpu/gpu_device.h""
 #include ""tensorflow/core/common_runtime/gpu/gpu_event_mgr.h""
@@ -611,7 +610,7 @@ Status BaseGPUDevice::MaybeCopyTensorToGPU(
       return err;
     }
 
-    auto wrapped_done = [ to, copy, done = std::move(done) ](const Status& s) {
+    auto wrapped_done = [to, copy, done = std::move(done)](const Status& s) {
       if (s.ok()) {
         *to = std::move(*copy);
       }
@@ -651,7 +650,7 @@ Status BaseGPUDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
     std::list<Notification> notifications;
     Status copy_status;
     auto copier = [this, &alloc_attrs, &notifications, &copy_status](
-        const Tensor& from, Tensor* to) {
+                      const Tensor& from, Tensor* to) {
       // Copier isn't run in a multithreaded environment, so we don't
       // have to worry about the notifications list being modified in parallel.
       notifications.emplace_back();
@@ -742,8 +741,8 @@ Status ParseVisibleDeviceList(const string& visible_device_list,
       if (!strings::safe_strto32(platform_gpu_id_str, &platform_gpu_id)) {
         return errors::InvalidArgument(
             ""Could not parse entry in 'visible_device_list': '"",
-            platform_gpu_id_str, ""'. visible_device_list = "",
-            visible_device_list);
+            platform_gpu_id_str,
+            ""'. visible_device_list = "", visible_device_list);
       }
       if (platform_gpu_id < 0 ||
           platform_gpu_id >= gpu_manager->VisibleDeviceCount()) {
@@ -1038,32 +1037,32 @@ Status BaseGPUDeviceFactory::CreateDevices(
 #if GOOGLE_CUDA
       err = cudaSetDevice(platform_gpu_id.value());
       if (err != cudaSuccess) {
-        return errors::Internal(""cudaSetDevice() on GPU:"",
-                                platform_gpu_id.value(), "" failed. Status: "",
-                                cudaGetErrorString(err));
+        return errors::Internal(
+            ""cudaSetDevice() on GPU:"", platform_gpu_id.value(),
+            "" failed. Status: "", cudaGetErrorString(err));
       }
       err = cudaFree(nullptr);
       if (err != cudaSuccess) {
         return errors::Internal(""CUDA runtime implicit initialization on GPU:"",
-                                platform_gpu_id.value(), "" failed. Status: "",
-                                cudaGetErrorString(err));
+                                platform_gpu_id.value(),
+                                "" failed. Status: "", cudaGetErrorString(err));
       }
 #elif TENSORFLOW_USE_ROCM
       err = hipSetDevice(platform_gpu_id.value());
       if (err != hipSuccess) {
-        return errors::Internal(""hipSetDevice() on GPU:"",
-                                platform_gpu_id.value(), "" failed. Status: "",
-                                hipGetErrorString(err));
+        return errors::Internal(
+            ""hipSetDevice() on GPU:"", platform_gpu_id.value(),
+            "" failed. Status: "", hipGetErrorString(err));
       }
       err = hipFree(nullptr);
       if (err != hipSuccess) {
         return errors::Internal(""ROCm runtime implicit initialization on GPU:"",
-                                platform_gpu_id.value(), "" failed. Status: "",
-                                hipGetErrorString(err));
+                                platform_gpu_id.value(),
+                                "" failed. Status: "", hipGetErrorString(err));
       }
 #endif
     }
-// Reset to the original device.
+    // Reset to the original device.
 #if GOOGLE_CUDA
     err = cudaSetDevice(original_device);
     if (err != cudaSuccess) {
@@ -1174,13 +1173,15 @@ static string GetShortDeviceDescription(PlatformGpuId platform_gpu_id,
     cc_minor = 0;
   }
   // LINT.IfChange
-  return strings::StrCat(""device: "", platform_gpu_id.value(), "", name: "",
-                         desc.name(), "", pci bus id: "", desc.pci_bus_id(),
+  return strings::StrCat(""device: "", platform_gpu_id.value(),
+                         "", name: "", desc.name(),
+                         "", pci bus id: "", desc.pci_bus_id(),
                          "", compute capability: "", cc_major, ""."", cc_minor);
-// LINT.ThenChange(//tensorflow/python/platform/test.py)
+  // LINT.ThenChange(//tensorflow/python/platform/test.py)
 #elif TENSORFLOW_USE_ROCM
-  return strings::StrCat(""device: "", platform_gpu_id.value(), "", name: "",
-                         desc.name(), "", pci bus id: "", desc.pci_bus_id());
+  return strings::StrCat(""device: "", platform_gpu_id.value(),
+                         "", name: "", desc.name(),
+                         "", pci bus id: "", desc.pci_bus_id());
 #endif
 }
 
@@ -1419,8 +1420,8 @@ struct CudaVersion {
   // Initialize from version_name in the form of ""3.5""
   explicit CudaVersion(const std::string& version_name) {
     size_t dot_pos = version_name.find('.');
-    CHECK(dot_pos != string::npos) << ""Illegal version name: ["" << version_name
-                                   << ""]"";
+    CHECK(dot_pos != string::npos)
+        << ""Illegal version name: ["" << version_name << ""]"";
     string major_str = version_name.substr(0, dot_pos);
     CHECK(strings::safe_strto32(major_str, &major_part))
         << ""Illegal version name: ["" << version_name << ""]"";
@@ -1445,8 +1446,7 @@ struct CudaVersion {
 };
 
 std::vector<CudaVersion> supported_cuda_compute_capabilities = {
-    TF_CUDA_CAPABILITIES,
-};
+    TF_CUDA_CAPABILITIES,};
 
 std::vector<CudaVersion> GetSupportedCudaComputeCapabilities() {
   auto cuda_caps = supported_cuda_compute_capabilities;
@@ -1792,10 +1792,10 @@ void GPUKernelTracker::RecordTerminated(uint64 queued_count) {
   VLOG(2) << this << "" RecordTerminated queued_count="" << queued_count
           << "" first_available_="" << first_available_
           << "" last_completed_="" << last_completed_
-          << "" num_pending_="" << num_pending_
-          << "" LC="" << ((last_completed_ >= 0)
-                            ? pending_kernels_[last_completed_].queued_count
-                            : -1);
+          << "" num_pending_="" << num_pending_ << "" LC=""
+          << ((last_completed_ >= 0)
+                  ? pending_kernels_[last_completed_].queued_count
+                  : -1);
   DCHECK_NE(first_available_, last_completed_);
   DCHECK_GT(num_pending_, 0);
   // Starting just past the last completed entry, find the entry with
",0,train
70cd042611d8edf8cb4d7c55994b9d80c5386205,tensorflow/tensorflow,Set output when side input exists,common_shape_fns.cc,"@@ -1285,6 +1285,12 @@ Status FusedBatchNormGradShape(shape_inference::InferenceContext* c) {
 Status FusedBatchNormGradExShape(shape_inference::InferenceContext* c) {
   TF_RETURN_IF_ERROR(FusedBatchNormGradShape(c));
 
+  int num_side_inputs;
+  TF_RETURN_IF_ERROR(c->GetAttr(""num_side_inputs"", &num_side_inputs));
+  if (num_side_inputs == 0) {
+    return Status::OK();
+  }
+
   string data_format_str;
   TF_RETURN_IF_ERROR(c->GetAttr(""data_format"", &data_format_str));
   TensorFormat data_format;
",0,train
f26ea84fdbb13fff6b7979231db95dd20438645d,tensorflow/tensorflow,"[tf2xla] Remove MakeLinspaceTensor

MakeLinspaceTensor is now unused.

PiperOrigin-RevId: 234124584",xla_helpers.cc,"@@ -81,16 +81,6 @@ xla::XlaOp XlaHelpers::FloatLiteral(xla::XlaBuilder* b, DataType data_type,
   return Status::OK();
 }
 
-template <typename T>
-static Tensor MakeLinspaceTensor(const TensorShape& shape, int64 depth) {
-  Tensor linspace(DataTypeToEnum<T>::v(), shape);
-  auto linspace_flat = linspace.flat<T>();
-  for (int64 i = 0; i < depth; ++i) {
-    linspace_flat(i) = i;
-  }
-  return linspace;
-}
-
 Status XlaHelpers::OneHot(xla::XlaBuilder* builder, int64 depth, int axis,
                           DataType index_type, const TensorShape& indices_shape,
                           const xla::XlaOp& indices, const xla::XlaOp& on_value,
",0,train
a5923d823e088a9723e445cce9248d5fc59f1b30,tensorflow/tensorflow,"Allow StreamExecutor commands to return status types other than the TensorFlow status type.
Change: 116793254",gpu_util.cc,"@@ -388,7 +388,7 @@ string GPUUtil::MemoryDebugString(const Device* device, Tensor* tensor) {
       string buf;
       buf.resize(num_bytes);
       DeviceMemoryBase gpu_ptr(ptr, num_bytes);
-      Status s = dev_info->stream->parent()->SynchronousMemcpyD2H(
+      auto s = dev_info->stream->parent()->SynchronousMemcpyD2H(
           gpu_ptr, num_bytes, gtl::string_as_array(&buf));
       strings::StrAppend(&ret,
                          PrintMemory(gtl::string_as_array(&buf), num_bytes));
",0,train
5cfb7593f2141d5885734104bc2891995532ea18,tensorflow/tensorflow,Update bcast.h,bcast.h,"@@ -139,7 +139,7 @@ BCastList<N>::BCastList(const BCastList::Vec (&x)[N],
     if (x[i] != x[0]) {
       all_equal = false;
     }
-    int x_i_size = x[i].size();
+    const int x_i_size = x[i].size();
     if (x_i_size > largest_rank) {
       largest_rank = x[i].size();
     }
",0,train
f9ccd02fb44b1272798058cb80cb31e0d12a1881,tensorflow/tensorflow,"Removing duplicate `six` requirement in setup.py.

PiperOrigin-RevId: 275549971
Change-Id: I7f42d74c4833009c0542c1a09d688ccd63083183",setup.py,"@@ -60,7 +60,6 @@ REQUIRED_PACKAGES = [
     'keras_preprocessing >= 1.0.5',
     'numpy >= 1.16.0, < 2.0',
     'opt_einsum >= 2.3.2',
-    'six >= 1.10.0',
     'protobuf >= 3.6.1',
     'tensorboard >= 2.0.0, < 2.1.0',
     'tensorflow_estimator >= 2.0.0, < 2.1.0',
",0,test
9d17630338fb0cadd4bb347eb4993102cb77bb03,tensorflow/tensorflow,"Lower tolerace of eigvalsh for float32/complex64

PiperOrigin-RevId: 274723451",linear_operator_test_util.py,"@@ -434,8 +434,8 @@ def _test_eigvalsh(use_placeholder, shapes_info, dtype):
       atol = self._atol[dtype]  # pylint: disable=protected-access
       rtol = self._rtol[dtype]  # pylint: disable=protected-access
       if dtype == dtypes.float32 or dtype == dtypes.complex64:
-        atol = 1e-5
-        rtol = 1e-5
+        atol = 1e-4
+        rtol = 1e-4
       self.assertAllClose(op_eigvals_v, mat_eigvals_v, atol=atol, rtol=rtol)
   return test_eigvalsh
 
",0,test
1acc02f4689f0a5ac5ecd5bc1a1fa3b5236fd56c,tensorflow/tensorflow,"Let variables initialized from checkpoints answer "".initialized_value()"" correctly.

PiperOrigin-RevId: 186741832",checkpoint_utils.py,"@@ -293,6 +293,8 @@ def _set_checkpoint_initializer(variable,
     restore_op = io_ops.restore_v2(
         ckpt_file, [tensor_name], [slice_spec], [base_type], name=name)[0]
     variable._initializer_op = state_ops.assign(variable, restore_op)  # pylint:disable=protected-access
+    restore_op.set_shape(variable.shape)
+    variable._initial_value = restore_op  # pylint:disable=protected-access
 
 
 def _set_variable_or_list_initializer(variable_or_list, ckpt_file,
",0,train
1acc02f4689f0a5ac5ecd5bc1a1fa3b5236fd56c,tensorflow/tensorflow,"Let variables initialized from checkpoints answer "".initialized_value()"" correctly.

PiperOrigin-RevId: 186741832",checkpoint_utils_test.py,"@@ -145,6 +145,36 @@ class CheckpointsTest(test.TestCase):
         # Check that tensors are not explicitly in the graph.
         self.assertLess(len(str(session.graph.as_graph_def())), 29000)
 
+  def testInitialValueComesFromCheckpoint(self):
+    checkpoint_dir = self.get_temp_dir()
+    with self.test_session() as session:
+      v1, _, _, _ = _create_checkpoints(session, checkpoint_dir)
+
+    # New graph and session.
+    with ops.Graph().as_default() as g:
+      with self.test_session(graph=g) as session:
+        with variable_scope.variable_scope(
+            ""some_scope"", initializer=init_ops.zeros_initializer()):
+          my1 = variable_scope.get_variable(""my1"", [1, 10])
+
+        # At this point, my1.initialized_value() will add ops that reference
+        # the zeros initializer of my1.
+        before = variables.Variable(my1.initialized_value(), name=""before"")
+
+        checkpoint_utils.init_from_checkpoint(checkpoint_dir, {""var1"": my1})
+
+        # At this point, my1.initialized_value() will add ops that reference
+        # the newly set initializer of my1.
+        after = variables.Variable(my1.initialized_value(), name=""after"")
+
+        session.run(variables.global_variables_initializer())
+        self.assertAllEqual(session.run(my1), v1)
+        self.assertAllEqual(session.run(my1.initialized_value()), v1)
+        self.assertAllClose(session.run(before), [[0.0] * 10])
+        self.assertAllClose(session.run(after), v1)
+        with self.assertRaises(AssertionError):
+          self.assertAllClose(session.run(before), session.run(after))
+
   def testInitWithScopeDoesNotCaptureSuffixes(self):
     checkpoint_dir = self.get_temp_dir()
     with self.test_session() as session:
",0,train
63b5e80e5c99f847d7ab63087ded0b4fc1854d3a,tensorflow/tensorflow,"Use strategy._gather in test_util.gather

We now have strategy._gather MWMS, so we can remove the naive implementation in
test_util.

PiperOrigin-RevId: 334518762
Change-Id: I7b1d97ff3937b8ca9f5c0287d007bb386efa9e3e",test_util.py,"@@ -21,10 +21,7 @@ from __future__ import print_function
 import functools
 
 from tensorflow.python.distribute import collective_all_reduce_strategy
-from tensorflow.python.distribute import cross_device_utils
-from tensorflow.python.distribute import distribute_utils
 from tensorflow.python.distribute import values
-from tensorflow.python.eager import def_function
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.util import nest
@@ -57,16 +54,5 @@ def _gather(strategy, value):
     return array_ops.stack(value._values)
   assert len(strategy.extended.worker_devices) == len(value._values)
   inputs = [array_ops.expand_dims_v2(v, axis=0) for v in value._values]
-  collective_keys = strategy.extended._collective_keys
-  devices = strategy.extended.worker_devices
-  group_size = strategy.num_replicas_in_sync
-
-  @def_function.function
-  def gather_fn():
-    gathered = cross_device_utils.build_collective_gather(
-        inputs, devices, group_size, collective_keys, axis=0)
-    return distribute_utils.update_regroup(
-        strategy.extended, gathered, group=True)
-
-  return gather_fn()
+  return strategy._gather(values.PerReplica(inputs), axis=0)
   # pylint: enable=protected-access
",0,train
1b3142988270fd69c9aa4fc933ae96fc92fbcbd7,tensorflow/tensorflow,"[XLA:Python] Delete deprecated overloads that accept device ordinals instead of device objects.

PiperOrigin-RevId: 285251896
Change-Id: I89b3f4ddf806d9bfe4d3d56554dde7056e09205a",xla.cc,"@@ -442,30 +442,6 @@ PYBIND11_MODULE(xla_extension, m) {
                 std::move(leaves), tree.shape, std::move(py_buffer_ref),
                 std::move(client), device->local_device_ordinal());
           })
-      // TODO(skyewm): get rid of this overload once everyone passes Device
-      .def_static(
-          ""from_python"",
-          [](const pybind11::object& argument,
-             std::shared_ptr<PyLocalClient> client,
-             int device_ordinal) -> StatusOr<std::unique_ptr<PyLocalBuffer>> {
-            GlobalPyRefManager()->CollectGarbage();
-            TF_ASSIGN_OR_RETURN(PythonBufferTree tree,
-                                GetPythonBufferTree(argument));
-            std::shared_ptr<PythonRefManager::ManagedPyObjects> py_buffer_ref =
-                GlobalPyRefManager()->ManageReferences(
-                    absl::MakeSpan(tree.arrays));
-            tree.arrays.clear();
-
-            std::vector<BorrowingLiteral> leaves;
-            leaves.insert(leaves.end(),
-                          std::make_move_iterator(tree.leaves.begin()),
-                          std::make_move_iterator(tree.leaves.end()));
-
-            py::gil_scoped_release gil_release;
-            return PyLocalBuffer::FromLiterals(
-                std::move(leaves), tree.shape, std::move(py_buffer_ref),
-                std::move(client), device_ordinal);
-          })
       .def_static(""make_tuple"",
                   [](const std::vector<PyLocalBuffer*> buffers,
                      std::shared_ptr<PyLocalClient> client,
@@ -481,8 +457,6 @@ PYBIND11_MODULE(xla_extension, m) {
                     return PyLocalBuffer::MakeTuple(
                         buffers, client, device->local_device_ordinal());
                   })
-      // TODO(skyewm): get rid of this overload once everyone passes Device
-      .def_static(""make_tuple"", &PyLocalBuffer::MakeTuple)
       .def(""copy_to_device"",
            [](PyLocalBuffer* buffer, std::shared_ptr<Device> dst_device) {
              CHECK(dst_device != nullptr);
@@ -490,13 +464,6 @@ PYBIND11_MODULE(xla_extension, m) {
              py::gil_scoped_release gil_release;
              return buffer->CopyToDevice(dst_device->local_device_ordinal());
            })
-      // TODO(skyewm): get rid of this overload once everyone passes Device
-      .def(""copy_to_device"",
-           [](PyLocalBuffer* buffer, int dst_device_ordinal) {
-             GlobalPyRefManager()->CollectGarbage();
-             py::gil_scoped_release gil_release;
-             return buffer->CopyToDevice(dst_device_ordinal);
-           })
       .def(""delete"", &PyLocalBuffer::Delete)
       .def(""destructure"", &PyLocalBuffer::DestructureTuple)
       .def(""block_host_until_ready"",
@@ -522,8 +489,6 @@ PYBIND11_MODULE(xla_extension, m) {
            [](PyLocalBuffer* buffer) -> std::shared_ptr<Device> {
              return buffer->client()->local_devices()[buffer->device_ordinal()];
            })
-      // TODO(skyewm): get rid of `device_ordinal` once everything uses `device`
-      .def(""device_ordinal"", &PyLocalBuffer::device_ordinal)
       .def(""platform"", &PyLocalBuffer::platform_name)
       .def(""is_deleted"",
            [](const PyLocalBuffer& buffer) {
@@ -546,15 +511,6 @@ PYBIND11_MODULE(xla_extension, m) {
       .def_static(""Compile"", &PyLocalExecutable::Compile,
                   py::call_guard<py::gil_scoped_release>())
       .def(""local_devices"", &PyLocalExecutable::local_devices)
-      // TODO(skyewm): get rid of this once everything uses `local_devices`
-      .def(""DeviceOrdinals"",
-           [](const PyLocalExecutable& executable) {
-             std::vector<int> device_ordinals;
-             for (std::shared_ptr<Device> device : executable.local_devices()) {
-               device_ordinals.push_back(device->local_device_ordinal());
-             }
-             return device_ordinals;
-           })
       .def(""SizeOfGeneratedCodeInBytes"",
            &PyLocalExecutable::SizeOfGeneratedCodeInBytes)
       .def(""Delete"", &PyLocalExecutable::Delete)
",0,test
1b3142988270fd69c9aa4fc933ae96fc92fbcbd7,tensorflow/tensorflow,"[XLA:Python] Delete deprecated overloads that accept device ordinals instead of device objects.

PiperOrigin-RevId: 285251896
Change-Id: I89b3f4ddf806d9bfe4d3d56554dde7056e09205a",xla_client.py,"@@ -597,7 +597,7 @@ class Computation(object):
 
 # An Executable is a C++ class that duck types with the following API:
 # class Executable(object):
-#   def DeviceOrdinals(self) -> [int]:
+#   def local_devices(self) -> [Device]:
 #   def Execute(self, arguments : [Buffer]) -> Buffer:
 #     """"""Execute on one replica with Buffer arguments and return value.""""""
 #
@@ -627,7 +627,7 @@ def execute_with_python_values(executable, arguments=(), backend=None):
 
   def put(arg):
     return Buffer.from_pyval(
-        arg, device=executable.DeviceOrdinals()[0], backend=backend)
+        arg, device=executable.local_devices()[0], backend=backend)
 
   arguments = [put(arg) for arg in arguments]
   return executable.Execute(arguments).to_py()
@@ -646,9 +646,9 @@ def execute_with_python_values_replicated(executable, arguments, backend=None):
     A list of python values, one per replica.
   """"""
   backend = backend or get_local_backend()
-  device_ordinals = executable.DeviceOrdinals()
+  devices = executable.local_devices()
   # pylint: disable=g-complex-comprehension
-  flat_args = [(arg, device_ordinals[replica])
+  flat_args = [(arg, devices[replica])
                for replica, replica_args in enumerate(arguments)
                for arg in replica_args]
   flat_arg_buffers = [
",0,test
1b3142988270fd69c9aa4fc933ae96fc92fbcbd7,tensorflow/tensorflow,"[XLA:Python] Delete deprecated overloads that accept device ordinals instead of device objects.

PiperOrigin-RevId: 285251896
Change-Id: I89b3f4ddf806d9bfe4d3d56554dde7056e09205a",xla_client_test.py,"@@ -530,7 +530,8 @@ class BufferTest(ComputationTest):
     )
     b0 = xla_client.Buffer.from_pyval(t[0])
     b1 = xla_client.Buffer.from_pyval(t[1])
-    btup = xla_client.Buffer.make_tuple([b0, b1], device=0)
+    device = xla_client.get_local_backend().local_devices()[0]
+    btup = xla_client.Buffer.make_tuple([b0, b1], device=device)
     pieces = btup.destructure()
     self.assertLen(pieces, 2)
     array0, array1 = pieces
@@ -576,15 +577,6 @@ class BufferTest(ComputationTest):
       self.assertEqual(buf.device(), device)
       np.testing.assert_equal(x, buf.to_py())
 
-  def testInvalidDevice(self):
-    t = np.array(1.)
-    with self.assertRaisesRegexp(
-        RuntimeError,
-        r""PyLocalBuffer::FromLiterals got bad device_ordinal: 100 ""
-        r""\(num_local_devices=\d+\)""):
-      # TODO(skyewm): figure out how to test this with a Device
-      xla_client.Buffer.from_pyval(t, device=100)
-
 
 class SingleOpTest(ComputationTest):
   """"""Tests for single ops.
",0,test
7817f10ec7ed4622d305fdee298042347ee55da7,tensorflow/tensorflow,"Give a better error message that sparse tensor is not supported.
Change: 125206796",control_flow_grad.py,"@@ -197,10 +197,13 @@ def _EnterGrad(op, grad):
     return grad
   if op.get_attr(""is_constant""):
     # Add a gradient accumulator for each loop invariant.
-    if isinstance(grad, ops.IndexedSlices):
+    if isinstance(grad, ops.Tensor):
+      result = grad_ctxt.AddBackPropAccumulator(grad)
+    elif isinstance(grad, ops.IndexedSlices):
       result = grad_ctxt.AddBackPropIndexedSlicesAccumulator(grad)
     else:
-      result = grad_ctxt.AddBackPropAccumulator(grad)
+      # TODO(yuanbyu, lukasr): Add support for SparseTensor.
+      raise TypeError(""Type %s not supported"" % type(grad))
   else:
     result = exit(grad)
     grad_ctxt.ExitResult([result])
",0,test
1a5364efe43f76ab72a1f3651df394d6b121c915,tensorflow/tensorflow,"Fix incorrect documentation for MaxPool3D, MaxPoolGrad3D, AvgPoolGrad3D

The channel size should be zero rather than the depth.
Fixes #2573 (GitHub)
Change: 124261261",nn_ops.cc,"@@ -496,7 +496,7 @@ REGISTER_OP(""AvgPool3D"")
 Performs 3D average pooling on the input.
 
 ksize: 1-D tensor of length 5. The size of the window for each dimension of
-  the input tensor. Must have `ksize[0] = ksize[1] = 1`.
+  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
 strides: 1-D tensor of length 5. The stride of the sliding window for each
   dimension of `input`. Must have `strides[0] = strides[4] = 1`.
 padding: The type of padding algorithm to use.
@@ -516,7 +516,7 @@ REGISTER_OP(""AvgPool3DGrad"")
 Computes gradients of average pooling function.
 
 ksize: 1-D tensor of length 5. The size of the window for each dimension of
-  the input tensor. Must have `ksize[0] = ksize[1] = 1`.
+  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
 strides: 1-D tensor of length 5. The stride of the sliding window for each
   dimension of `input`. Must have `strides[0] = strides[4] = 1`.
 padding: The type of padding algorithm to use.
@@ -538,7 +538,7 @@ REGISTER_OP(""MaxPool3D"")
 Performs 3D max pooling on the input.
 
 ksize: 1-D tensor of length 5. The size of the window for each dimension of
-  the input tensor. Must have `ksize[0] = ksize[1] = 1`.
+  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
 strides: 1-D tensor of length 5. The stride of the sliding window for each
   dimension of `input`. Must have `strides[0] = strides[4] = 1`.
 padding: The type of padding algorithm to use.
@@ -559,7 +559,7 @@ REGISTER_OP(""MaxPool3DGrad"")
 Computes gradients of max pooling function.
 
 ksize: 1-D tensor of length 5. The size of the window for each dimension of
-  the input tensor. Must have `ksize[0] = ksize[1] = 1`.
+  the input tensor. Must have `ksize[0] = ksize[4] = 1`.
 strides: 1-D tensor of length 5. The stride of the sliding window for each
   dimension of `input`. Must have `strides[0] = strides[4] = 1`.
 padding: The type of padding algorithm to use.
",0,train
2eecf54f925fe4cb28aff39e4cb90924fad88196,tensorflow/tensorflow,updating testcases to work correctly with ROCm,direct_session_test.cc,"@@ -51,9 +51,11 @@ limitations under the License.
 #include ""tensorflow/core/public/session_options.h""
 #include ""tensorflow/core/util/device_name_utils.h""
 
-#ifdef GOOGLE_CUDA
+#if GOOGLE_CUDA
 #include ""third_party/gpus/cuda/include/cuda.h""
 #include ""third_party/gpus/cuda/include/cuda_runtime_api.h""
+#elif TENSORFLOW_USE_ROCM
+#include ""rocm/include/hip/hip_runtime.h""
 #endif  // GOOGLE_CUDA
 
 namespace tensorflow {
@@ -2089,6 +2091,12 @@ bool IsCUDATensor(const Tensor& t) {
   if (err == cudaErrorInvalidValue) return false;
   CHECK_EQ(cudaSuccess, err) << cudaGetErrorString(err);
   return (attributes.memoryType == cudaMemoryTypeDevice);
+#elif TENSORFLOW_USE_ROCM
+  hipPointerAttribute_t attributes;
+  hipError_t err = hipPointerGetAttributes(&attributes, t.tensor_data().data());
+  if (err == hipErrorInvalidValue) return false;
+  CHECK_EQ(hipSuccess, err) << hipGetErrorString(err);
+  return (attributes.memoryType == hipMemoryTypeDevice);
 #else
   return false;
 #endif
",0,train
2eecf54f925fe4cb28aff39e4cb90924fad88196,tensorflow/tensorflow,updating testcases to work correctly with ROCm,process_function_library_runtime_test.cc,"@@ -33,9 +33,11 @@ limitations under the License.
 #include ""tensorflow/core/public/session_options.h""
 #include ""tensorflow/core/public/version.h""
 
-#ifdef GOOGLE_CUDA
+#if GOOGLE_CUDA
 #include ""third_party/gpus/cuda/include/cuda.h""
 #include ""third_party/gpus/cuda/include/cuda_runtime_api.h""
+#elif TENSORFLOW_USE_ROCM
+#include ""rocm/include/hip/hip_runtime.h""
 #endif  // GOOGLE_CUDA
 
 namespace tensorflow {
@@ -122,7 +124,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test {
   }
 
   Tensor GPUToCPU(const Tensor& device_tensor) {
-#ifdef GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
     CHECK(gpu_device_);
     CHECK(gpu_device_->tensorflow_gpu_device_info() != nullptr);
     DeviceContext* device_context =
@@ -146,7 +148,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test {
   }
 
   Tensor CPUToGPU(const Tensor& cpu_tensor) {
-#ifdef GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
     CHECK(gpu_device_);
     CHECK(gpu_device_->tensorflow_gpu_device_info() != nullptr);
     DeviceContext* device_context =
@@ -461,6 +463,12 @@ bool IsCUDATensor(const Tensor& t) {
   if (err == cudaErrorInvalidValue) return false;
   CHECK_EQ(cudaSuccess, err) << cudaGetErrorString(err);
   return (attributes.memoryType == cudaMemoryTypeDevice);
+#elif TENSORFLOW_USE_ROCM
+  hipPointerAttribute_t attributes;
+  hipError_t err = hipPointerGetAttributes(&attributes, t.tensor_data().data());
+  if (err == hipErrorInvalidValue) return false;
+  CHECK_EQ(hipSuccess, err) << hipGetErrorString(err);
+  return (attributes.memoryType == hipMemoryTypeDevice);
 #else
   CHECK(false)
       << ""IsCUDATensor should not be called when CUDA is not available"";
",0,train
2eecf54f925fe4cb28aff39e4cb90924fad88196,tensorflow/tensorflow,updating testcases to work correctly with ROCm,utils_test.cc,"@@ -40,6 +40,18 @@ TEST(UtilsTest, GetLocalGPUInfo) {
   properties = GetLocalGPUInfo(PlatformGpuId(0));
   EXPECT_EQ(""GPU"", properties.type());
   EXPECT_EQ(""NVIDIA"", properties.vendor());
+#elif TENSORFLOW_USE_ROCM
+  LOG(INFO) << ""ROCm is enabled."";
+  DeviceProperties properties;
+
+  // Invalid platform GPU ID.
+  properties = GetLocalGPUInfo(PlatformGpuId(100));
+  EXPECT_EQ(""UNKNOWN"", properties.type());
+
+  // Succeed when a valid platform GPU id was inserted.
+  properties = GetLocalGPUInfo(PlatformGpuId(0));
+  EXPECT_EQ(""GPU"", properties.type());
+  EXPECT_EQ(""Advanced Micro Devices, Inc"", properties.vendor());
 #else
   LOG(INFO) << ""CUDA is not enabled."";
   DeviceProperties properties;
@@ -73,6 +85,8 @@ TEST(UtilsTest, GetDeviceInfo) {
   EXPECT_EQ(""GPU"", properties.type());
 #if GOOGLE_CUDA
   EXPECT_EQ(""NVIDIA"", properties.vendor());
+#elif TENSORFLOW_USE_ROCM
+  EXPECT_EQ(""Advanced Micro Devices, Inc"", properties.vendor());
 #endif
 
   // TF to platform GPU id mapping entry doesn't exist.
@@ -81,7 +95,7 @@ TEST(UtilsTest, GetDeviceInfo) {
   properties = GetDeviceInfo(device);
   EXPECT_EQ(""UNKNOWN"", properties.type());
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
   // Invalid platform GPU id.
   TF_ASSERT_OK(
       GpuIdManager::InsertTfPlatformGpuIdPair(TfGpuId(0), PlatformGpuId(100)));
@@ -94,7 +108,11 @@ TEST(UtilsTest, GetDeviceInfo) {
   device.id = 1;
   properties = GetDeviceInfo(device);
   EXPECT_EQ(""GPU"", properties.type());
+#if GOOGLE_CUDA
   EXPECT_EQ(""NVIDIA"", properties.vendor());
+#elif TENSORFLOW_USE_ROCM
+  EXPECT_EQ(""Advanced Micro Devices, Inc"", properties.vendor());
+#endif
 #endif
 }
 
",0,train
2eecf54f925fe4cb28aff39e4cb90924fad88196,tensorflow/tensorflow,updating testcases to work correctly with ROCm,pin_to_host_optimizer_test.cc,"@@ -203,7 +203,7 @@ TEST_F(PinToHostOptimizerTest, Identity) {
       // If CUDA, then there is a GPU kernel registration that is pinned to Host
       // memory. Consequently, `b` will be mapped to Host correct if there is
       // a GPU kernel registered.
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
       EXPECT_EQ(node.device(), ""/device:CPU:0"");
 #else
       EXPECT_TRUE(node.device().empty());
",0,train
bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation

PiperOrigin-RevId: 378549445
Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_launch_util.cc,"@@ -57,10 +57,19 @@ se::Platform::Id XlaPlatformInfoFromDevice(DeviceBase* device_base) {
 
 }  // anonymous namespace
 
-VariableInfo::VariableInfo(int index, absl::string_view name, Var* var)
-    : index_(index), name_(name), var_(var) {}
+VariableInfo::VariableInfo(
+    int index, absl::string_view name, Var* var,
+    const absl::optional<ManagedStackTrace>& definition_stack_trace)
+    : index_(index),
+      name_(name),
+      var_(var),
+      definition_stack_trace_(definition_stack_trace) {}
+
 VariableInfo::VariableInfo(VariableInfo&& other)
-    : index_(other.index_), var_(other.var_), lock_held_(other.lock_held_) {
+    : index_(other.index_),
+      var_(other.var_),
+      definition_stack_trace_(other.definition_stack_trace_),
+      lock_held_(other.lock_held_) {
   other.index_ = -1;
   other.var_ = nullptr;
 }
@@ -69,6 +78,7 @@ VariableInfo& VariableInfo::operator=(VariableInfo&& other) {
   index_ = other.index_;
   var_ = other.var_;
   lock_held_ = other.lock_held_;
+  definition_stack_trace_ = other.definition_stack_trace_;
 
   other.index_ = -1;
   other.var_ = nullptr;
@@ -100,21 +110,8 @@ Status GetVariableInfosFromInputs(ResourceMgr* rm, DeviceBase* dev,
     Var* variable = nullptr;
     ResourceHandle handle = inputs[var_idx]->flat<ResourceHandle>()(0);
     if (handle.device() != dev->attributes().name()) {
-      std::string definition_location = [&]() -> std::string {
-        if (handle.definition_stack_trace()) {
-          std::vector<StackFrame> stack_frames =
-              handle.definition_stack_trace()->ToStackFrames(
-                  {}, IsInternalFrameForFilename,
-                  /*reverse_traversal=*/true,
-                  /*limit=*/1);
-          if (!stack_frames.empty()) {
-            const StackFrame& last_frame = stack_frames[0];
-            return absl::StrCat("" (defined @ "", last_frame.file_name, "":"",
-                                last_frame.line_number, "")"");
-          }
-        }
-        return """";
-      }();
+      std::string definition_location =
+          DefinitionLocationMsg(handle.definition_stack_trace());
       return errors::InvalidArgument(""Trying to access resource "",
                                      handle.name(), definition_location,
                                      "" located in device "", handle.device(),
@@ -126,7 +123,8 @@ Status GetVariableInfosFromInputs(ResourceMgr* rm, DeviceBase* dev,
           *ptr = new Var(DT_INVALID);
           return Status::OK();
         }));
-    result->emplace_back(var_idx, handle.name(), variable);
+    result->emplace_back(var_idx, handle.name(), variable,
+                         handle.definition_stack_trace());
   }
   return Status::OK();
 }
@@ -445,7 +443,8 @@ StatusOr<std::vector<VariableInfo>> GatherVariableInfo(
     const ResourceHandle handle = HandleFromInput(ctx, actual_input_index);
     TF_ASSIGN_OR_RETURN(Var * variable,
                         GetOrCreateResourceVar(ctx, handle, write));
-    out.emplace_back(actual_input_index, handle.name(), variable);
+    out.emplace_back(actual_input_index, handle.name(), variable,
+                     handle.definition_stack_trace());
   }
   return std::move(out);
 }
@@ -647,6 +646,7 @@ XlaComputationLaunchContext::BuildXlaCompilerArguments(
       arg.name = std::string(variable.name());
       arg.kind = XlaCompiler::Argument::kResource;
       arg.resource_kind = XlaResource::kVariable;
+      arg.definition_stack_trace = variable.definition_stack_trace();
       if (variable.var() && variable.var()->is_initialized) {
         const Tensor* value = variable.var()->tensor();
         arg.type = value->dtype();
",0,test
bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation

PiperOrigin-RevId: 378549445
Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_launch_util.h,"@@ -44,7 +44,9 @@ using ResourceVarsSnapshot = absl::flat_hash_map<int, absl::optional<Tensor>>;
 // refcount on destruction.
 class VariableInfo {
  public:
-  explicit VariableInfo(int index, absl::string_view name, Var* var);
+  explicit VariableInfo(int index, absl::string_view name, Var* var,
+                        const absl::optional<ManagedStackTrace>&
+                            definition_stack_trace = absl::nullopt);
   VariableInfo(VariableInfo&& other);
 
   VariableInfo& operator=(VariableInfo&& other);
@@ -68,12 +70,17 @@ class VariableInfo {
   bool lock_held() const { return lock_held_; }
   void set_lock_held() { lock_held_ = true; }
 
+  const absl::optional<ManagedStackTrace>& definition_stack_trace() const {
+    return definition_stack_trace_;
+  }
+
   ~VariableInfo();
 
  private:
   int index_;
   std::string name_;
   Var* var_;
+  absl::optional<ManagedStackTrace> definition_stack_trace_;
 
   // We can't use a optional<mutex_lock> here because it confuses the compiler's
   // thread safety analysis. Instead we use a boolean flag and release the lock
",0,test
bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation

PiperOrigin-RevId: 378549445
Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_argument.h,"@@ -122,6 +122,7 @@ struct XlaArgument {
   // When true, xla_compiler should input/output alias this arg to prevent
   // unnecessary HBM usage.
   bool requires_broadcast = false;
+  absl::optional<ManagedStackTrace> definition_stack_trace;
 };
 
 // Returns true if any of `args` is an uninitialized resource variable.
",0,test
bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation

PiperOrigin-RevId: 378549445
Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_compiler.cc,"@@ -987,7 +987,8 @@ Status XlaCompiler::BuildArguments(
                 absl::get<TensorShape>(arg.shape), xla::XlaOp(),
                 /*max_array_size=*/arg.max_array_size,
                 /*tensor_array_gradients=*/arg.tensor_array_gradients,
-                /*tensor_array_multiple_writes_aggregate=*/true));
+                /*tensor_array_multiple_writes_aggregate=*/true,
+                arg.definition_stack_trace));
         arg_expression =
             arg.kind == XlaCompiler::Argument::kResource
                 ? XlaExpression::Resource(resource)
",0,test
bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation

PiperOrigin-RevId: 378549445
Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_helpers.cc,"@@ -285,4 +285,20 @@ StatusOr<absl::optional<xla::DeviceAssignment>> ResolveDeviceAssignment(
   return {{out}};
 }
 
+std::string DefinitionLocationMsg(
+    const absl::optional<ManagedStackTrace>& stack_trace) {
+  if (stack_trace) {
+    std::vector<StackFrame> stack_frames =
+        stack_trace->ToStackFrames({}, IsInternalFrameForFilename,
+                                   /*reverse_traversal=*/true,
+                                   /*limit=*/1);
+    if (!stack_frames.empty()) {
+      const StackFrame& last_frame = stack_frames[0];
+      return absl::StrCat("" (defined @ "", last_frame.file_name, "":"",
+                          last_frame.line_number, "")"");
+    }
+  }
+  return """";
+}
+
 }  // end namespace tensorflow
",0,test
bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation

PiperOrigin-RevId: 378549445
Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_helpers.h,"@@ -188,6 +188,11 @@ StatusOr<absl::optional<xla::DeviceAssignment>> ResolveDeviceAssignment(
     const absl::optional<XlaCompilationResult::CollectiveReduceV2OpInfo>&
         collective_reduce_info);
 
+// Generate a message with a definition location based on a provided stack
+// trace, or an empty one if the stack trace is empty.
+std::string DefinitionLocationMsg(
+    const absl::optional<ManagedStackTrace>& stack_trace);
+
 }  // end namespace tensorflow
 
 #endif  // TENSORFLOW_COMPILER_TF2XLA_XLA_HELPERS_H_
",0,test
bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation

PiperOrigin-RevId: 378549445
Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_resource.cc,"@@ -59,11 +59,12 @@ namespace tensorflow {
       /*tensor_array_multiple_writes_aggregate=*/false);
 }
 
-XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type,
-                         TensorShape shape, const xla::XlaOp& initial_value,
-                         int64 max_array_size,
-                         const std::set<string>& tensor_array_gradients,
-                         bool tensor_array_multiple_writes_aggregate)
+XlaResource::XlaResource(
+    Kind kind, int arg_num, string name, DataType type, TensorShape shape,
+    xla::XlaOp initial_value, int64 max_array_size,
+    const std::set<string>& tensor_array_gradients,
+    bool tensor_array_multiple_writes_aggregate,
+    const absl::optional<ManagedStackTrace>& definition_stack_trace)
     : kind_(kind),
       arg_num_(arg_num),
       name_(std::move(name)),
@@ -73,7 +74,8 @@ XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type,
       initial_value_(initial_value),
       max_array_size_(max_array_size),
       tensor_array_multiple_writes_aggregate_(
-          tensor_array_multiple_writes_aggregate) {
+          tensor_array_multiple_writes_aggregate),
+      definition_stack_trace_(definition_stack_trace) {
   CHECK(kind_ != kInvalid);
 
   for (const string& gradient : tensor_array_gradients) {
@@ -87,22 +89,25 @@ XlaResource::XlaResource(Kind kind, int arg_num, string name, DataType type,
 
 Status XlaResource::SetTypeAndShape(DataType type, const TensorShape& shape) {
   if (type == DT_INVALID) {
-    return errors::InvalidArgument(""Attempted to set type of resource '"", name_,
-                                   ""'' to an invalid type"");
+    return errors::InvalidArgument(
+        ""Attempted to set type of resource '"", name_, ""'' to an invalid type"",
+        DefinitionLocationMsg(definition_stack_trace_));
   }
   if (initialized() && type_ != type) {
-    return errors::Unimplemented(""Type of resource "", name_,
-                                 "" cannot be changed after initialization: ""
-                                 ""old type was "",
-                                 DataTypeString(type_), "", new type is "",
-                                 DataTypeString(type));
+    return errors::InvalidArgument(
+        ""Type of resource "", name_,
+        "" cannot be changed after initialization: ""
+        ""old type was "",
+        DataTypeString(type_), "", new type is "", DataTypeString(type),
+        DefinitionLocationMsg(definition_stack_trace_));
   }
   if (initialized() && shape_ != shape) {
-    return errors::Unimplemented(""Shape of resource "", name_,
-                                 "" cannot be changed after initialization: ""
-                                 ""old shape was "",
-                                 shape_.DebugString(), "", new shape is "",
-                                 shape.DebugString());
+    return errors::InvalidArgument(
+        ""Shape of resource "", name_,
+        "" cannot be changed after initialization: ""
+        ""old shape was "",
+        shape_.DebugString(), "", new shape is "", shape.DebugString(),
+        DefinitionLocationMsg(definition_stack_trace_));
   }
   type_ = type;
   shape_ = shape;
",0,test
bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation

PiperOrigin-RevId: 378549445
Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",xla_resource.h,"@@ -24,6 +24,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor_shape.h""
 #include ""tensorflow/core/framework/types.pb.h""
 #include ""tensorflow/core/lib/core/status.h""
+#include ""tensorflow/core/util/managed_stack_trace.h""
 
 namespace tensorflow {
 
@@ -48,10 +49,11 @@ class XlaResource {
       int64 max_array_size);
 
   XlaResource(Kind kind, int arg_num, string name, DataType type,
-              TensorShape shape, const xla::XlaOp& initial_value,
-              int64 max_array_size,
+              TensorShape shape, xla::XlaOp initial_value, int64 max_array_size,
               const std::set<string>& tensor_array_gradients,
-              bool tensor_array_multiple_writes_aggregate);
+              bool tensor_array_multiple_writes_aggregate,
+              const absl::optional<ManagedStackTrace>& definition_stack_trace =
+                  absl::nullopt);
 
   XlaResource(const XlaResource&) = delete;
   XlaResource(XlaResource&&) = delete;
@@ -182,6 +184,8 @@ class XlaResource {
 
   std::map<string, std::unique_ptr<XlaResource>> tensor_array_gradients_;
   bool is_overwritten_ = false;
+
+  absl::optional<ManagedStackTrace> definition_stack_trace_;
 };
 
 }  // namespace tensorflow
",0,test
bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation

PiperOrigin-RevId: 378549445
Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",def_function_xla_jit_test.py,"@@ -1066,6 +1066,24 @@ class DefFunctionTest(xla_test.XLATestCase):
         v = variables.Variable([[2.]])
         self.assertAllClose(f(v), constant_op.constant([[0.5]]))
 
+  @test_util.disable_mlir_bridge('TODO(b/190444466): MLIR bridge seems to '
+                                 'ignore resource assignments')
+  def testErrMsgAssignWrongShape(self):
+    with ops.device('device:{}:0'.format(self.device)):
+
+      v = variables.Variable([3.1, 3.2])
+
+      @def_function.function(jit_compile=True)
+      def f(samples):
+        v.assign(array_ops.zeros(samples))  # assignment
+
+      with self.assertRaisesRegex(errors.InvalidArgumentError,
+                                  '@ .+def_function_xla_jit_test.py'):
+        f(constant_op.constant(6))
+
+      with self.assertRaisesRegex(errors.InvalidArgumentError, 'assignment'):
+        f(constant_op.constant(6))
+
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
",0,test
bee892dbad77d72a323127e0b22290363adac470,tensorflow/tensorflow,"[TF/XLA] Produce more helpful error messages for resource var shape errors during compilation

PiperOrigin-RevId: 378549445
Change-Id: I9cfe6aa845042ad864f5586512c4be650cc6e274",stack_trace.h,"@@ -152,8 +152,14 @@ inline std::vector<StackFrame> ManagedStackTraceToStackFrames(
     int id, const StackTraceMap& mapper, const StackTraceFilter& filtered,
     bool reverse_traversal, int limit) {
   PyGILState_STATE gstate = PyGILState_Ensure();
-  std::vector<StackFrame> result = stack_trace_manager->Get(id)->ToStackFrames(
-      mapper, filtered, reverse_traversal, limit);
+  StackTrace* stack_trace = stack_trace_manager->Get(id);
+  if (!stack_trace) {
+    // Must have evicted the stack trace by now. Do best effort.
+    return {};
+  }
+
+  std::vector<StackFrame> result =
+      stack_trace->ToStackFrames(mapper, filtered, reverse_traversal, limit);
   PyGILState_Release(gstate);
   return result;
 }
",0,test
2a3d40e362a9ea67570d4e8ca467e49845443962,tensorflow/tensorflow,"[XLA:CPU] Switch dynamic_ops_test to use Parameter (instead of Constant) for large test values (also reduces test timeout to medium/moderate).
Reduces avg runtime of DynamicUpdateSliceTest.R3ContiguousLarger (over 1000 runs) from 60sec to 10sec.
Change: 146524027",client_library_test_base.h,"@@ -239,6 +239,18 @@ class ClientLibraryTestBase : public ::testing::Test {
       const string& name, ComputationBuilder* builder,
       ComputationDataHandle* data_handle);
 
+  // Create a parameter instruction that wraps the given constant array
+  // ""array_3d"" and then stores to ""data_handle"" the global handle for that
+  // parameter.
+  //
+  // ""parameter_number"" is the parameter number.
+  // ""name"" is the name of the parameter instruction.
+  template <typename NativeT>
+  std::unique_ptr<GlobalData> CreateR3Parameter(
+      const Array3D<NativeT>& array_3d, int64 parameter_number,
+      const string& name, ComputationBuilder* builder,
+      ComputationDataHandle* data_handle);
+
   Client* client_;
   ExecutionOptions execution_options_;
 };
@@ -382,6 +394,18 @@ std::unique_ptr<GlobalData> ClientLibraryTestBase::CreateR2Parameter(
   return data;
 }
 
+template <typename NativeT>
+std::unique_ptr<GlobalData> ClientLibraryTestBase::CreateR3Parameter(
+    const Array3D<NativeT>& array_3d, int64 parameter_number,
+    const string& name, ComputationBuilder* builder,
+    ComputationDataHandle* data_handle) {
+  std::unique_ptr<Literal> literal = LiteralUtil::CreateR3FromArray3D(array_3d);
+  std::unique_ptr<GlobalData> data =
+      client_->TransferToServer(*literal).ConsumeValueOrDie();
+  *data_handle = builder->Parameter(parameter_number, literal->shape(), name);
+  return data;
+}
+
 template <typename NativeT>
 std::vector<NativeT> ClientLibraryTestBase::CreatePseudorandomR1(
     const int width, NativeT min_value, NativeT max_value, uint32 seed) {
",0,train
2a3d40e362a9ea67570d4e8ca467e49845443962,tensorflow/tensorflow,"[XLA:CPU] Switch dynamic_ops_test to use Parameter (instead of Constant) for large test values (also reduces test timeout to medium/moderate).
Reduces avg runtime of DynamicUpdateSliceTest.R3ContiguousLarger (over 1000 runs) from 60sec to 10sec.
Change: 146524027",dynamic_ops_test.cc,"@@ -350,13 +350,20 @@ class DynamicUpdateSliceTest : public ClientLibraryTestBase {
 
     // Build dynamic slice computation.
     ComputationBuilder builder(client_, TestName());
+    // Initialize and transfer input parameter.
+    ComputationDataHandle input;
+    std::unique_ptr<GlobalData> input_data = CreateR3Parameter<float>(
+        input_values, 0, ""input_values"", &builder, &input);
+    // Initialize and transfer update parameter.
+    ComputationDataHandle update;
+    std::unique_ptr<GlobalData> update_data = CreateR3Parameter<float>(
+        update_values, 1, ""update_values"", &builder, &update);
     auto starts = builder.ConstantR1<int32>({index, 0, 0});
-    auto input = builder.ConstantR3FromArray3D<float>(input_values);
-    auto update = builder.ConstantR3FromArray3D<float>(update_values);
     builder.DynamicUpdateSlice(input, update, starts);
 
     // Run computation and compare against expected values.
-    ComputeAndCompareR3<float>(&builder, expected_values, {},
+    ComputeAndCompareR3<float>(&builder, expected_values,
+                               {input_data.get(), update_data.get()},
                                ErrorSpec(0.000001));
   }
 
",0,train
ffca44e327e02a38bdaab56b8c5ebd6f4b2ab69c,tensorflow/tensorflow,"Fix tf.nn.log_softmax documentation. (Missing `log` in normalization.)
Change: 136520720",nn_ops.py,"@@ -1103,7 +1103,7 @@ def _softmax(logits, compute_op, dim=-1, name=None):
 
 
 def softmax(logits, dim=-1, name=None):
-  """"""Computes log softmax activations.
+  """"""Computes softmax activations.
 
   For each batch `i` and class `j` we have
 
@@ -1130,7 +1130,7 @@ def log_softmax(logits, dim=-1, name=None):
 
   For each batch `i` and class `j` we have
 
-      logsoftmax = logits - reduce_sum(exp(logits), dim)
+      logsoftmax = logits - log(reduce_sum(exp(logits), dim))
 
   Args:
     logits: A non-empty `Tensor`. Must be one of the following types: `half`,
",0,test
77c0bb432292f53148a1c2e36e8ec643ba994d23,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-04-10

PiperOrigin-RevId: 305845881
Change-Id: Idbaae458d48d6751d19452ae3d2ac42d58d91864",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 4, 9)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 4, 10)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting

The operations allow for a limited form of broadcasting which allows some
operands to be scalars. As such they are neither strictly `Elementwise`, nor
`Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise`
though.

PiperOrigin-RevId: 379719961
Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",chlo_ops.h,"@@ -53,10 +53,6 @@ namespace mlir {
 namespace chlo {
 namespace OpTrait {
 
-template <typename ConcreteType>
-class BroadcastingElementwise
-    : public mlir::OpTrait::TraitBase<ConcreteType, BroadcastingElementwise> {};
-
 template <typename ConcreteType>
 class Broadcasting
     : public mlir::OpTrait::TraitBase<ConcreteType, Broadcasting> {};
",0,train
ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting

The operations allow for a limited form of broadcasting which allows some
operands to be scalars. As such they are neither strictly `Elementwise`, nor
`Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise`
though.

PiperOrigin-RevId: 379719961
Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",hlo_ops.h,"@@ -34,6 +34,7 @@ limitations under the License.
 #include ""mlir/Interfaces/SideEffectInterfaces.h""
 
 // clang-format off
+#include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops_base.h""
 #include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_structs.h""
 #include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops_base_enums.h""
 #include ""mlir-hlo/Dialect/mhlo/IR/infer_fusibility_op_interface.h""
",0,train
ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting

The operations allow for a limited form of broadcasting which allows some
operands to be scalars. As such they are neither strictly `Elementwise`, nor
`Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise`
though.

PiperOrigin-RevId: 379719961
Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",hlo_ops_base.h,"@@ -0,0 +1,33 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_HLO_OPS_BASE_H_
+#define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_IR_HLO_OPS_BASE_H_
+
+#include ""mlir/IR/OpDefinition.h""
+
+namespace mlir {
+namespace mhlo {
+namespace OpTrait {
+
+template <typename ConcreteType>
+class BroadcastingElementwise
+    : public mlir::OpTrait::TraitBase<ConcreteType, BroadcastingElementwise> {};
+
+}  // namespace OpTrait
+}  // namespace mhlo
+}  // namespace mlir
+
+#endif
",0,train
ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting

The operations allow for a limited form of broadcasting which allows some
operands to be scalars. As such they are neither strictly `Elementwise`, nor
`Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise`
though.

PiperOrigin-RevId: 379719961
Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",hlo_ops.cc,"@@ -1561,7 +1561,8 @@ class DynamicReshapeOpSameShapeOpResult
   LogicalResult matchAndRewrite(DynamicReshapeOp op,
                                 PatternRewriter& rewriter) const override {
     Operation* def_op = op.operand().getDefiningOp();
-    if (!def_op || !def_op->hasTrait<OpTrait::SameOperandsAndResultShape>()) {
+    if (!def_op ||
+        !def_op->hasTrait<mlir::OpTrait::SameOperandsAndResultShape>()) {
       return failure();
     }
     Operation* input_def_op = def_op->getOperand(0).getDefiningOp();
@@ -2098,7 +2099,7 @@ Operation* ReduceWindowOp::getReductionOp(int result_index) {
   if (arg0_num == result_index && arg1_num == other_arg_index)
     return compute_op;
   if (arg0_num == other_arg_index && arg1_num == result_index &&
-      compute_op->hasTrait<OpTrait::IsCommutative>())
+      compute_op->hasTrait<mlir::OpTrait::IsCommutative>())
     return compute_op;
   return nullptr;
 }
",0,train
ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting

The operations allow for a limited form of broadcasting which allows some
operands to be scalars. As such they are neither strictly `Elementwise`, nor
`Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise`
though.

PiperOrigin-RevId: 379719961
Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",broadcast_propagation.cc,"@@ -177,8 +177,8 @@ struct MoveElementwiseOpsIntoAssumingOpPattern : public RewritePattern {
   LogicalResult matchAndRewrite(Operation *op,
                                 PatternRewriter &rewriter) const override {
     // Apply to all elementwise and broadcasting elementwise operations.
-    if (!op->hasTrait<OpTrait::Elementwise>() &&
-        !op->hasTrait<chlo::OpTrait::BroadcastingElementwise>())
+    if (!op->hasTrait<mlir::OpTrait::Elementwise>() &&
+        !op->hasTrait<mhlo::OpTrait::BroadcastingElementwise>())
       return failure();
 
     return MoveIntoAssumingOpMatchAndRewrite(op, rewriter);
@@ -336,8 +336,8 @@ struct EarlyBroadcastInDimOpPattern
                                 PatternRewriter &rewriter) const override {
     Operation *producer_op = bcast_op.operand().getDefiningOp();
     if (!producer_op ||
-        !producer_op->hasTrait<OpTrait::SameOperandsAndResultShape>() ||
-        !producer_op->hasTrait<OpTrait::Elementwise>()) {
+        !producer_op->hasTrait<mlir::OpTrait::SameOperandsAndResultShape>() ||
+        !producer_op->hasTrait<mlir::OpTrait::Elementwise>()) {
       return failure();
     }
 
",0,train
ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting

The operations allow for a limited form of broadcasting which allows some
operands to be scalars. As such they are neither strictly `Elementwise`, nor
`Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise`
though.

PiperOrigin-RevId: 379719961
Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",rank_specialization.cc,"@@ -66,9 +66,9 @@ namespace {
 bool IsClusterable(Operation *op) {
   if (!llvm::isa<InferShapedTypeOpInterface>(op)) return false;
   if (op->getNumOperands() == 0) return false;
-  return (op->hasTrait<OpTrait::Elementwise>() &&
-          op->hasTrait<OpTrait::SameOperandsAndResultShape>()) ||
-         (op->hasTrait<chlo::OpTrait::BroadcastingElementwise>() &&
+  return (op->hasTrait<mlir::OpTrait::Elementwise>() &&
+          op->hasTrait<mlir::OpTrait::SameOperandsAndResultShape>()) ||
+         (op->hasTrait<mhlo::OpTrait::BroadcastingElementwise>() &&
           op->hasTrait<chlo::OpTrait::Broadcasting>());
 }
 
@@ -729,7 +729,7 @@ SmallVector<SmallVector<Value, 4>, 4> FindNonScalarShapeEquivalences(
     for (Value v : vs.drop_front()) eqs.unionSets(repr, v);
   };
   for (Operation &nested_op : op.getBody()->without_terminator()) {
-    if (nested_op.hasTrait<OpTrait::SameOperandsAndResultShape>()) {
+    if (nested_op.hasTrait<mlir::OpTrait::SameOperandsAndResultShape>()) {
       union_sets(nested_op.getOperands());
       union_sets(nested_op.getResults());
       if (!nested_op.getOperands().empty() && !nested_op.getResults().empty())
",0,train
ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting

The operations allow for a limited form of broadcasting which allows some
operands to be scalars. As such they are neither strictly `Elementwise`, nor
`Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise`
though.

PiperOrigin-RevId: 379719961
Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",sink_constants_to_control_flow.cc,"@@ -65,7 +65,7 @@ class SinkConstantsToControlFlowPass
     visitUsedValuesDefinedAbove({*region}, [&](OpOperand* use) {
       Value constant = use->get();
       auto op = constant.getDefiningOp();
-      if (!op || !op->hasTrait<OpTrait::ConstantLike>()) return;
+      if (!op || !op->hasTrait<mlir::OpTrait::ConstantLike>()) return;
       auto map_entry = sunk_constant.try_emplace(constant, nullptr);
       if (!map_entry.second) {
         // This constant has already been cloned into the region, reuse it.
",0,train
ab5d7ab16789e72a523ba3033ce8ecbd0021ed63,tensorflow/tensorflow,"[MLIR][HLO] Annotate `mhlo.clamp` and `mhlo.select` as element-wise broadcasting

The operations allow for a limited form of broadcasting which allows some
operands to be scalars. As such they are neither strictly `Elementwise`, nor
`Broadcasting`. They do fulfill the requirements for `BroadcastingElementwise`
though.

PiperOrigin-RevId: 379719961
Change-Id: I4dce939c95d0e791e2c6a45dea9b8ce19ff0b6aa",legalize_tf.cc,"@@ -3418,9 +3418,9 @@ static void BroadcastBatchMatMulV2Operands(Value lhs, Value rhs, Location loc,
   // The last two dimensions are the matrix row/col dimensions. Don't broadcast
   // them.
   SmallVector<int64_t, 6> result_batch_shape_compile_time_extents;
-  OpTrait::util::getBroadcastedShape(lhs_type.getShape().drop_back(2),
-                                     rhs_type.getShape().drop_back(2),
-                                     result_batch_shape_compile_time_extents);
+  mlir::OpTrait::util::getBroadcastedShape(
+      lhs_type.getShape().drop_back(2), rhs_type.getShape().drop_back(2),
+      result_batch_shape_compile_time_extents);
   auto result_batch_shape = rewriter->create<shape::BroadcastOp>(
       loc, shape_type, lhs_splitted.head(), rhs_splitted.head(),
       /*error=*/nullptr);
",0,train
8ff33271ea4de89e6ff662fe8e479c1fcf56fe77,tensorflow/tensorflow,"Dump the computation's SessionModule as part of the tf_compile rule.

PiperOrigin-RevId: 172946149",compile.cc,"@@ -97,11 +97,11 @@ Status CompileGraph(const GraphDef& graph_def, const tf2xla::Config& config,
   TF_RETURN_IF_ERROR(ConvertGraphDefToXla(graph_def, config, client,
                                           &computation,
                                           &compile_result->has_context_arg));
-  if (!flags.debug_dir.empty()) {
+  if (!flags.out_session_module.empty()) {
     TF_ASSIGN_OR_RETURN(std::unique_ptr<xla::SessionModule> module,
                         computation.Snapshot());
-    string file = io::JoinPath(flags.debug_dir, ""tfcompile_xla_module.pb"");
-    TF_RETURN_IF_ERROR(WriteBinaryProto(Env::Default(), file, *module));
+    TF_RETURN_IF_ERROR(
+        WriteBinaryProto(Env::Default(), flags.out_session_module, *module));
   }
   xla::cpu::CpuAotCompilationOptions aot_opts(
       flags.target_triple, flags.target_cpu, flags.target_features,
",0,test
8ff33271ea4de89e6ff662fe8e479c1fcf56fe77,tensorflow/tensorflow,"Dump the computation's SessionModule as part of the tf_compile rule.

PiperOrigin-RevId: 172946149",flags.cc,"@@ -33,9 +33,6 @@ void AppendMainFlags(std::vector<Flag>* flag_list, MainFlags* flags) {
        ""fetch nodes will be dumped to stdout in a comma-separated list.  ""
        ""Typically used to format arguments for other tools, e.g. ""
        ""freeze_graph.""},
-      {""debug_dir"", &flags->debug_dir,
-       ""Specifies a directory to dump debugging information, including ""
-       ""rewritten graphs and the XLA HLO module.""},
       // Flags controlling the XLA ahead-of-time compilation, that correspond to
       // the fields of xla::cpu::CpuAotCompilationOptions.
       //
@@ -64,6 +61,8 @@ void AppendMainFlags(std::vector<Flag>* flag_list, MainFlags* flags) {
        ""namespaces are given, within the global namespace.""},
       {""out_object"", &flags->out_object, ""Output object file name.""},
       {""out_header"", &flags->out_header, ""Output header file name.""},
+      {""out_session_module"", &flags->out_session_module,
+       ""Output session module proto.""},
       {""gen_name_to_index"", &flags->gen_name_to_index,
        ""Generate name-to-index data for Lookup{Arg,Result}Index methods.""},
       {""gen_program_shape"", &flags->gen_program_shape,
",0,test
8ff33271ea4de89e6ff662fe8e479c1fcf56fe77,tensorflow/tensorflow,"Dump the computation's SessionModule as part of the tf_compile rule.

PiperOrigin-RevId: 172946149",flags.h,"@@ -29,7 +29,6 @@ struct MainFlags {
   string graph;
   string config;
   bool dump_fetch_nodes = false;
-  string debug_dir;
   string target_triple;
   string target_cpu;
   string target_features;
@@ -37,6 +36,7 @@ struct MainFlags {
   string cpp_class;
   string out_object;
   string out_header;
+  string out_session_module;
 
   // C++ codegen options
   bool gen_name_to_index = false;
",0,test
63b9442ce3cf07fc990f62b307c54694b2baa4bb,tensorflow/tensorflow,"Fix the issue that customized Sonnet module doesn't work on TF2.0 TPU. The following changes are included:

1. In TPUMirroredVariable creator, get the initial value from kwargs[""initial_value""] directly instead of from the first variable value.
2. Don't lift nodes with ""_tpu_replicate"" attributes and ""TPUReplicateMetadata"" node out of the function, which may otherwise trigger failures in TF-XLA bridge.
3. Fix a bug in TensorHandle that the resource type and shape cache is enabled even when the look up is not successful. What happens in the sonnet case is the resource shape lookup is before the resource creation, then the cache will remember the NotFound status and causes failures for future lookups.
4. Mark TPUReplicatedInput node as no differentiable.

PiperOrigin-RevId: 252153260",tensor_handle.cc,"@@ -400,8 +400,14 @@ Status TensorHandle::GetResourceVariableDtypeAndShape(
   mutex_lock l(ctx_mutex_);
   resource_dtype_and_shape_status_ = GetResourceVariableDtypeAndShapeInternal(
       tensor_, resource_device_, &resource_dtype_and_shape_);
-  resource_dtype_and_shape_initialized_ = true;
-  *result = resource_dtype_and_shape_;
+
+  // TODO(endlessroad): the resource variable shape may be partially known at
+  // creation time, and it can be changed later. We may not want the cache in
+  // this case.
+  if (resource_dtype_and_shape_status_.ok()) {
+    resource_dtype_and_shape_initialized_ = true;
+    *result = resource_dtype_and_shape_;
+  }
   return resource_dtype_and_shape_status_;
 }
 
",0,train
63b9442ce3cf07fc990f62b307c54694b2baa4bb,tensorflow/tensorflow,"Fix the issue that customized Sonnet module doesn't work on TF2.0 TPU. The following changes are included:

1. In TPUMirroredVariable creator, get the initial value from kwargs[""initial_value""] directly instead of from the first variable value.
2. Don't lift nodes with ""_tpu_replicate"" attributes and ""TPUReplicateMetadata"" node out of the function, which may otherwise trigger failures in TF-XLA bridge.
3. Fix a bug in TensorHandle that the resource type and shape cache is enabled even when the look up is not successful. What happens in the sonnet case is the resource shape lookup is before the resource creation, then the cache will remember the NotFound status and causes failures for future lookups.
4. Mark TPUReplicatedInput node as no differentiable.

PiperOrigin-RevId: 252153260",tpu_strategy.py,"@@ -438,6 +438,15 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
       value_list = []
       for i, d in enumerate(devices):
         with ops.device(d):
+          if i == 0:
+            initial_value = kwargs[""initial_value""]
+            # TODO(b/134779280): Remove initialization scope once the
+            # ""Tensor-typed variable initializers must either be wrapped in an ""
+            # ""init_scope or callable"" error is fixed.
+            with ops.init_scope():
+              initial_value = initial_value() if callable(
+                  initial_value) else initial_value
+
           if i > 0:
             # Give replicas meaningful distinct names:
             var0name = value_list[0].name.split("":"")[0]
@@ -445,22 +454,11 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
             # ensure that we ignore the name scope and instead use the given
             # name as the absolute name of the variable.
             kwargs[""name""] = ""%s/replica_%d/"" % (var0name, i)
-            # Initialize replicas with the same value:
-            def initial_value_fn():
-              return array_ops.identity(initial_value)
+          kwargs[""initial_value""] = initial_value
 
-            kwargs[""initial_value""] = initial_value_fn
           with context.device_policy(context.DEVICE_PLACEMENT_SILENT):
             v = next_creator(*args, **kwargs)
-          if i == 0:
-            # To avoid incorrectly nested device scopes, we exit out of
-            # existing control flow scopes and function building graphs.
-            # TODO(b/132997073): Remove initialization scope once nested
-            # device scope issue has been fixed.
-            with ops.init_scope():
-              initial_value = (
-                  v.value() if ops.executing_eagerly_outside_functions() else
-                  v.initial_value)
+
           assert not isinstance(v, values.TPUMirroredVariable)
           value_list.append(v)
       return value_list
",0,train
63b9442ce3cf07fc990f62b307c54694b2baa4bb,tensorflow/tensorflow,"Fix the issue that customized Sonnet module doesn't work on TF2.0 TPU. The following changes are included:

1. In TPUMirroredVariable creator, get the initial value from kwargs[""initial_value""] directly instead of from the first variable value.
2. Don't lift nodes with ""_tpu_replicate"" attributes and ""TPUReplicateMetadata"" node out of the function, which may otherwise trigger failures in TF-XLA bridge.
3. Fix a bug in TensorHandle that the resource type and shape cache is enabled even when the look up is not successful. What happens in the sonnet case is the resource shape lookup is before the resource creation, then the cache will remember the NotFound status and causes failures for future lookups.
4. Mark TPUReplicatedInput node as no differentiable.

PiperOrigin-RevId: 252153260",lift_to_graph.py,"@@ -204,13 +204,20 @@ def _copy_non_source(op, graph, op_map):
                            old_graph_op=original_control_input))
     else:
       copied_control_inputs.append(copied_control_input)
+
+  # Don't copy over nodes with _tpu_replicate attribute. This attributed is used
+  # to signal that the op was built inside a tpu_replicate context; if we're
+  # lifting it to another graph we're similarly lifting it into another context.
   with ops.control_dependencies(copied_control_inputs), ops.device(op.device):
     copied_op = graph.create_op(
         op_type=op.type,
         inputs=copied_inputs,
         dtypes=[x.dtype for x in op.outputs],
-        attrs={key: value for key, value in op.node_def.attr.items()
-               if not key.startswith(""_class"")},  # b/128981532.
+        attrs={
+            key: value for key, value in op.node_def.attr.items()
+            if not key.startswith(""_class"") and
+            not key.startswith(""_tpu_replicate"")
+        },  # b/128981532.
         name=op.name)
   op_map[op] = copied_op
   for i, o in enumerate(op.outputs):
@@ -339,6 +346,10 @@ def lift_to_graph(init_tensors, graph, sources=None,
       marked_ops.add(op)
       ops_to_copy.append(op)
       for inp in _graph_inputs(op):
+        # Don't lift the TPUReplicateMetadata nodes out of the function, because
+        # it has no registered kernels.
+        if inp.name == ""TPUReplicateMetadata"":
+          continue
         unvisited_ops.add(inp)
         if (all(x in marked_ops for x in op_outputs[inp]) and
             inp not in sources):
@@ -403,6 +414,10 @@ def lift_to_graph(init_tensors, graph, sources=None,
         mutation.copied_op._update_input(
             mutation.input_index, op_map[mutation.old_graph_tensor])
       for mutation in control_mutations:
+        # Don't lift the TPUReplicateMetadata nodes out of the function, because
+        # it has no registered kernels.
+        if mutation.old_graph_op.name == ""TPUReplicateMetadata"":
+          continue
         mutation.copied_op._add_control_input(op_map[mutation.old_graph_op])
     # pylint: enable=protected-access
 
",0,train
63b9442ce3cf07fc990f62b307c54694b2baa4bb,tensorflow/tensorflow,"Fix the issue that customized Sonnet module doesn't work on TF2.0 TPU. The following changes are included:

1. In TPUMirroredVariable creator, get the initial value from kwargs[""initial_value""] directly instead of from the first variable value.
2. Don't lift nodes with ""_tpu_replicate"" attributes and ""TPUReplicateMetadata"" node out of the function, which may otherwise trigger failures in TF-XLA bridge.
3. Fix a bug in TensorHandle that the resource type and shape cache is enabled even when the look up is not successful. What happens in the sonnet case is the resource shape lookup is before the resource creation, then the cache will remember the NotFound status and causes failures for future lookups.
4. Mark TPUReplicatedInput node as no differentiable.

PiperOrigin-RevId: 252153260",tpu.py,"@@ -42,6 +42,7 @@ from tensorflow.python.util import compat
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
 
+ops.NotDifferentiable(""TPUReplicatedInput"")
 
 # Operations that indicate some error in the users graph, e.g. a placeholder
 # that's introduced outside of the infeed.
",0,train
a8dbd3a680b06f617baef53dfad47a10e642f0ca,tensorflow/tensorflow,"Do not override `steps_per_epoch` if it is not None.

Currently if you pass a tf.keras.preprocessing.image.ImageDataGenerator to fit,
with a `steps_per_epoch`, your manual `steps_per_epoch` is overridden with the real value. Fix it to only override the value if the `steps_per_epoch` was not manually set.

PiperOrigin-RevId: 229774626",training_generator.py,"@@ -410,7 +410,9 @@ def convert_to_generator_like(data,
       and may be `None` or `[None]`.
     batch_size: Used when creating a generator out of tuples of NumPy arrays or
       EagerTensors.
-    steps_per_epoch: Steps of the generator to run each epoch.
+    steps_per_epoch: Steps of the generator to run each epoch. If `None` the
+      number of steps will be read from the data (for
+      `keras.utils.data_utils.Sequence` types).
     epochs: Total number of epochs to run.
     shuffle: Whether the data should be shuffled.
 
@@ -431,7 +433,8 @@ def convert_to_generator_like(data,
   if data_utils.is_generator_or_sequence(data) or isinstance(
       data, iterator_ops.EagerIterator):
     if isinstance(data, data_utils.Sequence):
-      steps_per_epoch = len(data)
+      if steps_per_epoch is None:
+        steps_per_epoch = len(data)
     return data, steps_per_epoch
   if isinstance(data, dataset_ops.DatasetV2):
     return dataset_ops.make_one_shot_iterator(data), steps_per_epoch
",0,test
ede138563636c4db03fa915efed5e4627f099da5,tensorflow/tensorflow,"Extend space to batch to apply to larger batch sizes

PiperOrigin-RevId: 335657911
Change-Id: I22a9f7f978b7d64bf09654771036d044d3d3ef41",space_to_batch_converter.cc,"@@ -52,7 +52,7 @@ class ConvolutionVisitor : public DfsHloVisitorWithDefault {
   Status HandleConvolution(HloInstruction* convolution) override;
 
   // Runs the visitor on a computation.
-  static bool Run(HloComputation* computation);
+  static bool Run(int64 limit_on_batch_size, HloComputation* computation);
 
   // Returns whether any convolution ops were rewritten.
   const bool changed() const { return changed_; }
@@ -60,18 +60,23 @@ class ConvolutionVisitor : public DfsHloVisitorWithDefault {
   ~ConvolutionVisitor() override = default;
 
  private:
-  explicit ConvolutionVisitor(HloComputation* computation)
-      : computation_(computation) {}
+  explicit ConvolutionVisitor(int64 limit_on_batch_size,
+                              HloComputation* computation)
+      : computation_(computation), limit_on_batch_size_(limit_on_batch_size) {}
 
   // Current HloComputation instance the ConvolutionVisitor is traversing.
   HloComputation* computation_;
 
   // Whether rewrite has occurred.
   bool changed_ = false;
+
+  // Limit on batch size to apply this technique on.
+  int64 limit_on_batch_size_;
 };
 
-bool ConvolutionVisitor::Run(HloComputation* computation) {
-  ConvolutionVisitor visitor(computation);
+bool ConvolutionVisitor::Run(int64 limit_on_batch_size,
+                             HloComputation* computation) {
+  ConvolutionVisitor visitor(limit_on_batch_size, computation);
   TF_CHECK_OK(computation->Accept(&visitor));
   return visitor.changed_;
 }
@@ -93,11 +98,18 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
   constexpr int64 kLowLimitForSplitCount = 4;
   constexpr int64 kHighLimitForSplitCount = 24;
 
+  // Batch in batch_group_count has different semantics (it isn't true batch).
+  // Consider supporting this case in future if needed.
+  if (convolution->batch_group_count() != 1) {
+    return Status::OK();
+  }
+
   if (convolution->window().dimensions(kChosenSpatialDim).window_dilation() !=
       1) {
     return Status::OK();
   }
 
+  // TODO(b/168316428): Support base dilations.
   if (convolution->window().dimensions(kChosenSpatialDim).base_dilation() !=
       1) {
     return Status::OK();
@@ -108,8 +120,7 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
   const int64 old_batch_size =
       convolution->operand(0)->shape().dimensions(activations_batch_dim);
 
-  // TODO(b/168316428): Only doing this for batch 1 currently. Extend later.
-  if (old_batch_size != 1) {
+  if (old_batch_size > limit_on_batch_size_) {
     return Status::OK();
   }
 
@@ -261,11 +272,20 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
   // -2 low padding and +2 high padding) to create shape B. Then, we select
   // between A and B such that halo regions are placed into A at the right
   // locations.
+
+  // The benefit of the above mentioned scheme is that it allows for batch
+  // growth. Here are some examples of the size increases it causes for a 3x3
+  // kernel.
+  // with batch=1, [1,16] -> [4,4] ->   [4,6] ->   [1,24] growth of 8.
+  // with batch=2, [2,16] -> [8,4] ->   [8,6] ->   [1,48] growth of 16.
+  // with batch=3, [3,16] -> [12,4] -> [12,6] -> [1,72] growth of 24.
+
   std::vector<int64> reshape_dimensions(
       activations->shape().dimensions().begin(),
       activations->shape().dimensions().end());
+
   reshape_dimensions[spatial_dimension_to_split] = spatial_split_size;
-  reshape_dimensions[activations_batch_dim] = num_splits;
+  reshape_dimensions[activations_batch_dim] = num_splits * old_batch_size;
 
   TF_ASSIGN_OR_RETURN(HloInstruction * batch_increased_reshape,
                       MakeReshapeHlo(reshape_dimensions, activations));
@@ -337,11 +357,19 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
   TF_ASSIGN_OR_RETURN(HloInstruction * select,
                       MakeSelectHlo(shape_mask, straightened_activations,
                                     rotated_activations, convolution));
-  VLOG(1) << ""Select generated"";
+  VLOG(1) << ""Select generated"" << select->ToString();
 
   // Increase batch size for one last time.
-  TF_ASSIGN_OR_RETURN(
-      activations, MakeReshapeHlo(pad_applied->shape().dimensions(), select));
+  std::vector<int64> combined_batch_dimensions(
+      pad_applied->shape().dimensions().begin(),
+      pad_applied->shape().dimensions().end());
+
+  combined_batch_dimensions[activations_batch_dim] =
+      old_batch_size * num_splits;
+  TF_ASSIGN_OR_RETURN(activations,
+                      MakeReshapeHlo(combined_batch_dimensions, select));
+
+  VLOG(1) << ""Batch merge done "" << activations->ToString();
 
   // Now, we rewrite the convolution with a larger batch.
   const auto& activations_shape = activations->shape();
@@ -385,28 +413,35 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
 
   VLOG(1) << ""new_conv "" << new_conv->ToString();
 
+  const int64 output_split_spatial_dim =
+      new_dim_numbers.output_spatial_dimensions(kChosenSpatialDim);
+  const int64 output_batch_dim = new_dim_numbers.output_batch_dimension();
+
   Shape new_shape = new_conv->shape();
-  const int64 new_batch_size =
-      new_shape.dimensions(new_dim_numbers.output_batch_dimension());
-  const int64 new_spatial_dim_size = new_shape.dimensions(
-      new_dim_numbers.output_spatial_dimensions(kChosenSpatialDim));
-  new_shape.set_dimensions(
-      new_dim_numbers.output_spatial_dimensions(kChosenSpatialDim),
-      new_batch_size * new_spatial_dim_size);
-  new_shape.set_dimensions(new_dim_numbers.output_batch_dimension(),
-                           old_batch_size);
+  const int64 new_batch_size = new_shape.dimensions(output_batch_dim);
+  const int64 new_spatial_dim_size =
+      new_shape.dimensions(output_split_spatial_dim);
+
+  CHECK_EQ(new_batch_size % old_batch_size, 0);
+
+  const int64 output_split_batch_size = new_batch_size / old_batch_size;
+
+  std::vector<int64> new_dimensions(new_conv->shape().dimensions().begin(),
+                                    new_conv->shape().dimensions().end());
+  new_dimensions[output_split_spatial_dim] =
+      output_split_batch_size * new_spatial_dim_size;
+  new_dimensions[new_dim_numbers.output_batch_dimension()] = old_batch_size;
 
   // Reshape the output of the new conv into the old convolutions shape.
   TF_ASSIGN_OR_RETURN(HloInstruction * reshape,
-                      MakeReshapeHlo(new_shape, new_conv));
+                      MakeReshapeHlo(new_dimensions, new_conv));
   convolution->SetupDerivedInstruction(reshape);
 
   std::vector<int64> start_indices(rank, 0),
-      end_indices(new_shape.dimensions().begin(), new_shape.dimensions().end()),
+      end_indices(new_dimensions.begin(), new_dimensions.end()),
       strides(rank, 1);
-  end_indices[new_dim_numbers.output_spatial_dimensions(kChosenSpatialDim)] =
-      convolution->shape().dimensions(
-          dim_numbers.output_spatial_dimensions(kChosenSpatialDim));
+  end_indices[output_split_spatial_dim] = convolution->shape().dimensions(
+      dim_numbers.output_spatial_dimensions(kChosenSpatialDim));
 
   // This slicing is getting rid of the padding we added to evenly divide space.
   TF_ASSIGN_OR_RETURN(
@@ -431,7 +466,7 @@ StatusOr<bool> ConvolutionSpaceToBatchConverter::Run(HloModule* module) {
                         module->ToString());
   bool changed = false;
   for (auto* comp : module->MakeNonfusionComputations()) {
-    if (ConvolutionVisitor::Run(comp)) {
+    if (ConvolutionVisitor::Run(limit_on_batch_size_, comp)) {
       changed = true;
     }
   }
",0,train
ede138563636c4db03fa915efed5e4627f099da5,tensorflow/tensorflow,"Extend space to batch to apply to larger batch sizes

PiperOrigin-RevId: 335657911
Change-Id: I22a9f7f978b7d64bf09654771036d044d3d3ef41",space_to_batch_converter.h,"@@ -26,7 +26,8 @@ namespace xla {
 // batch.
 class ConvolutionSpaceToBatchConverter : public HloModulePass {
  public:
-  ConvolutionSpaceToBatchConverter() = default;
+  explicit ConvolutionSpaceToBatchConverter(int64 limit_on_batch_size = 1)
+      : limit_on_batch_size_(limit_on_batch_size) {}
 
   absl::string_view name() const override {
     return ""convolution-space-to-batch-converter"";
@@ -35,6 +36,8 @@ class ConvolutionSpaceToBatchConverter : public HloModulePass {
   // Run convolution rewriting on the given computation. Returns whether the
   // computation was changed.
   StatusOr<bool> Run(HloModule* module) override;
+
+  int64 limit_on_batch_size_;
 };
 
 }  // namespace xla
",0,train
ede138563636c4db03fa915efed5e4627f099da5,tensorflow/tensorflow,"Extend space to batch to apply to larger batch sizes

PiperOrigin-RevId: 335657911
Change-Id: I22a9f7f978b7d64bf09654771036d044d3d3ef41",space_to_batch_converter_test.cc,"@@ -65,31 +65,42 @@ ENTRY computation {
 
 TEST_F(ConvolutionSpaceToBatchConverterTest, SimpleBatch2) {
   string hlo_string = R""(
-  
   HloModule module
-ENTRY computation {
-  %p0 = bf16[2,258,258,32] parameter(0)
-  %p1 = bf16[3,3,32,32] parameter(1)
-  ROOT %convolution = bf16[2,256,256,32] convolution(%p0, %p1), window={size=3x3}, 
-  dim_labels=b01f_01io->b01f
-}
+  ENTRY computation {
+    %p0 = bf16[2,258,258,32] parameter(0)
+    %p1 = bf16[3,3,32,32] parameter(1)
+    ROOT %convolution = bf16[2,256,256,32] convolution(%p0, %p1), window={size=3x3}, 
+    dim_labels=b01f_01io->b01f
+  }
 
   )"";
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
                           ParseAndReturnVerifiedModule(hlo_string));
 
-  ConvolutionSpaceToBatchConverter converter;
-  ASSERT_FALSE(converter.Run(module.get()).ValueOrDie());
+  ConvolutionSpaceToBatchConverter converter(/*limit_on_batch_size=*/2);
+  ASSERT_TRUE(converter.Run(module.get()).ValueOrDie());
+  auto computation = module->entry_computation();
+  HloInstruction* root = computation->root_instruction();
+  EXPECT_THAT(root, op::Transpose());
+  EXPECT_THAT(root->operand(0), op::Slice());
+  auto reshape = root->operand(0)->operand(0);
+  EXPECT_THAT(reshape, op::Reshape());
+  EXPECT_THAT(reshape->operand(0), op::Convolution());
+  const int64 batch_dim = reshape->operand(0)
+                              ->convolution_dimension_numbers()
+                              .output_batch_dimension();
+  // Verify that the transform has increased the batch size.
+  EXPECT_GT(reshape->operand(0)->shape().dimensions(batch_dim), 1);
 }
 
-TEST_F(ConvolutionSpaceToBatchConverterTest, Batch1WithStrideAndPad) {
+TEST_F(ConvolutionSpaceToBatchConverterTest, Batch4WithStrideAndPad) {
   string hlo_string = R""(
   HloModule module
   ENTRY computation {
-    %p0 = bf16[1,224,224,3]{3,2,1,0} parameter(0)
+    %p0 = bf16[4,224,224,3]{3,2,1,0} parameter(0)
     %p1 = bf16[7,7,3,64]{3,2,1,0} parameter(1)
   
-    ROOT %convolution.3 = bf16[1,112,112,64]{3,2,1,0} convolution(%p0, %p1), 
+    ROOT %convolution.3 = bf16[4,112,112,64]{3,2,1,0} convolution(%p0, %p1), 
       window={size=7x7 stride=2x2 pad=3_3x3_3}, dim_labels=b01f_01io->b01f
   }
   )"";
@@ -97,7 +108,7 @@ TEST_F(ConvolutionSpaceToBatchConverterTest, Batch1WithStrideAndPad) {
                           ParseAndReturnVerifiedModule(hlo_string));
 
   auto computation = module->entry_computation();
-  ConvolutionSpaceToBatchConverter converter;
+  ConvolutionSpaceToBatchConverter converter(/*limit_on_batch_size=*/4);
   ASSERT_TRUE(converter.Run(module.get()).ValueOrDie());
   HloInstruction* root = computation->root_instruction();
   EXPECT_THAT(root, op::Transpose());
@@ -109,7 +120,7 @@ TEST_F(ConvolutionSpaceToBatchConverterTest, Batch1WithStrideAndPad) {
                               ->convolution_dimension_numbers()
                               .output_batch_dimension();
 
-  EXPECT_GT(reshape->operand(0)->shape().dimensions(batch_dim), 1);
+  EXPECT_GT(reshape->operand(0)->shape().dimensions(batch_dim), 4);
 }
 
 TEST_F(ConvolutionSpaceToBatchConverterTest, Batch1WithKernelDilation) {
",0,train
4590f82223df65d7c0b1b2642408b0c8d17e1083,tensorflow/tensorflow,"Change callers of tf.image.per_image_whitening() to use tf.image.per_image_standardization(). Once these changes are submitted, per_image_whitening() can be removed.
Change: 137714408",cifar10_input.py,"@@ -179,7 +179,7 @@ def distorted_inputs(data_dir, batch_size):
                                              lower=0.2, upper=1.8)
 
   # Subtract off the mean and divide by the variance of the pixels.
-  float_image = tf.image.per_image_whitening(distorted_image)
+  float_image = tf.image.per_image_standardization(distorted_image)
 
   # Ensure that the random shuffling has good mixing properties.
   min_fraction_of_examples_in_queue = 0.4
@@ -234,7 +234,7 @@ def inputs(eval_data, data_dir, batch_size):
                                                          width, height)
 
   # Subtract off the mean and divide by the variance of the pixels.
-  float_image = tf.image.per_image_whitening(resized_image)
+  float_image = tf.image.per_image_standardization(resized_image)
 
   # Ensure that the random shuffling has good mixing properties.
   min_fraction_of_examples_in_queue = 0.4
",0,train
4590f82223df65d7c0b1b2642408b0c8d17e1083,tensorflow/tensorflow,"Change callers of tf.image.per_image_whitening() to use tf.image.per_image_standardization(). Once these changes are submitted, per_image_whitening() can be removed.
Change: 137714408",image_ops_test.py,"@@ -581,14 +581,14 @@ class PerImageWhiteningTest(test_util.TensorFlowTestCase):
 
     with self.test_session(use_gpu=True):
       x = constant_op.constant(x_np, shape=x_shape)
-      y = image_ops.per_image_whitening(x)
+      y = image_ops.per_image_standardization(x)
       y_tf = y.eval()
       self.assertAllClose(y_tf, y_np, atol=1e-4)
 
   def testUniformImage(self):
     im_np = np.ones([19, 19, 3]).astype(np.float32) * 249
     im = constant_op.constant(im_np)
-    whiten = image_ops.per_image_whitening(im)
+    whiten = image_ops.per_image_standardization(im)
     with self.test_session(use_gpu=True):
       whiten_np = whiten.eval()
       self.assertFalse(np.any(np.isnan(whiten_np)))
",0,train
bf3e812d313010246c480d81cc815fe0c92e5d70,tensorflow/tensorflow,"Add peer failure and recovery test cases with coordination service enabled.

Conditionally disable MWMS health check when coordination service is enabled.

PiperOrigin-RevId: 391344263
Change-Id: I0a3344eafe8fc02cd407b5d7a01017ec9a9d92fb",collective_all_reduce_strategy.py,"@@ -451,6 +451,11 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
       config_proto = copy.deepcopy(context.context().config)
       config_proto = self._update_config_proto(config_proto)
 
+      # If coordination service is enabled, use its internal heartbeat to detect
+      # peer failures instead of the Python-level health check.
+      if config_proto.experimental.coordination_service:
+        self._enable_check_health = False
+
       if hasattr(cluster_resolver, ""port""):
         port = cluster_resolver.port
       else:
",0,train
bf3e812d313010246c480d81cc815fe0c92e5d70,tensorflow/tensorflow,"Add peer failure and recovery test cases with coordination service enabled.

Conditionally disable MWMS health check when coordination service is enabled.

PiperOrigin-RevId: 391344263
Change-Id: I0a3344eafe8fc02cd407b5d7a01017ec9a9d92fb",mwms_peer_failure_test.py,"@@ -30,8 +30,11 @@ from tensorflow.python.distribute import collective_all_reduce_strategy as mwms_
 from tensorflow.python.distribute import multi_process_runner
 from tensorflow.python.distribute import multi_worker_test_base
 from tensorflow.python.distribute import test_util
+from tensorflow.python.eager import context
 from tensorflow.python.eager import test
 
+COORDINATION_SERVICE = None
+RPC_PROTOCOL = ""grpc""
 
 # Put it in top level so it executes in the child processes as well.
 mwms_lib.CollectiveAllReduceExtended._enable_check_health = True
@@ -77,6 +80,7 @@ class PeerFailureTest(test.TestCase):
     # the first replica to all replicas.
 
     def worker_fn():
+      context.context().enable_coordination_service(COORDINATION_SERVICE)
       strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
       with strategy.scope():
         tf.Variable(1.)
@@ -87,7 +91,8 @@ class PeerFailureTest(test.TestCase):
         return v.read_value().numpy()
 
     cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2)
-    mpr = multi_process_runner.MultiProcessRunner(worker_fn, cluster_spec)
+    mpr = multi_process_runner.MultiProcessRunner(
+        worker_fn, cluster_spec, rpc_layer=RPC_PROTOCOL)
     mpr.start()
     # TODO(b/151232436): Always raise UnavailableError when a peer fails.
     with self.assertRaises(
@@ -111,6 +116,7 @@ class PeerFailureTest(test.TestCase):
     # not aware of the failures of the receiving party.
 
     def worker_fn():
+      context.context().enable_coordination_service(COORDINATION_SERVICE)
       strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
       value = tf.identity([1.])
       strategy.reduce(""sum"", value, axis=None)
@@ -120,7 +126,8 @@ class PeerFailureTest(test.TestCase):
       strategy.reduce(""sum"", value, axis=None)
 
     cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2)
-    mpr = multi_process_runner.MultiProcessRunner(worker_fn, cluster_spec)
+    mpr = multi_process_runner.MultiProcessRunner(
+        worker_fn, cluster_spec, rpc_layer=RPC_PROTOCOL)
     mpr.start()
     # TODO(b/151232436): Always raise UnavailableError when a peer fails.
     with self.assertRaises(
@@ -136,6 +143,7 @@ class PeerFailureRecoverTest(test.TestCase):
     # See PeerFailureTest.test_creating_variable
 
     def worker_fn(attempts):
+      context.context().enable_coordination_service(COORDINATION_SERVICE)
       strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
       task_id, attempt = get_attempt(strategy, attempts)
       with strategy.scope():
@@ -149,7 +157,11 @@ class PeerFailureRecoverTest(test.TestCase):
     cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2)
     attempts = multi_process_runner.manager().dict()
     mpr = multi_process_runner.MultiProcessRunner(
-        worker_fn, cluster_spec, args=(attempts,), auto_restart=True)
+        worker_fn,
+        cluster_spec,
+        rpc_layer=RPC_PROTOCOL,
+        args=(attempts,),
+        auto_restart=True)
     mpr.start()
     results = mpr.join(timeout=90).return_value
     self.assertEqual(results[0], results[1])
@@ -158,6 +170,7 @@ class PeerFailureRecoverTest(test.TestCase):
     # See PeerFailureTest.test_reduce_small_tensor
 
     def worker_fn(attempts):
+      context.context().enable_coordination_service(COORDINATION_SERVICE)
       strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
       task_id, attempt = get_attempt(strategy, attempts)
       value = tf.identity([1.])
@@ -170,7 +183,11 @@ class PeerFailureRecoverTest(test.TestCase):
     cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2)
     attempts = multi_process_runner.manager().dict()
     mpr = multi_process_runner.MultiProcessRunner(
-        worker_fn, cluster_spec, args=(attempts,), auto_restart=True)
+        worker_fn,
+        cluster_spec,
+        rpc_layer=RPC_PROTOCOL,
+        args=(attempts,),
+        auto_restart=True)
     mpr.start()
     results = mpr.join(timeout=90).return_value
     self.assertAllEqual(results, [[2.], [2.]])
@@ -189,6 +206,7 @@ class PeerFailureRecoverTest(test.TestCase):
       mwms_lib.CollectiveAllReduceExtended._check_alive_interval = 30
       mwms_lib.CollectiveAllReduceExtended._check_alive_initial_timeout = 30
 
+      context.context().enable_coordination_service(COORDINATION_SERVICE)
       strategy = tf.distribute.experimental.MultiWorkerMirroredStrategy()
       task_id, attempt = get_attempt(strategy, attempts)
 
@@ -209,7 +227,11 @@ class PeerFailureRecoverTest(test.TestCase):
     cluster_spec = multi_worker_test_base.create_cluster_spec(num_workers=2)
     attempts = multi_process_runner.manager().dict()
     mpr = multi_process_runner.MultiProcessRunner(
-        worker_fn, cluster_spec, args=(attempts,), auto_restart=True)
+        worker_fn,
+        cluster_spec,
+        rpc_layer=RPC_PROTOCOL,
+        args=(attempts,),
+        auto_restart=True)
     mpr.start()
     mpr.join(timeout=90)
 
",0,train
bf3e812d313010246c480d81cc815fe0c92e5d70,tensorflow/tensorflow,"Add peer failure and recovery test cases with coordination service enabled.

Conditionally disable MWMS health check when coordination service is enabled.

PiperOrigin-RevId: 391344263
Change-Id: I0a3344eafe8fc02cd407b5d7a01017ec9a9d92fb",context.py,"@@ -506,6 +506,10 @@ class Context(object):
     device_list = pywrap_tfe.TFE_ContextListDevices(self._context_handle)
     try:
       self._num_gpus = 0
+      current_job, current_task = None, None
+      server_def = self._server_def or self._collective_ops_server_def
+      if server_def is not None:
+        current_job, current_task = server_def.job_name, server_def.task_index
       for i in range(pywrap_tfe.TF_DeviceListCount(device_list)):
         dev_name = pywrap_tfe.TF_DeviceListName(device_list, i)
         context_devices.append(pydev.canonical_name(dev_name))
@@ -517,7 +521,8 @@ class Context(object):
         logical_devices.append(
             LogicalDevice(name=spec.to_string(), device_type=spec.device_type))
         dev_type = pywrap_tfe.TF_DeviceListType(device_list, i)
-        if dev_type == ""GPU"":
+        if (dev_type == ""GPU"" and spec.job == current_job and
+            spec.task == current_task):
           self._num_gpus += 1
 
     finally:
",0,train
42318e0e71123b9e776f85fb2c397b3cbda3d596,tensorflow/tensorflow,"Minor fixes to SpaceToDepth and DepthToSpace error strings.
Change: 145747120",depthtospace_op.cc,"@@ -59,7 +59,7 @@ class DepthToSpaceOp : public OpKernel {
     static const int kRequiredDims = 4;
     OP_REQUIRES(context, kRequiredDims == dims,
                 errors::InvalidArgument(""Input rank should be: "", kRequiredDims,
-                                        ""instead of: "", dims));
+                                        "" instead of: "", dims));
 
     const int batch_size = input.dim_size(0);
     const int input_height = input.dim_size(1);
@@ -72,7 +72,7 @@ class DepthToSpaceOp : public OpKernel {
     OP_REQUIRES(
         context, input_depth % block_size_sq == 0,
         errors::InvalidArgument(""Input depth dimension "", input_depth,
-                                ""should be divisible by: "", block_size_sq));
+                                "" should be divisible by: "", block_size_sq));
 
     const int output_depth = input_depth / block_size_sq;
     const int output_width = input_width * block_size_;
",0,train
42318e0e71123b9e776f85fb2c397b3cbda3d596,tensorflow/tensorflow,"Minor fixes to SpaceToDepth and DepthToSpace error strings.
Change: 145747120",spacetodepth_op.cc,"@@ -59,7 +59,7 @@ class SpaceToDepthOp : public OpKernel {
     static const int kRequiredDims = 4;
     OP_REQUIRES(context, kRequiredDims == dims,
                 errors::InvalidArgument(""Input rank should be: "", kRequiredDims,
-                                        ""instead of: "", dims));
+                                        "" instead of: "", dims));
 
     const int batch_size = input.dim_size(0);
     const int height = input.dim_size(1);
@@ -67,11 +67,11 @@ class SpaceToDepthOp : public OpKernel {
     const int input_depth = input.dim_size(3);
 
     // Both width and height must be divisible by block_size.
-    OP_REQUIRES(
-        context, (width % block_size_) == 0 && (height % block_size_) == 0,
-        errors::InvalidArgument(""Image width "", width, "" and height "", height,
-                                ""should be divisible by block_size: "",
-                                block_size_));
+    OP_REQUIRES(context,
+                (width % block_size_) == 0 && (height % block_size_) == 0,
+                errors::InvalidArgument(
+                    ""Image width "", width, "" and height "", height,
+                    "" should be divisible by block_size: "", block_size_));
 
     const int block_size_sq = block_size_ * block_size_;
 
",0,train
498565a68898b4ce0a696e24fc36a52792141631,tensorflow/tensorflow,"Reset the inputs to ShapeRefiner::RunShapeFn so that it behaves the same every time it's called.
To properly handle queues that have populated by several enqueue ops, merge the shapes of the inputs to all the enqueue ops before calling InferenceContext::set_output_handle_shape(). This ensures that we detect incorrect queue setups (where the 2 enqueue ops might generate tensors with incompatible shapes), and that we take all the known shape information instead of that of just one of the enqueue ops.
Change: 154866747",shape_refiner.cc,"@@ -468,6 +468,8 @@ Status ShapeRefiner::RunShapeFn(const Node* node,
   std::vector<ShapeHandle> input_tensors_as_shapes;
 
   // Run the shape inference function, and return if there was an error.
+  c->set_input_tensors(input_tensors);
+  c->set_input_tensors_as_shapes(input_tensors_as_shapes);
   if (op_reg_data->shape_inference_fn) {
     TF_RETURN_IF_ERROR(c->Run(op_reg_data->shape_inference_fn));
   } else {
",0,train
498565a68898b4ce0a696e24fc36a52792141631,tensorflow/tensorflow,"Reset the inputs to ShapeRefiner::RunShapeFn so that it behaves the same every time it's called.
To properly handle queues that have populated by several enqueue ops, merge the shapes of the inputs to all the enqueue ops before calling InferenceContext::set_output_handle_shape(). This ensures that we detect incorrect queue setups (where the 2 enqueue ops might generate tensors with incompatible shapes), and that we take all the known shape information instead of that of just one of the enqueue ops.
Change: 154866747",graph_properties.cc,"@@ -60,8 +60,9 @@ Status GraphProperties::InferStatically() {
       if (!qctx) {
         continue;
       }
-      shape_inference::ShapeHandle data_shp = qctx->output_handle_shape(0);
-      if (qctx->FullyDefined(data_shp)) {
+      DataType queue_type = qctx->output_handle_dtype(0);
+      shape_inference::ShapeHandle queue_shp = qctx->output_handle_shape(0);
+      if (qctx->FullyDefined(queue_shp) && queue_type != DT_INVALID) {
         continue;
       }
 
@@ -73,16 +74,20 @@ Status GraphProperties::InferStatically() {
         if (node->type_string().find(""Enqueue"") != std::string::npos) {
           if (ctx->num_inputs() == 2) {
             const DataType dtype = node->input_type(1);
-            shape_inference::ShapeHandle shp = ctx->input(1);
-            shape_inference::ShapeHandle refined;
-            TF_RETURN_IF_ERROR(qctx->Merge(shp, data_shp, &refined));
-            if (qctx->set_output_handle_shape(0, refined) ||
-                qctx->set_output_handle_dtype(0, dtype)) {
-              new_shapes.push(qnode);
+            if (queue_type == DT_INVALID) {
+              queue_type = dtype;
+            } else {
+              CHECK_EQ(queue_type, dtype);
             }
+            shape_inference::ShapeHandle shp = ctx->input(1);
+            TF_RETURN_IF_ERROR(qctx->Merge(queue_shp, shp, &queue_shp));
           }
         }
       }
+      if (qctx->set_output_handle_dtype(0, queue_type) ||
+          qctx->set_output_handle_shape(0, queue_shp)) {
+        new_shapes.push(qnode);
+      }
     }
     // Propagate the shapes in the transitive fan-out of the queue.
     done = new_shapes.empty();
",0,train
498565a68898b4ce0a696e24fc36a52792141631,tensorflow/tensorflow,"Reset the inputs to ShapeRefiner::RunShapeFn so that it behaves the same every time it's called.
To properly handle queues that have populated by several enqueue ops, merge the shapes of the inputs to all the enqueue ops before calling InferenceContext::set_output_handle_shape(). This ensures that we detect incorrect queue setups (where the 2 enqueue ops might generate tensors with incompatible shapes), and that we take all the known shape information instead of that of just one of the enqueue ops.
Change: 154866747",graph_properties_test.cc,"@@ -177,6 +177,19 @@ TEST_F(GraphPropertiesTest, Queues) {
   auto dequeue2 =
       ops::QueueDequeue(root.WithOpName(""Dequeue2""), q2, {DataType::DT_FLOAT});
 
+  auto q3 =
+      ops::RandomShuffleQueue(root.WithOpName(""Queue3""), {DataType::DT_FLOAT});
+  auto dequeue3 =
+      ops::QueueDequeue(root.WithOpName(""Dequeue3""), q3, {DataType::DT_FLOAT});
+
+  auto q4 =
+      ops::RandomShuffleQueue(root.WithOpName(""Queue4""), {DataType::DT_FLOAT});
+  auto enqueue4 = ops::QueueEnqueue(root.WithOpName(""Enqueue4""), q4, {square2});
+  auto enqueue4_2 =
+      ops::QueueEnqueue(root.WithOpName(""Enqueue4_2""), q4, {dequeue3[0]});
+  auto dequeue4 =
+      ops::QueueDequeue(root.WithOpName(""Dequeue4""), q4, {DataType::DT_FLOAT});
+
   GrapplerItem item;
   TF_CHECK_OK(root.ToGraphDef(&item.graph));
 
@@ -200,6 +213,18 @@ TEST_F(GraphPropertiesTest, Queues) {
   EXPECT_EQ(2, prop2.shape().dim_size());
   EXPECT_EQ(3, prop2.shape().dim(0).size());
   EXPECT_EQ(7, prop2.shape().dim(1).size());
+
+  // The dequeue3 op shape is unknown. The square2 op shape is known. Verify
+  // that we merge the 2 properly to determine the shape of the data coming out
+  // of the queue.
+  const auto props4 = properties.GetOutputProperties(""Dequeue4"");
+  EXPECT_EQ(1, props4.size());
+  const OpInfo::TensorProperties& prop4 = props4[0];
+  EXPECT_EQ(DT_FLOAT, prop4.dtype());
+  EXPECT_FALSE(prop4.shape().unknown_rank());
+  EXPECT_EQ(2, prop4.shape().dim_size());
+  EXPECT_EQ(3, prop4.shape().dim(0).size());
+  EXPECT_EQ(7, prop4.shape().dim(1).size());
 }
 
 }  // namespace
",0,train
07bba62974b2e9bc39c3161be5fcdcb9b793757f,tensorflow/tensorflow,"Adding checks for control flow nodes when calculating device colocation within the graph. In addition to the prior fix to colocation_graph.cc, this fixes the issue underlying some skipped GPU-enabled tests in placement_test.py; this commit re-enables those skipped placements tests.

PiperOrigin-RevId: 395838212
Change-Id: I47e99b471196be5167a075b5ee17e1bd5504e92c",colocation_graph.cc,"@@ -819,11 +819,12 @@ Status ColocationGraph::AddHostOnlyDataTypesConstraints() {
     };
 
     auto enter = [&](Node* n) -> void {
+      // TODO(b/199443424): Replace this logic with propagated type information.
       if (data::DatasetOpKernel::IsDatasetOp(n->op_def())) {
         // NOTE: Datasets are expected to live on the host. This code should be
         // updated if that changes. Under this assumption, however, we must
         // locate some ops on the host when the input is a dataset variant.
-        if (node->IsRetval() || node->IsIdentity()) {
+        if (node->IsRetval() || node->IsIdentity() || node->IsControlFlow()) {
           is_host_data_type = true;
         }
       } else {
",0,test
07bba62974b2e9bc39c3161be5fcdcb9b793757f,tensorflow/tensorflow,"Adding checks for control flow nodes when calculating device colocation within the graph. In addition to the prior fix to colocation_graph.cc, this fixes the issue underlying some skipped GPU-enabled tests in placement_test.py; this commit re-enables those skipped placements tests.

PiperOrigin-RevId: 395838212
Change-Id: I47e99b471196be5167a075b5ee17e1bd5504e92c",placement_test.py,"@@ -66,7 +66,6 @@ class PlacementTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @combinations.generate(test_base.eager_only_combinations())
   def testWhile(self):
-    self.skipTest(""b/166625126"")
 
     @def_function.function
     def f():
@@ -121,7 +120,6 @@ class PlacementTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @combinations.generate(test_base.eager_only_combinations())
   def testCond(self):
-    self.skipTest(""b/166625126"")
     # Ideally, placer should avoid cross-device copies even when the cond op
     # has no placement constraints.
     @def_function.function
@@ -141,7 +139,6 @@ class PlacementTest(test_base.DatasetTestBase, parameterized.TestCase):
 
   @combinations.generate(test_base.eager_only_combinations())
   def testId(self):
-    self.skipTest(""b/166625126"")
     # Ideally, placer should know that Identity(dataset) should be on the same
     # device as the dataset.
     @def_function.function
",0,test
f8a98002491b7cd5f04ec7def6fa7dc30a66215a,tensorflow/tensorflow,"Reenable test.

PiperOrigin-RevId: 155894188",bias_op_test.py,"@@ -184,11 +184,8 @@ class BiasAddTest(test.TestCase):
       if dtype == dtypes.float64:
         threshold = 1e-10
       self.assertAllClose(tensor_jacob_t, tensor_jacob_n, threshold, threshold)
-      # TODO(annarev): Re-add assertion for float16, float32 dtypes and NCHW
-      # once we figure out why this check started failing with cuda mavx.
-      if dtype == dtypes.float64 or data_format != ""NCHW"":
-        self.assertAllClose(bias_jacob_t, bias_jacob_n, threshold, threshold)
-        self.assertAllClose(grad_jacob_t, grad_jacob_n, threshold, threshold)
+      self.assertAllClose(bias_jacob_t, bias_jacob_n, threshold, threshold)
+      self.assertAllClose(grad_jacob_t, grad_jacob_n, threshold, threshold)
 
   def testGradientTensor(self):
     for (data_format, use_gpu) in GetTestConfigs():
",0,test
eb03daf8c03cc5c7737afaa1123347976cc1eb35,tensorflow/tensorflow,"Removed _keras_mask from EagerTensor

It does not have to always be part of an EagerTensor and could instead
be stored in a __dict__.

Note that as a side-effect
* an EagerTensor with a _keras_mask always has a materialized __dict__ and
  consumes ~280 bytes more;
* EagerTensor._keras_mask lookup is slightly less efficient.

PiperOrigin-RevId: 245298840",pywrap_tensor.cc,"@@ -283,9 +283,6 @@ typedef struct EagerTensor {
   // cycles, and hence don't provide GC support for it.
   PyObject* handle_data;
 
-  // This stores `_keras_mask` object and is set by Tensorflow layers.
-  PyObject* keras_mask;
-
   // This stores `_tensor_shape`, a cached `TensorShape` object, and is set the
   // first time that `_EagerTensorBase`'s `shape` property is called.
   PyObject* tensor_shape;
@@ -349,8 +346,6 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) {
   Py_INCREF(Py_None);
   self->handle_data = Py_None;
   Py_INCREF(Py_None);
-  self->keras_mask = Py_None;
-  Py_INCREF(Py_None);
   self->tensor_shape = Py_None;
   self->status = TF_NewStatus();
   self->dict = nullptr;
@@ -498,7 +493,6 @@ void EagerTensor_dealloc(EagerTensor* self) {
 
   TF_DeleteStatus(self->status);
   Py_DECREF(self->handle_data);
-  Py_DECREF(self->keras_mask);
   Py_DECREF(self->tensor_shape);
   // If an attribute dictionary has been created, release it. Note that this
   // is only ever created by CPython's attribute setting methods; we don't
@@ -593,19 +587,6 @@ static int EagerTensor_settensor_handle(EagerTensor* self, PyObject* value,
   return 0;
 }
 
-static PyObject* EagerTensor_keras_mask(EagerTensor* self, void* unused) {
-  Py_INCREF(self->keras_mask);
-  return self->keras_mask;
-}
-
-static int EagerTensor_setkeras_mask(EagerTensor* self, PyObject* value,
-                                     void* unused) {
-  Py_DECREF(self->keras_mask);
-  Py_INCREF(value);
-  self->keras_mask = value;
-  return 0;
-}
-
 static PyObject* EagerTensor_tensor_shape(EagerTensor* self, void* unused) {
   Py_INCREF(self->tensor_shape);
   return self->tensor_shape;
@@ -697,9 +678,6 @@ static PyGetSetDef EagerTensor_getseters[] = {
     {const_cast<char*>(""_handle_data""), (getter)EagerTensor_tensor_handle,
      (setter)EagerTensor_settensor_handle, const_cast<char*>(""_tensor_handle""),
      nullptr},
-    {const_cast<char*>(""_keras_mask""), (getter)EagerTensor_keras_mask,
-     (setter)EagerTensor_setkeras_mask, const_cast<char*>(""_keras_mask""),
-     nullptr},
     {const_cast<char*>(""_tensor_shape""), (getter)EagerTensor_tensor_shape,
      (setter)EagerTensor_settensor_shape, const_cast<char*>(""_tensor_shape""),
      nullptr},
@@ -824,8 +802,6 @@ PyObject* EagerTensorFromHandle(TFE_TensorHandle* handle) {
     Py_INCREF(Py_None);
     t->handle_data = Py_None;
     Py_INCREF(Py_None);
-    t->keras_mask = Py_None;
-    Py_INCREF(Py_None);
     t->tensor_shape = Py_None;
     t->handle = handle;
     t->status = TF_NewStatus();
",0,train
eb03daf8c03cc5c7737afaa1123347976cc1eb35,tensorflow/tensorflow,"Removed _keras_mask from EagerTensor

It does not have to always be part of an EagerTensor and could instead
be stored in a __dict__.

Note that as a side-effect
* an EagerTensor with a _keras_mask always has a materialized __dict__ and
  consumes ~280 bytes more;
* EagerTensor._keras_mask lookup is slightly less efficient.

PiperOrigin-RevId: 245298840",network.py,"@@ -862,8 +862,9 @@ class Network(base_layer.Layer):
           if 'training' in argspec:
             kwargs.setdefault('training', training)
           if 'mask' in argspec:
-            computed_masks = nest.map_structure(lambda t: t._keras_mask,
-                                                computed_tensors)
+            computed_masks = nest.map_structure(
+                lambda t: getattr(t, '_keras_mask', None),
+                computed_tensors)
             kwargs.setdefault('mask', computed_masks)
 
           # Compute outputs.
",0,train
eb03daf8c03cc5c7737afaa1123347976cc1eb35,tensorflow/tensorflow,"Removed _keras_mask from EagerTensor

It does not have to always be part of an EagerTensor and could instead
be stored in a __dict__.

Note that as a side-effect
* an EagerTensor with a _keras_mask always has a materialized __dict__ and
  consumes ~280 bytes more;
* EagerTensor._keras_mask lookup is slightly less efficient.

PiperOrigin-RevId: 245298840",training_eager.py,"@@ -120,8 +120,7 @@ def _model_loss(model,
   outs = model(inputs, **kwargs)
 
   outs = nest.flatten(outs)
-  # `None` by default for `EagerTensors`.
-  masks = [t._keras_mask for t in outs]
+  masks = [getattr(t, '_keras_mask', None) for t in outs]
   targets = nest.flatten(targets)
 
   # Used to keep track of individual output losses.
",0,train
c25692039b954bb5aa891a26ff1744aab137eee7,tensorflow/tensorflow,"[TF:Profiler] Fix use-after-free bug introduced by previous change.

PiperOrigin-RevId: 295191859
Change-Id: I36584af1414c1b4e424f558fd7a4aec46d26cf50",profiler_server.cc,"@@ -21,7 +21,6 @@ limitations under the License.
 #include ""grpcpp/grpcpp.h""
 #include ""absl/strings/str_cat.h""
 #include ""tensorflow/core/platform/env.h""
-#include ""tensorflow/core/profiler/profiler_service.grpc.pb.h""
 #include ""tensorflow/core/profiler/rpc/profiler_service_impl.h""
 #include ""tensorflow/core/util/ptr_util.h""
 
@@ -29,11 +28,10 @@ namespace tensorflow {
 
 void ProfilerServer::StartProfilerServer(int32 port) {
   string server_address = absl::StrCat(""0.0.0.0:"", port);
-  std::unique_ptr<grpc::ProfilerService::Service> service =
-      CreateProfilerService();
+  service_ = CreateProfilerService();
   ::grpc::ServerBuilder builder;
   builder.AddListeningPort(server_address, ::grpc::InsecureServerCredentials());
-  builder.RegisterService(service.get());
+  builder.RegisterService(service_.get());
   server_ = builder.BuildAndStart();
   LOG(INFO) << ""Profiling Server listening on "" << server_address;
 }
",0,train
c25692039b954bb5aa891a26ff1744aab137eee7,tensorflow/tensorflow,"[TF:Profiler] Fix use-after-free bug introduced by previous change.

PiperOrigin-RevId: 295191859
Change-Id: I36584af1414c1b4e424f558fd7a4aec46d26cf50",profiler_server.h,"@@ -19,11 +19,10 @@ limitations under the License.
 
 #include ""grpcpp/grpcpp.h""
 #include ""tensorflow/core/platform/types.h""
+#include ""tensorflow/core/profiler/profiler_service.grpc.pb.h""
 
 namespace tensorflow {
 
-class Thread;
-
 class ProfilerServer {
  public:
   ~ProfilerServer();
@@ -31,6 +30,7 @@ class ProfilerServer {
   void StartProfilerServer(int32 port);
 
  private:
+  std::unique_ptr<grpc::ProfilerService::Service> service_;
   std::unique_ptr<::grpc::Server> server_;
 };
 
",0,train
c87d12a5e9bc4c568bd310c2266f1f28264e20fb,tensorflow/tensorflow,"Introduce TraceMeProducer and TraceMeConsumer.

PiperOrigin-RevId: 312209299
Change-Id: I304049413d332b17e141e3f85486f9676e2f859a",connected_traceme.h,"@@ -0,0 +1,122 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_CORE_PROFILER_LIB_CONNECTED_TRACEME_H_
+#define TENSORFLOW_CORE_PROFILER_LIB_CONNECTED_TRACEME_H_
+
+#include <string>
+
+#include ""absl/strings/string_view.h""
+#include ""absl/types/optional.h""
+#include ""tensorflow/core/profiler/lib/traceme.h""
+#include ""tensorflow/core/profiler/lib/traceme_encode.h""
+
+namespace tensorflow {
+namespace profiler {
+
+/*
+ * TraceMeProducer and TraceMeConsumer are used to correlate TraceMe events on
+ * different threads. TraceMeProducer generates the context information to be
+ * passed to TraceMeConsumer, which consists of the context id and optionally
+ * the context name. They may be provided by the user. Then, the events of the
+ * same context information can be correlated during the analysis.
+ *
+ * Example Usages:
+ * (1) Using the user-provided context name and id. The user is responsible for
+ *     providing the same context name and id to TraceMeProducer and
+ *     TraceMeConsumer.
+ * [Producer Thread]
+ * // user_context_id is provided by the user.
+ * TraceMeProducer producer(
+ *     [&] { return TraceMeEncode(""op_dispatch"", {{""op_type"", ""matmul""}}); },
+ *     ""executor_context"", user_context_id);
+ * [Consumer Thread]
+ * // user_context_id is provided by the user.
+ * TraceMeConsumer consumer(
+ *     [&] { return ""op_execute""; }, user_context_id, ""executor_context"");
+ *
+ * (2) Using the user-provided context name and generic id. The user is
+ *     responsible for passing the TraceMeProducer's context id to
+ *     TraceMeConsumer as well as providing the same context name to
+ *     TraceMeProducer and TraceMeConsumer.
+ * [Producer Thread]
+ * TraceMeProducer producer(
+ *     [&] { return TraceMeEncode(""op_dispatch"", {{""op_type"", ""matmul""}}); },
+ *     ""executor_context"");
+ * context_id = producer.GetContextId();
+ * // Pass context_id to the consumer thread.
+ * [Consumer Thread]
+ * // context_id is passed from the producer thread.
+ * TraceMeConsumer consumer(
+ *     [&] { return ""op_execute""; }, context_id, ""executor_context"");
+ *
+ * (3) Using the generic context information. The user is responsible for
+ *     passing the TraceMeProducer's context id to TraceMeConsumer.
+ * [Producer Thread]
+ * TraceMeProducer producer(
+ *     [&] { return TraceMeEncode(""op_dispatch"", {{""op_type"", ""matmul""}}); });
+ * context_id = producer.GetContextId();
+ * // Pass context_id to the consumer thread.
+ * [Consumer Thread]
+ * // context_id is passed from the producer thread.
+ * TraceMeConsumer consumer([&] { return ""op_execute""; }, context_id);
+ */
+class TraceMeProducer {
+ public:
+  template <typename NameT>
+  explicit TraceMeProducer(NameT name, absl::string_view context_name = """",
+                           absl::optional<uint64> context_id = absl::nullopt,
+                           int level = 2)
+      : trace_me_(name, level) {
+    trace_me_.AppendMetadata([&] {
+      context_id_ =
+          context_id.has_value() ? *context_id : TraceMe::NewActivityId();
+      if (context_name.empty()) {
+        return TraceMeEncode({{""$p"", context_id_}});
+      } else {
+        return TraceMeEncode({{""$pn"", context_name}, {""$p"", context_id_}});
+      }
+    });
+  }
+
+  uint64 GetContextId() const { return context_id_; }
+
+ private:
+  TraceMe trace_me_;
+  uint64 context_id_ = 0;
+};
+
+class TraceMeConsumer {
+ public:
+  template <typename NameT>
+  TraceMeConsumer(NameT name, uint64 context_id,
+                  absl::string_view context_name = """", int level = 2)
+      : trace_me_(name, level) {
+    trace_me_.AppendMetadata([&] {
+      if (context_name.empty()) {
+        return TraceMeEncode({{""$c"", context_id}});
+      } else {
+        return TraceMeEncode({{""$cn"", context_name}, {""$c"", context_id}});
+      }
+    });
+  }
+
+ private:
+  TraceMe trace_me_;
+};
+
+}  // namespace profiler
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_PROFILER_LIB_CONNECTED_TRACEME_H_
",0,train
c87d12a5e9bc4c568bd310c2266f1f28264e20fb,tensorflow/tensorflow,"Introduce TraceMeProducer and TraceMeConsumer.

PiperOrigin-RevId: 312209299
Change-Id: I304049413d332b17e141e3f85486f9676e2f859a",traceme.h,"@@ -248,6 +248,14 @@ class TraceMe {
 #endif
   }
 
+  static uint64 NewActivityId() {
+#if !defined(IS_MOBILE_PLATFORM)
+    return TraceMeRecorder::NewActivityId();
+#else
+    return 0;
+#endif
+  }
+
  private:
   // Activity ID or start time used when tracing is disabled.
   constexpr static uint64 kUntracedActivity = 0;
",0,train
c87d12a5e9bc4c568bd310c2266f1f28264e20fb,tensorflow/tensorflow,"Introduce TraceMeProducer and TraceMeConsumer.

PiperOrigin-RevId: 312209299
Change-Id: I304049413d332b17e141e3f85486f9676e2f859a",xplane_schema.cc,"@@ -147,6 +147,11 @@ const StatTypeMap& GetStatTypeMap() {
       {""region_type"", kRegionType},
       {""data_type"", kDataType},
       {""shape"", kTensorShapes},
+      // Schema related.
+      {""$pn"", kProducerContextName},
+      {""$cn"", kConsumerContextName},
+      {""$p"", kProducerId},
+      {""$c"", kConsumerId},
       // Device trace arguments.
       {""device_id"", kDeviceId},
       {""context_id"", kContextId},
",0,train
c87d12a5e9bc4c568bd310c2266f1f28264e20fb,tensorflow/tensorflow,"Introduce TraceMeProducer and TraceMeConsumer.

PiperOrigin-RevId: 312209299
Change-Id: I304049413d332b17e141e3f85486f9676e2f859a",xplane_schema.h,"@@ -139,6 +139,11 @@ enum StatType {
   kRegionType,
   kDataType,
   kTensorShapes,
+  // Schema related.
+  kProducerContextName,
+  kConsumerContextName,
+  kProducerId,
+  kConsumerId,
   // Device trace arguments.
   kDeviceId,
   kContextId,
",0,train
a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite.

PiperOrigin-RevId: 414377528
Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",tf_tfl_flatbuffer_helpers.cc,"@@ -269,11 +269,6 @@ Status PopulateQuantizationSpecs(
       quant_specs->inference_type = tensorflow::DT_QINT8;
       quant_specs->inference_input_type = tensorflow::DT_QINT8;
     }
-  } else {
-    // This flag is incompatible with post_training_quantize() as only
-    // QAT models can provide the desired range.
-    quant_specs->disable_infer_tensor_range =
-        toco_flags.disable_infer_tensor_range();
   }
 
   // Add information about half-precision support if fp16 quantization applies.
",0,train
a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite.

PiperOrigin-RevId: 414377528
Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",quantization_utils.h,"@@ -520,8 +520,9 @@ struct ConvertUnsignedToSigned : public OpRewritePattern<Q> {
     int num_bits = qtype.getStorageTypeIntegralWidth();
     if (num_bits == 8) {
       // If storage is 8-bit, trained num bits may be less than 8 so check here.
-      num_bits =
-          static_cast<int>(std::ceil(std::log2(qtype.getStorageTypeMax())));
+      const double range = static_cast<double>(qtype.getStorageTypeMax() -
+                                               qtype.getStorageTypeMin());
+      num_bits = static_cast<int>(std::ceil(std::log2(range)));
     }
     // This is a positive value, and will be applied on zero points and fixed
     // point ranges.
",0,train
a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite.

PiperOrigin-RevId: 414377528
Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",tf_tfl_passes.cc,"@@ -277,11 +277,10 @@ void AddPostVariableFreezingTFToTFLConversionPasses(
         pass_manager->nest<mlir::FuncOp>(), layout_optimization_options);
     // Prepare for TFLite dialect, rerun canonicalization, and then legalize to
     // the TFLite dialect.
-    pass_manager->addNestedPass<mlir::FuncOp>(
-        mlir::TFL::CreatePrepareTFPass(pass_config.unfold_batch_matmul,
-                                       /*allow_bf16_and_f16_type_legalization=*/
-                                       !pass_config.runtime_verification,
-                                       toco_flags.use_fake_quant_num_bits()));
+    pass_manager->addNestedPass<mlir::FuncOp>(mlir::TFL::CreatePrepareTFPass(
+        pass_config.unfold_batch_matmul,
+        /*allow_bf16_and_f16_type_legalization=*/!pass_config
+            .runtime_verification));
     pass_manager->addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
     if (pass_config.shape_inference) {
       // Add a shape inference pass to optimize away the unnecessary casts.
",0,train
a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite.

PiperOrigin-RevId: 414377528
Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",quantize.cc,"@@ -254,8 +254,7 @@ void QuantizePass::runOnFunction() {
 
   // TODO(b/202451048): separate full and weight-only post-training dynamic
   // range quantization
-  if (quant_specs.weight_quantization || enable_dynamic_range_quantization ||
-      quant_specs.disable_infer_tensor_range) {
+  if (quant_specs.weight_quantization || enable_dynamic_range_quantization) {
     patterns.insert<TFLDynamicRangeQuantization>(ctx, quant_params);
   } else {
     patterns.insert<TFLFullQuantization, TFLFullQuantizationReverse>(
",0,train
a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite.

PiperOrigin-RevId: 414377528
Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",convert.py,"@@ -450,8 +450,6 @@ def build_toco_flags(inference_type=dtypes.float32,
                      disable_per_channel_quantization=False,
                      enable_mlir_dynamic_range_quantizer=False,
                      tf_quantization_mode=None,
-                     disable_infer_tensor_range=False,
-                     use_fake_quant_num_bits=False,
                      **_):
   """"""Build the TOCO flags object from params.""""""
   toco = _toco_flags_pb2.TocoFlags()
@@ -500,8 +498,6 @@ def build_toco_flags(inference_type=dtypes.float32,
   toco.enable_mlir_dynamic_range_quantizer = enable_mlir_dynamic_range_quantizer
   if tf_quantization_mode:
     toco.tf_quantization_mode = tf_quantization_mode
-  toco.disable_infer_tensor_range = disable_infer_tensor_range
-  toco.use_fake_quant_num_bits = use_fake_quant_num_bits
   return toco
 
 
@@ -541,9 +537,7 @@ def build_toco_convert_protos(input_tensors,
                               supported_backends=None,
                               disable_per_channel_quantization=False,
                               enable_mlir_dynamic_range_quantizer=False,
-                              tf_quantization_mode=None,
-                              disable_infer_tensor_range=False,
-                              use_fake_quant_num_bits=False):
+                              tf_quantization_mode=None):
   """"""Builds protocol buffers describing a conversion of a model using TOCO.
 
   Typically this is to convert from TensorFlow GraphDef to TFLite, in which
@@ -643,9 +637,7 @@ def build_toco_convert_protos(input_tensors,
       If false, the old TOCO dynamic range quantizer is used.
     tf_quantization_mode: Indicates the mode of TF Quantization when the
       output model is used for TF Quantization.
-    disable_infer_tensor_range: Disable infering tensor ranges.
-    use_fake_quant_num_bits: Allow quantization parameters to be calculated from
-      num_bits attribute.
+
   Returns:
     model_flags, toco_flags, debug_info: three protocol buffers describing the
       conversion process and debug information.
@@ -683,9 +675,7 @@ def build_toco_convert_protos(input_tensors,
       supported_backends=supported_backends,
       disable_per_channel_quantization=disable_per_channel_quantization,
       enable_mlir_dynamic_range_quantizer=enable_mlir_dynamic_range_quantizer,
-      tf_quantization_mode=tf_quantization_mode,
-      disable_infer_tensor_range=disable_infer_tensor_range,
-      use_fake_quant_num_bits=use_fake_quant_num_bits)
+      tf_quantization_mode=tf_quantization_mode)
   model = _model_flags_pb2.ModelFlags()
   model.change_concat_input_ranges = change_concat_input_ranges
   for idx, input_tensor in enumerate(input_tensors):
",0,train
a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite.

PiperOrigin-RevId: 414377528
Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",lite.py,"@@ -236,8 +236,7 @@ class QuantizationMode(object):
                representative_dataset,
                graph_def,
                disable_per_channel=False,
-               experimental_new_dynamic_range_quantizer=False,
-               experimental_low_bit_qat=False):
+               experimental_new_dynamic_range_quantizer=False):
     self._optimizations = optimizations
     for deprecated_optimization in [
         Optimize.OPTIMIZE_FOR_SIZE, Optimize.OPTIMIZE_FOR_LATENCY
@@ -256,9 +255,6 @@ class QuantizationMode(object):
 
     self._enable_new_dynamic_range_quantizer = (
         experimental_new_dynamic_range_quantizer)
-    # Allow training with lower than 8 bit weights to be converted
-    # to constants with trained scale.
-    self._experimental_low_bit_qat = experimental_low_bit_qat
 
   # TODO(b/162537905): Refactor the following quantization functions -
   # re-organize and refactor for better readability.
@@ -289,12 +285,10 @@ class QuantizationMode(object):
 
   def is_integer_quantize(self):
     return (self.is_post_training_integer_quantize() or
-            self.is_training_time_int8_allow_float() or
-            self.is_training_time_low_bit_allow_float())
+            self.is_training_time_int8_allow_float())
 
   def is_training_time_int8_allow_float(self):
-    return (not self.is_training_time_low_bit_allow_float() and
-            self.any_optimization_enabled() and
+    return (self.any_optimization_enabled() and
             self.contains_training_quant_op())
 
   def is_bfloat16_inference_allowed(self):
@@ -333,11 +327,6 @@ class QuantizationMode(object):
                 self.post_training_dynamic_range_int8() or
                 self.post_training_fp16())
 
-  def is_training_time_low_bit_allow_float(self):
-    return (self.any_optimization_enabled() and
-            self.contains_training_quant_op() and
-            self._experimental_low_bit_qat)
-
   def activations_type(self):
     if self.is_integer_quantize():
       if self._is_int16x8_target_required():
@@ -351,15 +340,12 @@ class QuantizationMode(object):
     """"""Flags to the converter.""""""
 
     if self.is_integer_quantize():
-      is_low_bit_qat = self.is_training_time_low_bit_allow_float()
       return {
-          ""inference_type"": (inference_ty if inference_ty is not None else
-                             self.activations_type()),
+          ""inference_type"": (
+              inference_ty if inference_ty else self.activations_type()),
           ""inference_input_type"": _dtypes.float32,
           ""post_training_quantize"": False,  # disable dynamic range quantization
-          ""quantize_to_float16"": False,  # disable float16 quantization
-          ""disable_infer_tensor_range"": is_low_bit_qat,
-          ""use_fake_quant_num_bits"": is_low_bit_qat,
+          ""quantize_to_float16"": False  # disable float16 quantization
       }
     elif self.post_training_dynamic_range_int8():
       return {
@@ -388,8 +374,7 @@ class QuantizationMode(object):
       # Note this might still trigger (uint8) quantization to be compatible with
       # TOCO.
       return {
-          ""inference_type"": (
-              inference_ty if inference_ty is not None else _dtypes.float32),
+          ""inference_type"": inference_ty if inference_ty else _dtypes.float32,
           ""inference_input_type"": inference_input_ty,
           ""post_training_quantize"": False,  # enable dynamic range quantization
           ""quantize_to_float16"": False,  # disable float16 quantization
@@ -504,8 +489,6 @@ class TFLiteConverterBase(object):
     # by default and remove the flag once feature parity with the old quantizer
     # is verified.
     self._experimental_new_dynamic_range_quantizer = False
-    # Experimental flag to enable low-bit QAT in 8 bit.
-    self._experimental_low_bit_qat = False
 
   def _grappler_config(self, optimizers=None):
     """"""Creates a tf.compat.v1.ConfigProto for configuring Grappler.
@@ -685,8 +668,7 @@ class TFLiteConverterBase(object):
     quant_mode = QuantizationMode(
         self.optimizations, self.target_spec, self.representative_dataset,
         graph_def, self._experimental_disable_per_channel,
-        self._experimental_new_dynamic_range_quantizer,
-        self._experimental_low_bit_qat)
+        self._experimental_new_dynamic_range_quantizer)
     converter_kwargs.update({
         ""optimization_default"":
             quant_mode.any_optimization_enabled(),
@@ -698,8 +680,6 @@ class TFLiteConverterBase(object):
             quant_mode.is_post_training_integer_quantize(),
         ""optimization_qat"":
             quant_mode.is_training_time_int8_allow_float(),
-        ""optimization_low_bit_qat"":
-            quant_mode.is_training_time_low_bit_allow_float(),
         ""optimization_sparsify"":
             self._sparsify_model(),
         ""activations_type"":
@@ -885,8 +865,7 @@ class TFLiteConverterBaseV2(TFLiteConverterBase):
     self._quant_mode = QuantizationMode(
         self.optimizations, self.target_spec, self.representative_dataset,
         graph_def, self._experimental_disable_per_channel,
-        self._experimental_new_dynamic_range_quantizer,
-        self._experimental_low_bit_qat)
+        self._experimental_new_dynamic_range_quantizer)
     self._validate_inference_input_output_types(self._quant_mode)
 
     if not self._is_unknown_shapes_allowed():
@@ -1060,8 +1039,7 @@ class TFLiteSavedModelConverterV2(TFLiteConverterBaseV2):
     quant_mode = QuantizationMode(
         self.optimizations, self.target_spec, self.representative_dataset,
         graph_def, self._experimental_disable_per_channel,
-        self._experimental_new_dynamic_range_quantizer,
-        self._experimental_low_bit_qat)
+        self._experimental_new_dynamic_range_quantizer)
     self._validate_inference_input_output_types(quant_mode)
 
     converter_kwargs = {
@@ -1883,8 +1861,7 @@ class TFLiteConverterBaseV1(TFLiteConverterBase):
     quant_mode = QuantizationMode(
         self.optimizations, self.target_spec, self.representative_dataset,
         self._graph_def, self._experimental_disable_per_channel,
-        self._experimental_new_dynamic_range_quantizer,
-        self._experimental_low_bit_qat)
+        self._experimental_new_dynamic_range_quantizer)
 
     optimized_graph = self._optimize_tf_model(self._graph_def,
                                               self._input_tensors,
",0,train
a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite.

PiperOrigin-RevId: 414377528
Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",lite_v2_test.py,"@@ -17,7 +17,6 @@
 
 import ctypes
 import functools
-import itertools
 import os
 import sys
 
@@ -2388,67 +2387,6 @@ class FromKerasModelTest(lite_v2_test_util.ModelTest):
     # quantization.
     self.assertEqual(np.int8, quantized_weight['dtype'])
 
-  @parameterized.named_parameters([
-      ('{}BitWeightOnly={}LowBit={}'.format(num_bits, weight_only, low_bit),
-       num_bits, weight_only, low_bit) for num_bits, weight_only, low_bit
-      in itertools.product((2, 4, 6), (True, False), (True, False))])
-  @test_util.run_v2_only
-  def testQATLowBitKerasModel(self, num_bits, weight_only, low_bit):
-    bit_max = (1 << (num_bits - 1)) - 1
-    bit_min = -bit_max
-    tf_input_shape = (5, 5, 3)
-    tflite_input_shape = (1,) + tf_input_shape
-    model, input_name, output_name = (self._createV2QATLowBitKerasModel(
-        tf_input_shape, weight_only, num_bits, bit_min, bit_max))
-    input_data = np.linspace(
-        0, 6, np.prod(tflite_input_shape)).reshape(tflite_input_shape)
-    tf_result = model(input_data)
-
-    converter = tf.lite.TFLiteConverter.from_keras_model(model)
-    converter.optimizations = [tf.lite.Optimize.DEFAULT]
-    if low_bit:
-      converter._experimental_low_bit_qat = True
-    tflite_model = converter.convert()
-
-    result = self._evaluateTFLiteModelUsingSignatureDef(
-        tflite_model, 'serving_default',
-        {input_name: input_data.astype(np.float32)})[output_name]
-    self.assertAllClose(
-        [np.linalg.norm(result - tf_result.numpy().astype(np.float32))], [0.0])
-    interpreter = tf.lite.Interpreter(model_content=tflite_model)
-    interpreter.allocate_tensors()
-    num_8bit_activations = 0
-    num_8bit_weights = 0
-    kernel_name = ('model/conv_wrapper/Conv2D;model/conv_wrapper/'
-                   'FakeQuantWithMinMaxVarsPerChannel')
-
-    for detail in interpreter.get_tensor_details():
-      if (detail['dtype'] == np.int8 and detail['name'] and
-          detail['name'] == kernel_name):
-        num_8bit_weights += 1
-        weights = interpreter.get_tensor(detail['index'])
-        if low_bit:
-          self.assertFalse((bit_min > weights).any() or
-                           (weights > bit_max).any())
-        else:
-          self.assertTrue((bit_min > weights).any() or
-                          (weights > bit_max).any())
-        self.assertIn('scales', detail['quantization_parameters'])
-        if low_bit and detail['quantization_parameters']['scales']:
-          self.assertAllClose(
-              detail['quantization_parameters']['scales'], [1.0])
-      elif detail['dtype'] == np.int8 and detail['name']:
-        self.assertFalse(weight_only)
-        self.assertIn('scales', detail['quantization_parameters'])
-        if detail['quantization_parameters']['scales']:
-          self.assertAllClose(
-              detail['quantization_parameters']['scales'], [6/255])
-        num_8bit_activations += 1
-
-    self.assertEqual(num_8bit_weights, 0 if weight_only and not low_bit else 1)
-    # 3 activations with full integer: conv_input, conv_output, reshape_output
-    self.assertEqual(num_8bit_activations, 0 if weight_only else 3)
-
 
 class FromJaxModelTest(lite_v2_test_util.ModelTest):
 
",0,train
a9919ccc922e3f06f046b2047a0dcc2b0a618393,tensorflow/tensorflow,"Allow low-bit QAT training parameters to be used for quantization parameters in TFLite.

PiperOrigin-RevId: 414377528
Change-Id: I8cd69bf80a254d5089f9f525c49f9fae3656a977",lite_v2_test_util.py,"@@ -20,7 +20,6 @@ import os
 from absl.testing import parameterized
 import numpy as np
 from six.moves import zip
-import tensorflow as tf
 
 from tensorflow.lite.python.interpreter import Interpreter
 from tensorflow.python.eager import def_function
@@ -207,43 +206,3 @@ class ModelTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     # the name of this test file.
     self.assertIn('lite_v2_test.py', file_names)
     self.assertNotIn('lite_test.py', file_names)
-
-  def _createV2QATLowBitKerasModel(self, shape, weight_only, num_bits, bit_min,
-                                   bit_max):
-    """"""Creates a simple QAT num_bits-Weight Keras Model.""""""
-    input_name = 'input'
-    output_name = 'scores'
-
-    class ConvWrapper(tf.keras.layers.Wrapper):
-      """"""A Wrapper for simulating QAT on Conv2D layers.""""""
-
-      def build(self, input_shape):
-        if not self.layer.built:
-          self.layer.build(input_shape)
-        self.quantized_weights = self.layer.kernel
-
-      def call(self, inputs):
-        self.layer.kernel = (
-            tf.quantization.fake_quant_with_min_max_vars_per_channel(
-                self.quantized_weights, min=[bit_min], max=[bit_max],
-                num_bits=num_bits, narrow_range=True))
-        if not weight_only:
-          quant_inputs = tf.quantization.fake_quant_with_min_max_vars(
-              inputs, min=0, max=6, num_bits=8)
-          outputs = self.layer.call(quant_inputs)
-          return tf.quantization.fake_quant_with_min_max_vars(
-              outputs, min=0, max=6, num_bits=8)
-        return self.layer.call(inputs)
-
-    input_tensor = tf.keras.layers.Input(shape, name=input_name)
-    kernel_shape = (shape[-1], 3, 3, 1)
-    # Ensure constant weights contains the min and max.
-    initial_weights = np.linspace(
-        bit_min, bit_max, np.prod(kernel_shape)).reshape(kernel_shape)
-    test_initializer = tf.constant_initializer(initial_weights)
-    x = ConvWrapper(tf.keras.layers.Conv2D(
-        1, (3, 3), kernel_initializer=test_initializer,
-        activation='relu6'))(input_tensor)
-    scores = tf.keras.layers.Flatten(name=output_name)(x)
-    model = tf.keras.Model(input_tensor, scores)
-    return model, input_name, output_name
",0,train
0c79d3cdee83af0cf3bd7a9c7522ea88981159a6,tensorflow/tensorflow,"Fix apparent typo in an exception message that refers to the input shape instead of the output shape.

PiperOrigin-RevId: 300148776
Change-Id: Idd3d40455e248688cb73d9d32f1ad11ef5fb389c",rebatch_dataset_test.py,"@@ -92,7 +92,8 @@ class RebatchDatasetTest(test_base.DatasetTestBase, parameterized.TestCase):
   def testScalarInputError(self):
     dataset = dataset_ops.Dataset.range(1024)
     distribute._RebatchDataset(dataset.batch(4), num_replicas=4)
-    with self.assertRaisesRegexp(ValueError, ""at least one dimension""):
+    with self.assertRaisesRegexp(ValueError, (""You can fix the issue ""
+                                              ""by adding the `batch`"")):
       distribute._RebatchDataset(dataset, num_replicas=4)
 
   @combinations.generate(
",0,train
0c79d3cdee83af0cf3bd7a9c7522ea88981159a6,tensorflow/tensorflow,"Fix apparent typo in an exception message that refers to the input shape instead of the output shape.

PiperOrigin-RevId: 300148776
Change-Id: Idd3d40455e248688cb73d9d32f1ad11ef5fb389c",distribute.py,"@@ -92,8 +92,10 @@ class _RebatchDataset(dataset_ops.UnaryDataset):
         return None
 
       if len(output_shape) < 1:
-        raise ValueError(""Input shape should have at least one dimension. ""
-                         ""Perhaps your input dataset is not batched?"")
+        raise ValueError(""Expected a dataset whose elements have rank >= 1 ""
+                         ""but found a dataset whose elements are scalars. ""
+                         ""You can fix the issue by adding the `batch` ""
+                         ""transformation to the dataset."")
       output_dims = [d.value for d in output_shape.dims]
 
       if output_dims[0] is not None and output_dims[0] % num_replicas == 0:
",0,train
55a311cb735689a431c6aa9a6c765c5c5c034ede,tensorflow/tensorflow,"Add RISC Conv Op register.

PiperOrigin-RevId: 341415820
Change-Id: Ibd5f4c939e22be2af61e434e6927898b74e523a5",risc_conv_op.cc,"@@ -0,0 +1,50 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/core/framework/common_shape_fns.h""
+#include ""tensorflow/core/framework/op.h""
+#include ""tensorflow/core/framework/op_kernel.h""
+#include ""tensorflow/core/framework/register_types.h""
+#include ""tensorflow/core/framework/shape_inference.h""
+
+namespace tensorflow {
+namespace risc {
+namespace experimental {
+
+typedef Eigen::ThreadPoolDevice CPUDevice;
+
+template <typename Device, typename T>
+class RiscConvOp : public OpKernel {
+ public:
+  explicit RiscConvOp(OpKernelConstruction* context) : OpKernel(context) {
+    // TODO(b/171294012): Implement RiscConv op.
+  }
+
+  void Compute(OpKernelContext* context) override {
+    // TODO(b/171294012): Implement RiscConv op.
+  }
+};
+
+#define REGISTER_CPU(T)                                           \
+  REGISTER_KERNEL_BUILDER(                                        \
+      Name(""RiscConv"").Device(DEVICE_CPU).TypeConstraint<T>(""T""), \
+      RiscConvOp<CPUDevice, T>);
+
+REGISTER_CPU(float);
+REGISTER_CPU(double);
+
+}  // namespace experimental
+}  // namespace risc
+}  // namespace tensorflow
",0,test
55a311cb735689a431c6aa9a6c765c5c5c034ede,tensorflow/tensorflow,"Add RISC Conv Op register.

PiperOrigin-RevId: 341415820
Change-Id: Ibd5f4c939e22be2af61e434e6927898b74e523a5",risc_ops.cc,"@@ -30,4 +30,15 @@ REGISTER_OP(""RiscAdd"")
     .SetIsAggregate()
     .SetIsCommutative();
 
+// TODO(b/171294012): change shape function.
+REGISTER_OP(""RiscConv"")
+    .Input(""input: T"")
+    .Input(""filter: T"")
+    .Output(""output: T"")
+    .Attr(""T: {float, double}"")
+    .Attr(""strides: list(int)"")
+    .Attr(GetConvnetDataFormatAttrString())
+    .SetShapeFn(shape_inference::UnknownShape)
+    .Attr(""dilations: list(int) = [1, 1, 1, 1]"");
+
 }  // namespace tensorflow
",0,test
55a311cb735689a431c6aa9a6c765c5c5c034ede,tensorflow/tensorflow,"Add RISC Conv Op register.

PiperOrigin-RevId: 341415820
Change-Id: Ibd5f4c939e22be2af61e434e6927898b74e523a5",risc_grad.py,"@@ -28,3 +28,10 @@ def _RiscAddGrad(_, grad):
   # pylint: disable=unused-argument
   # TODO(b/171294012): Implement gradient of RISC with RISC ops.
   return None, None
+
+
+@ops.RegisterGradient(""RiscConv"")
+def _RiscConvGrad(_, grad):
+  # pylint: disable=unused-argument
+  # TODO(b/171294012): Implement gradient of RISC with RISC ops.
+  return None, None
",0,test
55a311cb735689a431c6aa9a6c765c5c5c034ede,tensorflow/tensorflow,"Add RISC Conv Op register.

PiperOrigin-RevId: 341415820
Change-Id: Ibd5f4c939e22be2af61e434e6927898b74e523a5",risc_ops.py,"@@ -30,5 +30,20 @@ from tensorflow.python.ops.risc_ops_gen import *
 def risc_add(
     input_lhs,
     input_rhs,
-    name=""RISC_ADD""):
+    name='RISC_ADD'):
   return gen_risc_ops.risc_add(input_lhs, input_rhs, name=name)
+
+
+def risc_conv(x,
+              kernel,
+              strides,
+              data_format='NHWC',
+              dilations=None,
+              name='RISC_CONV'):
+  return gen_risc_ops.risc_conv(
+      x,
+      kernel,
+      strides,
+      data_format=data_format,
+      dilations=dilations,
+      name=name)
",0,test
76e45e9c05c794fd35966bbabbc9d0c7a900f6cc,tensorflow/tensorflow,"sort host plane by index of xline (which is already sorted by name)

PiperOrigin-RevId: 316135421
Change-Id: Ie8d3999724c129326346a2b902d4b2d5308372b2",trace_events_to_json.cc,"@@ -50,11 +50,13 @@ void AddResourceMetadata(uint32 device_id,
       AppendEscapedName(json, resource.name());
       absl::StrAppend(json, ""}},"");
     }
+    uint32 sort_index =
+        resource.sort_index() ? resource.sort_index() : resource_id;
     absl::StrAppendFormat(
         json,
         R""({""ph"":""M"",""pid"":%u,""tid"":%u,)""
         R""(""name"":""thread_sort_index"",""args"":{""sort_index"":%u}},)"",
-        device_id, resource_id, resource_id);
+        device_id, resource_id, sort_index);
   }
 }
 
",0,train
76e45e9c05c794fd35966bbabbc9d0c7a900f6cc,tensorflow/tensorflow,"sort host plane by index of xline (which is already sorted by name)

PiperOrigin-RevId: 316135421
Change-Id: Ie8d3999724c129326346a2b902d4b2d5308372b2",xplane_to_trace_events.cc,"@@ -40,10 +40,18 @@ Device BuildDeviceAndResource(const XPlaneVisitor& plane) {
   Device device;
   device.set_name(std::string(plane.Name()));
   device.set_device_id(plane.Id());
+
+  bool sort_by_ordinal = plane.Name() == kHostThreads;
+  int ordinal = 0;
   plane.ForEachLine([&](const XLineVisitor& line) {
     Resource resource;
     resource.set_resource_id(line.Id());
-    resource.set_name(std::string(line.Name()));
+    resource.set_name(std::string(line.DisplayName()));
+    if (sort_by_ordinal) {
+      // When sort_index is absent (i.e. 0), resource id will be used.
+      // Therefore sort_index starts with 1.
+      resource.set_sort_index(++ordinal);
+    }
     (*device.mutable_resources())[line.Id()] = resource;
   });
   return device;
",0,train
91c0e9a4f6efc2068c886ed2b7eaa5dbf888a3ba,tensorflow/tensorflow,"Add common activation functions to keras activation globals so they can be deserialized by `tf.keras.activation.deserialize`.

PiperOrigin-RevId: 368509629
Change-Id: Ie85b50a03f2334cbf943de589297bd7d47b01422",activations.py,"@@ -507,14 +507,6 @@ def serialize(activation):
   return serialize_keras_object(activation)
 
 
-# Add additional globals so that deserialize can find these common activation
-# functions
-leaky_relu = nn.leaky_relu
-log_softmax = nn.log_softmax
-relu6 = nn.relu6
-silu = nn.swish
-
-
 @keras_export('keras.activations.deserialize')
 @dispatch.add_dispatch_support
 def deserialize(name, custom_objects=None):
",0,train
91c0e9a4f6efc2068c886ed2b7eaa5dbf888a3ba,tensorflow/tensorflow,"Add common activation functions to keras activation globals so they can be deserialized by `tf.keras.activation.deserialize`.

PiperOrigin-RevId: 368509629
Change-Id: Ie85b50a03f2334cbf943de589297bd7d47b01422",activations_test.py,"@@ -39,7 +39,7 @@ class KerasActivationsTest(test.TestCase, parameterized.TestCase):
   def test_serialization(self):
     all_activations = [
         'softmax', 'relu', 'elu', 'tanh', 'sigmoid', 'hard_sigmoid', 'linear',
-        'softplus', 'softsign', 'selu', 'gelu', 'relu6'
+        'softplus', 'softsign', 'selu', 'gelu'
     ]
     for name in all_activations:
       fn = activations.get(name)
",0,train
aae44380256166127ece6f5010d4656556f5c60d,tensorflow/tensorflow,Remove duplicate import,py_func_test.py,"@@ -27,7 +27,6 @@ import tensorflow as tf
 
 from tensorflow.python.framework import errors
 from tensorflow.python.ops import script_ops
-from six.moves import xrange
 
 
 class PyOpTest(tf.test.TestCase):
",0,train
1051c377051b2ee24a495318737358d9ccf7280f,tensorflow/tensorflow,Copy ben's changes,convert_nodes.cc,"@@ -62,14 +62,14 @@ limitations under the License.
 
 #define TFTRT_RETURN_ERROR_IF_FALSE(status, node) \
   do {                                            \
-    if (status == false) {                        \
+    if ((status) == false) {                        \
       TFTRT_INTERNAL_ERROR_AT_NODE(node);         \
     }                                             \
   } while (0)
 
 #define TFTRT_RETURN_ERROR_IF_NULLPTR(ptr, node) \
   do {                                           \
-    if (ptr == nullptr) {                        \
+    if ((ptr) == nullptr) {                        \
       TFTRT_INTERNAL_ERROR_AT_NODE(node);        \
     }                                            \
   } while (0)
@@ -1577,12 +1577,14 @@ tensorflow::Status ConvertConv2DHelper(OpConverterParams* params, int group) {
   const nvinfer1::ITensor* tensor = inputs.at(0).tensor();
   TFAttrs attrs(node_def);
 
+  int c_index = 1;
   int h_index = 2;
   int w_index = 3;
   auto data_format = attrs.get<string>(""data_format"");
   if (data_format == ""NHWC"") {
     TF_RETURN_IF_ERROR(params->converter->TransposeTensor(
         const_cast<nvinfer1::ITensor*>(tensor), {0, 3, 1, 2}, &tensor));
+    c_index = 3;
     h_index = 1;
     w_index = 2;
     // TODO(jie): transpose it
@@ -1618,14 +1620,30 @@ tensorflow::Status ConvertConv2DHelper(OpConverterParams* params, int group) {
           << tf_stride[3];
   const nvinfer1::DimsHW stride(tf_stride[h_index], tf_stride[w_index]);
 
+  auto tf_dilations = attrs.get<std::vector<int>>(""dilations"");
+  if ((int)tf_dilations.size() != 4) {
+    return tensorflow::errors::InvalidArgument(
+        ""Convolution dilations field must specify 4 dimensions "" +
+        node_def.name());
+  }
+  if (tf_dilations[0] != 1 || tf_dilations[c_index] != 1) {
+    return tensorflow::errors::Unimplemented(
+        ""Dilation rate must be 1 for batch and channel dimensions, at "",
+        node_def.name());
+  }
+  nvinfer1::DimsHW dilation(tf_dilations[h_index], tf_dilations[w_index]);
+
   std::vector<std::pair<int, int>> padding;
   // TODO(jie): padding.
   if (attrs.get<string>(""padding"") == ""SAME"") {
     // This is NCHW tensor with no batch dimension.
     //  1 -> h
     //  2 -> w
+    nvinfer1::DimsHW effective_kernel_size = kernel_size;
+    effective_kernel_size.h() += (kernel_size.h() - 1) * (dilation.h() - 1);
+    effective_kernel_size.w() += (kernel_size.w() - 1) * (dilation.w() - 1);
     padding = CreateSamePadding(
-        stride, kernel_size,
+        stride, effective_kernel_size,
         {static_cast<int>(tensor_dim.d[1]), static_cast<int>(tensor_dim.d[2])});
   } else {
     padding = {{0, 0}, {0, 0}};
@@ -1659,6 +1677,7 @@ tensorflow::Status ConvertConv2DHelper(OpConverterParams* params, int group) {
   layer->setPadding({padding[0].first, padding[1].first});
   layer->setName(node_def.name().c_str());
   layer->setNbGroups(num_groups);
+  layer->setDilation(dilation);
   const nvinfer1::ITensor* output_tensor = layer->getOutput(0);
   VLOG(2) << ""TENSOR out: "" << DebugString(output_tensor->getDimensions());
   VLOG(2) << ""data_format: "" << data_format;
",0,train
1051c377051b2ee24a495318737358d9ccf7280f,tensorflow/tensorflow,Copy ben's changes,conv2d_test.py,"@@ -0,0 +1,172 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""Model script to test TF-TensorRT integration.""""""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib.tensorrt.test import tf_trt_integration_test_base as trt_test
+from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
+from tensorflow.python.ops import gen_nn_ops
+from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.platform import test
+
+
+def conv2d_layer(inputs, filters, kernel_size, strides=(1, 1), padding='valid',
+                 data_format='channels_last', dilation_rate=(1, 1), name=None):
+  dtype = inputs.dtype
+  c_axis = -1 if data_format == 'channels_last' else 1
+  nchan = inputs.shape[c_axis]
+  weights_shape = (kernel_size[0], kernel_size[1], nchan, filters)
+  weights = constant_op.constant(np.random.randn(*weights_shape), dtype=dtype)
+  padding = padding.upper()
+  if data_format == 'channels_last':
+    strides = [1] + list(strides) + [1]
+    dilations = [1] + list(dilation_rate) + [1]
+    data_format = 'NHWC'
+  else:
+    strides = [1, 1] + list(strides)
+    dilations = [1, 1] + list(dilation_rate)
+    data_format = 'NCHW'
+  return gen_nn_ops.conv2d(inputs, weights, strides=strides, padding=padding,
+                           dilations=dilations, data_format=data_format)
+
+def div_round_up(n, d):
+  return (n - 1) // d + 1
+
+class Conv2DNCHWTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """"""Testing conversion of Conv2D (data_format=NCHW) in TF-TRT conversion.""""""
+    np.random.seed(1234)
+    dtype = dtypes.float32
+    input_name = ""input""
+    n, c, h, w = 13, 3, 7, 11
+    num_filters = 5
+    input_dims = [n, c, h, w]
+    output_name = ""output""
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device(""/GPU:0""):
+        results = []
+        for kernel_size in [(3, 3), (3, 2)]:
+          for dilation_rate in [(1, 1), (2, 3)]:
+            result = conv2d_layer(inp, num_filters, kernel_size,
+                                  dilation_rate=dilation_rate, padding='same',
+                                  data_format='channels_first')
+            results.append(result)
+        output = sum(results)
+        output = array_ops.identity(output, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(n, num_filters, h, w)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """"""Return the expected engines to build.""""""
+    return [""my_trt_op_0""]
+
+
+class Conv2DStridedNCHWTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """"""Testing conversion of strided Conv2D (data_format=NCHW) in TF-TRT
+    conversion.""""""
+    np.random.seed(1234)
+    dtype = dtypes.float32
+    input_name = ""input""
+    n, c, h, w = 13, 3, 7, 11
+    num_filters = 5
+    input_dims = [n, c, h, w]
+    output_name = ""output""
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device(""/GPU:0""):
+        output = inp
+        output = conv2d_layer(output, num_filters, (3, 2), strides=(2, 2),
+                              padding='same', data_format='channels_first')
+        h = div_round_up(h, 2)
+        w = div_round_up(w, 2)
+        output = conv2d_layer(output, num_filters, (3, 3), strides=(2, 2),
+                              dilation_rate=(2, 3), padding='same',
+                              data_format='channels_first')
+        h = div_round_up(h, 2)
+        w = div_round_up(w, 2)
+        output = array_ops.identity(output, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(n, num_filters, h, w)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """"""Return the expected engines to build.""""""
+    return [""my_trt_op_0""]
+
+
+class Conv2DNHWCTest(trt_test.TfTrtIntegrationTestBase):
+
+  def GetParams(self):
+    """"""Testing conversion of Conv2D (data_format=NHWC) in TF-TRT conversion.""""""
+    np.random.seed(1234)
+    dtype = dtypes.float32
+    input_name = ""input""
+    n, h, w, c = 13, 7, 11, 3
+    num_filters = 5
+    input_dims = [n, h, w, c]
+    output_name = ""output""
+    g = ops.Graph()
+    with g.as_default():
+      inp = array_ops.placeholder(
+          dtype=dtype, shape=[None] + input_dims[1:], name=input_name)
+      with g.device(""/GPU:0""):
+        results = []
+        for kernel_size in [(3, 3), (3, 2)]:
+          for dilation_rate in [(1, 1), (2, 3)]:
+            result = conv2d_layer(inp, num_filters, kernel_size,
+                                  dilation_rate=dilation_rate, padding='same',
+                                  data_format='channels_last')
+            results.append(result)
+        output = sum(results)
+        output = array_ops.identity(output, name=output_name)
+    return trt_test.TfTrtIntegrationTestParams(
+        gdef=g.as_graph_def(),
+        input_names=[input_name],
+        input_dims=[input_dims],
+        output_names=[output_name],
+        expected_output_dims=[(n, h, w, num_filters)])
+
+  def ExpectedEnginesToBuild(self, run_params):
+    """"""Return the expected engines to build.""""""
+    return [""my_trt_op_0""]
+
+
+if __name__ == ""__main__"":
+  test.main()
",0,train
23900d46dadaad68bafdaae5c5acb1c9fd093f9a,tensorflow/tensorflow,"[XLA] Emit less IR for tuple-select

For the number of tuple elements n, this used to emit n selects, 2n loads and n
stores. Instead emit one select on the address and a memcpy.

PiperOrigin-RevId: 240358129",tuple_ops.cc,"@@ -50,23 +50,12 @@ void EmitTupleSelect(const IrArray& select, const IrArray& pred,
   VLOG(2) << ""  pred_value: "" << DumpToString(*pred_value);
   VLOG(2) << ""  pred_cond: "" << DumpToString(*pred_cond);
 
-  for (int i = 0; i < ShapeUtil::TupleElementCount(select.GetShape()); ++i) {
-    llvm::Value* const element_index[] = {b->getInt64(0), b->getInt64(i)};
-    llvm::Value* on_true_element_address =
-        b->CreateInBoundsGEP(on_true, element_index);
-    llvm::Value* on_true_element = b->CreateLoad(
-        on_true_element_address, ""on_true_element_"" + llvm::Twine(i));
-    llvm::Value* on_false_element_address =
-        b->CreateInBoundsGEP(on_false, element_index);
-    llvm::Value* on_false_element = b->CreateLoad(
-        on_false_element_address, ""on_false_element_"" + llvm::Twine(i));
-
-    llvm::Value* output_element_address =
-        b->CreateInBoundsGEP(select.GetBasePointer(), element_index);
-    b->CreateStore(b->CreateSelect(pred_cond, on_true_element, on_false_element,
-                                   ""select_output_element_"" + llvm::Twine(i)),
-                   output_element_address);
-  }
+  llvm::Value* src = b->CreateSelect(pred_cond, on_true, on_false);
+  llvm::Value* dst = select.GetBasePointer();
+  int64 table_size = ShapeUtil::ByteSizeOfTupleIndexTable(
+      select.GetShape(), module->getDataLayout().getPointerSize());
+  b->CreateMemCpy(dst, /*DstAlign=*/1, src, /*SrcAlign=*/1,
+                  b->getInt64(table_size));
 }
 
 void EmitTuple(const IrArray& tuple, absl::Span<llvm::Value* const> operands,
",0,train
932d1242b419290ff19189c44f9dc40c7f799b20,tensorflow/tensorflow,"Package the ptxas arguments that were used for compilation with the ptx of a fat binary.

This was hardwired to --compile-only, which is wrong.

PiperOrigin-RevId: 362285857
Change-Id: I185ce7154842b9e0959084d53c9aeacd56a0050a",gpu_kernel_to_blob_pass.cc,"@@ -222,8 +222,7 @@ class GpuKernelToBlobPass
 
     // TODO(b/169870789): Revisit the use of fatbins.
     // Bundle cubin and PTX images into a single fatbin.
-    return tensorflow::se::BundleGpuAsm(images,
-                                        gpu_asm_opts.preferred_cuda_dir);
+    return tensorflow::se::BundleGpuAsm(images, gpu_asm_opts);
 #endif
 
     return InternalError(
",0,train
932d1242b419290ff19189c44f9dc40c7f799b20,tensorflow/tensorflow,"Package the ptxas arguments that were used for compilation with the ptx of a fat binary.

This was hardwired to --compile-only, which is wrong.

PiperOrigin-RevId: 362285857
Change-Id: I185ce7154842b9e0959084d53c9aeacd56a0050a",asm_compiler.cc,"@@ -195,6 +195,15 @@ static void LogPtxasTooOld(const std::string& ptxas_path, int cc_major,
   }
 }
 
+static void AppendArgsFromOptions(GpuAsmOpts options,
+                                  std::vector<std::string>& args) {
+  if (options.disable_gpuasm_optimizations) {
+    args.push_back(""-O0"");
+  }
+  args.insert(args.end(), options.extra_flags.begin(),
+              options.extra_flags.end());
+}
+
 port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor,
                                                  const char* ptx_contents,
                                                  GpuAsmOpts options) {
@@ -234,11 +243,7 @@ port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor,
   if (VLOG_IS_ON(2)) {
     ptxas_args.push_back(""-v"");
   }
-  if (options.disable_gpuasm_optimizations) {
-    ptxas_args.push_back(""-O0"");
-  }
-  ptxas_args.insert(ptxas_args.end(), options.extra_flags.begin(),
-                    options.extra_flags.end());
+  AppendArgsFromOptions(options, ptxas_args);
   if (VLOG_IS_ON(3)) {
     VLOG(3) << absl::StrJoin(ptxas_args, "" "");
   }
@@ -283,9 +288,9 @@ port::StatusOr<std::vector<uint8>> CompileGpuAsm(int cc_major, int cc_minor,
 }
 
 port::StatusOr<std::vector<uint8>> BundleGpuAsm(
-    std::vector<CubinOrPTXImage> images, const std::string preferred_cuda_dir) {
+    std::vector<CubinOrPTXImage> images, GpuAsmOpts options) {
   std::string fatbinary_path =
-      findCudaExecutable(""fatbinary"", preferred_cuda_dir);
+      findCudaExecutable(""fatbinary"", options.preferred_cuda_dir);
 
   // Write images to temporary files.
   std::vector<std::string> image_paths;
@@ -319,11 +324,19 @@ port::StatusOr<std::vector<uint8>> BundleGpuAsm(
     tensorflow::Env::Default()->DeleteFile(result_path).IgnoreError();
   });
 
+  // Compute the ptxas options that were used to produce the cubins.
+  std::vector<std::string> ptxas_options;
+  AppendArgsFromOptions(options, ptxas_options);
+
   // Invoke fatbinary and collect its output.
   tensorflow::SubProcess fatbinary;
   std::vector<std::string> fatbinary_args = {
-      fatbinary_path, ""--64"",           ""--cmdline=--compile-only"",
-      ""--link"",       ""--compress-all"", absl::StrCat(""--create="", result_path)};
+      fatbinary_path, ""--64"", ""--link"", ""--compress-all"",
+      absl::StrCat(""--create="", result_path)};
+  if (!ptxas_options.empty()) {
+    auto command_line = absl::StrJoin(ptxas_options, "" "");
+    fatbinary_args.push_back(absl::StrFormat(""--cmdline=%s"", command_line));
+  }
   assert(images.size() == image_paths.size());
   for (int i = 0; i < images.size(); i++) {
     fatbinary_args.push_back(absl::StrFormat(
",0,train
932d1242b419290ff19189c44f9dc40c7f799b20,tensorflow/tensorflow,"Package the ptxas arguments that were used for compilation with the ptx of a fat binary.

This was hardwired to --compile-only, which is wrong.

PiperOrigin-RevId: 362285857
Change-Id: I185ce7154842b9e0959084d53c9aeacd56a0050a",asm_compiler.h,"@@ -63,7 +63,7 @@ struct CubinOrPTXImage {
 // Bundles the GPU machine code (cubins) and PTX if requested and returns the
 // resulting binary (i.e. a fatbin) as a byte array.
 port::StatusOr<std::vector<uint8>> BundleGpuAsm(
-    std::vector<CubinOrPTXImage> images, const std::string preferred_cuda_dir);
+    std::vector<CubinOrPTXImage> images, GpuAsmOpts options);
 
 struct HsacoImage {
   std::string gfx_arch;
",0,train
403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper.

PiperOrigin-RevId: 171620470",ir_emitter.cc,"@@ -2102,19 +2102,6 @@ Status IrEmitter::HandleDynamicSlice(HloInstruction* dynamic_slice,
 
 namespace {
 
-// Returns the first non-GetTupleElement ancestor instruction of 'hlo'.
-// If the first non-GTE ancestor is tuple-shaped, populates 'index' with the
-// (possibly nested) tuple indices used on the path from ancestor to 'hlo'.
-const HloInstruction* LatestNonGteAncestorAndIndex(const HloInstruction* hlo,
-                                                   ShapeIndex* index) {
-  if (hlo->opcode() == HloOpcode::kGetTupleElement) {
-    const auto* operand = LatestNonGteAncestorAndIndex(hlo->operand(0), index);
-    index->push_back(hlo->tuple_index());
-    return operand;
-  }
-  return hlo;
-}
-
 // Checks if we can emit code for DynamicUpdateSlice to update data in-place.
 // Returns true if operand 0 of DynamicUpdateSlice and its output buffer
 // share the same buffer allocation.
@@ -2126,9 +2113,10 @@ bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment,
 
   // Walk DynamicUpdateSlice operand(0) to parameter and get its
   // associated operand. See if it shares an allocation with this operand.
+  HloInstruction* operand;
   ShapeIndex index;
-  auto* operand =
-      LatestNonGteAncestorAndIndex(dynamic_update_slice->operand(0), &index);
+  std::tie(operand, index) =
+      dynamic_update_slice->mutable_operand(0)->LatestNonGteAncestorAndIndex();
   if (operand->opcode() != HloOpcode::kParameter) {
     return false;
   }
",0,train
403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper.

PiperOrigin-RevId: 171620470",hlo_to_ir_bindings.cc,"@@ -67,7 +67,7 @@ void HloToIrBindings::EmitBasePointersForHlos(
         // Lookup allocation GetTupleElement operand.
         const BufferAllocation::Slice slice =
             buffer_assignment_
-                ->GetUniqueTopLevelSlice(LatestNonGteAncestor(non_io_hlo))
+                ->GetUniqueTopLevelSlice(non_io_hlo->LatestNonGteAncestor())
                 .ConsumeValueOrDie();
         // We are not in a nested context, so check non-thread-local allocation.
         CHECK(!slice.allocation()->is_thread_local());
",0,train
403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper.

PiperOrigin-RevId: 171620470",ir_emission_utils.cc,"@@ -214,12 +214,5 @@ llvm::Value* EmitShuffleDown(llvm::Value* value, llvm::Value* offset,
       value->getType());
 }
 
-const HloInstruction* LatestNonGteAncestor(const HloInstruction* hlo) {
-  while (hlo->opcode() == HloOpcode::kGetTupleElement) {
-    hlo = hlo->operand(0);
-  }
-  return hlo;
-}
-
 }  // namespace gpu
 }  // namespace xla
",0,train
403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper.

PiperOrigin-RevId: 171620470",ir_emission_utils.h,"@@ -53,10 +53,6 @@ llvm::Value* EmitPrintf(tensorflow::StringPiece fmt,
 llvm::Value* EmitShuffleDown(llvm::Value* value, llvm::Value* offset,
                              llvm::IRBuilder<>* builder);
 
-// Resolves GetTupleElement instruction operands starting with 'hlo'.
-// Returns the first ancestor instruction which is not a GetTupleElement.
-const HloInstruction* LatestNonGteAncestor(const HloInstruction* hlo);
-
 }  // namespace gpu
 }  // namespace xla
 
",0,train
403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper.

PiperOrigin-RevId: 171620470",ir_emitter_unnested.cc,"@@ -254,27 +254,11 @@ Status IrEmitterUnnested::HandleConvolution(HloInstruction* convolution,
                                       rhs_instruction, window);
 }
 
-namespace {
-
-// Returns the first non-GetTupleElement ancestor instruction of 'hlo'.
-// If the first non-GTE ancestor is tuple-shaped, populates 'index' with the
-// (possibly nested) tuple indices used on the path from ancestor to 'hlo'.
-const HloInstruction* LatestNonGteAncestorAndIndex(const HloInstruction* hlo,
-                                                   ShapeIndex* index) {
-  if (hlo->opcode() == HloOpcode::kGetTupleElement) {
-    const auto* operand = LatestNonGteAncestorAndIndex(hlo->operand(0), index);
-    index->push_back(hlo->tuple_index());
-    return operand;
-  }
-  return hlo;
-}
-
 // Checks if we can emit code for DynamicUpdateSlice to update data in-place.
 // Returns true if operand 0 of DynamicUpdateSlice and its output buffer
 // share the same buffer allocation.
-// Returns false otherwise.
-bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment,
-                                  HloInstruction* fusion) {
+static bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment,
+                                         HloInstruction* fusion) {
   CHECK_EQ(HloOpcode::kFusion, fusion->opcode());
   HloInstruction* fused_root = fusion->fused_expression_root();
   if (fused_root->opcode() != HloOpcode::kDynamicUpdateSlice) {
@@ -282,9 +266,10 @@ bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment,
   }
   // Walk DynamicUpdateSlice operand(0) to fused parameter and get its
   // associated operand. See if it shares an allocation with this operand.
+  HloInstruction* fusion_operand;
   ShapeIndex index;
-  auto* fusion_operand =
-      LatestNonGteAncestorAndIndex(fused_root->operand(0), &index);
+  std::tie(fusion_operand, index) =
+      fused_root->mutable_operand(0)->LatestNonGteAncestorAndIndex();
   if (fusion_operand->opcode() != HloOpcode::kParameter) {
     return false;
   }
@@ -292,8 +277,6 @@ bool CanUpdateDynamicSliceInPlace(const BufferAssignment& assignment,
   return assignment.SharesSliceAtIndex(fusion, {}, operand, index);
 }
 
-}  // namespace
-
 Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
   HloInstruction* root = fusion->fused_expression_root();
   // HandleFusion specializes reduction from a multi-dimensional array to a 1D
@@ -386,7 +369,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
     TF_RETURN_IF_ERROR(root->Accept(&fused_emitter));
 
     // Recursively lookup 'fusion_operand' for DynamicUpdateSlice operand 0.
-    auto* fusion_operand = LatestNonGteAncestor(root->operand(0));
+    auto* fusion_operand = root->operand(0)->LatestNonGteAncestor();
     CHECK_EQ(HloOpcode::kParameter, fusion_operand->opcode());
 
     // Operand(0) the input array which shares an allocation with the output.
@@ -1625,7 +1608,7 @@ llvm::Function* IrEmitterUnnested::EmitBasePointersForHloAndItsOperands(
   // with their operand buffer in 'io_hlos' and 'non_io_hlos' below.
   std::vector<const HloInstruction*> non_io_hlos;
   for (const HloInstruction* operand : hlo.operands()) {
-    const HloInstruction* to_lookup = LatestNonGteAncestor(operand);
+    const HloInstruction* to_lookup = operand->LatestNonGteAncestor();
     if (buffer_assignment.HasTopLevelAllocation(to_lookup) &&
         buffer_assignment.GetUniqueTopLevelSlice(to_lookup)
             .ConsumeValueOrDie()
@@ -1665,7 +1648,7 @@ std::unique_ptr<Thunk> IrEmitterUnnested::BuildKernelThunk(
   std::vector<BufferAllocation::Slice> io_buffers;
   io_buffers.reserve(io_hlos.size());
   for (const HloInstruction* io_hlo : io_hlos) {
-    io_buffers.push_back(GetAllocationSlice(*LatestNonGteAncestor(io_hlo)));
+    io_buffers.push_back(GetAllocationSlice(*io_hlo->LatestNonGteAncestor()));
   }
 
   // Create a KernelThunk that launches the kernel that implements ""inst"".
",0,train
403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper.

PiperOrigin-RevId: 171620470",hlo_instruction.cc,"@@ -1131,6 +1131,29 @@ std::unique_ptr<HloInstruction> HloInstruction::CloneFusionWithNewOperands(
   return new_instruction;
 }
 
+std::pair<const HloInstruction*, ShapeIndex>
+HloInstruction::LatestNonGteAncestorAndIndex() const {
+  const HloInstruction* hlo = this;
+  ShapeIndex index;
+  while (hlo->opcode() == HloOpcode::kGetTupleElement) {
+    index.push_back(hlo->tuple_index());
+    hlo = hlo->operand(0);
+  }
+
+  // We built up index in the reverse order from what we want.
+  std::reverse(index.begin(), index.end());
+
+  return {hlo, index};
+}
+
+const HloInstruction* HloInstruction::LatestNonGteAncestor() const {
+  const HloInstruction* hlo = this;
+  while (hlo->opcode() == HloOpcode::kGetTupleElement) {
+    hlo = hlo->operand(0);
+  }
+  return hlo;
+}
+
 const Literal& HloInstruction::literal() const {
   CHECK_EQ(HloOpcode::kConstant, opcode_);
   return *literal_;
",0,train
403e51018b3c47cd5989d6b50776e235221fade4,tensorflow/tensorflow,"[XLA] Factor out repeated LatestNonGteAncestorAndIndex helper.

PiperOrigin-RevId: 171620470",hlo_instruction.h,"@@ -508,6 +508,26 @@ class HloInstruction {
   // Precondition: opcode() == HloOpcode::kGetTupleElement
   int64 tuple_index() const;
 
+  // Returns the first non-GetTupleElement ancestor instruction of 'hlo'.
+  // If the first non-GTE ancestor is tuple-shaped, populates 'index' with the
+  // (possibly nested) tuple indices used on the path from ancestor to 'hlo'.
+  std::pair<const HloInstruction*, ShapeIndex> LatestNonGteAncestorAndIndex()
+      const;
+
+  std::pair<HloInstruction*, ShapeIndex> LatestNonGteAncestorAndIndex() {
+    auto rv =
+        const_cast<const HloInstruction*>(this)->LatestNonGteAncestorAndIndex();
+    return {const_cast<HloInstruction*>(rv.first), rv.second};
+  }
+
+  // Same as LatestNonGteAncestorAndIndex, but just returns the HloInstruction.
+  const HloInstruction* LatestNonGteAncestor() const;
+
+  HloInstruction* LatestNonGteAncestor() {
+    return const_cast<HloInstruction*>(
+        const_cast<const HloInstruction*>(this)->LatestNonGteAncestor());
+  }
+
   // Gets/sets the to_apply HloComputation for Call, Map, Reduce, etc.
   // The setter should only be called by HloModule or HloComputation methods.
   //
",0,train
5e0a1a375a795ceb0b8fac65b0a04bfdb124986e,tensorflow/tensorflow,"TensorSpec equality is more useful if it's shape-list-equality and not requires that the shapes are equal.

PiperOrigin-RevId: 232554706",def_function_test.py,"@@ -268,7 +268,8 @@ class DefFunctionTest(test.TestCase):
     self.assertAllClose(4., concrete(constant_op.constant(2.)))
     signature_args, _ = concrete.structured_input_signature
     self.assertEqual(signature_args,
-                     (tensor_spec.TensorSpec(None, dtypes.float32),))
+                     (tensor_spec.TensorSpec(
+                         None, dtypes.float32, name='x'),))
 
   def test_serialization_signature_cache(self):
 
@@ -288,10 +289,10 @@ class DefFunctionTest(test.TestCase):
 
     self.assertEqual(
         signatures_args,
-        set(((tensor_spec.TensorSpec([1, 2], dtypes.float32),
-              tensor_spec.TensorSpec([1], dtypes.float32)),
-             (tensor_spec.TensorSpec([1, 3], dtypes.int32),
-              tensor_spec.TensorSpec([1], dtypes.int32)))))
+        set(((tensor_spec.TensorSpec([1, 2], dtypes.float32, name='x'),
+              tensor_spec.TensorSpec([1], dtypes.float32, name='y')),
+             (tensor_spec.TensorSpec([1, 3], dtypes.int32, name='x'),
+              tensor_spec.TensorSpec([1], dtypes.int32, name='y')))))
 
   @test_util.assert_no_garbage_created
   def testFunctionReferenceCycles(self):
@@ -373,6 +374,18 @@ class DefFunctionTest(test.TestCase):
 
     self.assertAllEqual(add(v, v), 2.0)
 
+  def testShapeCache(self):
+    @def_function.function
+    def func(x):
+      return 2 * x
+
+    func_a = func.get_concrete_function(
+        tensor_spec.TensorSpec([None], dtypes.int32))
+    func_b = func.get_concrete_function(
+        tensor_spec.TensorSpec([None], dtypes.int32))
+
+    self.assertIs(func_a, func_b)
+
   def testInitializationInNestedCall(self):
     v_holder = []
 
",0,train
5e0a1a375a795ceb0b8fac65b0a04bfdb124986e,tensorflow/tensorflow,"TensorSpec equality is more useful if it's shape-list-equality and not requires that the shapes are equal.

PiperOrigin-RevId: 232554706",tensor_spec.py,"@@ -108,7 +108,9 @@ class TensorSpec(object):
     return hash((self._shape_tuple, self.dtype))
 
   def __eq__(self, other):
-    return self.shape == other.shape and self.dtype == other.dtype
+    return (self._shape_tuple == other._shape_tuple  # pylint: disable=protected-access
+            and self.dtype == other.dtype
+            and self._name == other._name)  # pylint: disable=protected-access
 
   def __ne__(self, other):
     return not self == other
",0,train
5e0a1a375a795ceb0b8fac65b0a04bfdb124986e,tensorflow/tensorflow,"TensorSpec equality is more useful if it's shape-list-equality and not requires that the shapes are equal.

PiperOrigin-RevId: 232554706",tensor_array_ops.py,"@@ -825,7 +825,7 @@ class _EagerTensorArray(object):
     if self._infer_shape:
       if self._element_shape is None:
         self._element_shape = value.shape
-      elif self._element_shape != value.shape:
+      elif not self._element_shape.is_compatible_with(value.shape):
         raise ValueError(""Incompatible shape for value (%s), expected (%s)"" %
                          (value.shape.as_list(), self._element_shape.as_list()))
 
",0,train
8cf8afefdb4c240f74a05e24246c8cd2dcce9d54,tensorflow/tensorflow,"Internal Change.

PiperOrigin-RevId: 211519679",__init__.py,"@@ -21,6 +21,14 @@ from __future__ import print_function
 
 import os
 
+from tensorflow.python.tools import component_api_helper
+component_api_helper.package_hook(
+    parent_package_str=(
+        ""tensorflow.contrib""),
+    child_package_str=(
+        ""tensorflow_estimator.contrib.estimator""))
+del component_api_helper
+
 # Add projects here, they will show up under tf.contrib.
 from tensorflow.contrib import autograph
 from tensorflow.contrib import batching
",0,train
8cf8afefdb4c240f74a05e24246c8cd2dcce9d54,tensorflow/tensorflow,"Internal Change.

PiperOrigin-RevId: 211519679",__init__.py,"@@ -48,6 +48,13 @@ import numpy as np
 
 from tensorflow.python import pywrap_tensorflow
 
+from tensorflow.python.tools import component_api_helper
+component_api_helper.package_hook(
+    parent_package_str='tensorflow.python',
+    child_package_str=(
+        'tensorflow_estimator.python.estimator'))
+del component_api_helper
+
 # Protocol buffers
 from tensorflow.core.framework.graph_pb2 import *
 from tensorflow.core.framework.node_def_pb2 import *
",0,train
8cf8afefdb4c240f74a05e24246c8cd2dcce9d54,tensorflow/tensorflow,"Internal Change.

PiperOrigin-RevId: 211519679",component_api_helper.py,"@@ -67,7 +67,7 @@ def package_hook(parent_package_str, child_package_str, error_msg=None):
     """"""
     child_pkg_path = [os.path.join(os.path.dirname(child_pkg.__file__), "".."")]
     try:
-      parent_pkg.__path__ += child_pkg_path
+      parent_pkg.__path__ = child_pkg_path + parent_pkg.__path__
     except AttributeError:
       parent_pkg.__path__ = child_pkg_path
 
",0,train
e8e1631f8e46bc2588f08f89fe2892d3cb2f8035,tensorflow/tensorflow,"Make sure trackables are initialized when _trackable_children is called

PiperOrigin-RevId: 425543411
Change-Id: I53fa60d7a3ecffd73af7ba6245fad901b67fb88a",base.py,"@@ -1461,6 +1461,7 @@ class Trackable(object):
     Returns:
       Dictionary mapping names to child trackables.
     """"""
+    self._maybe_initialize_trackable()
     # TODO(kathywu): Migrate `_checkpoint_dependencies` overrides to
     # `_trackable_children`.
     if save_type == SaveType.CHECKPOINT:
",0,train
f568deff5697891d6c6ca0d09490359cf96fe7a1,tensorflow/tensorflow,Fix an error in 'Adding a New Op' example code (#5846),zero_out_2_test.py,"@@ -31,6 +31,11 @@ class ZeroOut2Test(tf.test.TestCase):
       result = zero_out_op_2.zero_out([5, 4, 3, 2, 1])
       self.assertAllEqual(result.eval(), [5, 0, 0, 0, 0])
 
+  def test_2d(self):
+    with self.test_session():
+      result = zero_out_op_2.zero_out([[6, 5, 4], [3, 2, 1]])
+      self.assertAllEqual(result.eval(), [[6, 0, 0], [0, 0, 0]])
+
   def test_grad(self):
     with self.test_session():
       shape = (5,)
@@ -39,6 +44,14 @@ class ZeroOut2Test(tf.test.TestCase):
       err = tf.test.compute_gradient_error(x, shape, y, shape)
       self.assertLess(err, 1e-4)
 
+  def test_grad_2d(self):
+    with self.test_session():
+      shape = (2, 3)
+      x = tf.constant([[6, 5, 4], [3, 2, 1]], dtype=tf.float32)
+      y = zero_out_op_2.zero_out(x)
+      err = tf.test.compute_gradient_error(x, shape, y, shape)
+      self.assertLess(err, 1e-4)
+
 
 if __name__ == '__main__':
   tf.test.main()
",0,train
f568deff5697891d6c6ca0d09490359cf96fe7a1,tensorflow/tensorflow,Fix an error in 'Adding a New Op' example code (#5846),zero_out_grad_2.py,"@@ -40,5 +40,5 @@ def _zero_out_grad(op, grad):
   shape = array_ops.shape(to_zero)
   index = array_ops.zeros_like(shape)
   first_grad = array_ops.reshape(grad, [-1])[0]
-  to_zero_grad = sparse_ops.sparse_to_dense(index, shape, first_grad, 0)
+  to_zero_grad = sparse_ops.sparse_to_dense([index], shape, first_grad, 0)
   return [to_zero_grad]  # List of one Tensor, since we have one input
",0,train
4b161717ae7766111a7625b9110b730c4176ec03,tensorflow/tensorflow,Use deserialize-helper for deserializing dict activations,activations.py,"@@ -458,11 +458,10 @@ def get(identifier):
   if isinstance(identifier, six.string_types):
     identifier = str(identifier)
     return deserialize(identifier)
+  elif isinstance(identifier, dict):
+    return deserialize(identifier)
   elif callable(identifier):
     return identifier
-  elif isinstance(identifier, dict):
-    return deserialize_keras_object(
-        identifier, printable_module_name='activation')
   else:
     raise TypeError(
         'Could not interpret activation function identifier: {}'.format(
",0,train
f74ae6121e7dcaeca8a57af23f195d9de3e524da,tensorflow/tensorflow,"Add tests for collective ops + scoped allocator in while loop.

PiperOrigin-RevId: 263032410",collective_ops_test.py,"@@ -25,9 +25,11 @@ from tensorflow.python.framework import errors
 from tensorflow.python.framework import kernels
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import collective_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
@@ -110,6 +112,63 @@ class CollectiveOpTest(test.TestCase):
         set_graph_key=False,
         communication_hint='nccl')
 
+  def _testWhile(self, num_vars, num_iterations, key_base):
+    group_size = 2
+    group_key = 1
+    instances = [(key_base + i) for i in range(num_vars)]
+    devices = ['CPU:{}'.format(i) for i in range(group_size)]
+
+    config = config_pb2.ConfigProto(device_count={'CPU': group_size})
+    rewrite_options = config.graph_options.rewrite_options
+    rewrite_options.scoped_allocator_optimization = (
+        rewriter_config_pb2.RewriterConfig.ON)
+    del rewrite_options.scoped_allocator_opts.enable_op[:]
+    rewrite_options.scoped_allocator_opts.enable_op.append('CollectiveReduce')
+
+    with self.session(config=config) as sess:
+      loop_vars = []
+      for device in devices:
+        with ops.device(device):
+          loop_vars.append(
+              [variables.VariableV1((1 << i) * 1.) for i in range(num_vars)])
+      # This variable controls number of iterations.
+      loop_vars.append(variables.VariableV1(0.))
+      def loop_body(dev0_tensors, dev1_tensors, loop_tensor):
+        return_ops = []
+        for i in range(len(devices)):
+          device = devices[i]
+          device_tensors = dev0_tensors if i == 0 else dev1_tensors
+          with ops.device(device):
+            device_collectives = []
+            for j in range(num_vars):
+              # TODO(ayushd): figure out why identity is necessary to get the
+              # right device on the input here with TF2_BEHAVIOR=1.
+              input_tensor = array_ops.identity(device_tensors[j])
+              collective_op = collective_ops.all_reduce(
+                  input_tensor, group_size, group_key, instances[j],
+                  'Add', 'Id')
+              device_collectives.append(collective_op)
+            return_ops.append(device_collectives)
+        return_ops.append(math_ops.add(loop_tensor, 1.))
+        return return_ops
+      # Run until last variable exceeds number of iterations.
+      loop_cond = lambda d0, d1, i: math_ops.less(i, num_iterations)
+      sess.run(variables.global_variables_initializer())
+      results = sess.run(control_flow_ops.while_loop(loop_cond, loop_body,
+                                                     loop_vars))
+      self.assertEqual(results[:-1], [
+          [((1 << (num_iterations + v)) * 1.) for v in range(num_vars)]
+          for _ in range(group_size)])
+
+  @test_util.run_deprecated_v1
+  def testSimpleWhile(self):
+    self._testWhile(num_vars=1, num_iterations=4, key_base=20)
+
+  @test_util.run_deprecated_v1
+  def testWhileMultipleAllReduce(self):
+    self.skipTest('Temporarily disabled')  # TODO(b/135686041): re-enable
+    self._testWhile(num_vars=2, num_iterations=4, key_base=20)
+
   @test_util.run_deprecated_v1
   def testWhileWithScopedAllocator(self):
     group_size = 2
",0,train
e45d52e03932b6aca5ce8aac136b1b688fe2a47a,tensorflow/tensorflow,"Change MaybeFuseActivation to only support a single output - that is its only use-case.

PiperOrigin-RevId: 322419198
Change-Id: I4307683446795de77be4b0d3dc06396cfa3347c4",model_builder.cc,"@@ -129,16 +129,15 @@ absl::Status IsActivationSupported(TfLiteFusedActivation fused_activation) {
 // that will have identical output as the given node. New operation node will
 // depend on the given node output.
 absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation,
-                                 const std::vector<uint32_t>& output_indices,
                                  GraphFloat32* graph, Node* node) {
-  if (fused_activation == kTfLiteActNone) {
-    return absl::OkStatus();
-  }
   const auto outputs = graph->FindOutputs(node->id);
-  if (outputs.empty()) {
-    return absl::InternalError(""Empty outputs in fused node"");
+  if (outputs.size() != 1) {
+    return absl::InternalError(""Number of outputs != 1"");
   }
   switch (fused_activation) {
+    case kTfLiteActNone:
+      // Nothing to do here
+      return absl::OkStatus();
     case kTfLiteActRelu:
     case kTfLiteActReluN1To1:
     case kTfLiteActRelu6: {
@@ -146,36 +145,24 @@ absl::Status MaybeFuseActivation(TfLiteFusedActivation fused_activation,
       attr.clip = fused_activation == kTfLiteActRelu
                       ? 0.0f
                       : (fused_activation == kTfLiteActReluN1To1 ? 1.0f : 6.0f);
-      for (auto index : output_indices) {
-        Node* activation_node;
-        RETURN_IF_ERROR(
-            NewPassthroughNode(graph, node, outputs[index], &activation_node));
-        activation_node->operation.type = ToString(OperationType::RELU);
-        activation_node->operation.attributes = attr;
-      }
-      break;
+      Node* activation_node;
+      RETURN_IF_ERROR(
+          NewPassthroughNode(graph, node, outputs[0], &activation_node));
+      activation_node->operation.type = ToString(OperationType::RELU);
+      activation_node->operation.attributes = attr;
+      return absl::OkStatus();
+    }
+    case kTfLiteActTanh: {
+      Node* activation_node;
+      RETURN_IF_ERROR(
+          NewPassthroughNode(graph, node, outputs[0], &activation_node));
+      activation_node->operation.type = ToString(OperationType::TANH);
+      return absl::OkStatus();
     }
-    case kTfLiteActTanh:
-      for (auto index : output_indices) {
-        Node* activation_node;
-        RETURN_IF_ERROR(
-            NewPassthroughNode(graph, node, outputs[index], &activation_node));
-        activation_node->operation.type = ToString(OperationType::TANH);
-      }
-      break;
     default:
       return absl::NotFoundError(
           absl::StrCat(""Unsupported fused activation: "", fused_activation));
   }
-  return absl::OkStatus();
-}
-
-absl::Status MaybeFuseActivationToTheSingleOutput(
-    TfLiteFusedActivation fused_activation, GraphFloat32* graph, Node* node) {
-  if (graph->FindOutputs(node->id).size() != 1) {
-    return absl::InternalError(""Number of outputs exceeds 1"");
-  }
-  return MaybeFuseActivation(fused_activation, {0}, graph, node);
 }
 
 HW ToHW(int32_t h, int32_t w) { return HW(h > 0 ? h : 1, w > 0 ? w : 1); }
@@ -389,8 +376,7 @@ class AddOperationParser : public TFLiteOperationParser {
     node->operation.attributes = std::move(attr);
     const TfLiteAddParams* tf_options;
     RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options));
-    return MaybeFuseActivationToTheSingleOutput(tf_options->activation, graph,
-                                                node);
+    return MaybeFuseActivation(tf_options->activation, graph, node);
   }
 };
 
@@ -463,8 +449,7 @@ class ConcatenationOperationParser : public TFLiteOperationParser {
     }
     const TfLiteConcatenationParams* tf_options;
     RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options));
-    RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation,
-                                                         graph, node));
+    RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node));
     node->operation.attributes = attr;
     return absl::OkStatus();
   }
@@ -566,8 +551,7 @@ class Conv2DOperationParser : public TFLiteOperationParser {
                         tf_options->dilation_width_factor);
     UpdatePadding(tf_options->padding,
                   graph->FindInputs(node->id)[0]->tensor.shape, &attr);
-    RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation,
-                                                         graph, node));
+    RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node));
     node->operation.attributes = std::move(attr);
     return absl::OkStatus();
   }
@@ -684,8 +668,7 @@ class DepthwiseConvolutionOperationParser : public TFLiteOperationParser {
                         std::max(1, tf_options->dilation_width_factor));
     UpdatePadding(tf_options->padding,
                   graph->FindInputs(node->id)[0]->tensor.shape, &attr);
-    RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation,
-                                                         graph, node));
+    RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node));
     const int depth_multiplier = tf_options->depth_multiplier;
     if (depth_multiplier != 1) {
       const TfLiteTensor* input = reader->GetInputTensor(0);
@@ -850,8 +833,7 @@ class ElementwiseOperationParser : public TFLiteOperationParser {
       }
 
       if (activation) {
-        RETURN_IF_ERROR(
-            MaybeFuseActivationToTheSingleOutput(activation, graph, node));
+        RETURN_IF_ERROR(MaybeFuseActivation(activation, graph, node));
       }
     } else if (IsTwoArgumentOperationWithConst()) {
       RETURN_IF_ERROR(reader->VerifyInputsConstsOutputs(tflite_node,
@@ -997,8 +979,7 @@ class FullyConnectedOperationParser : public TFLiteOperationParser {
     conv->operation.type = ToString(OperationType::FULLY_CONNECTED);
     conv->operation.attributes = std::move(attr);
     absl::Status result = reader->AddOutputs(conv);
-    RETURN_IF_ERROR(MaybeFuseActivationToTheSingleOutput(tf_options->activation,
-                                                         graph, conv));
+    RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, conv));
 
     return result;
   }
@@ -1252,8 +1233,7 @@ class MulOperationParser : public TFLiteOperationParser {
 
     const TfLiteMulParams* tf_options;
     RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options));
-    return MaybeFuseActivationToTheSingleOutput(tf_options->activation, graph,
-                                                node);
+    return MaybeFuseActivation(tf_options->activation, graph, node);
   }
 
  private:
@@ -1454,9 +1434,7 @@ class Pooling2DOperationParser : public TFLiteOperationParser {
       RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options));
     }
 
-    std::vector<uint32_t> max_tensor_id{0};
-    RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, max_tensor_id,
-                                        graph, node));
+    RETURN_IF_ERROR(MaybeFuseActivation(tf_options->activation, graph, node));
     // Second output is optional. It is not required, it but must be added after
     // MaybeAddFusedActivation function is called
     reader->AddOutput(node, 1).IgnoreError();
",0,train
44697e33251ac74fcd3b136dfbfa1daad4fe4bfb,tensorflow/tensorflow,"Add checks for cases where no tensors are being profiled in the graph.

PiperOrigin-RevId: 424651575
Change-Id: Ibef450ba692d8e5678105006e14be40e3d5ee281",tensor_tracer.py,"@@ -1408,6 +1408,11 @@ class TensorTracer(object):
       else:
         return tensor
 
+    # Check if there are graph operations being profiled.
+    if not tensor_trace_order.traced_tensors:
+      logging.warn('Inspect mode has no tensors in the cache to check.')
+      return control_flow_ops.no_op
+
     # Check if the cache includes any nan or inf
     if self._parameters.trace_mode == tensor_tracer_flags.TRACE_MODE_NAN_INF:
       # Cache has 1s or 0s if the mode is NaN_INF
@@ -1571,6 +1576,9 @@ class TensorTracer(object):
     """"""
     # Add a dependency to op and tensor fetches to make sure that all tracing
     # ops are executed before flushing trace results.
+    if not tensor_trace_order.traced_tensors:
+      logging.warn('No tensor values being traced. No flush cache op added.')
+      return tensor_fetches
     with ops.control_dependencies(op_fetches +
                                   [tensor.op for tensor in tensor_fetches]):
       flush_cache_op = self._generate_flush_cache_op(
",0,train
e10dcf4fe7c480dcdfdff744e35f68683741cc59,tensorflow/tensorflow,"Add _placeholder_value for DistributedVariableTraceType

PiperOrigin-RevId: 436575035",values.py,"@@ -511,25 +511,28 @@ class DistributedVarOp(object):
     return hash((self.name, self.graph, tuple(self.traceback), self.type))
 
 
+# TODO(b/209081027): Remove this once Variable is a CompositeTensor.
 class DistributedVariableTraceType(trace.TraceType):
-  """"""Class outlining the Tracing Protocol for DistributedVariable.""""""
+  """"""TraceType of DistributedVariable objects.""""""
 
-  def __init__(self, shape, dtype):
-    self.components = (tuple(shape.as_list()), dtype)
+  def __init__(self, distributed_variable):
+    self.distributed_variable = distributed_variable
+    self.components = (tuple(distributed_variable.shape.as_list()),
+                       distributed_variable.dtype)
 
   def is_subtype_of(self, other):
     return self == other
 
   def most_specific_common_supertype(self, others):
-    return None
+    return self if all(self == other for other in others) else None
+
+  def _placeholder_value(self):
+    return self.distributed_variable
 
   def __hash__(self) -> int:
     return hash(self.components)
 
   def __eq__(self, other) -> bool:
-    if not isinstance(other, trace.TraceType):
-      return NotImplemented
-
     if not isinstance(other, DistributedVariableTraceType):
       return False
 
@@ -929,7 +932,7 @@ class DistributedVariable(DistributedDelegate, variables_lib.Variable,
         self, sparse_delta, use_locking=use_locking, name=name)
 
   def __tf_tracing_type__(self, _):
-    return DistributedVariableTraceType(self.shape, self.dtype)
+    return DistributedVariableTraceType(self)
 
   def _gather_saveables_for_checkpoint(self):
     """"""Overrides Trackable method.
",0,train
a72ee2f74061cdd72f1197eed4c90a8216d39d74,tensorflow/tensorflow,"Fast-path to VarHandleOp

PiperOrigin-RevId: 195744374",resource_mgr.h,"@@ -338,6 +338,9 @@ class ResourceHandleOp : public OpKernel {
  private:
   string container_;
   string name_;
+  mutex mutex_;
+  Tensor resource_ GUARDED_BY(mutex_);
+  std::atomic<bool> initialized_{false};
 };
 
 // Registers a kernel for an op which produces a handle to a resource of the
@@ -511,10 +514,17 @@ ResourceHandleOp<T>::ResourceHandleOp(OpKernelConstruction* context)
 
 template <typename T>
 void ResourceHandleOp<T>::Compute(OpKernelContext* ctx) {
-  Tensor* output = nullptr;
-  OP_REQUIRES_OK(ctx, ctx->allocate_output(0, TensorShape({}), &output));
-  output->scalar<ResourceHandle>()() =
-      MakeResourceHandle<T>(ctx, container_, name_);
+  if (!initialized_.load()) {
+    mutex_lock ml(mutex_);
+    AllocatorAttributes attr;
+    attr.set_on_host(true);
+    OP_REQUIRES_OK(ctx, ctx->allocate_temp(DT_RESOURCE, TensorShape({}),
+                                           &resource_, attr));
+    resource_.scalar<ResourceHandle>()() =
+        MakeResourceHandle<T>(ctx, container_, name_);
+    initialized_.store(true);
+  }
+  ctx->set_output(0, resource_);
 }
 
 }  //  end namespace tensorflow
",0,test
05dfc24e863c6fb7e7bd3552443a819f66b12dff,tensorflow/tensorflow,"DepthwiseConv, NEON 3x3 kernel, implement templated rounding method.

PiperOrigin-RevId: 242878081",depthwiseconv_quantized_test.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #include <sys/types.h>
+
 #include <algorithm>
 #include <cmath>
 #include <cstdint>
@@ -137,14 +138,28 @@ inline void DispatchDepthwiseConv(
           << "" output_height = "" << output_shape.Dims(1);
 
       // Call kernel optimized for depthwise convolutions using 3x3 filters.
-      optimized_ops::depthwise_conv::DepthwiseConv3x3Filter(
-          params, input_shape, input_data, filter_shape, filter_data,
-          bias_shape, bias_data, output_shape, output_data, /*thread_start=*/0,
-          /*thread_end=*/output_shape.Dims(1), /*thread_dim=*/1);
-      return;
-#else
-      break;
+      switch (test_param.output_rounding) {
+        case DepthwiseConvOutputRounding::kAwayFromZero:
+          optimized_ops::depthwise_conv::DepthwiseConv3x3Filter<
+              DepthwiseConvOutputRounding::kAwayFromZero>(
+              params, input_shape, input_data, filter_shape, filter_data,
+              bias_shape, bias_data, output_shape, output_data,
+              /*thread_start=*/0,
+              /*thread_end=*/output_shape.Dims(1), /*thread_dim=*/1);
+          return;
+        case DepthwiseConvOutputRounding::kUpward:
+          optimized_ops::depthwise_conv::DepthwiseConv3x3Filter<
+              DepthwiseConvOutputRounding::kAwayFromZero>(
+              params, input_shape, input_data, filter_shape, filter_data,
+              bias_shape, bias_data, output_shape, output_data,
+              /*thread_start=*/0,
+              /*thread_end=*/output_shape.Dims(1), /*thread_dim=*/1);
+          return;
+        default:
+          break;
+      }
 #endif
+      break;
     }
     case DepthwiseConvImplementation::kUseNeon3x3DotProduct: {
 #if defined(__ARM_FEATURE_DOTPROD) && !defined(GOOGLE_L4T)
",0,train
05dfc24e863c6fb7e7bd3552443a819f66b12dff,tensorflow/tensorflow,"DepthwiseConv, NEON 3x3 kernel, implement templated rounding method.

PiperOrigin-RevId: 242878081",depthwiseconv_uint8.h,"@@ -2016,7 +2016,8 @@ inline void DepthwiseConvWithRounding(
           dilation_width_factor, dilation_height_factor, pad_width, pad_height,
           depth_multiplier, output_shape, output_shift)) {
     gemmlowp::ScopedProfilingLabel specialized_label(""DepthwiseConv/8bit/3x3"");
-    depthwise_conv::DepthwiseConv3x3Filter(
+    depthwise_conv::DepthwiseConv3x3Filter<
+        DepthwiseConvOutputRounding::kAwayFromZero>(
         params, input_shape, input_data, filter_shape, filter_data, bias_shape,
         bias_data, output_shape, output_data, thread_start, thread_end,
         thread_dim);
",0,train
05dfc24e863c6fb7e7bd3552443a819f66b12dff,tensorflow/tensorflow,"DepthwiseConv, NEON 3x3 kernel, implement templated rounding method.

PiperOrigin-RevId: 242878081",depthwiseconv_uint8_3x3_filter.h,"@@ -574,11 +574,13 @@ static_assert(offsetof(DepthwiseConvDotProdParams, four_over_stride) ==
 #endif  // __ARM_FEATURE_DOTPROD && !GOOGLE_L4T
 
 #if defined(__aarch64__) && !defined(GOOGLE_L4T)
-template <int32 kDepth, int32 kStrideWidth, int32 kStrideHeight>
+template <DepthwiseConvOutputRounding output_rounding, int32 kDepth,
+          int32 kStrideWidth, int32 kStrideHeight>
 struct DepthwiseConvWindow {};
 
 template <>
-struct DepthwiseConvWindow<8, 1, 1> {
+struct DepthwiseConvWindow<DepthwiseConvOutputRounding::kAwayFromZero, 8, 1,
+                           1> {
  public:
   static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
                          const int32* bias_ptr, uint8* output_ptr,
@@ -1512,7 +1514,8 @@ struct DepthwiseConvWindow<8, 1, 1> {
 };
 
 template <>
-struct DepthwiseConvWindow<8, 2, 2> {
+struct DepthwiseConvWindow<DepthwiseConvOutputRounding::kAwayFromZero, 8, 2,
+                           2> {
   static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
                          const int32* bias_ptr, uint8* output_ptr,
                          int64_t input_depth, int64_t input_row_size,
@@ -2546,11 +2549,13 @@ struct DepthwiseConvWindow<8, 2, 2> {
 
 enum class EdgeType { kCorner, kHorizontal, kVertical, kCenter };
 
-template <EdgeType kEdgeType, int kPadWidth, int kPadHeight>
+template <DepthwiseConvOutputRounding output_rounding, EdgeType kEdgeType,
+          int kPadWidth, int kPadHeight>
 struct DepthwiseConvPartial {};
 
 template <>
-struct DepthwiseConvPartial<EdgeType::kCenter, 1, 1> {
+struct DepthwiseConvPartial<DepthwiseConvOutputRounding::kAwayFromZero,
+                            EdgeType::kCenter, 1, 1> {
   static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
                          const int32* bias_ptr, uint8* output_ptr,
                          const DepthwiseConvParams* params_ptr) {
@@ -2663,7 +2668,8 @@ struct DepthwiseConvPartial<EdgeType::kCenter, 1, 1> {
 };
 
 template <>
-struct DepthwiseConvPartial<EdgeType::kCorner, 1, 1> {
+struct DepthwiseConvPartial<DepthwiseConvOutputRounding::kAwayFromZero,
+                            EdgeType::kCorner, 1, 1> {
   static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
                          const int32* bias_ptr, uint8* output_ptr,
                          const DepthwiseConvParams* params_ptr) {
@@ -2828,7 +2834,8 @@ struct DepthwiseConvPartial<EdgeType::kCorner, 1, 1> {
 };
 
 template <>
-struct DepthwiseConvPartial<EdgeType::kHorizontal, 1, 1> {
+struct DepthwiseConvPartial<DepthwiseConvOutputRounding::kAwayFromZero,
+                            EdgeType::kHorizontal, 1, 1> {
   static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
                          const int32* bias_ptr, uint8* output_ptr,
                          const DepthwiseConvParams* params_ptr) {
@@ -3027,7 +3034,8 @@ struct DepthwiseConvPartial<EdgeType::kHorizontal, 1, 1> {
 };
 
 template <>
-struct DepthwiseConvPartial<EdgeType::kVertical, 1, 1> {
+struct DepthwiseConvPartial<DepthwiseConvOutputRounding::kAwayFromZero,
+                            EdgeType::kVertical, 1, 1> {
   static inline void Run(const uint8* input_ptr, const uint8* filter_ptr,
                          const int32* bias_ptr, uint8* output_ptr,
                          const DepthwiseConvParams* params_ptr) {
@@ -3287,7 +3295,8 @@ struct ShuffleParams {
         input_height(get_shuffle_input_size(stride_height, output_height)) {}
 };
 
-template <int32 kStrideWidth, int32 kStrideHeight>
+template <DepthwiseConvOutputRounding output_rounding, int32 kStrideWidth,
+          int32 kStrideHeight>
 struct DepthwiseConvThroughDepth {
   // Runs the DepthwiseConvWindow kernels through the depth dimension from
   // |start_depth| to |end_depth|. Keep this not inlined to maintain a small
@@ -3299,7 +3308,7 @@ struct DepthwiseConvThroughDepth {
       int64_t input_depth, int64_t input_row_size, int32 output_window_height,
       int32 output_window_width, const DepthwiseConvParams& params) {
     for (; start_depth <= end_depth - 8; start_depth += 8) {
-      DepthwiseConvWindow<8, kStrideWidth, kStrideHeight>::Run(
+      DepthwiseConvWindow<output_rounding, 8, kStrideWidth, kStrideHeight>::Run(
           input_ptr, filter_ptr, bias_ptr, output_ptr, input_depth,
           input_row_size, output_window_height, output_window_width, &params);
       input_ptr += 8;
@@ -3310,9 +3319,11 @@ struct DepthwiseConvThroughDepth {
   }
 };
 
-template <int32 kStrideWidth, int32 kStrideHeight>
+template <DepthwiseConvOutputRounding output_rounding, int32 kStrideWidth,
+          int32 kStrideHeight>
 struct DepthwiseConvMultiRow {
-  using ConvKernel = DepthwiseConvThroughDepth<kStrideWidth, kStrideHeight>;
+  using ConvKernel =
+      DepthwiseConvThroughDepth<output_rounding, kStrideWidth, kStrideHeight>;
 
   static inline void Run(const uint8* input_data, int32 start_x, int32 end_x,
                          const uint8* filter_data, const int32* bias_data,
@@ -3411,6 +3422,7 @@ struct DepthwiseConvMultiRow {
 //   * Corner edges.
 //   * Horizontal edges.
 //   * Vertical edges.
+template <DepthwiseConvOutputRounding output_rounding>
 inline void DepthwiseConvHandlePadding(const uint8* input_data,
                                        const uint8* filter_data,
                                        const int32* bias_data,
@@ -3419,7 +3431,7 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data,
   if (params.input_width == 1 && params.input_height == 1) {
     const uint8* filter_ptr =
         filter_data + params.filter_row_size + params.output_depth;
-    DepthwiseConvPartial<EdgeType::kCenter, 1, 1>::Run(
+    DepthwiseConvPartial<output_rounding, EdgeType::kCenter, 1, 1>::Run(
         input_data, filter_ptr, bias_data, output_data, &params);
     return;
   }
@@ -3435,7 +3447,7 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data,
       filter_data + params.filter_row_size + params.output_depth;
   uint8* output_ptr = output_data;
 
-  DepthwiseConvPartial<EdgeType::kCorner, 1, 1>::Run(
+  DepthwiseConvPartial<output_rounding, EdgeType::kCorner, 1, 1>::Run(
       input_ptr, filter_ptr, bias_data, output_ptr, &params);
 
   input_ptr += (params.stride_width - 1) * params.input_depth;
@@ -3444,13 +3456,13 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data,
 
   for (int32 out_x = out_x_start_corner + 1; out_x < out_x_end_corner;
        out_x++) {
-    DepthwiseConvPartial<EdgeType::kHorizontal, 1, 1>::Run(
+    DepthwiseConvPartial<output_rounding, EdgeType::kHorizontal, 1, 1>::Run(
         input_ptr, filter_ptr, bias_data, output_ptr, &params);
     input_ptr += params.stride_width * params.input_depth;
     output_ptr += params.output_depth;
   }
 
-  DepthwiseConvPartial<EdgeType::kCorner, 1, 1>::Run(
+  DepthwiseConvPartial<output_rounding, EdgeType::kCorner, 1, 1>::Run(
       input_ptr, filter_ptr, bias_data, output_ptr, &params);
 
   // Handle left side.
@@ -3460,7 +3472,7 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data,
 
   for (int32 out_y = out_y_start_corner + 1; out_y < out_y_end_corner;
        out_y++) {
-    DepthwiseConvPartial<EdgeType::kVertical, 1, 1>::Run(
+    DepthwiseConvPartial<output_rounding, EdgeType::kVertical, 1, 1>::Run(
         input_ptr, filter_ptr, bias_data, output_ptr, &params);
     input_ptr += params.stride_width * params.input_row_size;
     output_ptr += params.output_row_size;
@@ -3475,7 +3487,7 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data,
 
   for (int32 out_y = out_y_start_corner + 1; out_y < out_y_end_corner;
        out_y++) {
-    DepthwiseConvPartial<EdgeType::kVertical, 1, 1>::Run(
+    DepthwiseConvPartial<output_rounding, EdgeType::kVertical, 1, 1>::Run(
         input_ptr, filter_ptr, bias_data, output_ptr, &params);
     input_ptr += params.stride_width * params.input_row_size;
     output_ptr += params.output_row_size;
@@ -3487,7 +3499,7 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data,
   output_ptr =
       output_data + (params.output_height - 1) * params.output_row_size;
 
-  DepthwiseConvPartial<EdgeType::kCorner, 1, 1>::Run(
+  DepthwiseConvPartial<output_rounding, EdgeType::kCorner, 1, 1>::Run(
       input_ptr, filter_ptr, bias_data, output_ptr, &params);
 
   input_ptr += (params.stride_width == 1) ? 0 : params.input_depth;
@@ -3496,13 +3508,13 @@ inline void DepthwiseConvHandlePadding(const uint8* input_data,
 
   for (int32 out_x = out_x_start_corner + 1; out_x < out_x_end_corner;
        out_x++) {
-    DepthwiseConvPartial<EdgeType::kHorizontal, 1, 1>::Run(
+    DepthwiseConvPartial<output_rounding, EdgeType::kHorizontal, 1, 1>::Run(
         input_ptr, filter_ptr, bias_data, output_ptr, &params);
     input_ptr += params.stride_width * params.input_depth;
     output_ptr += params.output_depth;
   }
 
-  DepthwiseConvPartial<EdgeType::kCorner, 1, 1>::Run(
+  DepthwiseConvPartial<output_rounding, EdgeType::kCorner, 1, 1>::Run(
       input_ptr, filter_ptr, bias_data, output_ptr, &params);
 }
 
@@ -3568,6 +3580,7 @@ inline bool Fast3x3FilterKernelSupported(
   return supported;
 }
 
+template <DepthwiseConvOutputRounding output_rounding>
 inline void DepthwiseConv3x3Filter(
     const DepthwiseParams& rt_params, const RuntimeShape& input_shape,
     const uint8* input_data, const RuntimeShape& filter_shape,
@@ -3645,10 +3658,12 @@ inline void DepthwiseConv3x3Filter(
     eight_row_shuffle_params = ShuffleParams(2, 8, 2, 2);
   }
 
-  using conv_multirow_func_t = decltype(&DepthwiseConvMultiRow<1, 1>::Run);
-  conv_multirow_func_t conv_multirow_func = DepthwiseConvMultiRow<1, 1>::Run;
+  using conv_multirow_func_t =
+      decltype(&DepthwiseConvMultiRow<output_rounding, 1, 1>::Run);
+  conv_multirow_func_t conv_multirow_func =
+      DepthwiseConvMultiRow<output_rounding, 1, 1>::Run;
   if (stride_width == 2) {
-    conv_multirow_func = DepthwiseConvMultiRow<2, 2>::Run;
+    conv_multirow_func = DepthwiseConvMultiRow<output_rounding, 2, 2>::Run;
   }
 
   // Allocate maximum memory needed for shuffled input.
@@ -3689,8 +3704,8 @@ inline void DepthwiseConv3x3Filter(
     int32 end_y = row_end;
 
     if (pad_width == 1 && pad_height == 1) {
-      DepthwiseConvHandlePadding(input_ptr, filter_data, bias_data, output_ptr,
-                                 params);
+      DepthwiseConvHandlePadding<output_rounding>(
+          input_ptr, filter_data, bias_data, output_ptr, params);
 
       // Update extents now that the edges have been handled.
       out_x = 1;
",0,train
e5dcaf921cf9feefd42b2ab176590c696b3b0285,tensorflow/tensorflow,"Fix #15900 (#16154)

- Added `save_checkpoint_steps` attribute to `MonitoredTrainingSession`.
If both `save_checkpoint_steps` and `save_checkpoint_secs` are both `None` then default saver is disabled. Default is `save_checkpoint_secs=600`
- Added `test_save_checkpoint_steps`
- Updated golden file",monitored_session.py,"@@ -281,13 +281,14 @@ def MonitoredTrainingSession(master='',  # pylint: disable=invalid-name
                              scaffold=None,
                              hooks=None,
                              chief_only_hooks=None,
-                             save_checkpoint_secs=600,
+                             save_checkpoint_secs=USE_DEFAULT,
                              save_summaries_steps=USE_DEFAULT,
                              save_summaries_secs=USE_DEFAULT,
                              config=None,
                              stop_grace_period_secs=120,
                              log_step_count_steps=100,
-                             max_wait_secs=7200):
+                             max_wait_secs=7200,
+                             save_checkpoint_steps=USE_DEFAULT):
   """"""Creates a `MonitoredSession` for training.
 
   For a chief, this utility sets proper session initializer/restorer. It also
@@ -310,8 +311,10 @@ def MonitoredTrainingSession(master='',  # pylint: disable=invalid-name
     chief_only_hooks: list of `SessionRunHook` objects. Activate these hooks if
       `is_chief==True`, ignore otherwise.
     save_checkpoint_secs: The frequency, in seconds, that a checkpoint is saved
-      using a default checkpoint saver. If `save_checkpoint_secs` is set to
-      `None`, then the default checkpoint saver isn't used.
+      using a default checkpoint saver. If both `save_checkpoint_steps` and
+      `save_checkpoint_secs` are set to `None`, then the default checkpoint
+      saver isn't used. If both are provided, then only `save_checkpoint_secs`
+      is used. Default 600.
     save_summaries_steps: The frequency, in number of global steps, that the
       summaries are written to disk using a default summary saver. If both
       `save_summaries_steps` and `save_summaries_secs` are set to `None`, then
@@ -330,6 +333,11 @@ def MonitoredTrainingSession(master='',  # pylint: disable=invalid-name
       become available. This should be kept relatively short to help detect
       incorrect code, but sometimes may need to be increased if the chief takes
       a while to start up.
+    save_checkpoint_steps: The frequency, in number of global steps, that a
+      checkpoint is saved using a default checkpoint saver. If both
+      `save_checkpoint_steps` and `save_checkpoint_secs` are set to `None`, then
+      the default checkpoint saver isn't used. If both are provided, then only
+      `save_checkpoint_secs` is used. Default not enabled.
 
   Returns:
     A `MonitoredSession` object.
@@ -342,6 +350,15 @@ def MonitoredTrainingSession(master='',  # pylint: disable=invalid-name
   elif save_summaries_steps == USE_DEFAULT:
     save_summaries_steps = None
 
+  if save_checkpoint_steps == USE_DEFAULT and \
+    save_checkpoint_secs == USE_DEFAULT:
+    save_checkpoint_steps = None
+    save_checkpoint_secs = 600
+  elif save_checkpoint_secs == USE_DEFAULT:
+    save_checkpoint_secs = None
+  elif save_checkpoint_steps == USE_DEFAULT:
+    save_checkpoint_steps = None
+
   scaffold = scaffold or Scaffold()
   if not is_chief:
     session_creator = WorkerSessionCreator(
@@ -374,9 +391,13 @@ def MonitoredTrainingSession(master='',  # pylint: disable=invalid-name
           save_steps=save_summaries_steps,
           save_secs=save_summaries_secs,
           output_dir=checkpoint_dir))
-    if save_checkpoint_secs and save_checkpoint_secs > 0:
+    if (save_checkpoint_secs and save_checkpoint_secs > 0) or (
+        save_checkpoint_steps and save_checkpoint_steps > 0):
       all_hooks.append(basic_session_run_hooks.CheckpointSaverHook(
-          checkpoint_dir, save_secs=save_checkpoint_secs, scaffold=scaffold))
+          checkpoint_dir,
+          save_steps=save_checkpoint_steps,
+          save_secs=save_checkpoint_secs,
+          scaffold=scaffold))
 
   if hooks:
     all_hooks.extend(hooks)
",0,train
e5dcaf921cf9feefd42b2ab176590c696b3b0285,tensorflow/tensorflow,"Fix #15900 (#16154)

- Added `save_checkpoint_steps` attribute to `MonitoredTrainingSession`.
If both `save_checkpoint_steps` and `save_checkpoint_secs` are both `None` then default saver is disabled. Default is `save_checkpoint_secs=600`
- Added `test_save_checkpoint_steps`
- Updated golden file",monitored_session_test.py,"@@ -282,6 +282,42 @@ class MonitoredTrainingSessionTest(test.TestCase):
           is_chief=True, checkpoint_dir=logdir) as session:
         self.assertEqual(2, session.run(gstep))
 
+  def test_save_checkpoint_steps(self):
+    logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_steps')
+    with ops.Graph().as_default():
+      gstep = variables_lib.get_or_create_global_step()
+      new_gstep = state_ops.assign_add(gstep, 1)
+      with monitored_session.MonitoredTrainingSession(
+          is_chief=True,
+          checkpoint_dir=logdir,
+          save_checkpoint_steps=100,
+          log_step_count_steps=10) as session:
+        for _ in range(100):
+          session.run(new_gstep)
+      # A restart will find the checkpoint and recover automatically.
+      with monitored_session.MonitoredTrainingSession(
+          is_chief=True, checkpoint_dir=logdir) as session:
+        self.assertEqual(100, session.run(gstep))
+
+  def test_save_checkpoint_secs(self):
+    logdir = _test_dir(self.get_temp_dir(), 'test_save_checkpoint_secs')
+    with ops.Graph().as_default():
+      gstep = variables_lib.get_or_create_global_step()
+      new_gstep = state_ops.assign_add(gstep, 1)
+      with monitored_session.MonitoredTrainingSession(
+          is_chief=True,
+          checkpoint_dir=logdir,
+          save_checkpoint_secs=0.1,
+          log_step_count_steps=10) as session:
+        session.run(new_gstep)
+        time.sleep(0.2)
+        for _ in range(10):
+          session.run(new_gstep)
+      # A restart will find the checkpoint and recover automatically.
+      with monitored_session.MonitoredTrainingSession(
+          is_chief=True, checkpoint_dir=logdir) as session:
+        self.assertEqual(11, session.run(gstep))
+
   def test_summaries_steps(self):
     logdir = _test_dir(self.get_temp_dir(), 'test_summaries_steps')
     with ops.Graph().as_default():
",0,train
46f86abb7bd15989b88f69e6027d867718675789,tensorflow/tensorflow,"[XLA] Fix a bug in SplitF64ToF32

Overflows in SplitF64ToF32 could result in non-finite lower components which,
when reconstructed, would result in NaN.

PiperOrigin-RevId: 343889763
Change-Id: Ie4dffd64738a22c4bc2377a40e4760cfe776e95a",util.cc,"@@ -367,15 +367,15 @@ string SanitizeFileName(string file_name) {
 //     precision, Numerische Mathematik, vol. 18, pp. 224–242, 1971.
 std::pair<float, float> SplitF64ToF32(double x) {
   const float x_f32 = static_cast<float>(x);
-  // Early return if x is an infinity or NaN.
-  if (!std::isfinite(x)) {
-    return std::make_pair(x_f32, 0.0f);
-  }
 
-  // Only values within the range of F32 are supported, unless it is infinity.
-  // Small values with large negative exponents would be rounded to zero.
+  // Early return if x is an infinity or NaN.
   if (!std::isfinite(x_f32)) {
-    LOG(WARNING) << ""Out of range F64 constant detected: "" << x;
+    // Only values within the range of F32 are supported, unless it is infinity.
+    // Small values with large negative exponents would be rounded to zero.
+    if (std::isfinite(x)) {
+      LOG(WARNING) << ""Out of range F64 constant detected: "" << x;
+    }
+    return std::make_pair(x_f32, 0.0f);
   }
 
   // The high float is simply the double rounded to the nearest float. Because
",0,train
46f86abb7bd15989b88f69e6027d867718675789,tensorflow/tensorflow,"[XLA] Fix a bug in SplitF64ToF32

Overflows in SplitF64ToF32 could result in non-finite lower components which,
when reconstructed, would result in NaN.

PiperOrigin-RevId: 343889763
Change-Id: Ie4dffd64738a22c4bc2377a40e4760cfe776e95a",util_test.cc,"@@ -126,5 +126,13 @@ TEST(UtilTest, RoundTripFpToString) {
             ""-nan"");
 }
 
+TEST(UtilTest, SplitF64ToF32) {
+  // Overflowing the F32 exponent in SplitF64ToF32 should result in a pair of
+  // [∞,0].
+  EXPECT_EQ(SplitF64ToF32(std::numeric_limits<double>::max()).first,
+            std::numeric_limits<float>::infinity());
+  EXPECT_EQ(SplitF64ToF32(std::numeric_limits<double>::max()).second, 0.0f);
+}
+
 }  // namespace
 }  // namespace xla
",0,train
b5358aa31cf6f7e80def9fbaea0513d04b2891b8,tensorflow/tensorflow,"Do not accumulate loop invariants in while_v2.

PiperOrigin-RevId: 258199946",while_v2_test.py,"@@ -225,15 +225,6 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
     train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
     train_op.append(outputs[0])
 
-    def GetOptimizedGraph():
-      mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
-      config = config_pb2.ConfigProto()
-      config.graph_options.rewrite_options.CopyFrom(
-          rewriter_config_pb2.RewriterConfig(
-              constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
-              memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL))
-      return tf_optimizer.OptimizeGraph(config, mg)
-
     g = GetOptimizedGraph()
     # TODO(b/136034023): while_v2 adds an extra loop_counter which is not pruned
     # away, causing an extra Enter node.
@@ -267,6 +258,30 @@ class WhileV2Test(test.TestCase, parameterized.TestCase):
   def testPruningV2(self):
     self._testPruning()
 
+  @parameterized.named_parameters(
+      (""V1"", control_flow_ops.while_loop, ""StackPushV2""),
+      (""V2"", while_loop_v2, ""TensorListPushBack""),
+  )
+  @test_util.run_deprecated_v1
+  def testDoNotAccumulateInvariants(self, while_loop_fn, push_op):
+    # Tests that loop invariants, i.e., tensors that are ""captured"" by the
+    # while loop and not passed as loop variables are not accumulated in
+    # gradient computation.
+    v = constant_op.constant(5.0, name=""v"")
+
+    r = while_loop_fn(
+        lambda _: True, lambda x: v * x, [1.0], maximum_iterations=5)
+
+    output = gradients_impl.gradients(r, v)[0]
+    train_op = ops.get_collection_ref(ops.GraphKeys.TRAIN_OP)
+    train_op.append(output)
+
+    g = GetOptimizedGraph()
+    # The gradient for v * x requires the value of both v and x. Since v is a
+    # loop invariant it is not accumulated so we have just one accumulator for
+    # x.
+    self.assertLen([n for n in g.node if n.op == push_op], 1)
+
   @test_util.run_deprecated_v1
   def testCaptureExternalTensorInCond(self):
     x = constant_op.constant(2.)
@@ -522,5 +537,15 @@ def ScalarShape():
   return ops.convert_to_tensor([], dtype=dtypes.int32)
 
 
+def GetOptimizedGraph():
+  mg = meta_graph.create_meta_graph_def(graph=ops.get_default_graph())
+  config = config_pb2.ConfigProto()
+  config.graph_options.rewrite_options.CopyFrom(
+      rewriter_config_pb2.RewriterConfig(
+          constant_folding=rewriter_config_pb2.RewriterConfig.OFF,
+          memory_optimization=rewriter_config_pb2.RewriterConfig.MANUAL))
+  return tf_optimizer.OptimizeGraph(config, mg)
+
+
 if __name__ == ""__main__"":
   test.main()
",0,train
b5358aa31cf6f7e80def9fbaea0513d04b2891b8,tensorflow/tensorflow,"Do not accumulate loop invariants in while_v2.

PiperOrigin-RevId: 258199946",while_v2.py,"@@ -470,6 +470,16 @@ def _create_grad_func(ys, xs, grads, cond_graph, body_graph, name, while_op,
   counter = constant_op.constant(
       0, dtype=total_iters.dtype, name=""grad_counter"")
 
+  # Build frozen sets so that we do not have linear time lookups in
+  # `_is_loop_invariant`. Note: `body_graph.inputs` and `body_graph.outputs`
+  # may get updated during gradient computation because we add accumulators to
+  # the forward op. However, those are not loop invariants so wouldn't affect
+  # the output of `_is_loop_invariant`. Also we would never attempt to capture
+  # those accumulators so `_is_loop_invariant` should never receive those new
+  # tensors as args.
+  body_graph_inputs = frozenset(body_graph.inputs)
+  body_graph_outputs = frozenset(body_graph.outputs)
+
   args = [counter, maximum_iterations, total_iters] + list(grads)
   # Note: The returned function does not have `args` in the list of
   # `external_captures`.
@@ -478,18 +488,28 @@ def _create_grad_func(ys, xs, grads, cond_graph, body_graph, name, while_op,
       lambda *args: _grad_fn(ys, xs, args, body_graph),
       args, {},
       func_graph=_WhileBodyGradFuncGraph(name, cond_graph, body_graph,
-                                         maximum_iterations, while_op))
-
-  # Add the popped accumulators to the list of outputs.
-  for internal_capture in grad_func_graph.internal_captures:
+                                         maximum_iterations, while_op,
+                                         body_graph_inputs, body_graph_outputs))
+
+  # Update the list of outputs with tensors corresponding to the captured
+  # tensors. We capture 3 types of tensors when building the grad fn:
+  # 1. Accumulators for forward graph intermediates which are not loop
+  #    invariants. The outputs corresponding to these are populated in
+  #    `popped_tensor_lists` by `_WhileBodyGradFuncGraph`.
+  # 2. Resources, which are output as is.
+  # 3. Forward graph loop invariants, which are output as is.
+  for external_capture, internal_capture in grad_func_graph.captures.items():
     if internal_capture in grad_func_graph.popped_tensor_lists:
       new_output = grad_func_graph.popped_tensor_lists[internal_capture]
-    elif internal_capture.dtype == dtypes.resource:
+    elif (internal_capture.dtype == dtypes.resource or _is_loop_invariant(
+        external_capture, body_graph_inputs, body_graph_outputs)):
       new_output = internal_capture
     else:
-      raise ValueError(""Tensor %s is in list of internal_captures but is""
-                       "" neither a resource nor is in popped_tensor_lists."" %
-                       str(internal_capture))
+      raise ValueError(""Tensor %s which captures %s is in list of ""
+                       ""internal_captures but is not a resource, is not in ""
+                       ""popped_tensor_lists and does not capture a loop ""
+                       ""invariant."" %
+                       (str(internal_capture), str(external_capture)))
     grad_func_graph.outputs.append(new_output)
     grad_func_graph.structured_outputs.append(new_output)
 
@@ -562,7 +582,7 @@ def _resolve_grad_captures(body_graph, body_grad_graph, while_op):
     # graph or a captured resource variable (note that input gradients are
     # regular non-captured inputs).
     if t.graph == body_graph:
-      # Captured accumulator
+      # Captured accumulator or loop invariant.
       t = while_op.outputs[t.graph.outputs.index(t)]
       # Note: We rely on the capturing logic of the gradient While op graph to
       # correctly capture the tensors in `body_graph.outer_graph`. Both cond_v2
@@ -715,7 +735,8 @@ class _WhileBodyGradFuncGraph(util.WhileBodyFuncGraph):
   """"""
 
   def __init__(self, name, forward_cond_graph, forward_body_graph,
-               maximum_iterations, forward_while_op):
+               maximum_iterations, forward_while_op, body_graph_inputs,
+               body_graph_outputs):
     super(_WhileBodyGradFuncGraph, self).__init__(name)
     self.empty_tensor_lists = []
     self.popped_tensor_lists = {}
@@ -725,6 +746,11 @@ class _WhileBodyGradFuncGraph(util.WhileBodyFuncGraph):
     self._forward_cond_graph = forward_cond_graph
     self._maximum_iterations = maximum_iterations
     self._forward_while_op = forward_while_op
+    # Only for use in `_is_loop_invariant`. These are not updated when
+    # additional tensors are added to `forward_body_graph.inputs` and
+    # `forward_body_graph.outputs` in `_capture_helper`.
+    self._forward_graph_inputs = body_graph_inputs
+    self._forward_graph_outputs = body_graph_outputs
     # Dict from forward intermediate tensor to its indirectly captured tensor
     # in this graph. Indirect capturing happens in two ways:
     # 1. For non-resource tensors we capture their accumulators from the forward
@@ -781,6 +807,15 @@ class _WhileBodyGradFuncGraph(util.WhileBodyFuncGraph):
     if tensor.dtype == dtypes.resource:
       return self._resource_capture_helper(tensor)
 
+    # No need to accumulate loop invariants. Capture them directly.
+    # The captured tensor gets resolved to the corresponding while output in
+    # `_resolve_grad_captures`.
+    if _is_loop_invariant(tensor, self._forward_graph_inputs,
+                          self._forward_graph_outputs):
+      captured_tensor = super(_WhileBodyGradFuncGraph,
+                              self)._capture_helper(tensor, name)
+      return captured_tensor
+
     # Create or find an existing accumulator output for `tensor` in the forward
     # graph, and fetch from this accumulator in the gradient graph to get the
     # raw intermediate value.
@@ -956,4 +991,8 @@ def _build_accumulator_name(tensor):
   # Tensor name may be of the form ""pow/y:0"". Name scope does not allow "":"".
   return ""{}/accumulator"".format(tensor.name).replace("":"", ""_"")
 
+
+def _is_loop_invariant(tensor, inputs, outputs):
+  return tensor in inputs and tensor in outputs
+
 # pylint: enable=protected-access
",0,train
604988b5d4e8cec6564db6502e6e40eefac8fc67,tensorflow/tensorflow,"Add operator overloads to AutoCastVariable.

The code was copied from DistributionStrategy at https://github.com/tensorflow/tensorflow/blob/81acfa851ecf413df02c6bdf4795630524f2f859/tensorflow/python/distribute/values.py#L401 with slight modifications.

PiperOrigin-RevId: 256469842",autocast_variable.py,"@@ -148,8 +148,63 @@ class AutoCastVariable(trackable.Trackable):
     """"""Pass resource_variable_ops.is_resource_variable check.""""""
     pass
 
-  # TODO(reedwm): Define operator overloads.
-
+  # Operator overloads:
+  # Note we only overload operators that support floating-point types, as
+  # non-float variables cannot be wrapped with an AutoCastVariable.
+
+  # pylint: disable=multiple-statements
+  def __add__(self, o): return self.value() + o
+  def __radd__(self, o): return o + self.value()
+  def __sub__(self, o): return self.value() - o
+  def __rsub__(self, o): return o - self.value()
+  def __mul__(self, o): return self.value() * o
+  def __rmul__(self, o): return o * self.value()
+  def __truediv__(self, o): return self.value() / o
+  def __rtruediv__(self, o): return o / self.value()
+  def __floordiv__(self, o): return self.value() // o
+
+  def __rfloordiv__(self, o): return o // self.value()
+  def __mod__(self, o): return self.value() % o
+  def __rmod__(self, o): return o % self.value()
+  def __lt__(self, o): return self.value() < o
+  def __le__(self, o): return self.value() <= o
+  def __gt__(self, o): return self.value() > o
+  def __ge__(self, o): return self.value() >= o
+  def __getitem__(self, o): return self.value()[o]
+  def __pow__(self, o, modulo=None): return pow(self.value(), o, modulo)
+  def __rpow__(self, o): return pow(o, self.value())
+  def __neg__(self): return -self.value()
+  def __abs__(self): return abs(self.value())
+
+  def __div__(self, o):
+    try:
+      return self.value().__div__(o)
+    except AttributeError:
+      # See https://docs.python.org/3/library/constants.html#NotImplemented
+      return NotImplemented
+
+  def __rdiv__(self, o):
+    try:
+      return self.value().__rdiv__(o)
+    except AttributeError:
+      # See https://docs.python.org/3/library/constants.html#NotImplemented
+      return NotImplemented
+
+  def __matmul__(self, o):
+    try:
+      return self.value().__matmul__(o)
+    except AttributeError:
+      # See https://docs.python.org/3/library/constants.html#NotImplemented
+      return NotImplemented
+
+  def __rmatmul__(self, o):
+    try:
+      return self.value().__rmatmul__(o)
+    except AttributeError:
+      # See https://docs.python.org/3/library/constants.html#NotImplemented
+      return NotImplemented
+
+  # pylint: enable=multiple-statements
 
 ops.register_tensor_conversion_function(
     AutoCastVariable, AutoCastVariable._dense_var_to_tensor)  # pylint:disable=protected-access
",0,train
604988b5d4e8cec6564db6502e6e40eefac8fc67,tensorflow/tensorflow,"Add operator overloads to AutoCastVariable.

The code was copied from DistributionStrategy at https://github.com/tensorflow/tensorflow/blob/81acfa851ecf413df02c6bdf4795630524f2f859/tensorflow/python/distribute/values.py#L401 with slight modifications.

PiperOrigin-RevId: 256469842",autocast_variable_test.py,"@@ -97,30 +97,46 @@ class AutoCastVariableTest(test.TestCase, parameterized.TestCase):
   @parameterized.named_parameters(*TESTCASES)
   def test_operator_overloads(self, distribute):
     with get_distribute_scope(distribute):
-      x = get_var(1., dtypes.float32)
-      x = get_autocast_var(x, distribute)
-      self.evaluate(x.initializer)
-
-    v1 = constant_op.constant(2., dtype=dtypes.float32)
-    v2 = constant_op.constant(2., dtype=dtypes.float16)
-
-    # Because autocast variables do not yet define operator overloads, the
-    # operator is defined by the non-variable tensor
-
-    # Test variable as the LHS. Currently, this is not supported with
-    # distributed autocast variables
-    if not distribute:
-      self.assertEqual(self.evaluate(x + v1), 3.)
-
-      x._read_dtype = dtypes.float16
-      self.assertEqual(self.evaluate(x + v2), 3.)
-
-    # Test variable as the RHS
-    x._read_dtype = dtypes.float32
-    self.assertEqual(self.evaluate(v1 + x), 3.)
-
-    x._read_dtype = dtypes.float16
-    self.assertEqual(self.evaluate(v2 + x), 3.)
+      for read_dtype in (dtypes.float32, dtypes.float16):
+        x = get_var(7., dtypes.float32)
+        x = get_autocast_var(x, distribute)
+        x._read_dtype = read_dtype
+        self.evaluate(x.initializer)
+        self.assertAlmostEqual(8, self.evaluate(x + 1))
+        self.assertAlmostEqual(10, self.evaluate(3 + x))
+        self.assertAlmostEqual(14, self.evaluate(x + x))
+        self.assertAlmostEqual(5, self.evaluate(x - 2))
+        self.assertAlmostEqual(6, self.evaluate(13 - x))
+        self.assertAlmostEqual(0, self.evaluate(x - x))
+        self.assertAlmostEqual(14, self.evaluate(x * 2))
+        self.assertAlmostEqual(21, self.evaluate(3 * x))
+        self.assertAlmostEqual(49, self.evaluate(x * x))
+        self.assertAlmostEqual(3.5, self.evaluate(x / 2))
+        self.assertAlmostEqual(1.5, self.evaluate(10.5 / x))
+        self.assertAlmostEqual(3, self.evaluate(x // 2))
+        self.assertAlmostEqual(2, self.evaluate(15 // x))
+        if read_dtype == dtypes.float32:
+          # The ""mod"" operator does not support float16
+          self.assertAlmostEqual(1, self.evaluate(x % 2))
+          self.assertAlmostEqual(2, self.evaluate(16 % x))
+        self.assertTrue(self.evaluate(x < 12))
+        self.assertTrue(self.evaluate(x <= 12))
+        self.assertFalse(self.evaluate(x > 12))
+        self.assertFalse(self.evaluate(x >= 12))
+        self.assertFalse(self.evaluate(12 < x))
+        self.assertFalse(self.evaluate(12 <= x))
+        self.assertTrue(self.evaluate(12 > x))
+        self.assertTrue(self.evaluate(12 >= x))
+        self.assertAlmostEqual(343, self.evaluate(pow(x, 3)), places=4)
+        self.assertAlmostEqual(128, self.evaluate(pow(2, x)), places=4)
+        self.assertAlmostEqual(-7, self.evaluate(-x))
+        self.assertAlmostEqual(7, self.evaluate(abs(x)))
+
+        x = get_var([7, 8, 9], dtypes.float32)
+        x = get_autocast_var(x, distribute)
+        x._read_dtype = read_dtype
+        self.evaluate(x.initializer)
+        self.assertEqual(self.evaluate(x[1]), 8)
 
   @parameterized.named_parameters(*TESTCASES)
   def test_assign(self, distribute):
",0,train
0ebd45086c7a9e412c4102f42004b6c02578fc49,tensorflow/tensorflow,fix mistype,generic_utils_test.py,"@@ -313,8 +313,8 @@ class SerializeKerasObjectTest(test.TestCase):
   def test_serialize_type_object_initializer(self):
      layer = keras.layers.Dense(
          1,
-         kernel_initializer=keras.initializer.ones,
-         bias_initializer=keras.initializer.zeros)
+         kernel_initializer=keras.initializers.ones,
+         bias_initializer=keras.initializers.zeros)
      config = keras.layers.serialize(layer)
      self.assertEqual(
          config['config']['bias_initializer'],
",0,train
952c2ab177ece6e1d2ddeb3d59f0f5d617532dae,tensorflow/tensorflow,"MultiWorkerTutorialTest: Add the model saving and loading parts. The tutorial will use the same pieces of code to demonstrate how to save and load (with MWMS) in a multi-worker environment.

PiperOrigin-RevId: 319902843
Change-Id: If3e00764c0b5f545da0e003f9c14a88be48a1ff6",multi_worker_tutorial_test.py,"@@ -33,6 +33,7 @@ from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import test_util
 from tensorflow.python.keras.datasets import mnist
 from tensorflow.python.keras.optimizer_v2 import gradient_descent
+from tensorflow.python.lib.io import file_io
 from tensorflow.python.platform import test
 from tensorflow.python.util import nest
 
@@ -104,7 +105,7 @@ class MultiWorkerTutorialTest(parameterized.TestCase, test.TestCase):
 
     num_workers = 4
 
-    def proc_func():
+    def proc_func(model_path):
       global_batch_size = per_worker_batch_size * num_workers
       strategy = collective_all_reduce_strategy.CollectiveAllReduceStrategy()
       with strategy.scope():
@@ -127,10 +128,47 @@ class MultiWorkerTutorialTest(parameterized.TestCase, test.TestCase):
           steps_per_epoch=20,
           callbacks=callbacks)
 
+      def _is_chief(task_type, task_id):
+        return task_type == 'chief' or (task_type == 'worker' and task_id == 0)
+
+      def _get_temp_dir(dirpath, task_id):
+        base_dirpath = 'workertemp_' + str(task_id)
+        temp_dir = os.path.join(dirpath, base_dirpath)
+        file_io.recursive_create_dir_v2(temp_dir)
+        return temp_dir
+
+      def write_filepath(filepath, task_type, task_id):
+        dirpath = os.path.dirname(filepath)
+        base = os.path.basename(filepath)
+        if not _is_chief(task_type, task_id):
+          dirpath = _get_temp_dir(dirpath, task_id)
+        return os.path.join(dirpath, base)
+
+      task_type, task_id = (strategy.cluster_resolver.task_type,
+                            strategy.cluster_resolver.task_id)
+      write_model_path = write_filepath(model_path, task_type, task_id)
+
+      multi_worker_model.save(write_model_path)
+      if not _is_chief(task_type, task_id):
+        file_io.delete_recursively_v2(os.path.dirname(write_model_path))
+
+      # Make sure chief finishes saving before non-chief's assertions.
+      multi_process_runner.barrier().wait()
+
+      if not file_io.file_exists(model_path):
+        raise RuntimeError()
+      if file_io.file_exists(write_model_path) != _is_chief(task_type, task_id):
+        raise RuntimeError()
+
+      loaded_model = keras.saving.save.load_model(model_path)
+      loaded_model.fit(multi_worker_dataset, epochs=2, steps_per_epoch=20)
+
+    model_path = os.path.join(self.get_temp_dir(), 'ckpt.tf')
     with test_util.skip_if_error(self, errors_impl.UnavailableError):
       mpr_result = multi_process_runner.run(
           proc_func,
           multi_worker_test_base.create_cluster_spec(num_workers=num_workers),
+          args=(model_path,),
           list_stdout=True)
 
     def extract_accuracy(worker_id, input_string):
",0,train
b949f47bb463ea38ddaa6f6fc0a3f94e05e8b646,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@d65c32fb41b0

Updates LLVM usage to match
[d65c32fb41b0](https://github.com/llvm/llvm-project/commit/d65c32fb41b0)

PiperOrigin-RevId: 375537865
Change-Id: I91152346f59e770926a2f8ace1c3241094ce1f90",dot_op_emitter.cc,"@@ -24,8 +24,7 @@ limitations under the License.
 #include ""llvm/IR/Module.h""
 #include ""llvm/IR/Value.h""
 #include ""mlir/Dialect/Linalg/Transforms/CodegenStrategy.h""  // from @llvm-project
-#include ""mlir/Dialect/StandardOps/EDSC/Intrinsics.h""  // from @llvm-project
-#include ""mlir/EDSC/Builders.h""  // from @llvm-project
+#include ""mlir/Dialect/StandardOps/Utils/Utils.h""  // from @llvm-project
 #include ""mlir/IR/Builders.h""  // from @llvm-project
 #include ""mlir/IR/BuiltinOps.h""  // from @llvm-project
 #include ""mlir/IR/MLIRContext.h""  // from @llvm-project
@@ -305,26 +304,6 @@ Status DotOpEmitter::EmitLinalgMatmul() {
         llvm::SmallVector<llvm::StringRef, 4> iteratorTypes(
             parallel_exprs.size(), toString(mlir::IteratorType::Parallel));
         iteratorTypes.push_back(toString(mlir::IteratorType::Reduction));
-        /// Helper struct to build simple arithmetic quantities with minimal
-        /// type inference support.
-        /// TODO: reuse the core abstraction once it is in a reusable location.
-        struct ArithBuilder {
-          ArithBuilder(mlir::OpBuilder& b, mlir::Location loc)
-              : b(b), loc(loc) {}
-          mlir::Value add(mlir::Value lhs, mlir::Value rhs) {
-            if (lhs.getType().isa<mlir::IntegerType>())
-              return b.create<mlir::AddIOp>(loc, lhs, rhs);
-            return b.create<mlir::AddFOp>(loc, lhs, rhs);
-          }
-          mlir::Value mul(mlir::Value lhs, mlir::Value rhs) {
-            if (lhs.getType().isa<mlir::IntegerType>())
-              return b.create<mlir::MulIOp>(loc, lhs, rhs);
-            return b.create<mlir::MulFOp>(loc, lhs, rhs);
-          }
-
-          mlir::OpBuilder& b;
-          mlir::Location loc;
-        };
         builder->create<mlir::linalg::GenericOp>(
             function.getLoc(),
             /*inputs=*/mlir::ValueRange{b, c},
@@ -334,7 +313,7 @@ Status DotOpEmitter::EmitLinalgMatmul() {
                 {b_exprs, c_exprs, parallel_exprs}),
             /*iteratorTypes=*/iteratorTypes,
             [](mlir::OpBuilder& b, mlir::Location loc, mlir::ValueRange args) {
-              ArithBuilder ab(b, loc);
+              mlir::ArithBuilder ab(b, loc);
               mlir::Value mul = ab.mul(args[0], args[1]);
               mlir::Value add = ab.add(mul, args[2]);
               b.create<mlir::linalg::YieldOp>(loc, add);
",0,train
eb03410ac0614ad25a7b0b487b7554affaa12ceb,tensorflow/tensorflow,added double braces to initialization,expected_output_data.h,"@@ -16,6 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_EXPECTED_OUTPUT_DATA_H_
 #define TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_EXPECTED_OUTPUT_DATA_H_
 
-static unsigned char expected_output_data[1][4] = {6, 8, 14, 16};
+static unsigned char expected_output_data[1][4] = {{6, 8, 14, 16}};
 
 #endif  // TENSORFLOW_LITE_MICRO_EXAMPLES_NETWORK_TESTER_EXPECTED_OUTPUT_DATA_H_
",0,train
3bfd77c175160ca0d4edded4d7861c1f8abfa929,tensorflow/tensorflow,"[TF:XLA] Re-enable ResourceApplyAddSign of 16 bit floats

This was blocked on an LLVM bug that was resolved in LLVM r336227. I had to increase the
fp tolerance to make the test pass.

 x: array([ 0.925781,  1.927734], dtype=float16)
 y: array([ 0.927734,  1.927734], dtype=float16)

PiperOrigin-RevId: 203808682",addsign_test.py,"@@ -64,9 +64,6 @@ class AddSignTest(xla_test.XLATestCase):
                  alpha=1.0,
                  beta=0.9):
     for dtype in self.float_types:
-      # TODO(b/111123982): remove once the bug is fixed.
-      if dtype == dtypes.float16:
-        continue
       with self.test_session(), self.test_scope():
         # Initialize variables for numpy implementation.
         m0, m1 = 0.0, 0.0
@@ -128,7 +125,8 @@ class AddSignTest(xla_test.XLATestCase):
           )
 
           # Validate updated params
-          self.assertAllCloseAccordingToType(var0_np, var0.eval())
+          self.assertAllCloseAccordingToType(
+              var0_np, var0.eval(), half_rtol=1e-2)
           self.assertAllCloseAccordingToType(var1_np, var1.eval())
 
   def testDense(self):
",0,train
3bfd77c175160ca0d4edded4d7861c1f8abfa929,tensorflow/tensorflow,"[TF:XLA] Re-enable ResourceApplyAddSign of 16 bit floats

This was blocked on an LLVM bug that was resolved in LLVM r336227. I had to increase the
fp tolerance to make the test pass.

 x: array([ 0.925781,  1.927734], dtype=float16)
 y: array([ 0.927734,  1.927734], dtype=float16)

PiperOrigin-RevId: 203808682",training_ops.cc,"@@ -719,9 +719,7 @@ class ResourceApplyAddSign : public ResourceApplySignBase {
     return alpha + decay;
   }
 };
-// TODO(b/111123982): Use kFloatTypes once the bug is fixed.
-REGISTER_XLA_OP(Name(""ResourceApplyAddSign"")
-                    .TypeConstraint(""T"", {DT_FLOAT, DT_DOUBLE, DT_BFLOAT16}),
+REGISTER_XLA_OP(Name(""ResourceApplyAddSign"").TypeConstraint(""T"", kFloatTypes),
                 ResourceApplyAddSign);
 
 class ResourceApplyPowerSign : public ResourceApplySignBase {
",0,train
e436475e805d259b5359f64aebba89a5b83e4aee,tensorflow/tensorflow,"[FLR] Use the correct `Options` when populating `Executor::Args`.

My recent change modified how `Executor::Args` was populated and missed the fact that we
rewrite the `Options` to add in a created `Rendezvous` object in some cases. This change correctly uses the rewritten `Options` in both cases.

PiperOrigin-RevId: 220218606",function.cc,"@@ -1028,9 +1028,9 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
   }
 
   Executor::Args exec_args;
-  ExecutorArgsFromOptions(opts, frame, &exec_args);
+  ExecutorArgsFromOptions(run_opts, frame, &exec_args);
 
-  bool allow_dead_tensors = opts.allow_dead_tensors;
+  bool allow_dead_tensors = run_opts.allow_dead_tensors;
   item->exec->RunAsync(
       // Executor args
       exec_args,
@@ -1085,7 +1085,7 @@ void FunctionLibraryRuntimeImpl::Run(const Options& opts, Handle handle,
   DCHECK(run_opts.runner != nullptr);
 
   Executor::Args exec_args;
-  ExecutorArgsFromOptions(opts, frame, &exec_args);
+  ExecutorArgsFromOptions(run_opts, frame, &exec_args);
   item->exec->RunAsync(exec_args, std::move(done));
 }
 
",0,train
80347abfbff60817c86b6de62709f6d3d3da00ce,tensorflow/tensorflow,"Add 'tf_saved_model.under_construction' attribute in tf_saved_model dialect

This change will introduce a new module attribute,
`tf_saved_model.under_construction`, which indicates that the given IR hasn't
completed the transformation.

1) This attribute will be added before promoting variables to tf.resource_name
   arguments.
2) Skip the test for allowing saved_model attributes only in the exported
   function if there is a `tf_saved_model.under_construction`.
3) This attribute will be removed when lifting variables.

PiperOrigin-RevId: 319455102
Change-Id: Id1950898a0abc73c3b0ad0a5f25405ffb50958d9",tf_saved_model.cc,"@@ -298,6 +298,7 @@ static LogicalResult VerifySavedModelModule(
 
 LogicalResult VerifyExportedFunc(FuncOp func) {
   bool reached_bound_inputs = false;
+  auto module = func.getParentOfType<ModuleOp>();
   for (int i = 0, e = func.getNumArguments(); i < e; i++) {
     if (func.getArgAttr(i, ""tf_saved_model.bound_input"")) {
       reached_bound_inputs = true;
@@ -312,7 +313,9 @@ LogicalResult VerifyExportedFunc(FuncOp func) {
       continue;
     }
     if (func.getArgAttr(i, ""tf.resource_name"")) {
-      continue;
+      if (module.getAttr(""tf_saved_model.under_construction"")) continue;
+      return func.emitError() << ""'tf.resource_name' attribute is not allowed ""
+                                 ""unless it is being under construction"";
     }
     return func.emitError()
            << ""all arguments should have 'tf_saved_model.index_path', ""
@@ -371,6 +374,9 @@ LogicalResult TensorFlowSavedModelDialect::verifyOperationAttribute(
     }
     return VerifySavedModelModule(module, this);
   }
+  if (named_attr.first == ""tf_saved_model.under_construction"") {
+    return success();
+  }
 
   return op->emitError() << ""unknown tf_saved_model dialect attribute '""
                          << named_attr.first << ""'"";
",0,train
80347abfbff60817c86b6de62709f6d3d3da00ce,tensorflow/tensorflow,"Add 'tf_saved_model.under_construction' attribute in tf_saved_model dialect

This change will introduce a new module attribute,
`tf_saved_model.under_construction`, which indicates that the given IR hasn't
completed the transformation.

1) This attribute will be added before promoting variables to tf.resource_name
   arguments.
2) Skip the test for allowing saved_model attributes only in the exported
   function if there is a `tf_saved_model.under_construction`.
3) This attribute will be removed when lifting variables.

PiperOrigin-RevId: 319455102
Change-Id: Id1950898a0abc73c3b0ad0a5f25405ffb50958d9",import_model.cc,"@@ -3399,9 +3399,11 @@ SavedModelSignatureDefImporter::ConvertSignatures() {
   mlir::OpBuilder builder(module_->getBodyRegion());
   module_->setAttr(""tf_saved_model.semantics"", builder.getUnitAttr());
 
+  module_->setAttr(""tf_saved_model.under_construction"", builder.getUnitAttr());
   TF_RETURN_IF_ERROR(ExecutorDialectToFunctional());
   TF_RETURN_IF_ERROR(RemoveVariablesInSessionInitializer());
   TF_RETURN_IF_ERROR(LiftVariables());
+  module_->removeAttr(""tf_saved_model.under_construction"");
 
   SortSavedModelModule(*module_);
   MarkSavedModelFunctionVisibility(*module_);
",0,train
a286fad85e6ee8b8692a7db7ac5a5e5968f9e740,tensorflow/tensorflow,"Remove incorrect comment

PiperOrigin-RevId: 245500211",mark_for_compilation_pass.cc,"@@ -1062,7 +1062,6 @@ StatusOr<bool> MarkForCompilationPassImpl::TryToContractEdge(Cluster* from,
 StatusOr<bool> MarkForCompilationPassImpl::TryToContractEdgesFrom(
     Cluster* cluster_from) {
   bool changed = false;
-  // Needs to be RPO because of shape consumer opt
   for (int to :
        cycles_graph_.Successors(cluster_from->cycles_graph_node_id())) {
     iteration_count_++;
",0,train
e23aafa44fb804814d8d513a048ba5429e360e8c,tensorflow/tensorflow,"Use determinisitc, default-seeded random inputs for convolution autotuning.

PiperOrigin-RevId: 251338008",cudnn_conv_algorithm_picker.cc,"@@ -266,8 +266,8 @@ void InitializeTypedBuffer(se::Stream* stream, se::DeviceMemory<T> buffer,
   static std::vector<T>* host_buffer = [] {
     // Use a large prime number to fragment the accesses.
     auto* ret = new std::vector<T>(10069);
-    std::random_device rd;
-    std::mt19937 gen(rd());
+    // Default-seeded random numbers.
+    std::mt19937 gen;
     for (auto& element : *ret) {
       using RandomType =
           typename std::conditional<std::is_same<T, Eigen::half>::value, float,
",0,train
ca3addea1a508bdc6bc1ab2fc2f574fd69734877,tensorflow/tensorflow,"Fixes asan errors introduced due to cl/259085857

PiperOrigin-RevId: 259233558",object_detection_average_precision_stage.cc,"@@ -57,26 +57,26 @@ TfLiteStatus ObjectDetectionAveragePrecisionStage::Init() {
 }
 
 TfLiteStatus ObjectDetectionAveragePrecisionStage::Run() {
-  for (int i = 0; i < ground_truth_objects_->objects_size(); ++i) {
-    const int class_id = ground_truth_objects_->objects(i).class_id();
+  for (int i = 0; i < ground_truth_objects_.objects_size(); ++i) {
+    const int class_id = ground_truth_objects_.objects(i).class_id();
     if (class_id >= num_classes_) {
       LOG(ERROR) << ""Encountered invalid class ID: "" << class_id;
       return kTfLiteError;
     }
 
     ground_truth_object_vectors_[class_id].push_back(ConvertProtoToDetection(
-        ground_truth_objects_->objects(i), current_image_index_));
+        ground_truth_objects_.objects(i), current_image_index_));
   }
 
-  for (int i = 0; i < predicted_objects_->objects_size(); ++i) {
-    const int class_id = predicted_objects_->objects(i).class_id();
+  for (int i = 0; i < predicted_objects_.objects_size(); ++i) {
+    const int class_id = predicted_objects_.objects(i).class_id();
     if (class_id >= num_classes_) {
       LOG(ERROR) << ""Encountered invalid class ID: "" << class_id;
       return kTfLiteError;
     }
 
     predicted_object_vectors_[class_id].push_back(ConvertProtoToDetection(
-        predicted_objects_->objects(i), current_image_index_));
+        predicted_objects_.objects(i), current_image_index_));
   }
 
   current_image_index_++;
",0,train
ca3addea1a508bdc6bc1ab2fc2f574fd69734877,tensorflow/tensorflow,"Fixes asan errors introduced due to cl/259085857

PiperOrigin-RevId: 259233558",object_detection_average_precision_stage.h,"@@ -42,17 +42,16 @@ class ObjectDetectionAveragePrecisionStage : public EvaluationStage {
   EvaluationStageMetrics LatestMetrics() override;
 
   // Call before Run().
-  // Both protos must outlive the call to Run().
   void SetEvalInputs(const ObjectDetectionResult& predicted_objects,
                      const ObjectDetectionResult& ground_truth_objects) {
-    predicted_objects_ = &predicted_objects;
-    ground_truth_objects_ = &ground_truth_objects;
+    predicted_objects_ = predicted_objects;
+    ground_truth_objects_ = ground_truth_objects;
   }
 
  private:
   int num_classes_ = -1;
-  const ObjectDetectionResult* predicted_objects_;
-  const ObjectDetectionResult* ground_truth_objects_;
+  ObjectDetectionResult predicted_objects_;
+  ObjectDetectionResult ground_truth_objects_;
   int current_image_index_ = 0;
 
   // One inner vector per class for ground truth objects.
",0,train
b2f092894012e9c2612cb46c332140f28d91ced2,tensorflow/tensorflow,"Add DeviceIndex xla op.

DeviceIndex op: given a list of device names, this operation returns the index of the device this op runs.  In the case of XLA, we are not executing on any device, we return the length of the list.

PiperOrigin-RevId: 317740778
Change-Id: I0679aa0adc5508b83502eee0d2044584577ed5b4",mark_for_compilation_pass.cc,"@@ -1837,7 +1837,7 @@ absl::flat_hash_map<string, std::vector<string>>* GetWhitelistTable() {
       ""ConcatOffset"", ""Const"", ""MirrorPad"", ""Pack"", ""Pad"", ""PadV2"", ""Reverse"",
       ""ReverseV2"", ""ReverseSequence"", ""Slice"", ""Split"", ""SplitV"",
       ""StridedSlice"", ""StridedSliceGrad"", ""ResourceStridedSliceAssign"",
-      ""Tile"", ""Transpose"", ""InvertPermutation"", ""Unpack""}}};
+      ""Tile"", ""Transpose"", ""InvertPermutation"", ""Unpack"", ""DeviceIndex""}}};
   // clang-format on
   return result;
 }
",0,train
b2f092894012e9c2612cb46c332140f28d91ced2,tensorflow/tensorflow,"Add DeviceIndex xla op.

DeviceIndex op: given a list of device names, this operation returns the index of the device this op runs.  In the case of XLA, we are not executing on any device, we return the length of the list.

PiperOrigin-RevId: 317740778
Change-Id: I0679aa0adc5508b83502eee0d2044584577ed5b4",device_index_op.cc,"@@ -0,0 +1,51 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""absl/container/flat_hash_map.h""
+#include ""absl/strings/string_view.h""
+#include ""tensorflow/compiler/tf2xla/xla_helpers.h""
+#include ""tensorflow/compiler/tf2xla/xla_op_kernel.h""
+#include ""tensorflow/compiler/tf2xla/xla_op_registry.h""
+#include ""tensorflow/compiler/xla/client/client_library.h""
+#include ""tensorflow/compiler/xla/client/lib/arithmetic.h""
+#include ""tensorflow/compiler/xla/client/lib/constants.h""
+#include ""tensorflow/compiler/xla/client/lib/math.h""
+#include ""tensorflow/compiler/xla/client/xla_builder.h""
+#include ""tensorflow/core/framework/kernel_def_builder.h""
+
+namespace tensorflow {
+namespace {
+
+class DeviceIndexOp : public XlaOpKernel {
+ public:
+  explicit DeviceIndexOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
+    OP_REQUIRES_OK(ctx, ctx->GetAttr(""device_names"", &device_names_));
+  }
+
+  void Compile(XlaOpKernelContext* ctx) override {
+    // When compiling we are not executing on any physical device, so we return
+    // a sentinel value (size of the list of devices).
+    ctx->SetOutput(
+        0, xla::ConstantR0<int32>(ctx->builder(), device_names_.size()));
+  }
+
+ private:
+  std::vector<string> device_names_;
+};
+
+REGISTER_XLA_OP(Name(""DeviceIndex""), DeviceIndexOp);
+
+}  // namespace
+}  // namespace tensorflow
",0,train
b2f092894012e9c2612cb46c332140f28d91ced2,tensorflow/tensorflow,"Add DeviceIndex xla op.

DeviceIndex op: given a list of device names, this operation returns the index of the device this op runs.  In the case of XLA, we are not executing on any device, we return the length of the list.

PiperOrigin-RevId: 317740778
Change-Id: I0679aa0adc5508b83502eee0d2044584577ed5b4",control_flow_ops_test.py,"@@ -1274,6 +1274,26 @@ class ExecuteFnForDeviceTest(test_util.TensorFlowTestCase):
       self.assertEqual(6., self.evaluate(result))
       self.assertEqual([2.], self.evaluate(grad))
 
+  def testCompile(self):
+    if not test_util.is_gpu_available():
+      return
+
+    def cpu_fn(x):
+      return x + x
+
+    def gpu_fn(x):
+      return x * x
+
+    @def_function.function(experimental_compile=True)
+    def flexible_defun(a):
+      branches = {""CPU"": lambda: cpu_fn(a), ""GPU"": lambda: gpu_fn(a)}
+      return control_flow_ops.execute_fn_for_device(branches, lambda: cpu_fn(a))
+
+    # Always execute the default branch in xla compilation case.
+    a = array_ops.constant(3.)
+    r = flexible_defun(a)
+    self.assertEqual(6., self.evaluate(r))
+
   def testFallBack(self):
 
     def default_fn(x):
",0,train
65336d57f39903865f4f48ca5c5f791a87918f3d,tensorflow/tensorflow,"PFor: Support TensorLists in the while_loop converter when the condition is pfor-loop-variant

Since they use internal stacking, they need to be accumulated differently.

PiperOrigin-RevId: 382784481
Change-Id: I1628178d61e0f7a9158b0ee57d37244e45d93297",control_flow_ops_test.py,"@@ -1370,7 +1370,6 @@ class TensorListTest(PForTestCase):
 
     self._test_loop_fn(loop_fn, 2)
 
-  @test_util.enable_control_flow_v2
   def test_tensor_list_reserve_while_loop(self):
     # Here a loop invariant TensorList is captured by a while_loop, which then
     # performs loop dependent operations on it, resulting in a loop variant
@@ -1378,6 +1377,8 @@ class TensorListTest(PForTestCase):
     # while_loop.
     # We handle this particular case by forcing vectorization of
     # TensorListReserve operation.
+    v2_enabled = control_flow_v2_toggles.control_flow_v2_enabled()
+    control_flow_v2_toggles.enable_control_flow_v2()
 
     def loop_fn(i):
       handle = list_ops.tensor_list_reserve([], 2, dtypes.int32)
@@ -1387,32 +1388,8 @@ class TensorListTest(PForTestCase):
       return list_ops.tensor_list_stack(out_handle, dtypes.int32)
 
     self._test_loop_fn(loop_fn, 2)
-
-  @test_util.enable_control_flow_v2
-  def test_tensor_list_while_loop_stacked_cond_stacked_list(self):
-
-    def loop_fn(i):
-      handle = list_ops.tensor_list_from_tensor([20, 21, 22, 23, i], [])
-      _, out_handle = control_flow_ops.while_loop(
-          lambda j, _: j < i,
-          lambda j, h: (j + 1, list_ops.tensor_list_set_item(h, j, i)),
-          (0, handle))
-      return list_ops.tensor_list_stack(out_handle, dtypes.int32)
-
-    self._test_loop_fn(loop_fn, 5)
-
-  @test_util.enable_control_flow_v2
-  def test_tensor_list_while_loop_stacked_cond_unstacked_list(self):
-
-    def loop_fn(i):
-      handle = list_ops.tensor_list_from_tensor([20, 21, 22, 23, 24], [])
-      _, out_handle = control_flow_ops.while_loop(
-          lambda j, _: j < i,
-          lambda j, h: (j + 1, list_ops.tensor_list_set_item(h, j, i)),
-          (0, handle))
-      return list_ops.tensor_list_stack(out_handle, dtypes.int32)
-
-    self._test_loop_fn(loop_fn, 5)
+    if not v2_enabled:
+      control_flow_v2_toggles.disable_control_flow_v2()
 
   def test_tensor_list_addn_already_stacked(self):
 
",0,test
65336d57f39903865f4f48ca5c5f791a87918f3d,tensorflow/tensorflow,"PFor: Support TensorLists in the while_loop converter when the condition is pfor-loop-variant

Since they use internal stacking, they need to be accumulated differently.

PiperOrigin-RevId: 382784481
Change-Id: I1628178d61e0f7a9158b0ee57d37244e45d93297",pfor.py,"@@ -88,23 +88,6 @@ def _variant_handle_data(t):
   return handle_data.shape_and_type
 
 
-def _variant_type_id(t):
-  """"""Returns the full_type_pb2 type of `t`, or None if it is not available.""""""
-  if t.dtype != dtypes.variant:
-    return None
-  shapes_and_types = _variant_handle_data(t)
-  if shapes_and_types is None or not shapes_and_types:
-    # TODO(b/169968286): Identify all variant tensors (e.g. maps) and we can
-    # make this an error instead of assuming TensorLists have handle data.
-    return None  # Presumed not a TensorList/Optional
-  return shapes_and_types[0].type.type_id
-
-
-_INTERNAL_STACKING_TYPE_IDS = (
-    full_type_pb2.TFT_ARRAY,
-    full_type_pb2.TFT_OPTIONAL)
-
-
 def _is_variant_with_internal_stacking(t):
   """"""Identifies variant tensors which pfor always maintains as scalars.
 
@@ -116,8 +99,15 @@ def _is_variant_with_internal_stacking(t):
   Returns:
     True if `t` is a TensorList/Optional, False not, None if unknown.
   """"""
-  type_id = _variant_type_id(t)
-  return type_id in _INTERNAL_STACKING_TYPE_IDS
+  if t.dtype != dtypes.variant:
+    return False
+  shapes_and_types = _variant_handle_data(t)
+  if shapes_and_types is None or not shapes_and_types:
+    # TODO(b/169968286): Identify all variant tensors (e.g. maps) and we can
+    # make this an error instead of assuming TensorLists have handle data.
+    return None  # Presumed not a TensorList/Optional
+  type_id = shapes_and_types[0].type.type_id
+  return type_id in (full_type_pb2.TFT_ARRAY, full_type_pb2.TFT_OPTIONAL)
 
 
 def _parse_variant_shapes_and_types(t):
@@ -4536,60 +4526,11 @@ class WhileV2(object):
     with ops.name_scope(""while_init""):
       for inp in self._pfor_input.inputs:
         inputs.append(inp.t)
-        variant_type_id = _variant_type_id(inp.t)
-        if variant_type_id in _INTERNAL_STACKING_TYPE_IDS:
-          if variant_type_id != full_type_pb2.TFT_ARRAY:
-            raise NotImplementedError(
-                (""While loop conversion is only supported for TensorLists. Got ""
-                 ""another variant {}, probably an optional. Please file a bug."")
-                .format(inp.t))
-          # For TensorLists, the input format is:
-          #
-          #   List[user_list_len, Tensor[loop_len, ...]]
-          #
-          # rather than the usual
-          #
-          #   Tensor[loop_len, ...]
-          #
-          # The body of the loop will take and return lists in this ""internal
-          # vectorization"" format, so we want to keep it that way as much as
-          # possible. We'll accumulate finished iterations (only relevant for
-          # pfor-loop-variant while_loop conditions) in an accumulator with
-          # type:
-          #
-          #   List[user_list_len, List[loop_len, Tensor[...]]]
-          #
-          # This means that each while_loop iteration, we'll iterate over the
-          # length of the TensorList, dividing done/remaining pfor loop indices
-          # and scattering the done indices into the inner nested list of the
-          # accumulator.
-          element_shape = list_ops.tensor_list_element_shape(
-              inp.t, dtypes.int32)[1:]
-          dtype = _parse_variant_shapes_and_types(inp.t)[0].dtype
-
-          def _init_loop_body(index, output_ta):
-            output_ta = output_ta.write(
-                index,
-                list_ops.tensor_list_reserve(element_shape, loop_len, dtype))
-            return index + 1, output_ta
-
-          length = list_ops.tensor_list_length(inp.t)
-          output_ta = tensor_array_ops.TensorArray(
-            inp.t.dtype,  # Variant; this is a nested TensorList
-            size=length,
-            dynamic_size=True,
-            infer_shape=False)
-          _, output_ta = control_flow_ops.while_loop(
-              lambda index, _: index < length,
-              _init_loop_body,
-              [0, output_ta])
-        else:
-          output_ta = tensor_array_ops.TensorArray(
+        output_tas.append(tensor_array_ops.TensorArray(
             inp.t.dtype,
             size=loop_len,
             dynamic_size=False,
-            infer_shape=True)
-        output_tas.append(output_ta)
+            infer_shape=True))
     # See documentation for __call__ for the structure of init_values.
     indices = (
         math_ops.range(self._pfor.loop_len_vector[0])
@@ -4617,51 +4558,21 @@ class WhileV2(object):
     new_output_tas = []
     for i, (inp, stacked) in enumerate(zip(inputs, inputs_stacked)):
       pass_through = i in self._body_pass_through_indices
-      if not pass_through and  _variant_type_id(inp) == full_type_pb2.TFT_ARRAY:
-        shape_and_type = _parse_variant_shapes_and_types(inp)[0]
-        element_shape = list_ops.tensor_list_element_shape(inp, dtypes.int32)
-        user_list_len = list_ops.tensor_list_length(inp)
-
-        def _split_vectorized_ta_element(index, new_inp, new_out_ta):
-          elem = list_ops.tensor_list_get_item(inp, index, shape_and_type.dtype,
-                                               element_shape)
-          if stacked:
-            done_elem, new_elem = data_flow_ops.dynamic_partition(
-                elem, conditions_int, 2)
-            new_inp = list_ops.tensor_list_set_item(new_inp, index, new_elem)
-          else:
-            done_elem = _stack(elem, [array_ops.size(done_indices)]).t
-          done_accum = new_out_ta.read(index)
-          done_accum = list_ops.tensor_list_scatter(
-              tensor=done_elem, indices=done_indices, input_handle=done_accum)
-          new_out_ta = new_out_ta.write(index, done_accum)
-          return index + 1, new_inp, new_out_ta
-
-        length = list_ops.tensor_list_length(inp)
-        new_inp = list_ops.tensor_list_reserve(
-            tensor_shape.TensorShape([None])
-            + tensor_shape.TensorShape(shape_and_type.shape)[1:],
-            user_list_len, shape_and_type.dtype)
-        _, new_inp, out_ta = control_flow_ops.while_loop(
-            lambda index, unused_new_inp, unused_new_out_ta: index < length,
-            _split_vectorized_ta_element,
-            [0, new_inp, output_tas[i]])
+      # Partition the inputs.
+      if stacked:
+        done_inp, new_inp = data_flow_ops.dynamic_partition(
+            inp, conditions_int, 2)
       else:
-        # Partition the inputs.
-        if stacked:
-          done_inp, new_inp = data_flow_ops.dynamic_partition(
-              inp, conditions_int, 2)
-        else:
-          if not pass_through:
-            done_inp = _stack(inp, [array_ops.size(done_indices)]).t
-          new_inp = inp
-
-        out_ta = output_tas[i]
         if not pass_through:
-          # Note that done_indices can be empty. done_inp should also be empty
-          # in that case.
-          out_ta = out_ta.scatter(done_indices, done_inp)
+          done_inp = _stack(inp, [array_ops.size(done_indices)]).t
+        new_inp = inp
+
       new_inputs.append(new_inp)
+      out_ta = output_tas[i]
+      if not pass_through:
+        # Note that done_indices can be empty. done_inp should also be empty
+        # in that case.
+        out_ta = out_ta.scatter(done_indices, done_inp)
       new_output_tas.append(out_ta)
 
     assert len(new_output_tas) == len(output_tas)
@@ -4862,37 +4773,7 @@ class WhileV2(object):
               outputs.append(init_values[i + 2])
             else:
               ta = output_tas[i]
-              if _variant_type_id(inp) == full_type_pb2.TFT_ARRAY:
-                shape_and_type = _parse_variant_shapes_and_types(inp)[0]
-                length = list_ops.tensor_list_length(inp)
-
-                # We have been accumulating values in a:
-                #
-                #   List[user_list_len, List[loop_len, Tensor[...]]]
-                #
-                # We want to return an output in the same format as the input:
-                #
-                #   List[user_list_len, Tensor[loop_len, ...]]
-                #
-                # So we need to loop over the list and stack its contents.
-                def _stack_loop_body(index, output_list):
-                  current_value = ta.read(index)
-                  output_list = list_ops.tensor_list_set_item(
-                      output_list, index,
-                      list_ops.tensor_list_stack(
-                          current_value, shape_and_type.dtype))
-                  return index + 1, output_list
-
-                output_list = list_ops.tensor_list_reserve(
-                    tensor_shape.TensorShape(shape_and_type.shape), length,
-                    shape_and_type.dtype)
-                _, output_list = control_flow_ops.while_loop(
-                    lambda index, _: index < length,
-                    _stack_loop_body,
-                    [0, output_list])
-                outputs.append(output_list)
-              else:
-                outputs.append(ta.stack())
+              outputs.append(ta.stack())
           else:
             outputs.append(inp)
         return outputs
",0,test
17b7d69ad4bfe3e51c4cee2a10fa24bd9048ec27,tensorflow/tensorflow,Removed Depricated API from the file.,vector_diffeomixture.py,"@@ -1060,5 +1060,5 @@ def softmax(x, axis, name=None):
     if axis_ is not None:
       axis = np.int(ndims + axis_ if axis_ < 0 else axis_)
     else:
-      axis = array_ops.where(axis < 0, ndims + axis, axis)
+      axis = array_ops.where_v2(axis < 0, ndims + axis, axis)
   return nn_ops.softmax(x, axis=axis)
",0,test
a9429e942a261948f146f9b4a9fbaeab8598dadc,tensorflow/tensorflow,"Fix resize_bilinear type propagation

This operator supports more than just float32 outputs.

PiperOrigin-RevId: 259411764",propagate_array_data_types.cc,"@@ -55,7 +55,6 @@ void SetDataTypeForAllOutputs(Model* model, Operator* op,
   // Do the actual output data types propagation.
   switch (op->type) {
     case OperatorType::kDequantize:
-    case OperatorType::kResizeBilinear:
       // These operators unconditionally produce float outputs
       SetDataTypeForAllOutputs(model, op, ArrayDataType::kFloat);
       break;
",0,train
bfaaefa9ecbbbc797f5af60f3d87f6a3c3ac7a09,tensorflow/tensorflow,"Update APIs for TPU Cluster Resolver to remove the custom API definition and instead use a standard definition file stored in GCS.

PiperOrigin-RevId: 170960877",tpu_cluster_resolver.py,"@@ -39,7 +39,6 @@ class TPUClusterResolver(ClusterResolver):
   """"""
 
   def __init__(self,
-               api_definition,
                project,
                zone,
                tpu_names,
@@ -52,8 +51,6 @@ class TPUClusterResolver(ClusterResolver):
     for the IP addresses and ports of each Cloud TPU listed.
 
     Args:
-      api_definition: (Alpha only) A copy of the JSON API definitions for
-        Cloud TPUs. This will be removed once Cloud TPU enters beta.
       project: Name of the GCP project containing Cloud TPUs
       zone: Zone where the TPUs are located
       tpu_names: A list of names of the target Cloud TPUs.
@@ -83,11 +80,13 @@ class TPUClusterResolver(ClusterResolver):
         raise ImportError('googleapiclient must be installed before using the '
                           'TPU cluster resolver')
 
-      # TODO(frankchn): Remove once Cloud TPU API Definitions are public and
-      # replace with discovery.build('tpu', 'v1')
-      self._service = discovery.build_from_document(
-          api_definition,
-          credentials=self._credentials)
+      # TODO(b/67375680): Remove custom URL once TPU APIs are finalized
+      self._service = discovery.build(
+          'tpu',
+          'v1',
+          credentials=self._credentials,
+          discoveryServiceUrl='https://storage.googleapis.com'
+                              '/tpu-api-definition/v1alpha1.json')
     else:
       self._service = service
 
",0,train
4002c6c2bc4946a162236f3357ec54b4ab8b4e1e,tensorflow/tensorflow,"Improve documentation for TFRecordDataset.

This adds a longer description and includes the example used for TFRecordWriter (https://www.tensorflow.org/api_docs/python/tf/io/TFRecordWriter)

PiperOrigin-RevId: 363734980
Change-Id: I97e616bbcabae0e64b016d34bfbdba6996bd9ebc",readers.py,"@@ -213,7 +213,48 @@ class TextLineDatasetV1(dataset_ops.DatasetV1Adapter):
 
 
 class _TFRecordDataset(dataset_ops.DatasetSource):
-  """"""A `Dataset` comprising records from one or more TFRecord files.""""""
+  """"""A `Dataset` comprising records from one or more TFRecord files.
+
+  This dataset loads TFRecords from file as bytes, exactly as they were written.
+  `TFRecordDataset` does not do any parsing or decoding on its own. Parsing and
+  decoding can be done by applying `Dataset.map` transformations after the
+  `TFRecordDataset`.
+
+  A minimal example is given below:
+
+  >>> import tempfile
+  >>> example_path = os.path.join(tempfile.gettempdir(), ""example.tfrecords"")
+  >>> np.random.seed(0)
+
+  >>> # Write the records to a file.
+  ... with tf.io.TFRecordWriter(example_path) as file_writer:
+  ...   for _ in range(4):
+  ...     x, y = np.random.random(), np.random.random()
+  ...
+  ...     record_bytes = tf.train.Example(features=tf.train.Features(feature={
+  ...         ""x"": tf.train.Feature(float_list=tf.train.FloatList(value=[x])),
+  ...         ""y"": tf.train.Feature(float_list=tf.train.FloatList(value=[y])),
+  ...     })).SerializeToString()
+  ...     file_writer.write(record_bytes)
+
+  >>> # Read the data back out.
+  >>> def decode_fn(record_bytes):
+  ...   return tf.io.parse_single_example(
+  ...       # Data
+  ...       record_bytes,
+  ...
+  ...       # Schema
+  ...       {""x"": tf.io.FixedLenFeature([], dtype=tf.float32),
+  ...        ""y"": tf.io.FixedLenFeature([], dtype=tf.float32)}
+  ...   )
+
+  >>> for batch in tf.data.TFRecordDataset([example_path]).map(decode_fn):
+  ...   print(""x = {x:.4f},  y = {y:.4f}"".format(**batch))
+  x = 0.5488,  y = 0.7152
+  x = 0.6028,  y = 0.5449
+  x = 0.4237,  y = 0.6459
+  x = 0.4376,  y = 0.8918
+  """"""
 
   def __init__(self, filenames, compression_type=None, buffer_size=None):
     """"""Creates a `TFRecordDataset`.
",0,train
591eaa548218c664632b6ee1fe0732650094116d,tensorflow/tensorflow,"Account for the fact that the equivallent of the IsVariableInitialized op is VarIsInitializedOp when working on ResourceVariables

PiperOrigin-RevId: 161272800",variables.py,"@@ -736,7 +736,7 @@ class Variable(object):
         return initial_value
     elif isinstance(initial_value, ops.Operation):
       if initial_value.node_def.op in [
-          ""IsVariableInitialized"", ""ReadVariableOp""
+          ""IsVariableInitialized"", ""VarIsInitializedOp"", ""ReadVariableOp""
       ]:
         return initial_value
       if initial_value.node_def.op in [""Variable"", ""VariableV2"", ""VarHandleOp""]:
",0,test
9406c024d4d31a5a3db13bc5d4b2b0b19a311afb,tensorflow/tensorflow,"Remove padding_inital_value(s) for optimizers. The padding segment has been
removed in favor of another technique to handle padding.

PiperOrigin-RevId: 384520602
Change-Id: Id64e5d9d618666c7f6d407795a2299b58c24c133",tpu_embedding_optimization_parameters_utils.cc,"@@ -15,6 +15,9 @@ limitations under the License.
 
 #include ""tensorflow/core/tpu/tpu_embedding_optimization_parameters_utils.h""
 
+#include <string>
+#include <utility>
+
 #include ""tensorflow/compiler/xla/service/hlo.pb.h""
 #include ""tensorflow/compiler/xla/service/hlo_opcode.h""
 #include ""tensorflow/compiler/xla/xla_data.pb.h""
@@ -26,7 +29,7 @@ limitations under the License.
 namespace tensorflow {
 namespace tpu {
 
-string GetOptimizationAlgorithmName(OptimizationAlgorithm alg) {
+std::string GetOptimizationAlgorithmName(OptimizationAlgorithm alg) {
   switch (alg) {
     case OptimizationAlgorithm::kAdagrad:
       return ""Adagrad"";
@@ -66,7 +69,7 @@ string GetOptimizationAlgorithmName(OptimizationAlgorithm alg) {
   return ""*** Not set ***"";
 }
 
-string GetOptimizationAlgorithmFriendlyName(OptimizationAlgorithm alg) {
+std::string GetOptimizationAlgorithmFriendlyName(OptimizationAlgorithm alg) {
   switch (alg) {
     case OptimizationAlgorithm::kAdagrad:
       return ""Adagrad"";
@@ -194,21 +197,6 @@ Status GetGradientAccumulationSupport(const OptimizationParameters& params,
   return Status::OK();
 }
 
-namespace {
-// Make a normal state variable specification. Please refer to
-// //tensorflow/core/protobuf/tpu/optimization_parameters.proto
-// (StateVariableSpecification message) for instructions on how to set the
-// padding_initial_value field.
-StateVariableSpecification MakeStandardStateVariableSpecification(
-    const string& name, double padding_initial_value) {
-  StateVariableSpecification result;
-  result.set_name(name);
-  result.mutable_user_defined()->set_padding_initial_value(
-      padding_initial_value);
-  return result;
-}
-}  // namespace
-
 Status UseGradientAccumulation(const OptimizationParameters& params,
                                bool* use_gradient_accumulation) {
   GradientAccumulationSupport support;
@@ -264,112 +252,104 @@ Status GetOptimizationAlgorithmStateVariables(
   TF_RETURN_IF_ERROR(
       UseGradientAccumulation(params, &use_gradient_accumulation));
 
-  auto add_state_variable = [&](const std::string& name, float value) {
-    state_variables->push_back(
-        MakeStandardStateVariableSpecification(name, value));
+  auto add_state_variable = [&](const std::string& name) {
+    StateVariableSpecification spec;
+    spec.set_name(name);
+    (void)spec.mutable_user_defined();
+    state_variables->push_back(spec);
   };
 
   switch (params.parameters_case()) {
     case OptimizationAlgorithm::kAdagrad: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""accumulators"", 0.1);
+      add_state_variable(""parameters"");
+      add_state_variable(""accumulators"");
       break;
     }
     case OptimizationAlgorithm::kBoundedAdagrad: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""accumulators"", 0.1);
+      add_state_variable(""parameters"");
+      add_state_variable(""accumulators"");
       break;
     }
     case OptimizationAlgorithm::kStochasticGradientDescent: {
-      add_state_variable(""parameters"", 0.0);
+      add_state_variable(""parameters"");
       break;
     }
     case OptimizationAlgorithm::kFtrl: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""accumulators"", 0.1);
-      add_state_variable(""linears"", 0.0);
+      add_state_variable(""parameters"");
+      add_state_variable(""accumulators"");
+      add_state_variable(""linears"");
       break;
     }
     case OptimizationAlgorithm::kAdam: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""momenta"", 0.0);
-      add_state_variable(""velocities"", 0.0);
+      add_state_variable(""parameters"");
+      add_state_variable(""momenta"");
+      add_state_variable(""velocities"");
       break;
     }
     case OptimizationAlgorithm::kMomentum: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""momenta"", 0.0);
+      add_state_variable(""parameters"");
+      add_state_variable(""momenta"");
       break;
     }
     case OptimizationAlgorithm::kRmsProp: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""ms"", 1.0);
-      add_state_variable(""mom"", 0.0);
+      add_state_variable(""parameters"");
+      add_state_variable(""ms"");
+      add_state_variable(""mom"");
       break;
     }
     case OptimizationAlgorithm::kCenteredRmsProp: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""ms"", 1.0);
-      add_state_variable(""mom"", 0.0);
-      add_state_variable(""mg"", 0.0);
+      add_state_variable(""parameters"");
+      add_state_variable(""ms"");
+      add_state_variable(""mom"");
+      add_state_variable(""mg"");
       break;
     }
     case OptimizationAlgorithm::kMdlAdagradLight: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""accumulators"", 0.1);
-      add_state_variable(""weights"", 0.0);
-      add_state_variable(""benefits"", 0.0);
+      add_state_variable(""parameters"");
+      add_state_variable(""accumulators"");
+      add_state_variable(""weights"");
+      add_state_variable(""benefits"");
       break;
     }
     case OptimizationAlgorithm::kAdadelta: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""accumulators"", 0.0);
-      add_state_variable(""updates"", 0.0);
+      add_state_variable(""parameters"");
+      add_state_variable(""accumulators"");
+      add_state_variable(""updates"");
       break;
     }
     case OptimizationAlgorithm::kProximalAdagrad: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""accumulators"", 0.1);
+      add_state_variable(""parameters"");
+      add_state_variable(""accumulators"");
       break;
     }
     case OptimizationAlgorithm::kOnlineYogi: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""vs"", 0.1);
-      add_state_variable(""linears"", 0.0);
+      add_state_variable(""parameters"");
+      add_state_variable(""vs"");
+      add_state_variable(""linears"");
       break;
     }
     case OptimizationAlgorithm::kProximalYogi: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""v"", 0.1);
-      add_state_variable(""m"", 0.0);
+      add_state_variable(""parameters"");
+      add_state_variable(""v"");
+      add_state_variable(""m"");
       break;
     }
     case OptimizationAlgorithm::kFrequencyEstimator: {
-      add_state_variable(""parameters"", 0.0);
-      add_state_variable(""last_hit_step"", 0);
+      add_state_variable(""parameters"");
+      add_state_variable(""last_hit_step"");
       break;
     }
     case OptimizationAlgorithm::kUserDefinedProgram: {
-      add_state_variable(""parameters"",
-                         params.user_defined_program().padding_values(0));
+      add_state_variable(""parameters"");
       int num_slots = -1;
       TF_RETURN_IF_ERROR(GetBaseAuxiliaryParameterCount(params, &num_slots));
-      if (num_slots + 1 !=
-          params.user_defined_program().padding_values_size()) {
-        return errors::InvalidArgument(
-            absl::StrCat(""Number of slots "", num_slots + 1,
-                         "" does not agree with the number of padding values "",
-                         params.user_defined_program().padding_values_size(),
-                         "" specified for "", params.ShortDebugString(), "".""));
-      }
       for (int i = 0; i < num_slots; ++i) {
-        add_state_variable(absl::StrCat(""Slot_"", i),
-                           params.user_defined_program().padding_values(i + 1));
+        add_state_variable(absl::StrCat(""Slot_"", i));
       }
       break;
     }
     case OptimizationAlgorithm::kAssign: {
-      add_state_variable(""parameters"", 0.0);
+      add_state_variable(""parameters"");
       break;
     }
     case OptimizationAlgorithm::PARAMETERS_NOT_SET: {
",0,train
b4c37a452d2ed1d1c29ceb70127c4ef6434c44ca,tensorflow/tensorflow,"Teach the conditinal simplifier about sharding.

PiperOrigin-RevId: 193510638",conditional_simplifier.cc,"@@ -69,7 +69,7 @@ static StatusOr<bool> TryRemoveConditional(HloInstruction* conditional) {
         conditional->shape(), {conditional->mutable_operand(2)},
         conditional->false_computation()));
   }
-
+  conditional->SetupDerivedInstruction(call_op);
   TF_RETURN_IF_ERROR(computation->ReplaceInstruction(conditional, call_op));
   TF_RETURN_IF_ERROR(CallInliner::Inline(call_op).status());
 
",0,train
7ad011b7c542f7183af78f09a47a9673f8457954,tensorflow/tensorflow,"Allow registering concrete functions directly, rather than through PolymorphicFunction.

PiperOrigin-RevId: 217735452",function.py,"@@ -1409,8 +1409,35 @@ class PolymorphicFunction(object):
       ]
 
 
+def register_concrete(func):
+  """"""Register a concrete function into the graph.
+
+  Args:
+    func: A graph function.
+  """"""
+  graph = ops.get_default_graph()
+
+  # There are two situations for the actual call of a defun:
+  # 1. If none of the input args are resource variables or watch by any tape,
+  #   it will run the _inference_function of concrete_func for forward pass, and
+  #   the gradient will be generated by standard mechanism.
+  # 2. Otherwise, defun will create two functions, one for forward pass, and the
+  #   backward pass will be created via tape.
+  # When registering the function, we put both cases into graph.
+  # pylint: disable=protected-access
+  func._inference_function.add_to_graph(graph)
+
+  if func._backward_graph_function is None:
+    func._construct_backprop_function()
+  forward_function = func._forward_function
+  backward_function = func._backward_graph_function._inference_function
+  forward_function.add_to_graph(graph)
+  backward_function.add_to_graph(graph)
+  # pylint: enable=protected-access
+
+
 def register(func, *args, **kwargs):
-  """"""Register the defun function into the graph.
+  """"""Register a specialization of a PolymorphicFunction into the graph.
 
   This won't actually call the function with the inputs, and only put the
   function definition into graph. Register function with different input param
@@ -1434,26 +1461,7 @@ def register(func, *args, **kwargs):
     raise ValueError(""Only defun function is allowed to be registered. ""
                      ""Got type: %s"" % type(func))
   concrete_func = func.get_concrete_function(*args, **kwargs)
-  graph = ops.get_default_graph()
-
-  # There are two situations for the actual call of a defun:
-  # 1. If none of the input args are resource variables or watch by any tape,
-  #   it will run the _inference_function of concrete_func for forward pass, and
-  #   the gradient will be generated by standard mechanism.
-  # 2. Otherwise, defun will create two functions, one for forward pass, and the
-  #   backward pass will be created via tape.
-  # When registering the function, we put both cases into graph.
-  # pylint: disable=protected-access
-  concrete_func._inference_function.add_to_graph(graph)
-
-  if concrete_func._backward_graph_function is None:
-    concrete_func._construct_backprop_function()
-  forward_function = concrete_func._forward_function
-  backward_function = concrete_func._backward_graph_function._inference_function
-  forward_function.add_to_graph(graph)
-  backward_function.add_to_graph(graph)
-  # pylint: enable=protected-access
-
+  register_concrete(concrete_func)
   return concrete_func
 
 
",0,train
7ad011b7c542f7183af78f09a47a9673f8457954,tensorflow/tensorflow,"Allow registering concrete functions directly, rather than through PolymorphicFunction.

PiperOrigin-RevId: 217735452",function_test.py,"@@ -88,7 +88,7 @@ class DefunnedMiniModel(MiniModel):
 
 
 @test_util.with_c_shapes
-class FunctionTest(test.TestCase):
+class FunctionTest(test.TestCase, parameterized.TestCase):
 
   def testBasic(self):
     matmul = function.defun(math_ops.matmul)
@@ -2149,7 +2149,7 @@ class FunctionTest(test.TestCase):
           t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
           add(t, t)
 
-  def testRegisterFunction(self):
+  def testRegisterPolymorphicFunction(self):
     @function.defun
     def add(x, y):
       return math_ops.add(x, y)
@@ -2211,6 +2211,65 @@ class FunctionTest(test.TestCase):
           self.assertEquals(captured_function_names[i],
                             functions[i].definition.signature.name)
 
+  @parameterized.named_parameters(
+      dict(testcase_name='Defun',
+           function_decorator=function.defun),
+      dict(testcase_name='DefFunction',
+           function_decorator=def_function.function))
+  def testRegisterConcreteFunction(self, function_decorator):
+    @function_decorator
+    def py_add(x, y):
+      return math_ops.add(x, y)
+
+    py_add(array_ops.ones([]), array_ops.ones([]))
+    add = py_add.get_concrete_function(
+        tensor_spec.TensorSpec(None, dtypes.float32),
+        tensor_spec.TensorSpec(None, dtypes.float32))
+
+    @function_decorator
+    def py_composite(x, y):
+      return x, add(x, y)
+
+    py_composite(array_ops.ones([]), array_ops.ones([]))
+    composite = py_composite.get_concrete_function(
+        tensor_spec.TensorSpec(None, dtypes.float32),
+        tensor_spec.TensorSpec(None, dtypes.float32))
+
+    with context.graph_mode(), self.cached_session():
+      with ops.get_default_graph().as_default():
+        t = constant_op.constant([[1.0, 2.0], [3.0, 4.0]])
+        function.register_concrete(composite)
+
+        graph = ops.get_default_graph()
+        # pylint: disable=protected-access
+        self.assertEqual(len(graph._functions), 6)
+        # two sets of functions, each of them are (inference, forward, backward)
+        functions = list(graph._functions.values())
+        captured_function_names = [
+            f.definition.signature.name for f in functions
+        ]
+        expected_func_name_regex = [
+            '.*inference.*py_composite.*',
+            '.*inference.*py_add.*',
+            '.*forward.*py_composite.*',
+            '.*forward.*py_add.*',
+            '.*inference.*backward.*py_composite.*',
+            '.*inference.*backward.*py_add.*',
+        ]
+        for expected, found in zip(
+            expected_func_name_regex,
+            captured_function_names):
+          self.assertRegexpMatches(found, expected)
+
+        composite_t, composite_double = composite(t, t)
+        double = add(t, t)
+        self.assertAllEqual([[2, 4], [6, 8]], self.evaluate(double))
+        self.assertAllEqual([[2, 4], [6, 8]], self.evaluate(composite_double))
+        self.assertAllEqual([[1, 2], [3, 4]], self.evaluate(composite_t))
+        # Make sure the pre registered function is used, and no other function
+        # is added.
+        self.assertEqual(len(graph._functions), 6)
+
   def testRegisterFunctionWithInputSignature(self):
     def matmul(x, y):
       return math_ops.matmul(x, y)
",0,train
2a71aacb81d5e3282bbcbcd4712803f02800bf6f,tensorflow/tensorflow,"Exclude IDLE op from metrics db used for ""Overview Page | Top 10 TF operations"" analysis.

PiperOrigin-RevId: 295038203
Change-Id: Ie2a46b36600f929a292aac9a5d0a8175c5acc934",op_stats_to_overview_page.cc,"@@ -144,8 +144,8 @@ OverviewPageRecommendation ComputeGenericRecommendation(
 
 OverviewPageAnalysis ComputeAnalysisResult(const OpStats& op_stats) {
   OverviewPageAnalysis analysis;
-  OpMetricsDb metrics_db =
-      CreateTfMetricsDbFromHloMetricsDb(op_stats.device_op_metrics_db());
+  OpMetricsDb metrics_db = CreateTfMetricsDbFromHloMetricsDb(
+      op_stats.device_op_metrics_db(), /*with_idle=*/false);
   uint64 total_device_time_ps = metrics_db.total_time_ps();
   constexpr int kNumTopOpsShown = 10;
   double device_cumulative_fraction = 0.0;
",0,train
2a71aacb81d5e3282bbcbcd4712803f02800bf6f,tensorflow/tensorflow,"Exclude IDLE op from metrics db used for ""Overview Page | Top 10 TF operations"" analysis.

PiperOrigin-RevId: 295038203
Change-Id: Ie2a46b36600f929a292aac9a5d0a8175c5acc934",op_metrics_db_utils.cc,"@@ -90,8 +90,8 @@ void AddIdleOp(OpMetricsDb* db) {
   metrics->set_self_time_ps(idle_time_ps);
 }
 
-OpMetricsDb CreateTfMetricsDbFromHloMetricsDb(
-    const OpMetricsDb& hlo_metrics_db) {
+OpMetricsDb CreateTfMetricsDbFromHloMetricsDb(const OpMetricsDb& hlo_metrics_db,
+                                              bool with_idle) {
   OpMetricsDb tf_op_metrics_db;
   DeviceTfOpMetricsDbBuilder builder(&tf_op_metrics_db);
   for (const auto& hlo_op_metrics : hlo_metrics_db.metrics_db()) {
@@ -101,11 +101,18 @@ OpMetricsDb CreateTfMetricsDbFromHloMetricsDb(
                                                 hlo_op_metrics);
     } else {
       DCHECK_EQ(hlo_op_metrics.name(), ""IDLE"");
-      builder.UpdateTfOpMetricsWithHloOpMetrics(""IDLE"", ""IDLE"", hlo_op_metrics);
+      if (with_idle) {
+        builder.UpdateTfOpMetricsWithHloOpMetrics(""IDLE"", ""IDLE"",
+                                                  hlo_op_metrics);
+      }
     }
   }
   tf_op_metrics_db.set_total_op_time_ps(hlo_metrics_db.total_op_time_ps());
-  tf_op_metrics_db.set_total_time_ps(hlo_metrics_db.total_time_ps());
+
+  tf_op_metrics_db.set_total_time_ps(with_idle
+                                         ? hlo_metrics_db.total_time_ps()
+                                         : hlo_metrics_db.total_op_time_ps());
+
   return tf_op_metrics_db;
 }
 }  // namespace profiler
",0,train
2a71aacb81d5e3282bbcbcd4712803f02800bf6f,tensorflow/tensorflow,"Exclude IDLE op from metrics db used for ""Overview Page | Top 10 TF operations"" analysis.

PiperOrigin-RevId: 295038203
Change-Id: Ie2a46b36600f929a292aac9a5d0a8175c5acc934",op_metrics_db_utils.h,"@@ -68,8 +68,8 @@ uint64 IdleTimePs(const OpMetricsDb& metrics_db);
 void AddIdleOp(OpMetricsDb* db);
 
 // Converts from Hlo-op metrics to Tf-op metrics.
-OpMetricsDb CreateTfMetricsDbFromHloMetricsDb(
-    const OpMetricsDb& hlo_metrics_db);
+OpMetricsDb CreateTfMetricsDbFromHloMetricsDb(const OpMetricsDb& hlo_metrics_db,
+                                              bool with_idle = true);
 }  // namespace profiler
 }  // namespace tensorflow
 
",0,train
293a2be20bef4cd9edc2b53d828d091f1f49e855,tensorflow/tensorflow,"Convert absl string_view constants to const, rather than constexpr.

MSVC fails to compile such variables with:
error C2131: expression did not evaluate to a constant

PiperOrigin-RevId: 293408611
Change-Id: Ie42a085fce1540ed2da7739b6875e5f2e4e5411a",call_inliner_test.cc,"@@ -208,7 +208,7 @@ TEST_F(CallInlinerTest, CallToOutfeedComputationIsInlined) {
 }
 
 TEST_F(CallInlinerTest, InlineSingleUseCalleesOnly) {
-  constexpr absl::string_view hlo_string = R""(
+  const absl::string_view hlo_string = R""(
   HloModule inline_module
 
   a {
",0,test
293a2be20bef4cd9edc2b53d828d091f1f49e855,tensorflow/tensorflow,"Convert absl string_view constants to const, rather than constexpr.

MSVC fails to compile such variables with:
error C2131: expression did not evaluate to a constant

PiperOrigin-RevId: 293408611
Change-Id: Ie42a085fce1540ed2da7739b6875e5f2e4e5411a",hlo_evaluator_test.cc,"@@ -4094,7 +4094,7 @@ ENTRY main {
 
 TEST_P(HloEvaluatorBf16Test, Bitcast) {
   // Regression test for b/114735354.
-  constexpr absl::string_view hlo_text_base = R""(
+  const absl::string_view hlo_text_base = R""(
 HloModule Bitcast
 
 ENTRY main {
@@ -4121,7 +4121,7 @@ ENTRY main {
 
 // Check that s32 under/overflow doesn't trigger a ubsan failure.
 TEST_F(HloEvaluatorTest, Int32Overflow) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
 HloModule Test
 
 ENTRY main {
@@ -4150,7 +4150,7 @@ ENTRY main {
 }
 
 TEST_F(HloEvaluatorTest, GetDimensionSize) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
 HloModule Test
 
 ENTRY main {
@@ -4184,7 +4184,7 @@ ENTRY main {
 
 // Check that we get a useful error if we pass inputs of the wrong shape.
 TEST_F(HloEvaluatorTest, EvaluateWithWrongInputShapes) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
 HloModule Test
 
 ENTRY main {
@@ -4211,7 +4211,7 @@ ENTRY main {
 
 // Check that we get a useful error if we pass too many or too few inputs.
 TEST_F(HloEvaluatorTest, EvaluateWithWrongNumberOfInputs) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
 HloModule Test
 
 ENTRY main {
@@ -4233,7 +4233,7 @@ ENTRY main {
 }
 
 TEST_F(HloEvaluatorTest, PreserveFusionInputLayout) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
     HloModule FusionInputLayout
 
     fused_computation {
@@ -4255,7 +4255,7 @@ TEST_F(HloEvaluatorTest, PreserveFusionInputLayout) {
 }
 
 TEST_F(HloEvaluatorTest, PreserveFusionOutputLayout) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
     HloModule FusionOutputLayout
 
     fused_computation {
@@ -4276,7 +4276,7 @@ TEST_F(HloEvaluatorTest, PreserveFusionOutputLayout) {
 }
 
 TEST_F(HloEvaluatorTest, PreserveMOFusionOutputLayout) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
     HloModule MOFusionOutputLayout
 
     fused_computation {
@@ -4301,7 +4301,7 @@ TEST_F(HloEvaluatorTest, PreserveMOFusionOutputLayout) {
 
 // Tests that custom_calls fail to evaluate when no handler is specified.
 TEST_F(HloEvaluatorTest, EvaluateCustomCall_NoHandler) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
     HloModule EvaluateCustomCall_NoHandler
     ENTRY kernel_entry {
       parameter.0 = u32[2,2]{1,0} parameter(0)
@@ -4318,7 +4318,7 @@ TEST_F(HloEvaluatorTest, EvaluateCustomCall_NoHandler) {
 
 // Tests when a custom_call handler returns an error.
 TEST_F(HloEvaluatorTest, EvaluateCustomCall_HandlerError) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
     HloModule EvaluateCustomCall_HandlerError
     ENTRY kernel_entry {
       parameter.0 = u32[2,2]{1,0} parameter(0)
@@ -4342,7 +4342,7 @@ TEST_F(HloEvaluatorTest, EvaluateCustomCall_HandlerError) {
 // We sum the operands so that we can verify the operand and output literals
 // are properly mapped for access.
 TEST_F(HloEvaluatorTest, EvaluateCustomCall_ManyInputs) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
     HloModule EvaluateCustomCall_ManyInputs
     ENTRY kernel_entry {
       parameter.0 = u32[1]{0} parameter(0)
@@ -4378,7 +4378,7 @@ TEST_F(HloEvaluatorTest, EvaluateCustomCall_ManyInputs) {
 }
 
 TEST_F(HloEvaluatorTest, IsFiniteF16) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
   HloModule test
 
   ENTRY IsFiniteTest {
@@ -4395,7 +4395,7 @@ TEST_F(HloEvaluatorTest, IsFiniteF16) {
 }
 
 TEST_F(HloEvaluatorTest, IsFiniteBf16) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
   HloModule test
 
   ENTRY IsFiniteTest {
@@ -4414,7 +4414,7 @@ TEST_F(HloEvaluatorTest, IsFiniteBf16) {
 // Check that evaluating `f32[<huge>, 0] iota` doesn't oom (it's an empty
 // array!).
 TEST_F(HloEvaluatorTest, ZeroSizedIotaWithHugeDimension) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
   HloModule test
   ENTRY t {
     ROOT i = f32[1000000000000, 0] iota(), iota_dimension=0
@@ -4427,7 +4427,7 @@ TEST_F(HloEvaluatorTest, ZeroSizedIotaWithHugeDimension) {
 }
 
 TEST_F(HloEvaluatorTest, CopyStartCopyDone) {
-  constexpr absl::string_view hlo_text = R""(
+  const absl::string_view hlo_text = R""(
   HloModule test
   ENTRY CopyStartCopyDone {
     init = f32[] constant(42.0)
",0,test
d432c1fb460dd3de37312cbb69d3f4fbdc5508c6,tensorflow/tensorflow,Update comments.,parallel_loop_emitter.cc,"@@ -68,6 +68,7 @@ ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock(absl::string_view loop_name,
   //     ...
   //   }
   // The part between [] are added only if blockDim.y > 1.
+  // blockIdx.y and gridDim.y are always 1.
 
   // Per the PTX documentation:
   //   ""It is guaranteed that [...] 0  <=  %ctaid.x <  %nctaid.x""
@@ -76,7 +77,8 @@ ParallelLoopEmitter::EmitIndexAndSetExitBasicBlock(absl::string_view loop_name,
   if (launch_dimensions_.thread_counts_per_block().y > 1) {
     // When blockDim.y > 1, then we are in the small row case. Each
     // blockDim.x do exatly to one row and blockDim.y map to some
-    // consecutive row.
+    // consecutive row. This prevents too small block size that isn't
+    // efficient.
     CHECK(launch_config_.row_vectorized);
     CHECK_EQ(shape_.dimensions().back(),
 	     launch_dimensions_.thread_counts_per_block().x *
",0,train
bed506e3a160402f4f93aa8fdfc4bb8b270a3953,tensorflow/tensorflow,"[XLA] Fix race condition in RefcountingHashMap

Quoting the bug from jlebar@:

> Suppose the refcount of entry for key K goes to 0.  Then before the deleter is run, someone touches map[K], thus causing the refcount of this entry to go back to 1.  Then the deleter runs, deleting the object.  Boom.

PiperOrigin-RevId: 289194684
Change-Id: I3a1d9a8294d45eb1c554ee511328fc5a9d0b1e20",refcounting_hash_map.h,"@@ -63,16 +63,22 @@ class RefcountingHashMap {
   std::shared_ptr<V> operator[](const K& key) {
     absl::MutexLock lock(&mu_);
     auto it = map_.find(key);
-    if (it == map_.end()) {
-      // Create entry in the map and then set its value, so the value can
-      // contain a pointer back into the map.
-      it = map_.emplace(key, std::weak_ptr<V>()).first;
-      std::shared_ptr<V> value(value_factory_(key).release(),
-                               Deleter{&it->first, this});
-      it->second = value;  // Set the weak ptr to the shared ptr.
-      return value;
+    // We ensure that the entry has not expired in case deleter was running when
+    // we have entered this block.
+    if (it != map_.end()) {
+      if (std::shared_ptr<V> value = it->second.lock()) {
+        return value;
+      }
+      map_.erase(it);
     }
-    return it->second.lock();
+
+    // Create entry in the map and then set its value, so the value can
+    // contain a pointer back into the map.
+    it = map_.emplace(key, std::weak_ptr<V>()).first;
+    std::shared_ptr<V> value(value_factory_(key).release(),
+                             Deleter{&it->first, this});
+    it->second = value;  // Set the weak ptr to the shared ptr.
+    return value;
   }
 
   // Runs a function over every key/value in the map.
@@ -99,9 +105,9 @@ class RefcountingHashMap {
       delete v;
       absl::MutexLock lock(&parent->mu_);
       auto it = parent->map_.find(*key);
-      CHECK(it != parent->map_.end());
-      CHECK(it->second.expired());
-      parent->map_.erase(it);
+      if (it != parent->map_.end() && it->second.expired()) {
+        parent->map_.erase(it);
+      }
     }
   };
 
",0,train
e46f394c1744ac93ed3a73a333c47809ff6198a7,tensorflow/tensorflow,"[XLA:GPU] Simplify Cusolver rewriter to not require a StreamExecutor or an allocator.

PiperOrigin-RevId: 249255382",cusolver_context.cc,"@@ -91,12 +91,14 @@ StatusOr<CusolverContext> CusolverContext::Create(se::Stream* stream) {
   TF_RETURN_IF_ERROR(CusolverStatusToStatus(cusolverDnCreate(&handle)));
   CusolverContext context(stream, handle);
 
-  // StreamExecutor really should just expose the Cuda stream to clients...
-  const cudaStream_t* cuda_stream =
-      CHECK_NOTNULL(reinterpret_cast<const cudaStream_t*>(
-          stream->implementation()->GpuStreamMemberHack()));
-  TF_RETURN_IF_ERROR(
-      CusolverStatusToStatus(cusolverDnSetStream(handle, *cuda_stream)));
+  if (stream) {
+    // StreamExecutor really should just expose the Cuda stream to clients...
+    const cudaStream_t* cuda_stream =
+        CHECK_NOTNULL(reinterpret_cast<const cudaStream_t*>(
+            stream->implementation()->GpuStreamMemberHack()));
+    TF_RETURN_IF_ERROR(
+        CusolverStatusToStatus(cusolverDnSetStream(handle, *cuda_stream)));
+  }
 
   return std::move(context);
 }
@@ -131,17 +133,40 @@ CusolverContext::~CusolverContext() {
 
 #define DN_SOLVER_FN(method, type_prefix) cusolverDn##type_prefix##method
 
-#define POTRF_BUFFER_SIZE_INSTANCE(T, type_prefix)                            \
-  StatusOr<int64> CusolverContext::PotrfBufferSize(                           \
-      se::blas::UpperLower uplo, int n, se::DeviceMemory<T> A, int lda) {     \
-    int size = -1;                                                            \
-    TF_RETURN_IF_ERROR(CusolverStatusToStatus(DN_SOLVER_FN(                   \
-        potrf_bufferSize, type_prefix)(handle(), CUDABlasUpperLower(uplo), n, \
-                                       ToDevicePointer(A), lda, &size)));     \
-    return size;                                                              \
+// Note: NVidia have promised that it is safe to pass 'nullptr' as the argument
+// buffers to cuSolver buffer size methods and this will be a documented
+// behavior in a future cuSolver release.
+StatusOr<int64> CusolverContext::PotrfBufferSize(PrimitiveType type,
+                                                 se::blas::UpperLower uplo,
+                                                 int n, int lda) {
+  int size = -1;
+  switch (type) {
+    case F32: {
+      TF_RETURN_IF_ERROR(CusolverStatusToStatus(cusolverDnSpotrf_bufferSize(
+          handle(), CUDABlasUpperLower(uplo), n, /*A=*/nullptr, lda, &size)));
+      break;
+    }
+    case F64: {
+      TF_RETURN_IF_ERROR(CusolverStatusToStatus(cusolverDnDpotrf_bufferSize(
+          handle(), CUDABlasUpperLower(uplo), n, /*A=*/nullptr, lda, &size)));
+      break;
+    }
+    case C64: {
+      TF_RETURN_IF_ERROR(CusolverStatusToStatus(cusolverDnCpotrf_bufferSize(
+          handle(), CUDABlasUpperLower(uplo), n, /*A=*/nullptr, lda, &size)));
+      break;
+    }
+    case C128: {
+      TF_RETURN_IF_ERROR(CusolverStatusToStatus(cusolverDnZpotrf_bufferSize(
+          handle(), CUDABlasUpperLower(uplo), n, /*A=*/nullptr, lda, &size)));
+      break;
+    }
+    default:
+      return InvalidArgument(""Invalid type for cholesky decomposition: %s"",
+                             PrimitiveType_Name(type));
   }
-
-CALL_LAPACK_TYPES(POTRF_BUFFER_SIZE_INSTANCE);
+  return size;
+}
 
 #define POTRF_INSTANCE(T, type_prefix)                                    \
   Status CusolverContext::Potrf(                                          \
",0,train
e46f394c1744ac93ed3a73a333c47809ff6198a7,tensorflow/tensorflow,"[XLA:GPU] Simplify Cusolver rewriter to not require a StreamExecutor or an allocator.

PiperOrigin-RevId: 249255382",cusolver_context.h,"@@ -32,6 +32,8 @@ namespace gpu {
 
 class CusolverContext {
  public:
+  // stream may be nullptr, in which case the context can only be used for
+  // buffer size queries.
   static StatusOr<CusolverContext> Create(se::Stream* stream);
   CusolverContext() = default;
   ~CusolverContext();
@@ -63,17 +65,9 @@ class CusolverContext {
                se::DeviceMemory<std::complex<double>> workspace);
 
   // Returns the size of the `workspace` required by Potrf, in number of
-  // elements of size T.
-  StatusOr<int64> PotrfBufferSize(se::blas::UpperLower uplo, int n,
-                                  se::DeviceMemory<float> dev_A, int lda);
-  StatusOr<int64> PotrfBufferSize(se::blas::UpperLower uplo, int n,
-                                  se::DeviceMemory<double> dev_A, int lda);
-  StatusOr<int64> PotrfBufferSize(se::blas::UpperLower uplo, int n,
-                                  se::DeviceMemory<std::complex<float>> dev_A,
-                                  int lda);
-  StatusOr<int64> PotrfBufferSize(se::blas::UpperLower uplo, int n,
-                                  se::DeviceMemory<std::complex<double>> dev_A,
-                                  int lda);
+  // elements of `type`.
+  StatusOr<int64> PotrfBufferSize(PrimitiveType type, se::blas::UpperLower uplo,
+                                  int n, int lda);
 
  private:
   CusolverContext(se::Stream* stream, cusolverDnHandle_t handle);
",0,train
e46f394c1744ac93ed3a73a333c47809ff6198a7,tensorflow/tensorflow,"[XLA:GPU] Simplify Cusolver rewriter to not require a StreamExecutor or an allocator.

PiperOrigin-RevId: 249255382",cusolver_rewriter.cc,"@@ -23,7 +23,6 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/literal.h""
 #include ""tensorflow/compiler/xla/service/dfs_hlo_visitor_with_default.h""
 #include ""tensorflow/compiler/xla/service/gpu/ir_emission_utils.h""
-#include ""tensorflow/compiler/xla/service/gpu/scratch_allocator.h""
 #include ""tensorflow/compiler/xla/service/hlo_computation.h""
 #include ""tensorflow/compiler/xla/service/hlo_instruction.h""
 #include ""tensorflow/compiler/xla/service/hlo_opcode.h""
@@ -31,7 +30,6 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/xla_data.pb.h""
 #include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/platform/logging.h""
-#include ""tensorflow/core/platform/stream_executor_no_cuda.h""
 #include ""tensorflow/stream_executor/blas.h""
 
 namespace xla {
@@ -48,7 +46,6 @@ void SetFortranLayout(Shape* shape) {
 }
 
 StatusOr<HloInstruction*> CreateCholesky(CusolverContext* context,
-                                         ScratchAllocator* allocator,
                                          HloInstruction* operand,
                                          const CholeskyOptions& options,
                                          const OpMetadata& metadata) {
@@ -67,39 +64,8 @@ StatusOr<HloInstruction*> CreateCholesky(CusolverContext* context,
   se::blas::UpperLower uplo = options.lower() ? se::blas::UpperLower::kLower
                                               : se::blas::UpperLower::kUpper;
   int64 workspace_size;  // Number of elements of size a_shape.element_type()
-  switch (a_shape.element_type()) {
-    case F32: {
-      TF_ASSIGN_OR_RETURN(auto a,
-                          allocator->Allocate<float>(context->stream(), n * n));
-      TF_ASSIGN_OR_RETURN(workspace_size,
-                          context->PotrfBufferSize(uplo, n, a, n));
-      break;
-    }
-    case F64: {
-      TF_ASSIGN_OR_RETURN(
-          auto a, allocator->Allocate<double>(context->stream(), n * n));
-      TF_ASSIGN_OR_RETURN(workspace_size,
-                          context->PotrfBufferSize(uplo, n, a, n));
-      break;
-    }
-    case C64: {
-      TF_ASSIGN_OR_RETURN(auto a, allocator->Allocate<std::complex<float>>(
-                                      context->stream(), n * n));
-      TF_ASSIGN_OR_RETURN(workspace_size,
-                          context->PotrfBufferSize(uplo, n, a, n));
-      break;
-    }
-    case C128: {
-      TF_ASSIGN_OR_RETURN(auto a, allocator->Allocate<std::complex<double>>(
-                                      context->stream(), n * n));
-      TF_ASSIGN_OR_RETURN(workspace_size,
-                          context->PotrfBufferSize(uplo, n, a, n));
-      break;
-    }
-    default:
-      return InvalidArgument(""Invalid type for cholesky decomposition: %s"",
-                             a_shape.ToString());
-  }
+  TF_ASSIGN_OR_RETURN(workspace_size, context->PotrfBufferSize(
+                                          a_shape.element_type(), uplo, n, n));
 
   // TODO(phawkins): Ideally we would relax this constraint. What we actually
   // want is that:
@@ -131,7 +97,6 @@ StatusOr<HloInstruction*> CreateCholesky(CusolverContext* context,
 
 // Tries to rewrite a single convolution into a call to cudnn.
 StatusOr<bool> RunOnInstruction(CusolverContext* context,
-                                ScratchAllocator* allocator,
                                 HloInstruction* instruction) {
   if (instruction->opcode() != HloOpcode::kCholesky) {
     return false;
@@ -139,7 +104,7 @@ StatusOr<bool> RunOnInstruction(CusolverContext* context,
 
   TF_ASSIGN_OR_RETURN(
       HloInstruction * custom_call,
-      CreateCholesky(context, allocator, instruction->mutable_operand(0),
+      CreateCholesky(context, instruction->mutable_operand(0),
                      instruction->cholesky_options(), instruction->metadata()));
 
   VLOG(1) << ""Replacing "" << instruction->ToString() << "" with ""
@@ -167,41 +132,18 @@ StatusOr<bool> CusolverRewriter::RunOnComputation(HloComputation* computation) {
     return false;
   }
 
-  // Create a stream for us to do our work on. We don't really need to do any
-  // work, just allocate memory, but that's the cuSolver API.
-  se::Stream stream{stream_exec_};
-  stream.Init();
-  const auto device_ordinal = stream_exec_->device_ordinal();
-
-  // allocator either points to this->allocator_ or, if that's null, to a
-  // se::StreamExecutorMemoryAllocator for stream_exec_.
-  se::DeviceMemoryAllocator* allocator;
-  absl::optional<se::StreamExecutorMemoryAllocator> se_allocator;
-  if (allocator_ != nullptr) {
-    allocator = allocator_;
-  } else {
-    se_allocator.emplace(stream_exec_->platform(),
-                         absl::Span<se::StreamExecutor* const>({stream_exec_}));
-    allocator = &*se_allocator;
-  }
-  ScratchAllocator scratch_allocator(device_ordinal, allocator);
-
   TF_ASSIGN_OR_RETURN(CusolverContext context,
-                      CusolverContext::Create(&stream));
+                      CusolverContext::Create(/*stream=*/nullptr));
 
   bool changed = false;
   for (HloInstruction* instruction : cusolver_calls) {
-    TF_ASSIGN_OR_RETURN(
-        bool result,
-        RunOnInstruction(&context, &scratch_allocator, instruction));
+    TF_ASSIGN_OR_RETURN(bool result, RunOnInstruction(&context, instruction));
     changed |= result;
   }
   return changed;
 }
 
-CusolverRewriter::CusolverRewriter(se::StreamExecutor* stream_exec,
-                                   se::DeviceMemoryAllocator* allocator)
-    : stream_exec_(stream_exec), allocator_(allocator) {}
+CusolverRewriter::CusolverRewriter() = default;
 
 StatusOr<bool> CusolverRewriter::Run(HloModule* module) {
   bool changed = false;
",0,train
e46f394c1744ac93ed3a73a333c47809ff6198a7,tensorflow/tensorflow,"[XLA:GPU] Simplify Cusolver rewriter to not require a StreamExecutor or an allocator.

PiperOrigin-RevId: 249255382",cusolver_rewriter.h,"@@ -29,17 +29,13 @@ namespace gpu {
 // Rewrites Cholesky calls into CustomCall HLOs that call into cuSolver.
 class CusolverRewriter : public HloModulePass {
  public:
-  CusolverRewriter(se::StreamExecutor* stream_exec,
-                   se::DeviceMemoryAllocator* allocator);
+  CusolverRewriter();
   absl::string_view name() const override { return ""cusolver-rewriter""; }
 
   StatusOr<bool> Run(HloModule* module) override;
 
  private:
   StatusOr<bool> RunOnComputation(HloComputation* computation);
-
-  se::StreamExecutor* stream_exec_;   // never null
-  se::DeviceMemoryAllocator* allocator_;  // may be null
 };
 
 }  // namespace gpu
",0,train
e46f394c1744ac93ed3a73a333c47809ff6198a7,tensorflow/tensorflow,"[XLA:GPU] Simplify Cusolver rewriter to not require a StreamExecutor or an allocator.

PiperOrigin-RevId: 249255382",nvptx_compiler.cc,"@@ -266,7 +266,7 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
     HloPassPipeline pipeline(""conv_canonicalization"");
     pipeline.AddInvariantChecker<HloVerifier>(/*layout_sensitive=*/false,
                                               /*allow_mixed_precision=*/false);
-    pipeline.AddPass<CusolverRewriter>(stream_exec, device_allocator);
+    pipeline.AddPass<CusolverRewriter>();
     pipeline.AddPass<CudnnConvRewriter>();
     pipeline.AddPass<CudnnFusedConvRewriter>();
     pipeline.AddPass<CudnnConvPaddingLegalization>();
",0,train
83765f2c27c1ed3da86e78f033ba73a87d7cf6b7,tensorflow/tensorflow,VLOG(2) accepted nodes in segmenter,segment.cc,"@@ -450,6 +450,12 @@ tensorflow::Status SegmentGraph(
         num_unsupported_ops++;
         node = nullptr;
       }
+      else {
+        VLOG(2) << ""Accepted as a TF-TRT candidate, ""
+                << ""(Op type: "" << node->tf_node()->type_string() << ""), ""
+                << ""(Op name: "" << node->name() << ""), ""
+                << ""(Reason: "" << status << "")"";
+      }
     }
     node_segments.emplace_back(node);
   }
",0,train
1dc984a4f49cd4181a00d361b567a2c7c11c6650,tensorflow/tensorflow,Avoid unnecessary creation of RuntimeShape,space_to_batch_nd.h,"@@ -42,9 +42,13 @@ inline void SpaceToBatchND(const SpaceToBatchParams& params,
 
   // Extends the input/output shape from 3D to 4D if needed, NHC -> NH1C.
   const RuntimeShape input1_shape =
-      RuntimeShape::ExtendedShape(4, unextended_input1_shape);
+      (unextended_input1_shape.DimensionsCount() == 4)
+          ? unextended_input1_shape
+          : RuntimeShape::ExtendedShape(4, unextended_input1_shape);
   const RuntimeShape output_shape =
-      RuntimeShape::ExtendedShape(4, unextended_output_shape);
+      (unextended_output_shape.DimensionsCount() == 4)
+          ? unextended_output_shape
+          : RuntimeShape::ExtendedShape(4, unextended_output_shape);
 
   const int depth = input1_shape.Dims(3);
   const int input_width = input1_shape.Dims(2);
",0,train
b98a1f31bca1e773ee215f2c32aa0509843c1247,tensorflow/tensorflow,"Propagate NaNs for floating point min/max operations.

PiperOrigin-RevId: 187395444",hlo_evaluator.cc,"@@ -613,14 +613,25 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
-  template <
-      typename NativeT,
-      typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
+  template <typename NativeT,
+            typename std::enable_if<std::is_integral<NativeT>::value>::type* =
+                nullptr>
+  Status HandleMaximum(HloInstruction* maximum) {
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[maximum],
+        ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) {
+          return std::max(lhs, rhs);
+        }));
+    return Status::OK();
+  }
+
+  template <typename NativeT, typename std::enable_if<std::is_floating_point<
+                                  NativeT>::value>::type* = nullptr>
   Status HandleMaximum(HloInstruction* maximum) {
     TF_ASSIGN_OR_RETURN(
         parent_->evaluated_[maximum],
         ElementWiseBinaryOp(maximum, [](ElementwiseT lhs, ElementwiseT rhs) {
-          return std::fmax(lhs, rhs);
+          return ((lhs >= rhs) || std::isnan(lhs)) ? lhs : rhs;
         }));
     return Status::OK();
   }
@@ -636,18 +647,30 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     return HandleMaximum<ElementwiseT>(maximum);
   }
 
-  template <
-      typename NativeT,
-      typename std::enable_if<!is_complex_t<NativeT>::value>::type* = nullptr>
+  template <typename NativeT,
+            typename std::enable_if<std::is_integral<NativeT>::value>::type* =
+                nullptr>
   Status HandleMinimum(HloInstruction* minimum) {
     TF_ASSIGN_OR_RETURN(parent_->evaluated_[minimum],
                         ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el,
                                                         ElementwiseT rhs_el) {
-                          return std::fmin(lhs_el, rhs_el);
+                          return std::min(lhs_el, rhs_el);
                         }));
     return Status::OK();
   }
 
+  template <typename NativeT, typename std::enable_if<std::is_floating_point<
+                                  NativeT>::value>::type* = nullptr>
+  Status HandleMinimum(HloInstruction* minimum) {
+    TF_ASSIGN_OR_RETURN(
+        parent_->evaluated_[minimum],
+        ElementWiseBinaryOp(minimum, [](ElementwiseT lhs_el,
+                                        ElementwiseT rhs_el) {
+          return ((lhs_el <= rhs_el) || std::isnan(lhs_el)) ? lhs_el : rhs_el;
+        }));
+    return Status::OK();
+  }
+
   template <
       typename NativeT,
       typename std::enable_if<is_complex_t<NativeT>::value>::type* = nullptr>
",0,train
b98a1f31bca1e773ee215f2c32aa0509843c1247,tensorflow/tensorflow,"Propagate NaNs for floating point min/max operations.

PiperOrigin-RevId: 187395444",llvm_util.cc,"@@ -106,8 +106,10 @@ llvm::Value* EmitFloatMax(llvm::Value* lhs_value, llvm::Value* rhs_value,
     auto cmp = ir_builder->CreateFCmpUGE(lhs_value, rhs_value);
     return ir_builder->CreateSelect(cmp, lhs_value, rhs_value);
   } else {
-    return EmitCallToIntrinsic(llvm::Intrinsic::maxnum, {lhs_value, rhs_value},
-                               {lhs_value->getType()}, ir_builder);
+    auto cmp_ge = ir_builder->CreateFCmpOGE(lhs_value, rhs_value);
+    auto lhs_is_nan = ir_builder->CreateFCmpUNE(lhs_value, lhs_value);
+    auto sel_lhs = ir_builder->CreateOr(cmp_ge, lhs_is_nan);
+    return ir_builder->CreateSelect(sel_lhs, lhs_value, rhs_value);
   }
 }
 
@@ -117,8 +119,10 @@ llvm::Value* EmitFloatMin(llvm::Value* lhs_value, llvm::Value* rhs_value,
     auto cmp = ir_builder->CreateFCmpULE(lhs_value, rhs_value);
     return ir_builder->CreateSelect(cmp, lhs_value, rhs_value);
   } else {
-    return EmitCallToIntrinsic(llvm::Intrinsic::minnum, {lhs_value, rhs_value},
-                               {lhs_value->getType()}, ir_builder);
+    auto cmp_le = ir_builder->CreateFCmpOLE(lhs_value, rhs_value);
+    auto lhs_is_nan = ir_builder->CreateFCmpUNE(lhs_value, lhs_value);
+    auto sel_lhs = ir_builder->CreateOr(cmp_le, lhs_is_nan);
+    return ir_builder->CreateSelect(sel_lhs, lhs_value, rhs_value);
   }
 }
 
",0,train
b98a1f31bca1e773ee215f2c32aa0509843c1247,tensorflow/tensorflow,"Propagate NaNs for floating point min/max operations.

PiperOrigin-RevId: 187395444",array_elementwise_ops_test.cc,"@@ -1648,33 +1648,15 @@ XLA_TEST_F(ArrayElementwiseOpTest, SquareIn4DZeroElements) {
   ComputeAndCompareR4<float>(&builder, expected, {}, error_spec_);
 }
 
-// GPU backend emits nvvm intrinsic for fmin and fmax, whose semantics is NOT
-// such
-// * fmin(NaN, x) = x
-// * fmax(NaN, x) = x
-// so we only test NAN on CPU.
-//
-// TODO(b/28180546): Make this compile in a way that is consistent
-// among backends.
 XLA_TEST_F(ArrayElementwiseOpTest, MinF32s) {
   ComputationBuilder builder(client_, TestName());
-#if !defined(XLA_TEST_BACKEND_CPU)
-  auto lhs = builder.ConstantR1<float>({1.0f, 1.0f, 2.25f});
-  auto rhs = builder.ConstantR1<float>({2.0f, -5.0f, 1.0f});
-#else
   SetFastMathDisabled(true);
   auto lhs = builder.ConstantR1<float>({1.0f, 1.0f, 2.25f, NAN, 6.0f});
   auto rhs = builder.ConstantR1<float>({2.0f, -5.0f, 1.0f, 10.0f, NAN});
-#endif
   auto minimum = builder.Min(lhs, rhs);
 
-  ComputeAndCompareR1<float>(&builder,
-#if !defined(XLA_TEST_BACKEND_CPU)
-                             {1.0f, -5.0f, 1.0f},
-#else
-                             {1.0f, -5.0f, 1.0f, 10.0f, 6.0f},
-#endif
-                             {}, error_spec_);
+  ComputeAndCompareR1<float>(&builder, {1.0f, -5.0f, 1.0f, NAN, NAN}, {},
+                             error_spec_);
 }
 
 XLA_TEST_F(ArrayElementwiseOpTest, MinZeroElementF32s) {
@@ -1685,50 +1667,26 @@ XLA_TEST_F(ArrayElementwiseOpTest, MinZeroElementF32s) {
   ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_);
 }
 
-// TODO(b/28180546): Make this compile in a way that is consistent
-// among backends. See comment on MinF32s test above.
 XLA_TEST_F(ArrayElementwiseOpTest, MinF64s) {
   ComputationBuilder builder(client_, TestName());
-#if !defined(XLA_TEST_BACKEND_CPU)
-  auto lhs = builder.ConstantR1<double>({1.0, 1.0, 2.25});
-  auto rhs = builder.ConstantR1<double>({2.0, -5.0, 1.0});
-#else
   SetFastMathDisabled(true);
   auto lhs = builder.ConstantR1<double>({1.0, 1.0, 2.25, NAN, 6.0});
   auto rhs = builder.ConstantR1<double>({2.0, -5.0, 1.0, 10.0, NAN});
-#endif
   auto minimum = builder.Min(lhs, rhs);
 
-  ComputeAndCompareR1<double>(&builder,
-#if !defined(XLA_TEST_BACKEND_CPU)
-                              {1.0, -5.0, 1.0},
-#else
-                              {1.0, -5.0, 1.0, 10.0, 6.0},
-#endif
-                              {}, error_spec_);
+  ComputeAndCompareR1<double>(&builder, {1.0, -5.0, 1.0, NAN, NAN}, {},
+                              error_spec_);
 }
 
-// TODO(b/28180546): Make this compile in a way that is consistent
-// among backends. See comment on MinF32s test above.
 XLA_TEST_F(ArrayElementwiseOpTest, MaxF32s) {
   ComputationBuilder builder(client_, TestName());
-#if !defined(XLA_TEST_BACKEND_CPU)
-  auto lhs = builder.ConstantR1<float>({1.0f, 1.0f, 2.25f});
-  auto rhs = builder.ConstantR1<float>({2.0f, -5.0f, 1.0f});
-#else
   SetFastMathDisabled(true);
   auto lhs = builder.ConstantR1<float>({1.0f, 1.0f, 2.25f, NAN, 6.0f});
   auto rhs = builder.ConstantR1<float>({2.0f, -5.0f, 1.0f, 10.0f, NAN});
-#endif
   auto maximum = builder.Max(lhs, rhs);
 
-  ComputeAndCompareR1<float>(&builder,
-#if !defined(XLA_TEST_BACKEND_CPU)
-                             {2.0f, 1.0f, 2.25f},
-#else
-                             {2.0f, 1.0f, 2.25f, 10.0f, 6.0f},
-#endif
-                             {}, error_spec_);
+  ComputeAndCompareR1<float>(&builder, {2.0f, 1.0f, 2.25f, NAN, NAN}, {},
+                             error_spec_);
 }
 
 XLA_TEST_F(ArrayElementwiseOpTest, MaxZeroElementF32s) {
@@ -1739,27 +1697,15 @@ XLA_TEST_F(ArrayElementwiseOpTest, MaxZeroElementF32s) {
   ComputeAndCompareR1<float>(&builder, {}, {}, error_spec_);
 }
 
-// TODO(b/28180546): Make this compile in a way that is consistent
-// among backends. See comment on MinF32s test above.
 XLA_TEST_F(ArrayElementwiseOpTest, MaxF64s) {
   ComputationBuilder builder(client_, TestName());
-#if !defined(XLA_TEST_BACKEND_CPU)
-  auto lhs = builder.ConstantR1<double>({1.0, 1.0, 2.25});
-  auto rhs = builder.ConstantR1<double>({2.0, -5.0, 1.0});
-#else
   SetFastMathDisabled(true);
   auto lhs = builder.ConstantR1<double>({1.0, 1.0, 2.25, NAN, 6.0});
   auto rhs = builder.ConstantR1<double>({2.0, -5.0, 1.0, 10.0, NAN});
-#endif
   auto maximum = builder.Max(lhs, rhs);
 
-  ComputeAndCompareR1<double>(&builder,
-#if !defined(XLA_TEST_BACKEND_CPU)
-                              {2.0, 1.0, 2.25},
-#else
-                              {2.0, 1.0, 2.25, 10.0, 6.0},
-#endif
-                              {}, error_spec_);
+  ComputeAndCompareR1<double>(&builder, {2.0, 1.0, 2.25, NAN, NAN}, {},
+                              error_spec_);
 }
 
 XLA_TEST_F(ArrayElementwiseOpTest, MaxS32s) {
",0,train
b98a1f31bca1e773ee215f2c32aa0509843c1247,tensorflow/tensorflow,"Propagate NaNs for floating point min/max operations.

PiperOrigin-RevId: 187395444",scalar_computations_test.cc,"@@ -860,6 +860,12 @@ XLA_TEST_F(ScalarComputationsTest, MinF32Below) {
   TestMinMax<float>(-100.1f, 3.1f, -100.1f, &ComputationBuilder::Min);
 }
 
+XLA_TEST_F(ScalarComputationsTest, MinPropagatesNan) {
+  SetFastMathDisabled(true);
+  TestMinMax<float>(NAN, 3.1f, NAN, &ComputationBuilder::Min);
+  TestMinMax<float>(-3.1f, NAN, NAN, &ComputationBuilder::Min);
+}
+
 XLA_TEST_F(ScalarComputationsTest, MaxF32Above) {
   TestMinMax<float>(10.1f, 3.1f, 10.1f, &ComputationBuilder::Max);
 }
@@ -868,6 +874,12 @@ XLA_TEST_F(ScalarComputationsTest, MaxF32Below) {
   TestMinMax<float>(-100.1f, 3.1f, 3.1f, &ComputationBuilder::Max);
 }
 
+XLA_TEST_F(ScalarComputationsTest, MaxPropagatesNan) {
+  SetFastMathDisabled(true);
+  TestMinMax<float>(NAN, 3.1f, NAN, &ComputationBuilder::Max);
+  TestMinMax<float>(-3.1f, NAN, NAN, &ComputationBuilder::Max);
+}
+
 XLA_TEST_F(ScalarComputationsTest, ComplicatedArithmeticExpressionF32) {
   // Compute the expression (1 * (3 - 1) * (7 + 0) - 4) / 20.
   ComputationBuilder b(client_, TestName());
",0,train
c102dc94c70dfa58cc1aa847492bba8307e731d0,tensorflow/tensorflow,"Remove redundant code.

PiperOrigin-RevId: 351484243
Change-Id: If3a9eead0ceb1859c65627c13efeeb5b9dc8676c",stream_executor.cc,"@@ -801,10 +801,6 @@ port::Status InitStreamExecutorPlugin(SEInitPluginFn init_fn) {
   TF_RETURN_IF_ERROR(tensorflow::StatusFromTF_Status(c_status.get()));
   TF_RETURN_IF_ERROR(ValidateSPTimerFns(timer_fns));
 
-  platform_fns.create_timer_fns(&platform, &timer_fns, c_status.get());
-  TF_RETURN_IF_ERROR(tensorflow::StatusFromTF_Status(c_status.get()));
-  TF_RETURN_IF_ERROR(ValidateSPTimerFns(timer_fns));
-
   // Register new platform
   std::string platform_name = std::string(platform.name);
   std::unique_ptr<stream_executor::CPlatform> cplatform(
",0,train
2197c067c79d1464eb957764f8edd120693dcdf6,tensorflow/tensorflow,"Fix setup.py for Eigen headers
Change: 114173506",setup.py,"@@ -99,18 +99,21 @@ class InstallHeaders(Command):
     # directories for -I
     install_dir = re.sub('/google/protobuf/src', '', install_dir)
 
-    # Copy eigen code into tensorflow/include and
-    # tensorflow/include/external/eigen_archive/eigen-eigen-<revision>.
+    # Copy eigen code into tensorflow/include,
+    # tensorflow/include/external/eigen_archive/eigen-eigen-<revision>,
+    # and tensorflow/include/eigen-eigen-<revision>.
     # A symlink would do, but the wheel file that gets created ignores
     # symlink within the directory hierarchy.
     # NOTE(keveman): Figure out how to customize bdist_wheel package so
     # we can do the symlink.
     if re.search(r'(external/eigen_archive/eigen-eigen-\w+)', install_dir):
-      extra_dir = re.sub(r'/external/eigen_archive/eigen-eigen-\w+', '',
-                         install_dir)
-      if not os.path.exists(extra_dir):
-        self.mkpath(extra_dir)
-      self.copy_file(header, extra_dir)
+      extra_dirs = [re.sub('/external/eigen_archive', '', install_dir),
+                    re.sub(r'external/eigen_archive/eigen-eigen-\w+', '',
+                           install_dir)]
+      for extra_dir in extra_dirs:
+        if not os.path.exists(extra_dir):
+          self.mkpath(extra_dir)
+        self.copy_file(header, extra_dir)
 
     if not os.path.exists(install_dir):
       self.mkpath(install_dir)
",0,train
4a4605f50fc63871f995a5f487c9b66e399ac674,tensorflow/tensorflow,"Initialize TPU inside MWMS

PiperOrigin-RevId: 401877668
Change-Id: I3e19d194e94ac617bf3ab5a131898030f9ffd67c",collective_all_reduce_strategy.py,"@@ -43,6 +43,7 @@ from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import collective_ops
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.tpu import tpu_strategy_util
 from tensorflow.python.training.tracking import base
 from tensorflow.python.util import deprecation
 from tensorflow.python.util.tf_export import tf_export
@@ -504,6 +505,8 @@ class CollectiveAllReduceExtended(mirrored_strategy.MirroredExtended):
     # some cases.
     local_devices, local_device_type = self._initialize_local_devices(
         cluster_resolver, self._worker_device)
+    if local_device_type == ""TPU"":
+      tpu_strategy_util.initialize_tpu_system()
 
     self._collective_keys = cross_device_utils.CollectiveKeys(
         group_key_start=1 + self._collective_key_base)
",0,train
4a4605f50fc63871f995a5f487c9b66e399ac674,tensorflow/tensorflow,"Initialize TPU inside MWMS

PiperOrigin-RevId: 401877668
Change-Id: I3e19d194e94ac617bf3ab5a131898030f9ffd67c",cross_device_utils.py,"@@ -265,10 +265,11 @@ class CollectiveReplicaLauncher(object):
     self._group_size = group_size
     self._collective_keys = collective_keys
     self._device = device
-    # Created lazily in _get_ordering_token to avoid creating tensors on TPUs
-    # before the user has a chance to call initialize_system.
-    self._ordering_token = None
-    self._ordering_token_init_lock = threading.Lock()
+    if self._use_ordering_token():
+      with ops.init_scope(), ops.device(device):
+        self._ordering_token = resource_variable_ops.ResourceVariable(0.)
+    else:
+      self._ordering_token = None
 
   def _control_input(self, control_input):
     if control_input is not None and not self._use_ordering_token():
@@ -319,14 +320,8 @@ class CollectiveReplicaLauncher(object):
                                                     self._device)
 
   def _get_ordering_token(self, communication_hint):
-    if self._use_ordering_token():
-      with self._ordering_token_init_lock:
-        if self._ordering_token is None:
-          with ops.init_scope(), ops.device(self._device):
-            self._ordering_token = resource_variable_ops.ResourceVariable(0.)
-        if communication_hint == 'NCCL':
-          return self._ordering_token.handle
-
+    if self._use_ordering_token() and communication_hint == 'NCCL':
+      return self._ordering_token.handle
     return None
 
   def can_order_nccl(self):
",0,train
ac91ebc9bec9eb9b0ade27ccd470a547b180ec8b,tensorflow/tensorflow,"Don't clear+resize outputs vector twice in ExecutorState.
Change: 121507010",executor.cc,"@@ -1006,7 +1006,6 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) {
     Entry* input_tensors = GetInputTensors(input_frame, input_iter);
     Entry* first_input = input_tensors + item.input_start;
     outputs.clear();
-    outputs.resize(item.num_outputs);
 
     TensorReferenceVector accessed_tensors;
     DeviceContext* device_context = nullptr;
@@ -1014,7 +1013,9 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_usec) {
     // transfer node. For transfer nodes, we need to propagate the ""dead""
     // bit even when the node is dead.
     bool launched_asynchronously = false;
-    if (!tagged_node.is_dead || IsTransferNode(node)) {
+    if (tagged_node.is_dead && !IsTransferNode(node)) {
+      outputs.resize(item.num_outputs);
+    } else {
       // Prepares inputs.
       bool is_input_dead = false;
       s = PrepareInputs(item, first_input, &inputs, &input_device_contexts,
@@ -1230,7 +1231,7 @@ Status ExecutorState::ProcessOutputs(const NodeItem& item, OpKernelContext* ctx,
                                      EntryVector* outputs,
                                      NodeExecStats* stats) {
   const Node* node = item.node;
-  outputs->clear();
+  DCHECK_EQ(0, outputs->size());
   outputs->resize(item.num_outputs);
 
   Status s = ctx->status();
",0,train
ebf554ff77bc46bfdd9b424bc44b62f803100b33,tensorflow/tensorflow,"Make adaptive SDCA the default.

PiperOrigin-RevId: 188380039",sdca_ops_test.py,"@@ -270,14 +270,14 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
 
           train_op = lr.minimize()
 
-          def Minimize():
+          def minimize():
             with self._single_threaded_test_session():
               for _ in range(_MAX_ITERATIONS):
-                train_op.run()
+                train_op.run()  # pylint: disable=cell-var-from-loop
 
           threads = []
           for _ in range(num_loss_partitions):
-            threads.append(threading.Thread(target=Minimize))
+            threads.append(threading.Thread(target=minimize))
             threads[-1].start()
 
           for t in threads:
@@ -395,7 +395,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
         predicted_labels = get_binary_predictions_for_logistic(predictions)
         self.assertAllClose([0, 1, 1, 1], predicted_labels.eval())
         self.assertAllClose(
-            0.01, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
+            0.0, lr.approximate_duality_gap().eval(), rtol=1e-2, atol=1e-2)
 
   def testFractionalExampleLabel(self):
     # Setup test data with 1 positive, and 1 mostly-negative example.
@@ -407,7 +407,7 @@ class SdcaWithLogisticLossTest(SdcaModelTest):
         make_example_proto({
             'age': [1],
             'gender': [1]
-        }, 1),
+        }, 0.9),
     ]
     example_weights = [1.0, 1.0]
     for num_shards in _SHARD_NUMBERS:
",0,train
ebf554ff77bc46bfdd9b424bc44b62f803100b33,tensorflow/tensorflow,"Make adaptive SDCA the default.

PiperOrigin-RevId: 188380039",sdca_internal.cc,"@@ -226,7 +226,7 @@ const ExampleStatistics Example::ComputeWxAndWeightedExampleNorm(
 }
 
 // Examples contains all the training examples that SDCA uses for a mini-batch.
-Status Examples::SampleAdaptativeProbabilities(
+Status Examples::SampleAdaptiveProbabilities(
     const int num_loss_partitions, const Regularizations& regularization,
     const ModelWeights& model_weights,
     const TTypes<float>::Matrix example_state_data,
",0,train
ebf554ff77bc46bfdd9b424bc44b62f803100b33,tensorflow/tensorflow,"Make adaptive SDCA the default.

PiperOrigin-RevId: 188380039",sdca_internal.h,"@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_KERNELS_SDCA_INTERNAL_H_
-#define TENSORFLOW_KERNELS_SDCA_INTERNAL_H_
+#ifndef TENSORFLOW_CORE_KERNELS_SDCA_INTERNAL_H_
+#define TENSORFLOW_CORE_KERNELS_SDCA_INTERNAL_H_
 
 #define EIGEN_USE_THREADS
 
@@ -75,7 +75,7 @@ struct ExampleStatistics {
 
 class Regularizations {
  public:
-  Regularizations(){};
+  Regularizations() {}
 
   // Initialize() must be called immediately after construction.
   Status Initialize(OpKernelConstruction* const context) {
@@ -199,7 +199,7 @@ class FeatureWeightsDenseStorage {
   FeatureWeightsDenseStorage(const TTypes<const float>::Matrix nominals,
                              TTypes<float>::Matrix deltas)
       : nominals_(nominals), deltas_(deltas) {
-    CHECK(deltas.rank() > 1);
+    CHECK_GT(deltas.rank(), 1);
   }
 
   // Check if a feature index is with-in the bounds.
@@ -322,15 +322,15 @@ class Examples {
     return examples_.at(example_index);
   }
 
-  int sampled_index(const int id, const bool adaptative) const {
-    if (adaptative) return sampled_index_[id];
+  int sampled_index(const int id, const bool adaptive) const {
+    if (adaptive) return sampled_index_[id];
     return id;
   }
 
   // Adaptive SDCA in the current implementation only works for
   // binary classification, where the input argument for num_weight_vectors
   // is 1.
-  Status SampleAdaptativeProbabilities(
+  Status SampleAdaptiveProbabilities(
       const int num_loss_partitions, const Regularizations& regularization,
       const ModelWeights& model_weights,
       const TTypes<float>::Matrix example_state_data,
@@ -378,7 +378,7 @@ class Examples {
   // All examples in the batch.
   std::vector<Example> examples_;
 
-  // Adaptative sampling variables
+  // Adaptive sampling variables.
   std::vector<float> probabilities_;
   std::vector<int> sampled_index_;
   std::vector<int> sampled_count_;
@@ -391,4 +391,4 @@ class Examples {
 }  // namespace sdca
 }  // namespace tensorflow
 
-#endif  // TENSORFLOW_KERNELS_SDCA_INTERNAL_H_
+#endif  // TENSORFLOW_CORE_KERNELS_SDCA_INTERNAL_H_
",0,train
ebf554ff77bc46bfdd9b424bc44b62f803100b33,tensorflow/tensorflow,"Make adaptive SDCA the default.

PiperOrigin-RevId: 188380039",sdca_ops.cc,"@@ -80,7 +80,7 @@ struct ComputeOptions {
           context, false,
           errors::InvalidArgument(""Unsupported loss type: "", loss_type));
     }
-    OP_REQUIRES_OK(context, context->GetAttr(""adaptative"", &adaptative));
+    OP_REQUIRES_OK(context, context->GetAttr(""adaptative"", &adaptive));
     OP_REQUIRES_OK(
         context, context->GetAttr(""num_sparse_features"", &num_sparse_features));
     OP_REQUIRES_OK(context, context->GetAttr(""num_sparse_features_with_values"",
@@ -113,7 +113,7 @@ struct ComputeOptions {
   int num_dense_features = 0;
   int num_inner_iterations = 0;
   int num_loss_partitions = 0;
-  bool adaptative = false;
+  bool adaptive = true;
   Regularizations regularizations;
 };
 
@@ -147,9 +147,9 @@ void DoCompute(const ComputeOptions& options, OpKernelContext* const context) {
   OP_REQUIRES_OK(context, context->set_output(""out_example_state_data"",
                                               mutable_example_state_data_t));
 
-  if (options.adaptative) {
+  if (options.adaptive) {
     OP_REQUIRES_OK(context,
-                   examples.SampleAdaptativeProbabilities(
+                   examples.SampleAdaptiveProbabilities(
                        options.num_loss_partitions, options.regularizations,
                        model_weights, example_state_data, options.loss_updater,
                        /*num_weight_vectors =*/1));
@@ -163,7 +163,7 @@ void DoCompute(const ComputeOptions& options, OpKernelContext* const context) {
     // num_examples which is an int.
     for (int id = static_cast<int>(begin); id < end; ++id) {
       const int64 example_index =
-          examples.sampled_index(++atomic_index, options.adaptative);
+          examples.sampled_index(++atomic_index, options.adaptive);
       const Example& example = examples.example(example_index);
       const float dual = example_state_data(example_index, 0);
       const float example_weight = example.example_weight();
",0,train
720a3a15764546619a38da655c23ba6e1cd9200d,tensorflow/tensorflow,"Lower ReluGrad to HLO.

PiperOrigin-RevId: 273772952",hlo_ops.cc,"@@ -702,5 +702,31 @@ static LogicalResult Verify(TransposeOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// CompareOp
+//===----------------------------------------------------------------------===//
+
+void CompareOp::build(Builder* builder, OperationState& result, Value* lhs,
+                      Value* rhs, DenseIntElementsAttr broadcast_dimensions,
+                      StringAttr comparison_direction) {
+  build(builder, result,
+        InferOutputTypes(builder, lhs, rhs, broadcast_dimensions,
+                         comparison_direction),
+        lhs, rhs, broadcast_dimensions, comparison_direction);
+}
+
+Type CompareOp::InferOutputTypes(Builder* builder, Value* lhs, Value* rhs,
+                                 DenseIntElementsAttr broadcast_dimensions,
+                                 StringAttr comparison_direction) {
+  if (!lhs->getType().isa<ShapedType>() || !rhs->getType().isa<ShapedType>())
+    return builder->getTensorType(builder->getI1Type());
+  // TODO(parkers): When binary ops support broadcasting shape inference, reuse
+  // that logic.
+  auto lhs_type = lhs->getType().cast<ShapedType>();
+  auto rhs_type = rhs->getType().cast<ShapedType>();
+  if (lhs_type != rhs_type) return builder->getTensorType(builder->getI1Type());
+  return builder->getTensorType(lhs_type.getShape(), builder->getI1Type());
+}
+
 #define GET_OP_CLASSES
 #include ""tensorflow/compiler/mlir/xla/ir/hlo_ops.cc.inc""
",0,test
a8001b9e8db92620603c3c0588d251192d327bae,tensorflow/tensorflow,"Take proto by value.

PiperOrigin-RevId: 312626373
Change-Id: I2effeab7b0c97052f14b8f52b653f24a379dc7ee",xla_computation.h,"@@ -29,8 +29,8 @@ namespace xla {
 class XlaComputation {
  public:
   XlaComputation() : unique_id_(-1) {}
-  XlaComputation(const HloModuleProto& proto)
-      : unique_id_(proto.id()), proto_(proto) {}
+  XlaComputation(HloModuleProto proto)
+      : unique_id_(proto.id()), proto_(std::move(proto)) {}
 
   ~XlaComputation() {}
 
",0,train
56760749a29fbbca270c90811d1bdfc8414c6c7f,tensorflow/tensorflow,Extract the logic into a separate function to write unit tests,hadoop_file_system.cc,"@@ -135,6 +135,24 @@ const LibHDFS* libhdfs() {
   return libhdfs;
 }
 
+Status SplitArchiveNameAndPath(StringPiece& path, string& nn) {
+  size_t index_end_archive_name = path.find("".har"");
+  if (index_end_archive_name == path.npos) {
+    return errors::InvalidArgument(
+        ""Hadoop archive path does not contain a .har extension"");
+  }
+  // Case of hadoop archive. Namenode is the path to the archive.
+  nn = string(""har://"") + string(nn) +
+       string(path.substr(0, index_end_archive_name + 4));
+  // Remove the hadoop archive path to the path
+  path.remove_prefix(index_end_archive_name + 4);
+  if (path.empty()) {
+    // Root of the archive
+    path = ""/"";
+  }
+  return Status::OK();
+}
+
 // We rely on HDFS connection caching here. The HDFS client calls
 // org.apache.hadoop.fs.FileSystem.get(), which caches the connection
 // internally.
@@ -164,16 +182,7 @@ Status HadoopFileSystem::Connect(StringPiece fname, hdfsFS* fs) {
     // https://github.com/tensorflow/tensorflow/blob/v1.0.0/third_party/hadoop/hdfs.h#L259
     libhdfs()->hdfsBuilderSetNameNode(builder, ""default"");
   } else if (scheme == ""har"") {
-    size_t index_end_archive_name = path.find("".har"");
-    if (index_end_archive_name == path.npos) {
-      return errors::InvalidArgument(
-          ""Hadoop archive path does not contain a .har extension"");
-    }
-    // Case of hadoop archive. Namenode is the path to the archive.
-    nn = string(""har://"") + string(nn) +
-         string(path.substr(0, index_end_archive_name + 4));
-    // Remove the hadoop archive path to the path
-    path.remove_prefix(index_end_archive_name + 4);
+    SplitArchiveNameAndPath(path, nn);
     libhdfs()->hdfsBuilderSetNameNode(builder, nn.c_str());
   } else {
     libhdfs()->hdfsBuilderSetNameNode(builder,
",0,train
56760749a29fbbca270c90811d1bdfc8414c6c7f,tensorflow/tensorflow,Extract the logic into a separate function to write unit tests,hadoop_file_system.h,"@@ -70,6 +70,8 @@ class HadoopFileSystem : public FileSystem {
   Status Connect(StringPiece fname, hdfsFS* fs);
 };
 
+Status SplitArchiveNameAndPath(StringPiece& path, string& nn);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_PLATFORM_HADOOP_HADOOP_FILE_SYSTEM_H_
",0,train
56760749a29fbbca270c90811d1bdfc8414c6c7f,tensorflow/tensorflow,Extract the logic into a separate function to write unit tests,hadoop_file_system_test.cc,"@@ -235,6 +235,44 @@ TEST_F(HadoopFileSystemTest, WriteWhileReading) {
   TF_EXPECT_OK(writer->Close());
 }
 
+TEST_F(HadoopFileSystemTest, HarSplit) {
+  string har_path =
+      ""har://hdfs-root/user/j.doe/my_archive.har/dir0/dir1/file.txt"";
+  StringPiece scheme, namenode, path;
+  io::ParseURI(har_path, &scheme, &namenode, &path);
+  EXPECT_EQ(""har"", scheme);
+  EXPECT_EQ(""hdfs-root"", namenode);
+  EXPECT_EQ(""/user/j.doe/my_archive.har/dir0/dir1/file.txt"", path);
+  string nn(namenode);
+  TF_EXPECT_OK(SplitArchiveNameAndPath(path, nn));
+  EXPECT_EQ(""har://hdfs-root/user/j.doe/my_archive.har"", nn);
+  EXPECT_EQ(""/dir0/dir1/file.txt"", path);
+}
+
+TEST_F(HadoopFileSystemTest, NoHarExtension) {
+  string har_path = ""har://hdfs-root/user/j.doe/my_archive/dir0/dir1/file.txt"";
+  StringPiece scheme, namenode, path;
+  io::ParseURI(har_path, &scheme, &namenode, &path);
+  EXPECT_EQ(""har"", scheme);
+  EXPECT_EQ(""hdfs-root"", namenode);
+  EXPECT_EQ(""/user/j.doe/my_archive/dir0/dir1/file.txt"", path);
+  string nn(namenode);
+  EXPECT_EQ(errors::InvalidArgument("""").code(),
+            SplitArchiveNameAndPath(path, nn).code());
+}
+
+TEST_F(HadoopFileSystemTest, HarRootPath) {
+  string har_path = ""har://hdfs-root/user/j.doe/my_archive.har"";
+  StringPiece scheme, namenode, path;
+  io::ParseURI(har_path, &scheme, &namenode, &path);
+  EXPECT_EQ(""har"", scheme);
+  EXPECT_EQ(""hdfs-root"", namenode);
+  EXPECT_EQ(""/user/j.doe/my_archive.har"", path);
+  string nn(namenode);
+  TF_EXPECT_OK(SplitArchiveNameAndPath(path, nn));
+  EXPECT_EQ(""har://hdfs-root/user/j.doe/my_archive.har"", nn);
+  EXPECT_EQ(""/"", path);
+}
 // NewAppendableFile() is not testable. Local filesystem maps to
 // ChecksumFileSystem in Hadoop, where appending is an unsupported operation.
 
",0,train
a4fb4cb6d4440212fbd2e694bbe4d16f02708384,tensorflow/tensorflow,"Legalize xla_hlo.reshape to tf.Reshape

PiperOrigin-RevId: 313630645
Change-Id: Ie8d2c1f0963c6b4a61e593bb95449ef7bf8915ff",legalize_hlo.cc,"@@ -24,6 +24,7 @@ limitations under the License.
 #include ""mlir/IR/MLIRContext.h""  // from @llvm-project
 #include ""mlir/IR/Operation.h""  // from @llvm-project
 #include ""mlir/IR/PatternMatch.h""  // from @llvm-project
+#include ""mlir/IR/StandardTypes.h""  // from @llvm-project
 #include ""mlir/Pass/Pass.h""  // from @llvm-project
 #include ""mlir/Support/LLVM.h""  // from @llvm-project
 #include ""mlir/Support/LogicalResult.h""  // from @llvm-project
@@ -94,6 +95,15 @@ static bool AreBroadcastCompatible(Value x, Value y) {
                                             y_ranked.getShape(), resultShape);
 }
 
+// Returns the shape of the given value in a Constant Op.
+ConstantOp ShapeToConst(PatternRewriter &rewriter, Value value) {
+  ArrayRef<int64_t> shape = value.getType().cast<ShapedType>().getShape();
+  auto attr_type = RankedTensorType::get({static_cast<int64_t>(shape.size())},
+                                         rewriter.getIntegerType(64));
+  auto attr = DenseElementsAttr::get(attr_type, shape);
+  return rewriter.create<ConstantOp>(value.getLoc(), attr_type, attr);
+}
+
 #include ""tensorflow/compiler/mlir/tensorflow/transforms/generated_legalize_hlo.inc""
 
 /// Performs the lowering to XLA dialect.
@@ -107,7 +117,7 @@ void LegalizeHloToTf::runOnFunction() {
 
   ConversionTarget target(context);
   target.addLegalDialect<TensorFlowDialect>();
-  target.addLegalOp<CallOp>();
+  target.addLegalOp<CallOp, ConstantOp>();
   if (failed(applyPartialConversion(getFunction(), target, patterns)))
     signalPassFailure();
 }
",0,train
f38b4a412fdd7002368cf3d6dd5471239526c310,tensorflow/tensorflow,"Minor update of rnn comments, as pointed out in:
https://github.com/tensorflow/tensorflow/issues/4197
Change: 132656666",rnn.py,"@@ -489,7 +489,7 @@ def bidirectional_rnn(cell_fw, cell_bw, inputs,
       [batch_size, input_size], or a nested tuple of such elements.
     initial_state_fw: (optional) An initial state for the forward RNN.
       This must be a tensor of appropriate type and shape
-      `[batch_size x cell_fw.state_size]`.
+      `[batch_size, cell_fw.state_size]`.
       If `cell_fw.state_size` is a tuple, this should be a tuple of
       tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
     initial_state_bw: (optional) Same as for `initial_state_fw`, but using
@@ -574,7 +574,7 @@ def bidirectional_dynamic_rnn(cell_fw, cell_bw, inputs, sequence_length=None,
       containing the actual lengths for each of the sequences.
     initial_state_fw: (optional) An initial state for the forward RNN.
       This must be a tensor of appropriate type and shape
-      `[batch_size x cell_fw.state_size]`.
+      `[batch_size, cell_fw.state_size]`.
       If `cell_fw.state_size` is a tuple, this should be a tuple of
       tensors having shapes `[batch_size, s] for s in cell_fw.state_size`.
     initial_state_bw: (optional) Same as for `initial_state_fw`, but using
@@ -717,7 +717,7 @@ def dynamic_rnn(cell, inputs, sequence_length=None, initial_state=None,
     sequence_length: (optional) An int32/int64 vector sized `[batch_size]`.
     initial_state: (optional) An initial state for the RNN.
       If `cell.state_size` is an integer, this must be
-      a `Tensor` of appropriate type and shape `[batch_size x cell.state_size]`.
+      a `Tensor` of appropriate type and shape `[batch_size, cell.state_size]`.
       If `cell.state_size` is a tuple, this should be a tuple of
       tensors having shapes `[batch_size, s] for s in cell.state_size`.
     dtype: (optional) The data type for the initial state and expected output.
",0,test
46afa1f0e8a8b269054025aefe9a7d42290f8e8d,tensorflow/tensorflow,"Amend cluster resolver error to suggest oauth2client as a possible issue.

PiperOrigin-RevId: 198894470",tpu_cluster_resolver.py,"@@ -170,10 +170,11 @@ class TPUClusterResolver(ClusterResolver):
 
     if service is None and should_resolve:
       if not _GOOGLE_API_CLIENT_INSTALLED:
-        raise ImportError('googleapiclient must be installed before using the '
-                          'TPU cluster resolver. Execute: `pip install '
-                          '--upgrade google-api-python-client` to install with '
-                          'pip.')
+        raise ImportError('googleapiclient and oauth2client must be installed '
+                          'before using the TPU cluster resolver. Execute: '
+                          '`pip install --upgrade google-api-python-client` '
+                          'and `pip install --upgrade oauth2lclient` to '
+                          'install with pip.')
 
       final_discovery_url = self._discoveryUrl() or discovery_url
       if final_discovery_url:
",0,train
1219f682f7faa3619b58f41cc3f479445588cf24,tensorflow/tensorflow,"Enable OpenCL 2.0 or 3.0 compilation when the device supports it.

By default OpenCL programs are compiled as 1.x only.

PiperOrigin-RevId: 327300390
Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147",cl_program.cc,"@@ -95,6 +95,8 @@ std::string CompilerOptionToString(const CLDevice& device,
       return ""-cl-opt-disable"";
     case CompilerOptions::CL_2_0:
       return ""-cl-std=CL2.0"";
+    case CompilerOptions::CL_3_0:
+      return ""-cl-std=CL3.0"";
   }
 }
 
",0,train
1219f682f7faa3619b58f41cc3f479445588cf24,tensorflow/tensorflow,"Enable OpenCL 2.0 or 3.0 compilation when the device supports it.

By default OpenCL programs are compiled as 1.x only.

PiperOrigin-RevId: 327300390
Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147",cl_program.h,"@@ -41,7 +41,8 @@ enum class CompilerOptions {
   ADRENO_MORE_WAVES,
   POWERVR_FP16,
   CL_OPT_DISABLE,
-  CL_2_0
+  CL_2_0,
+  CL_3_0,
 };
 
 std::string CompilerOptionsToString(
",0,train
1219f682f7faa3619b58f41cc3f479445588cf24,tensorflow/tensorflow,"Enable OpenCL 2.0 or 3.0 compilation when the device supports it.

By default OpenCL programs are compiled as 1.x only.

PiperOrigin-RevId: 327300390
Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147",mean_stddev_normalization.cc,"@@ -17,6 +17,8 @@ limitations under the License.
 
 #include <string>
 
+#include ""tensorflow/lite/delegates/gpu/cl/cl_program.h""
+#include ""tensorflow/lite/delegates/gpu/cl/device_info.h""
 #include ""tensorflow/lite/delegates/gpu/cl/kernels/util.h""
 #include ""tensorflow/lite/delegates/gpu/cl/kernels/work_group_picking.h""
 #include ""tensorflow/lite/delegates/gpu/cl/precision.h""
@@ -64,7 +66,8 @@ static inline float local_reduce(float input, __local float* tmp) {
 }
 }  // namespace
 
-MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition)
+MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition,
+                                                 const DeviceInfo& device_info)
     : GPUOperation(definition) {
   // The kernel code does not inherently need a fixed size, but in order to not
   // hardcode the __local array's size for the reductions, we would need to pass
@@ -74,6 +77,11 @@ MeanStdDevNormalization::MeanStdDevNormalization(const OperationDef& definition)
   work_group_size_.y = 1;  // Required
   work_group_size_.z = 1;  // Required
   code_ = GetNormalizationCode();
+  if (device_info.cl_version >= OpenCLVersion::CL_3_0) {
+    compiler_options_.push_back(CompilerOptions::CL_3_0);
+  } else if (device_info.cl_version >= OpenCLVersion::CL_2_0) {
+    compiler_options_.push_back(CompilerOptions::CL_2_0);
+  }
 }
 
 std::string MeanStdDevNormalization::GetNormalizationCode() {
@@ -145,8 +153,8 @@ int3 MeanStdDevNormalization::GetGridSize() const {
 }
 
 MeanStdDevNormalization CreateMeanStdDevNormalization(
-    const OperationDef& definition) {
-  return MeanStdDevNormalization(definition);
+    const OperationDef& definition, const DeviceInfo& device_info) {
+  return MeanStdDevNormalization(definition, device_info);
 }
 
 }  // namespace cl
",0,train
1219f682f7faa3619b58f41cc3f479445588cf24,tensorflow/tensorflow,"Enable OpenCL 2.0 or 3.0 compilation when the device supports it.

By default OpenCL programs are compiled as 1.x only.

PiperOrigin-RevId: 327300390
Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147",mean_stddev_normalization.h,"@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
 #define TENSORFLOW_LITE_DELEGATES_GPU_CL_KERNELS_LSTM_NORMALIZATION_H_
 
+#include ""tensorflow/lite/delegates/gpu/cl/device_info.h""
 #include ""tensorflow/lite/delegates/gpu/cl/kernels/gpu_operation.h""
 #include ""tensorflow/lite/delegates/gpu/common/operations.h""
 #include ""tensorflow/lite/delegates/gpu/common/status.h""
@@ -28,7 +29,8 @@ namespace cl {
 // Implements tensor_utils::MeanStddevNormalization
 class MeanStdDevNormalization : public GPUOperation {
  public:
-  explicit MeanStdDevNormalization(const OperationDef& definition);
+  explicit MeanStdDevNormalization(const OperationDef& definition,
+                                   const DeviceInfo& device_info);
 
   void GetPossibleKernelWorkGroups(
       TuningType tuning_type, const DeviceInfo& device_info,
@@ -50,7 +52,7 @@ class MeanStdDevNormalization : public GPUOperation {
 };
 
 MeanStdDevNormalization CreateMeanStdDevNormalization(
-    const OperationDef& definition);
+    const OperationDef& definition, const DeviceInfo& device_info);
 
 }  // namespace cl
 }  // namespace gpu
",0,train
1219f682f7faa3619b58f41cc3f479445588cf24,tensorflow/tensorflow,"Enable OpenCL 2.0 or 3.0 compilation when the device supports it.

By default OpenCL programs are compiled as 1.x only.

PiperOrigin-RevId: 327300390
Change-Id: I7e31c3c0253bc9175f156614a47f5ef8dddf2147",operation_selector.cc,"@@ -262,7 +262,8 @@ absl::Status GPUOperationFromNode(const CreationContext& creation_context,
       return SelectMean(attr, op_def, creation_context.device->info_, gpu_op);
     }
     case OperationType::MEAN_STDDEV_NORMALIZATION: {
-      MeanStdDevNormalization operation = CreateMeanStdDevNormalization(op_def);
+      MeanStdDevNormalization operation =
+          CreateMeanStdDevNormalization(op_def, creation_context.device->info_);
       *gpu_op =
           absl::make_unique<MeanStdDevNormalization>(std::move(operation));
       return absl::OkStatus();
",0,train
bd24a27f305badb89d68014ee1cfedd460b04536,tensorflow/tensorflow,Final Changes 2,compat.py,"@@ -112,8 +112,10 @@ def as_str_any(value):
 @tf_export('compat.path_to_str')
 def path_to_str(path):
   """"""Returns the file system path representation of a `PathLike` object, else as it is.
+
   Args:
     path: An object that can be converted to path representation.
+  
   Returns:
     A `str` object.
 
",0,train
830cde8776d9adb6bdbb2e0b3173d16780d52df7,tensorflow/tensorflow,"Eliminate crashy Concat()/Split() overloads.
Change: 150143909",tensor_util.cc,"@@ -42,12 +42,6 @@ Tensor DeepCopy(const Tensor& other) {
   return tmp;
 }
 
-Tensor Concat(const gtl::ArraySlice<Tensor>& tensors) {
-  Tensor result;
-  TF_CHECK_OK(Concat(tensors, &result));
-  return result;
-}
-
 Status Concat(const gtl::ArraySlice<Tensor>& tensors, Tensor* result) {
   if (tensors.empty()) {
     return errors::InvalidArgument(""Cannot concatenate zero tensors"");
@@ -109,13 +103,6 @@ Status Concat(const gtl::ArraySlice<Tensor>& tensors, Tensor* result) {
   return Status::OK();
 }
 
-std::vector<Tensor> Split(const Tensor& tensor,
-                          const gtl::ArraySlice<int64>& sizes) {
-  std::vector<Tensor> result;
-  TF_CHECK_OK(Split(tensor, sizes, &result));
-  return result;
-}
-
 Status Split(const Tensor& tensor, const gtl::ArraySlice<int64>& sizes,
              std::vector<Tensor>* result) {
   if (tensor.dims() == 0) {
",0,train
830cde8776d9adb6bdbb2e0b3173d16780d52df7,tensorflow/tensorflow,"Eliminate crashy Concat()/Split() overloads.
Change: 150143909",tensor_util.h,"@@ -41,10 +41,6 @@ Tensor DeepCopy(const Tensor& other);
 Status Concat(const gtl::ArraySlice<Tensor>& tensors,
               Tensor* result) TF_MUST_USE_RESULT;
 
-// Version of Concat() that crashes upon hitting an error.
-// DEPRECATED. DO NOT USE.
-Tensor Concat(const gtl::ArraySlice<Tensor>& tensors);
-
 // Splits 'tensor' into 'sizes.size()' individual tensors, along the 0th
 // dimension. The ith output tensor has 0th-dimension size 'sizes[i]'.
 //
@@ -58,11 +54,6 @@ Tensor Concat(const gtl::ArraySlice<Tensor>& tensors);
 Status Split(const Tensor& tensor, const gtl::ArraySlice<int64>& sizes,
              std::vector<Tensor>* result) TF_MUST_USE_RESULT;
 
-// Version of Split() that crashes upon hitting an error.
-// DEPRECATED. DO NOT USE.
-std::vector<Tensor> Split(const Tensor& tensor,
-                          const gtl::ArraySlice<int64>& sizes);
-
 }  // namespace tensor
 }  // namespace tensorflow
 
",0,train
830cde8776d9adb6bdbb2e0b3173d16780d52df7,tensorflow/tensorflow,"Eliminate crashy Concat()/Split() overloads.
Change: 150143909",tensor_util_test.cc,"@@ -208,7 +208,10 @@ TEST(TensorUtil, ConcatSplitStrings) {
     x.flat<string>()(i) = strings::StrCat(""foo_"", i);
   }
 
-  Tensor x_round_tripped = tensor::Concat(tensor::Split(x, {2, 1, 1}));
+  std::vector<Tensor> split;
+  TF_ASSERT_OK(tensor::Split(x, {2, 1, 1}, &split));
+  Tensor x_round_tripped;
+  TF_ASSERT_OK(tensor::Concat(split, &x_round_tripped));
   ASSERT_EQ(x.shape(), x_round_tripped.shape());
   for (int i = 0; i < 4 * 3; ++i) {
     EXPECT_EQ(x.flat<string>()(i), x_round_tripped.flat<string>()(i));
",0,train
e30d648f8782d595056ef7cf251d30fcf34aef7c,tensorflow/tensorflow,"Adding regexp target to tensorflow/core/platform/BUILD.

PiperOrigin-RevId: 268105122",regexp.h,"@@ -21,7 +21,7 @@ limitations under the License.
 #include ""tensorflow/core/platform/types.h""
 
 #if defined(PLATFORM_GOOGLE) || defined(PLATFORM_GOOGLE_ANDROID)
-#include ""tensorflow/core/platform/google/build_config/re2.h""
+#include ""third_party/re2/re2.h""
 #else
 #include ""re2/re2.h""
 #endif
",0,train
cc967a1f37270d17b624214f3a23629e889054e3,tensorflow/tensorflow,"Expose 'CreateNNAPIDelegate(StatefulNnApiDelegate::Options)' unconditionally,
rather than only on Android.  On unsupported platforms, it will return nullptr.

PiperOrigin-RevId: 333654569
Change-Id: I2dc6f63f7a25c951f6f310acd8c1b52657be7267",utils.cc,"@@ -105,14 +105,16 @@ TfLiteDelegatePtr CreateNNAPIDelegate() {
 #endif  // defined(__ANDROID__)
 }
 
-#if defined(__ANDROID__)
 TfLiteDelegatePtr CreateNNAPIDelegate(StatefulNnApiDelegate::Options options) {
+#if defined(__ANDROID__)
   return TfLiteDelegatePtr(
       new StatefulNnApiDelegate(options), [](TfLiteDelegate* delegate) {
         delete reinterpret_cast<StatefulNnApiDelegate*>(delegate);
       });
-}
+#else
+  return CreateNullDelegate();
 #endif  // defined(__ANDROID__)
+}
 
 #if TFLITE_SUPPORTS_GPU_DELEGATE
 TfLiteDelegatePtr CreateGPUDelegate(TfLiteGpuDelegateOptionsV2* options) {
",0,train
cc967a1f37270d17b624214f3a23629e889054e3,tensorflow/tensorflow,"Expose 'CreateNNAPIDelegate(StatefulNnApiDelegate::Options)' unconditionally,
rather than only on Android.  On unsupported platforms, it will return nullptr.

PiperOrigin-RevId: 333654569
Change-Id: I2dc6f63f7a25c951f6f310acd8c1b52657be7267",utils.h,"@@ -29,12 +29,10 @@ limitations under the License.
 #include ""tensorflow/lite/delegates/gpu/delegate.h""
 #endif
 
-#if defined(__ANDROID__)
 #include ""tensorflow/lite/delegates/nnapi/nnapi_delegate.h""
-#if (defined(__arm__) || defined(__aarch64__))
+#if defined(__ANDROID__) && (defined(__arm__) || defined(__aarch64__))
 #include ""tensorflow/lite/delegates/hexagon/hexagon_delegate.h""
 #endif
-#endif
 
 // TODO(b/149248802): include XNNPACK delegate when the issue is resolved.
 #if !defined(__Fuchsia__) || defined(TFLITE_WITHOUT_XNNPACK)
@@ -46,8 +44,8 @@ limitations under the License.
 namespace tflite {
 namespace evaluation {
 
-// Same w/ Interpreter::TfLiteDelegatePtr to avoid pulling
-// tensorflow/lite/interpreter.h dependency
+// Same as Interpreter::TfLiteDelegatePtr, defined here to avoid pulling
+// in tensorflow/lite/interpreter.h dependency.
 using TfLiteDelegatePtr =
     std::unique_ptr<TfLiteDelegate, void (*)(TfLiteDelegate*)>;
 
@@ -68,10 +66,9 @@ inline TfLiteStatus GetSortedFileNames(const std::string& directory,
                             std::unordered_set<std::string>());
 }
 
+// Returns nullptr on error, e.g. if NNAPI isn't supported on this platform.
 TfLiteDelegatePtr CreateNNAPIDelegate();
-#if defined(__ANDROID__)
 TfLiteDelegatePtr CreateNNAPIDelegate(StatefulNnApiDelegate::Options options);
-#endif
 
 TfLiteDelegatePtr CreateGPUDelegate();
 #if TFLITE_SUPPORTS_GPU_DELEGATE
",0,train
bf1d138211f2ca1a23923ba44b2a234417e46adb,tensorflow/tensorflow,"Remove TF_CPP_VMODULE from NCCL tests since it has no effect when set after
program start.

PiperOrigin-RevId: 247040886",collective_nccl_reducer_test.cc,"@@ -95,7 +95,6 @@ class NcclReducerTest : public ::testing::Test {
   void Init(int num_ranks) {
     setenv(""NCCL_DEBUG"", ""INFO"", 1 /* replace */);
     setenv(""NCCL_LAUNCH_MODE"", ""PARALLEL"", 1 /* replace */);
-    setenv(""TF_CPP_VMODULE"", ""nccl_manager=2"", 1 /* replace */);
     InitGPUDevices();
     std::vector<std::unique_ptr<Device>> local_devices;
     std::vector<string> device_names;
",0,train
bf1d138211f2ca1a23923ba44b2a234417e46adb,tensorflow/tensorflow,"Remove TF_CPP_VMODULE from NCCL tests since it has no effect when set after
program start.

PiperOrigin-RevId: 247040886",nccl_manager_test.cc,"@@ -66,7 +66,6 @@ class NcclManagerTest : public ::testing::Test {
   static void SetUpTestCase() {
     setenv(""NCCL_DEBUG"", ""INFO"", 1 /* replace */);
     setenv(""NCCL_LAUNCH_MODE"", ""PARALLEL"", 1 /* replace */);
-    setenv(""TF_CPP_VMODULE"", ""nccl_manager=2"", 1 /* replace */);
     devices_ = new std::vector<std::unique_ptr<BaseGPUDevice>>(GetGPUDevices());
     LOG(INFO) << ""Running test with "" << devices_->size() << "" gpus"";
   }
",0,train
1b9f56058daeb5d95f853969c9d9e0b0b4d349c7,tensorflow/tensorflow,"Refactor the FillRandom code and seperate integer and float

This is majorly for code health and avoid misusing the integer code path
for float

PiperOrigin-RevId: 418045468
Change-Id: Ib781c503c447d3e26390d974da0442e849a41d9c",test_util.cc,"@@ -103,7 +103,7 @@ float ExponentialRandomPositiveFloat(float percentile, float percentile_val,
   return val;
 }
 
-void FillRandom(std::vector<float>* vec, float min, float max) {
+void FillRandomFloat(std::vector<float>* vec, float min, float max) {
   std::uniform_real_distribution<float> dist(min, max);
   // TODO(b/154540105): use std::ref to avoid copying the random engine.
   auto gen = std::bind(dist, RandomEngine());
",0,test
1b9f56058daeb5d95f853969c9d9e0b0b4d349c7,tensorflow/tensorflow,"Refactor the FillRandom code and seperate integer and float

This is majorly for code health and avoid misusing the integer code path
for float

PiperOrigin-RevId: 418045468
Change-Id: Ib781c503c447d3e26390d974da0442e849a41d9c",test_util.h,"@@ -57,7 +57,7 @@ float ExponentialRandomPositiveFloat(float percentile, float percentile_val,
                                      float max_val);
 
 // Fills a vector with random floats between |min| and |max|.
-void FillRandom(std::vector<float>* vec, float min, float max);
+void FillRandomFloat(std::vector<float>* vec, float min, float max);
 
 template <typename T>
 void FillRandom(typename std::vector<T>::iterator begin_it,
@@ -74,7 +74,13 @@ void FillRandom(typename std::vector<T>::iterator begin_it,
 // Fills a vector with random numbers between |min| and |max|.
 template <typename T>
 void FillRandom(std::vector<T>* vec, T min, T max) {
-  return FillRandom(std::begin(*vec), std::end(*vec), min, max);
+  FillRandom(std::begin(*vec), std::end(*vec), min, max);
+}
+
+// Template specialization for float.
+template <>
+inline void FillRandom<float>(std::vector<float>* vec, float min, float max) {
+  FillRandomFloat(vec, min, max);
 }
 
 // Fills a vector with random numbers.
",0,test
5947bb78e7728a2b2f80edc4a1ed9a774bbb2274,tensorflow/tensorflow,"Demonstrate variables updates in tf.function

PiperOrigin-RevId: 275301015
Change-Id: I2b6b96f706f7d8fdcc35129581c5df5b1f35c2da",def_function_test.py,"@@ -539,7 +539,6 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase):
     self.assertAllClose([13., 14.], add_var(constant_op.constant(2.)))
 
   def testSameVariableTwice(self):
-
     v = variables.Variable(1.0)
 
     @def_function.function
@@ -548,6 +547,29 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase):
 
     self.assertAllEqual(add(v, v), 2.0)
 
+  def testVariableUpdate(self):
+    v1 = variables.Variable(1.0)
+    v2 = variables.Variable(2.0)
+    v3 = variables.Variable(4, dtype=dtypes.int32)
+
+    trace_count = [0]
+
+    @def_function.function
+    def double_variable(x):
+      trace_count[0] += 1
+      x.assign_add(x.read_value())
+
+    self.assertEqual(trace_count[0], 0)
+    double_variable(v1)
+    self.assertEqual(trace_count[0], 1)
+    self.assertEqual(self.evaluate(v1), 2.0)
+    double_variable(v2)
+    self.assertEqual(trace_count[0], 1 if ops.Tensor._USE_EQUALITY else 2)
+    self.assertEqual(self.evaluate(v2), 4.0)
+    double_variable(v3)
+    self.assertEqual(trace_count[0], 2 if ops.Tensor._USE_EQUALITY else 3)
+    self.assertEqual(self.evaluate(v3), 8)
+
   def testShapeCache(self):
     @def_function.function
     def func(x):
",0,train
334aa8f8f38cc31cd8c934471fd9d45a390b5f3d,tensorflow/tensorflow,"Automated g4 rollback of changelist 181260801

PiperOrigin-RevId: 181548597",xla_launch_op.cc,"@@ -257,8 +257,10 @@ void XlaLocalLaunchOp::Compute(OpKernelContext* ctx) {
 
   const XlaCompiler::CompilationResult* kernel;
   xla::LocalExecutable* executable;
+
   OP_REQUIRES_OK(ctx, cache->Compile(options, function_, num_constant_args_,
-                                     variables, ctx, &kernel, &executable));
+                                     variables, ctx, &kernel, &executable,
+                                     /*compile_options=*/nullptr));
 
   VLOG(1) << ""Executing XLA Computation..."";
 
",0,test
334aa8f8f38cc31cd8c934471fd9d45a390b5f3d,tensorflow/tensorflow,"Automated g4 rollback of changelist 181260801

PiperOrigin-RevId: 181548597",xla_compilation_cache.cc,"@@ -238,7 +238,8 @@ Status XlaCompilationCache::Compile(
     int num_constant_args, const std::vector<OptionalTensor>& variable_args,
     OpKernelContext* ctx,
     const XlaCompiler::CompilationResult** compilation_result,
-    xla::LocalExecutable** executable) {
+    xla::LocalExecutable** executable,
+    const XlaCompiler::CompileOptions* compile_options) {
   VLOG(1) << ""XlaCompilationCache::Compile "" << DebugString();
 
   if (VLOG_IS_ON(2)) {
@@ -297,9 +298,9 @@ Status XlaCompilationCache::Compile(
 
     XlaCompiler compiler(options);
     entry->compiled = true;
-    entry->compilation_status =
-        compiler.CompileFunction(XlaCompiler::CompileOptions(), function, args,
-                                 &entry->compilation_result);
+    entry->compilation_status = compiler.CompileFunction(
+        compile_options ? *compile_options : XlaCompiler::CompileOptions(),
+        function, args, &entry->compilation_result);
   }
   *compilation_result = &entry->compilation_result;
   if (entry->compilation_status.ok() && executable) {
",0,test
334aa8f8f38cc31cd8c934471fd9d45a390b5f3d,tensorflow/tensorflow,"Automated g4 rollback of changelist 181260801

PiperOrigin-RevId: 181548597",xla_compilation_cache.h,"@@ -66,7 +66,8 @@ class XlaCompilationCache : public ResourceBase {
                  const std::vector<OptionalTensor>& variable_args,
                  OpKernelContext* ctx,
                  const XlaCompiler::CompilationResult** compilation_result,
-                 xla::LocalExecutable** executable);
+                 xla::LocalExecutable** executable,
+                 const XlaCompiler::CompileOptions* compile_options);
 
   xla::LocalClient* client() const { return client_; }
   const DeviceType& device_type() const { return device_type_; }
",0,test
334aa8f8f38cc31cd8c934471fd9d45a390b5f3d,tensorflow/tensorflow,"Automated g4 rollback of changelist 181260801

PiperOrigin-RevId: 181548597",while_op.cc,"@@ -201,10 +201,16 @@ void XlaWhileOp::Compile(XlaOpKernelContext* ctx) {
   OP_REQUIRES_OK(ctx, compiler->CompileFunction(cond_options, cond_name_attr_,
                                                 arguments, &cond));
 
-  xla::Shape body_input_shape =
-      xla::ShapeUtil::MakeTupleShape(body.xla_input_shapes);
-  xla::Shape cond_input_shape =
-      xla::ShapeUtil::MakeTupleShape(cond.xla_input_shapes);
+  OP_REQUIRES(ctx, body.xla_input_shapes.size() == 1,
+              errors::FailedPrecondition(""Expected one input shape""));
+  xla::Shape body_input_shape = body.xla_input_shapes[0];
+  OP_REQUIRES(ctx, xla::ShapeUtil::IsTuple(body_input_shape),
+              errors::FailedPrecondition(""Expected tuple shape""));
+  OP_REQUIRES(ctx, cond.xla_input_shapes.size() == 1,
+              errors::FailedPrecondition(""Expected one input shape""));
+  xla::Shape cond_input_shape = cond.xla_input_shapes[0];
+  OP_REQUIRES(ctx, xla::ShapeUtil::IsTuple(cond_input_shape),
+              errors::FailedPrecondition(""Expected tuple shape""));
 
   VLOG(2) << ""Body shape: "" << xla::ShapeUtil::HumanString(body_input_shape)
           << "" -> "" << xla::ShapeUtil::HumanString(body.xla_output_shape);
",0,test
334aa8f8f38cc31cd8c934471fd9d45a390b5f3d,tensorflow/tensorflow,"Automated g4 rollback of changelist 181260801

PiperOrigin-RevId: 181548597",xla_compiler.cc,"@@ -316,15 +316,22 @@ Status BuildArguments(const Graph& graph,
     return Status::OK();
   }
 
-  input_shapes->resize(parameters.size());
+  std::vector<xla::Shape> arg_shapes;
+  arg_shapes.reserve(parameters.size());
   input_mapping->resize(parameters.size());
   for (std::vector<int>::size_type i = 0; i < parameters.size(); ++i) {
     const XlaCompiler::Argument& arg = args[parameters[i]];
     // Computes the shapes of non-constant arguments.
-    (*input_shapes)[i] = arg.shape;
+    arg_shapes.push_back(arg.shape);
     (*input_mapping)[i] = parameters[i];
   }
 
+  if (use_tuple_arg) {
+    input_shapes->push_back(xla::ShapeUtil::MakeTupleShape(arg_shapes));
+  } else {
+    *input_shapes = arg_shapes;
+  }
+
   // Use the _Arg nodes in the graph to resolve core assignments.
   for (const Node* n : graph.nodes()) {
     if (StringPiece(n->type_string()) != ""_Arg"") continue;
@@ -348,9 +355,19 @@ Status BuildArguments(const Graph& graph,
   // Build parameter handles for non-constant arguments.
   std::vector<xla::ComputationDataHandle> arg_handles(parameters.size());
   if (use_tuple_arg) {
-    xla::Shape tuple_shape = xla::ShapeUtil::MakeTupleShape(*input_shapes);
+    xla::OpSharding tuple_sharding;
+    tuple_sharding.set_type(xla::OpSharding::Type::OpSharding_Type_TUPLE);
+    for (int64 parameter : parameters) {
+      const int core = (*arg_cores)[parameter];
+      const int root_device = 0;
+      *tuple_sharding.add_tuple_shardings() =
+          core == -1 ? xla::sharding_builder::AssignDevice(root_device)
+                     : xla::sharding_builder::AssignDevice(core);
+    }
+    xla::ScopedShardingAssignment assign_tuple_sharding(builder,
+                                                        tuple_sharding);
     xla::ComputationDataHandle tuple =
-        builder->Parameter(0, tuple_shape, ""arg_tuple"");
+        builder->Parameter(0, (*input_shapes)[0], ""arg_tuple"");
     for (std::vector<int>::size_type i = 0; i < parameters.size(); ++i) {
       const int core = (*arg_cores)[parameters[i]];
       xla::ScopedShardingAssignment assign_sharding(
@@ -374,7 +391,7 @@ Status BuildArguments(const Graph& graph,
   for (std::vector<int>::size_type i = 0; i < parameters.size(); ++i) {
     const XlaCompiler::Argument& arg = args[parameters[i]];
     VLOG(2) << ""  XLA arg "" << i
-            << "" shape: "" << xla::ShapeUtil::HumanString((*input_shapes)[i])
+            << "" shape: "" << xla::ShapeUtil::HumanString(arg_shapes[i])
             << "" name: "" << arg.name << "" TF arg "" << parameters[i];
     XlaExpression& arg_expression = (*arg_expressions)[parameters[i]];
     switch (arg.kind) {
",0,test
90a754c965005fe33dfde1352267be92fea4c095,tensorflow/tensorflow,"TFLM: nit catch memory allocation failures

PiperOrigin-RevId: 283699825
Change-Id: I0434f161c1ac6d5578e7a35f26f7a8344cc3cf6f",micro_allocator.cc,"@@ -89,6 +89,11 @@ MicroAllocator::MicroAllocator(TfLiteContext* context, const Model* model,
       reinterpret_cast<TfLiteTensor*>(memory_allocator_.AllocateFromTail(
           sizeof(TfLiteTensor) * context_->tensors_size,
           alignof(TfLiteTensor)));
+  if (context_->tensors == nullptr) {
+    error_reporter_->Report(
+        ""Failed to allocate memory for context->tensors, %d bytes required"",
+        sizeof(TfLiteTensor) * context_->tensors_size);
+  }
 
   // Null all inputs so we can later perform a null check to avoid re-allocating
   // registered pre-allocated inputs.
@@ -230,6 +235,12 @@ TfLiteStatus MicroAllocator::FinishTensorAllocation() {
   TensorInfo* tensor_info =
       reinterpret_cast<TensorInfo*>(tmp_allocator.AllocateFromTail(
           sizeof(TensorInfo) * tensors_size, alignof(TensorInfo)));
+  if (tensor_info == nullptr) {
+    error_reporter_->Report(
+        ""Failed to allocate memory for tensor_info, %d bytes required"",
+        sizeof(TfLiteTensor) * context_->tensors_size);
+    return kTfLiteError;
+  }
 
   // Set up the runtime data structures for all tensors.
   for (size_t i = 0; i < tensors_size; ++i) {
",0,train
39e5d1099a343212ee2a5ac2cbc0bff31d70e739,tensorflow/tensorflow,"Fix dtype handling in tf.contrib.metrics.confusion_matrix()
Change: 133403718",confusion_matrix_ops_test.py,"@@ -27,10 +27,12 @@ class ConfusionMatrixTest(tf.test.TestCase):
 
   def _testConfMatrix(self, predictions, labels, truth, weights=None):
     with self.test_session():
+      dtype = predictions.dtype
       ans = tf.contrib.metrics.confusion_matrix(
-          predictions, labels, weights=weights)
+          predictions, labels, dtype=dtype, weights=weights)
       tf_ans = ans.eval()
       self.assertAllClose(tf_ans, truth, atol=1e-10)
+      self.assertEqual(tf_ans.dtype, dtype)
 
   def _testBasic(self, dtype):
     predictions = np.arange(5, dtype=dtype)
@@ -44,10 +46,7 @@ class ConfusionMatrixTest(tf.test.TestCase):
          [0, 0, 0, 0, 1]],
         dtype=dtype)
 
-    self._testConfMatrix(
-        predictions=predictions,
-        labels=labels,
-        truth=truth)
+    self._testConfMatrix(predictions=predictions, labels=labels, truth=truth)
 
   def testInt32Basic(self):
     self._testBasic(dtype=np.int32)
@@ -55,6 +54,41 @@ class ConfusionMatrixTest(tf.test.TestCase):
   def testInt64Basic(self):
     self._testBasic(dtype=np.int64)
 
+def _testConfMatrixOnTensors(self, tf_dtype, np_dtype):
+  with self.test_session() as sess:
+    m_neg = tf.placeholder(dtype=tf.float32)
+    m_pos = tf.placeholder(dtype=tf.float32)
+    s = tf.placeholder(dtype=tf.float32)
+
+    neg = tf.random_normal([20], mean=m_neg, stddev=s, dtype=tf.float32)
+    pos = tf.random_normal([20], mean=m_pos, stddev=s, dtype=tf.float32)
+
+    data = tf.concat(0, [neg, pos])
+    data = tf.cast(tf.round(data), tf_dtype)
+    data = tf.minimum(tf.maximum(data, 0), 1)
+    lab = tf.concat(0, [tf.zeros([20], dtype=tf_dtype),
+                        tf.ones([20], dtype=tf_dtype)])
+
+    cm = tf.contrib.metrics.confusion_matrix(
+        data, lab, dtype=tf_dtype, num_classes=2)
+
+    d, l, cm_out = sess.run([data, lab, cm], {m_neg: 0.0,
+                                              m_pos: 1.0,
+                                              s: 1.0})
+
+    truth = np.zeros([2, 2], dtype=np_dtype)
+    for i in xrange(len(d)):
+      truth[d[i], l[i]] = truth[d[i], l[i]] + 1
+
+    self.assertEqual(cm_out.dtype, np_dtype)
+    self.assertAllClose(cm_out, truth, atol=1e-10)
+
+  def _testOnTensors_int32(self):
+    self._testConfMatrixOnTensors(tf.int32, np.int32)
+
+  def testOnTensors_int64(self):
+    self._testConfMatrixOnTensors(tf.int64, np.int64)
+
   def _testDiffentLabelsInPredictionAndTarget(self, dtype):
     predictions = np.asarray([1, 2, 3], dtype=dtype)
     labels = np.asarray([4, 5, 6], dtype=dtype)
@@ -69,10 +103,7 @@ class ConfusionMatrixTest(tf.test.TestCase):
          [0, 0, 0, 0, 0, 0, 0]],
         dtype=dtype)
 
-    self._testConfMatrix(
-        predictions=predictions,
-        labels=labels,
-        truth=truth)
+    self._testConfMatrix(predictions=predictions, labels=labels, truth=truth)
 
   def testInt32DifferentLabels(self, dtype=np.int32):
     self._testDiffentLabelsInPredictionAndTarget(dtype)
@@ -94,10 +125,7 @@ class ConfusionMatrixTest(tf.test.TestCase):
          [0, 1, 0, 0, 0, 0, 0]],
         dtype=dtype)
 
-    self._testConfMatrix(
-        predictions=predictions,
-        labels=labels,
-        truth=truth)
+    self._testConfMatrix(predictions=predictions, labels=labels, truth=truth)
 
   def testInt32MultipleLabels(self, dtype=np.int32):
     self._testMultipleLabels(dtype)
@@ -119,30 +147,27 @@ class ConfusionMatrixTest(tf.test.TestCase):
         dtype=np.int32)
 
     self._testConfMatrix(
-        predictions=predictions,
-        labels=labels,
-        weights=weights,
-        truth=truth)
+        predictions=predictions, labels=labels, weights=weights, truth=truth)
 
   def testInvalidRank(self):
     predictions = np.asarray([[1, 2, 3]])
     labels = np.asarray([1, 2, 3])
-    self.assertRaisesRegexp(
-        ValueError, ""an not squeeze dim"",
-        tf.contrib.metrics.confusion_matrix, predictions, labels)
+    self.assertRaisesRegexp(ValueError, ""an not squeeze dim"",
+                            tf.contrib.metrics.confusion_matrix, predictions,
+                            labels)
 
     predictions = np.asarray([1, 2, 3])
     labels = np.asarray([[1, 2, 3]])
-    self.assertRaisesRegexp(
-        ValueError, ""an not squeeze dim"",
-        tf.contrib.metrics.confusion_matrix, predictions, labels)
+    self.assertRaisesRegexp(ValueError, ""an not squeeze dim"",
+                            tf.contrib.metrics.confusion_matrix, predictions,
+                            labels)
 
   def testInputDifferentSize(self):
     predictions = np.asarray([1, 2, 3])
     labels = np.asarray([1, 2])
-    self.assertRaisesRegexp(
-        ValueError, ""must be equal"",
-        tf.contrib.metrics.confusion_matrix, predictions, labels)
+    self.assertRaisesRegexp(ValueError, ""must be equal"",
+                            tf.contrib.metrics.confusion_matrix, predictions,
+                            labels)
 
   def testOutputIsInt32(self):
     predictions = np.arange(2)
",0,train
39e5d1099a343212ee2a5ac2cbc0bff31d70e739,tensorflow/tensorflow,"Fix dtype handling in tf.contrib.metrics.confusion_matrix()
Change: 133403718",confusion_matrix_ops.py,"@@ -75,8 +75,10 @@ def confusion_matrix(predictions, labels, num_classes=None, dtype=dtypes.int32,
                       [predictions, labels, num_classes]) as name:
     predictions, labels = metric_ops_util.remove_squeezable_dimensions(
         ops.convert_to_tensor(
-            predictions, name='predictions', dtype=dtypes.int64),
-        ops.convert_to_tensor(labels, name='labels', dtype=dtypes.int64))
+            predictions, name='predictions'),
+        ops.convert_to_tensor(labels, name='labels'))
+    predictions = math_ops.cast(predictions, dtypes.int64)
+    labels = math_ops.cast(labels, dtypes.int64)
 
     if num_classes is None:
       num_classes = math_ops.maximum(math_ops.reduce_max(predictions),
@@ -91,7 +93,7 @@ def confusion_matrix(predictions, labels, num_classes=None, dtype=dtypes.int32,
     values = (array_ops.ones_like(predictions, dtype)
               if weights is None else weights)
     cm_sparse = ops.SparseTensor(
-        indices=indices, values=values, shape=shape)
+        indices=indices, values=values, shape=math_ops.to_int64(shape))
     zero_matrix = array_ops.zeros(math_ops.to_int32(shape), dtype)
 
     return sparse_ops.sparse_add(zero_matrix, cm_sparse)
",0,train
5c4055b80bb5d86f5a1c2d0d843d9501817ef369,tensorflow/tensorflow,"[XLA] Bound compilation time spent in the region-based live-range interference analysis in the copy insertion pass to be at most linear to the size of the input HLO module.

PiperOrigin-RevId: 396740128
Change-Id: I2e9ff7172c1fb6039af6eb95068c01f53ec7be6c",copy_insertion.cc,"@@ -1312,8 +1312,10 @@ class CopyRemover {
   // live range interference is introduced by the copy's elimination. If
   // elision is possible, then the internal state (value lists) are updated,
   // and true is returned. Returns false otherwise.
-  bool TryElideCopy(const HloInstruction* copy, int64_t region_analysis_limit) {
+  bool TryElideCopy(const HloInstruction* copy,
+                    int64_t* region_analysis_limit) {
     VLOG(2) << ""Trying to remove "" << copy->name();
+    CHECK_NE(region_analysis_limit, nullptr);
 
     if (!ContainsKey(copy_map_, copy)) {
       VLOG(2) << copy->name() << "" is not removable"";
@@ -1340,8 +1342,9 @@ class CopyRemover {
     // are cheap and are later removed by replicating the broadcasts.
     bool use_region_analysis =
         copy->operand(0)->opcode() != HloOpcode::kBroadcast &&
-        (region_analysis_limit < 0 ||
-         live_range_size1 * live_range_size2 <= region_analysis_limit);
+        (*region_analysis_limit < 0 ||
+         live_range_size1 * live_range_size2 <= *region_analysis_limit);
+    *region_analysis_limit = 0;
     VLOG(3) << copy->name() << "" copies value ""
             << copy_node.src->value->ToShortString();
     VLOG(3) << ""Source buffer values: "" << ValueListToString(copy_node.src);
@@ -1369,6 +1372,7 @@ class CopyRemover {
         VLOG(2) << ""Configured to not use region-based analysis.\n"";
         return true;
       }
+      *region_analysis_limit += live_range_size1 * live_range_size2;
       if (ValuesInterfere(src, dest, option)) {
         VLOG(2) << ""Region-based interference is true. \n"";
         return true;
@@ -1964,7 +1968,6 @@ Status CopyInsertion::RemoveUnnecessaryCopies(HloOrdering* ordering,
   XLA_VLOG_LINES(4, module->ToString());
   TF_ASSIGN_OR_RETURN(std::unique_ptr<HloAliasAnalysis> alias_analysis,
                       HloAliasAnalysis::Run(module, can_share_buffer_));
-
   CopyRemover copy_remover(*module, *alias_analysis, ordering,
                            check_live_range_ordering);
   if (VLOG_IS_ON(3)) {
@@ -1980,6 +1983,11 @@ Status CopyInsertion::RemoveUnnecessaryCopies(HloOrdering* ordering,
   int64_t num_existing_copies = GetNumExistingCopies(module);
   bool changed = true;
   int64_t num_iterations = -1;
+  constexpr int64_t region_analysis_allowance_cap = 30000;
+  VLOG(6) << ""Copy Insertion analyzing module with instructino count = ""
+          << module->instruction_count() << ""\n"";
+  int64_t region_analysis_allowance =
+      std::max(region_analysis_allowance_cap, module->instruction_count() / 10);
   while (changed) {
     CHECK_LE(++num_iterations, num_existing_copies);
     changed = false;
@@ -1989,13 +1997,29 @@ Status CopyInsertion::RemoveUnnecessaryCopies(HloOrdering* ordering,
       VLOG(2) << ""computation:"" << computation->name() << ""\n"";
       for (HloInstruction* instruction : computation->instructions()) {
         VLOG(2) << instruction->ToString() << ""\n"";
-        if (instruction->opcode() == HloOpcode::kCopy &&
-            copy_remover.TryElideCopy(instruction,
-                                      use_region_based_live_range_analysis_)) {
-          changed = true;
-          TF_RETURN_IF_ERROR(StripControlDependenciesFrom(instruction));
-          TF_RETURN_IF_ERROR(
-              instruction->ReplaceAllUsesWith(instruction->mutable_operand(0)));
+        // The region_analysis_cost_now is always set to
+        // use_region_based_live_range_analysis_ if it is < 0, in which case the
+        // analysis is always performed.
+        int64_t region_analysis_cost_now = std::min(
+            region_analysis_allowance, use_region_based_live_range_analysis_);
+        if (instruction->opcode() == HloOpcode::kCopy) {
+          if (copy_remover.TryElideCopy(instruction,
+                                        &region_analysis_cost_now)) {
+            changed = true;
+            TF_RETURN_IF_ERROR(StripControlDependenciesFrom(instruction));
+            TF_RETURN_IF_ERROR(instruction->ReplaceAllUsesWith(
+                instruction->mutable_operand(0)));
+            VLOG(6) << ""succeeded in eliminating copy.\n"";
+          }
+          if (region_analysis_allowance > 0 && region_analysis_cost_now > 0) {
+            VLOG(6) << ""Copy Insertion analyzing module cost: ""
+                    << region_analysis_cost_now << ""\n"";
+            VLOG(6) << ""instruction:"" << instruction->ToString() << ""\n"";
+            region_analysis_allowance -= region_analysis_cost_now;
+            if (region_analysis_allowance < 0) {
+              region_analysis_allowance = 0;
+            }
+          }
         }
       }
     }
",0,train
5c4055b80bb5d86f5a1c2d0d843d9501817ef369,tensorflow/tensorflow,"[XLA] Bound compilation time spent in the region-based live-range interference analysis in the copy insertion pass to be at most linear to the size of the input HLO module.

PiperOrigin-RevId: 396740128
Change-Id: I2e9ff7172c1fb6039af6eb95068c01f53ec7be6c",copy_insertion.h,"@@ -99,9 +99,6 @@ class CopyInsertion : public HloModulePass {
 
  private:
   Status AddCopiesToResolveInterference(HloModule* module);
-  // TODO(b/189898980): the region based live range analysis currently
-  // does not enforce a strict ordering of the merged live ranges. This may
-  // cause problems for parallel workloads (e.g., in SPMD).
   int64_t use_region_based_live_range_analysis_;
 };
 
",0,train
0204fbd5fec268e2b4d4d4e9185e21725a6c248d,tensorflow/tensorflow,"Update tests and pydoc for dequeue_batch.
De-flake graph_io_test.
Fix typo.
Change: 129677002",graph_io.py,"@@ -133,7 +133,7 @@ def read_keyed_batch_examples(
   Raises:
     ValueError: for invalid inputs.
   """"""
-  # Retrive files to read.
+  # Retrieve files to read.
   if isinstance(file_pattern, list):
     file_names = file_pattern
     if not file_names:
",0,test
0204fbd5fec268e2b4d4d4e9185e21725a6c248d,tensorflow/tensorflow,"Update tests and pydoc for dequeue_batch.
De-flake graph_io_test.
Fix typo.
Change: 129677002",graph_io_test.py,"@@ -200,11 +200,20 @@ class GraphIOTest(tf.test.TestCase):
 
   def _create_temp_file(self, lines):
     tempdir = tempfile.mkdtemp()
-    filename = os.path.join(tempdir, ""file.csv"")
+    filename = os.path.join(tempdir, ""temp_file"")
     gfile.Open(filename, ""w"").write(lines)
     return filename
 
-  def test_read_csv(self):
+  def _create_sorted_temp_files(self, lines_list):
+    tempdir = tempfile.mkdtemp()
+    filenames = []
+    for i, lines in enumerate(lines_list):
+      filename = os.path.join(tempdir, ""temp_file%05d"" % i)
+      gfile.Open(filename, ""w"").write(lines)
+      filenames.append(filename)
+    return filenames
+
+  def test_read_text_lines(self):
     gfile.Glob = self._orig_glob
     filename = self._create_temp_file(""ABC\nDEF\nGHK\n"")
 
@@ -214,9 +223,35 @@ class GraphIOTest(tf.test.TestCase):
 
     with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
       inputs = tf.contrib.learn.io.read_batch_examples(
-          filename, batch_size,
-          reader=tf.TextLineReader, randomize_input=False,
-          num_epochs=1, queue_capacity=queue_capacity, name=name)
+          filename, batch_size, reader=tf.TextLineReader,
+          randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
+          name=name)
+      session.run(tf.initialize_local_variables())
+
+      coord = tf.train.Coordinator()
+      tf.train.start_queue_runners(session, coord=coord)
+
+      self.assertAllEqual(session.run(inputs), [b""ABC""])
+      self.assertAllEqual(session.run(inputs), [b""DEF""])
+      self.assertAllEqual(session.run(inputs), [b""GHK""])
+      with self.assertRaises(errors.OutOfRangeError):
+        session.run(inputs)
+
+      coord.request_stop()
+
+  def test_read_text_lines_multifile(self):
+    gfile.Glob = self._orig_glob
+    filenames = self._create_sorted_temp_files([""ABC\n"", ""DEF\nGHK\n""])
+
+    batch_size = 1
+    queue_capacity = 5
+    name = ""my_batch""
+
+    with tf.Graph().as_default() as g, self.test_session(graph=g) as session:
+      inputs = tf.contrib.learn.io.read_batch_examples(
+          filenames, batch_size, reader=tf.TextLineReader,
+          randomize_input=False, num_epochs=1, queue_capacity=queue_capacity,
+          name=name)
       session.run(tf.initialize_local_variables())
 
       coord = tf.train.Coordinator()
@@ -230,7 +265,7 @@ class GraphIOTest(tf.test.TestCase):
 
       coord.request_stop()
 
-  def test_batch_reader(self):
+  def test_batch_text_lines(self):
     gfile.Glob = self._orig_glob
     filename = self._create_temp_file(""A\nB\nC\nD\nE\n"")
 
@@ -255,7 +290,7 @@ class GraphIOTest(tf.test.TestCase):
 
       coord.request_stop()
 
-  def test_keyed_read_csv(self):
+  def test_keyed_read_text_lines(self):
     gfile.Glob = self._orig_glob
     filename = self._create_temp_file(""ABC\nDEF\nGHK\n"")
 
",0,test
4f333b63f7b46a3122f91b5358f2763e6c2e8206,tensorflow/tensorflow,"[XLA] Add a whole graph execution interface.

PiperOrigin-RevId: 188554206",service.cc,"@@ -937,6 +937,11 @@ tensorflow::Status Service::Execute(const ExecuteRequest* arg,
   return tensorflow::Status::OK();
 }
 
+tensorflow::Status Service::ExecuteGraph(const ExecuteGraphRequest* /*arg*/,
+                                         ExecuteResponse* /*result*/) {
+  return Unimplemented(""execute-graph is not yet implemented"");
+}
+
 tensorflow::Status Service::ExecuteAsync(const ExecuteAsyncRequest* arg,
                                          ExecuteAsyncResponse* result) {
   VLOG(1) << ""running execute-async request: "" << arg->ShortDebugString();
",0,train
4f333b63f7b46a3122f91b5358f2763e6c2e8206,tensorflow/tensorflow,"[XLA] Add a whole graph execution interface.

PiperOrigin-RevId: 188554206",service.h,"@@ -112,6 +112,12 @@ class Service : public ServiceInterface {
   tensorflow::Status Execute(const ExecuteRequest* arg,
                              ExecuteResponse* result) override;
 
+  // Executes a computation with the provided global data passed as
+  // immutable arguments. The request contains the whole computation graph.
+  // Returns global data output and execution timing.
+  tensorflow::Status ExecuteGraph(const ExecuteGraphRequest* arg,
+                                  ExecuteResponse* result) override;
+
   // Executes one or more computations in parallel with the provided global data
   // passed as immutable arguments. Returns global data output for each
   // computation.
",0,train
4f333b63f7b46a3122f91b5358f2763e6c2e8206,tensorflow/tensorflow,"[XLA] Add a whole graph execution interface.

PiperOrigin-RevId: 188554206",service_interface.h,"@@ -54,6 +54,9 @@ class ServiceInterface {
   virtual tensorflow::Status Execute(const ExecuteRequest* arg,
                                      ExecuteResponse* result) = 0;
 
+  virtual tensorflow::Status ExecuteGraph(const ExecuteGraphRequest* arg,
+                                          ExecuteResponse* result) = 0;
+
   virtual tensorflow::Status ExecuteParallel(
       const ExecuteParallelRequest* arg, ExecuteParallelResponse* result) = 0;
 
",0,train
4da61c0caadbab46cd43961565ac21314dee8254,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-10-01

PiperOrigin-RevId: 334769371
Change-Id: Id864a5af5aa673ca2eb2bf2ba32ae236744d5c38",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 9, 30)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 10, 1)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,test
da81d8fa1bf4d82633b283473f7d04bca148f974,tensorflow/tensorflow,"TFlite benchmark_model tool: Don't destruct the NNAPI SL before destructing the
delegate that uses it.

PiperOrigin-RevId: 431445099",nnapi_delegate_provider.cc,"@@ -28,18 +28,19 @@ namespace {
 
 using nnapi::NnApiSupportLibrary;
 
-// StatefulNnApiDelegate that takes ownership of NnApiSupportLibrary instance
-// passed to the constructor.
+// StatefulNnApiDelegate that holds onto an NnApiSupportLibrary instance
+// passed to the constructor for later destruction.
+// Note that the support library must outlive the delegate.
 class NnApiSupportLibraryDelegate : public StatefulNnApiDelegate {
  public:
-  // The constructed object takes ownership of the nnapi_sl.
   NnApiSupportLibraryDelegate(const NnApiSupportLibrary* nnapi_sl,
                               Options options)
       : StatefulNnApiDelegate(nnapi_sl->getFL5(), options),
         nnapi_sl_(nnapi_sl) {}
+  const NnApiSupportLibrary* get_nnapi_sl() const { return nnapi_sl_; }
 
  private:
-  std::unique_ptr<const NnApiSupportLibrary> nnapi_sl_;
+  const NnApiSupportLibrary* const nnapi_sl_;
 };
 
 }  // namespace
@@ -260,7 +261,11 @@ TfLiteDelegatePtr NnapiDelegateProvider::CreateTfLiteDelegate(
       return TfLiteDelegatePtr(
           new NnApiSupportLibraryDelegate(nnapi_impl.release(), options),
           [](TfLiteDelegate* delegate) {
-            delete reinterpret_cast<NnApiSupportLibraryDelegate*>(delegate);
+            NnApiSupportLibraryDelegate* sl_delegate =
+                reinterpret_cast<NnApiSupportLibraryDelegate*>(delegate);
+            const NnApiSupportLibrary* sl = sl_delegate->get_nnapi_sl();
+            delete sl_delegate;
+            delete sl;
           });
     }
   } else if (!params.Get<std::string>(""nnapi_accelerator_name"").empty()) {
",0,train
744a6bb1db15bf0d7ed9d83fb117ef2a02fb4591,tensorflow/tensorflow,"[XLA:GPU] Revert the workaround for the LLVM PTX backend bug on llvm.round.

Previously, we translated HLO RoundNearestAfz instruction to a call of the
NVIDIA libdevice routine __nv_round_ as a workaround of the bug (see
cl/235610143). This change reverts the workaround to translate the HLO
RoundNearestAfz instruction to llvm.round again as the LLVM PTX bug has been
fixed.

PiperOrigin-RevId: 246848247",elemental_ir_emitter.cc,"@@ -442,7 +442,9 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitFloatUnaryOp(
                                           {operand_value},
                                           {operand_value->getType()}, b_);
     case HloOpcode::kRoundNearestAfz:
-      return EmitRoundNearestAfz(op->shape().element_type(), operand_value);
+      return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::round,
+                                          {operand_value},
+                                          {operand_value->getType()}, b_);
     case HloOpcode::kSign: {
       auto type = operand_value->getType();
       auto zero = llvm::ConstantFP::get(type, 0.0);
@@ -1139,12 +1141,6 @@ StatusOr<llvm::Value*> ElementalIrEmitter::EmitExpm1(PrimitiveType prim_type,
   return Select(x_is_small, for_small_x, for_large_x);
 }
 
-StatusOr<llvm::Value*> ElementalIrEmitter::EmitRoundNearestAfz(
-    PrimitiveType /*prim_type*/, llvm::Value* value) {
-  return llvm_ir::EmitCallToIntrinsic(llvm::Intrinsic::round, {value},
-                                      {value->getType()}, b_);
-}
-
 StatusOr<llvm::Value*> ElementalIrEmitter::EmitPow(PrimitiveType prim_type,
                                                    llvm::Value* lhs,
                                                    llvm::Value* rhs) {
",0,train
744a6bb1db15bf0d7ed9d83fb117ef2a02fb4591,tensorflow/tensorflow,"[XLA:GPU] Revert the workaround for the LLVM PTX backend bug on llvm.round.

Previously, we translated HLO RoundNearestAfz instruction to a call of the
NVIDIA libdevice routine __nv_round_ as a workaround of the bug (see
cl/235610143). This change reverts the workaround to translate the HLO
RoundNearestAfz instruction to llvm.round again as the LLVM PTX bug has been
fixed.

PiperOrigin-RevId: 246848247",elemental_ir_emitter.h,"@@ -146,9 +146,6 @@ class ElementalIrEmitter : public IrBuilderMixin<ElementalIrEmitter> {
   virtual StatusOr<llvm::Value*> EmitTanh(PrimitiveType prim_type,
                                           llvm::Value* value);
 
-  virtual StatusOr<llvm::Value*> EmitRoundNearestAfz(PrimitiveType prim_type,
-                                                     llvm::Value* value);
-
   virtual StatusOr<llvm::Value*> EmitReducePrecision(const HloInstruction* hlo,
                                                      llvm::Value* x);
 
",0,train
744a6bb1db15bf0d7ed9d83fb117ef2a02fb4591,tensorflow/tensorflow,"[XLA:GPU] Revert the workaround for the LLVM PTX backend bug on llvm.round.

Previously, we translated HLO RoundNearestAfz instruction to a call of the
NVIDIA libdevice routine __nv_round_ as a workaround of the bug (see
cl/235610143). This change reverts the workaround to translate the HLO
RoundNearestAfz instruction to llvm.round again as the LLVM PTX bug has been
fixed.

PiperOrigin-RevId: 246848247",elemental_ir_emitter.cc,"@@ -271,16 +271,6 @@ StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitTanh(PrimitiveType prim_type,
   return FPCast(fast_tanh, value->getType());
 }
 
-StatusOr<llvm::Value*> GpuElementalIrEmitter::EmitRoundNearestAfz(
-    PrimitiveType prim_type, llvm::Value* value) {
-  // Use libdevice __nv_round instead of llvm.round. This is to workaround a
-  // bug in the PTX backend, which implements llvm.round with PTX cvt.rni.
-  // When the llvm.round is fixed, we may still want to use __nv_round here as
-  // expanding the non-trivial implementation early while inlining allows better
-  // optimizations.
-  return EmitLibdeviceMathCall(""__nv_round"", {value}, {prim_type}, prim_type);
-}
-
 llvm::Value* GpuElementalIrEmitter::EmitDeviceFunctionCall(
     const string& callee_name, absl::Span<llvm::Value* const> operands,
     absl::Span<const PrimitiveType> input_types, PrimitiveType output_type,
",0,train
744a6bb1db15bf0d7ed9d83fb117ef2a02fb4591,tensorflow/tensorflow,"[XLA:GPU] Revert the workaround for the LLVM PTX backend bug on llvm.round.

Previously, we translated HLO RoundNearestAfz instruction to a call of the
NVIDIA libdevice routine __nv_round_ as a workaround of the bug (see
cl/235610143). This change reverts the workaround to translate the HLO
RoundNearestAfz instruction to llvm.round again as the LLVM PTX bug has been
fixed.

PiperOrigin-RevId: 246848247",elemental_ir_emitter.h,"@@ -91,9 +91,6 @@ class GpuElementalIrEmitter : public ElementalIrEmitter {
   StatusOr<llvm::Value*> EmitTanh(PrimitiveType prim_type,
                                   llvm::Value* value) override;
 
-  StatusOr<llvm::Value*> EmitRoundNearestAfz(PrimitiveType prim_type,
-                                             llvm::Value* value) override;
-
   llvm::Value* EmitThreadId() override;
 
  private:
",0,train
5fc14e22172722a115a282cbde8c4770e305aef5,tensorflow/tensorflow,"Fix layers_test exception regex matching.
Change: 152422855",layers_test.py,"@@ -1486,7 +1486,7 @@ class PartialFlattenTest(test.TestCase):
     inputs = sparse_tensor.SparseTensor(indices, values, shape)
 
     with self.assertRaisesRegexp(ValueError,
-                                 'inputs has rank less than new_rank'):
+                                 'Inputs has rank less than new_rank'):
       _layers._inner_flatten(inputs, new_rank)
 
 
",0,train
503da90d9deb8964bd435e0893eb50b6f42a18ee,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-05-02

PiperOrigin-RevId: 246278996",compat.py,"@@ -27,7 +27,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 5, 1)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 5, 2)
 
 
 @tf_export(""compat.forward_compatible"")
",0,train
6e369da8b4a8668169cdd6a93c8a069a5a4888c2,tensorflow/tensorflow,"Add new str_util::HumanReadableElapsedTime that formats a 'double seconds' value
using appropriate units based on the magnitude of the time interval, and add
tests for this.
Change: 132365945",str_util.cc,"@@ -18,6 +18,7 @@ limitations under the License.
 #include <ctype.h>
 #include <vector>
 #include ""tensorflow/core/lib/strings/numbers.h""
+#include ""tensorflow/core/lib/strings/stringprintf.h""
 
 namespace tensorflow {
 namespace str_util {
@@ -334,5 +335,58 @@ bool SplitAndParseAsInts(StringPiece text, char delim,
   return true;
 }
 
+string HumanReadableElapsedTime(double seconds) {
+  string human_readable;
+
+  if (seconds < 0) {
+    human_readable = ""-"";
+    seconds = -seconds;
+  }
+
+  // Start with us and keep going up to years.
+  // The comparisons must account for rounding to prevent the format breaking
+  // the tested condition and returning, e.g., ""1e+03 us"" instead of ""1 ms"".
+  const double microseconds = seconds * 1.0e6;
+  if (microseconds < 999.5) {
+    strings::Appendf(&human_readable, ""%0.3g us"", microseconds);
+    return human_readable;
+  }
+  double milliseconds = seconds * 1e3;
+  if (milliseconds >= .995 && milliseconds < 1) {
+    // Round half to even in Appendf would convert this to 0.999 ms.
+    milliseconds = 1.0;
+  }
+  if (milliseconds < 999.5) {
+    strings::Appendf(&human_readable, ""%0.3g ms"", milliseconds);
+    return human_readable;
+  }
+  if (seconds < 60.0) {
+    strings::Appendf(&human_readable, ""%0.3g s"", seconds);
+    return human_readable;
+  }
+  seconds /= 60.0;
+  if (seconds < 60.0) {
+    strings::Appendf(&human_readable, ""%0.3g min"", seconds);
+    return human_readable;
+  }
+  seconds /= 60.0;
+  if (seconds < 24.0) {
+    strings::Appendf(&human_readable, ""%0.3g h"", seconds);
+    return human_readable;
+  }
+  seconds /= 24.0;
+  if (seconds < 30.0) {
+    strings::Appendf(&human_readable, ""%0.3g days"", seconds);
+    return human_readable;
+  }
+  if (seconds < 365.2425) {
+    strings::Appendf(&human_readable, ""%0.3g months"", seconds / 30.436875);
+    return human_readable;
+  }
+  seconds /= 365.2425;
+  strings::Appendf(&human_readable, ""%0.3g years"", seconds);
+  return human_readable;
+}
+
 }  // namespace str_util
 }  // namespace tensorflow
",0,test
6e369da8b4a8668169cdd6a93c8a069a5a4888c2,tensorflow/tensorflow,"Add new str_util::HumanReadableElapsedTime that formats a 'double seconds' value
using appropriate units based on the magnitude of the time interval, and add
tests for this.
Change: 132365945",str_util.h,"@@ -80,6 +80,15 @@ string Uppercase(StringPiece s);
 // set of characters that can be used as word boundaries.
 void TitlecaseString(string* s, StringPiece delimiters);
 
+// Converts a time interval as double to a human readable
+// string. For example:
+//   0.001       -> ""1 ms""
+//   10.0        -> ""10 s""
+//   933120.0    -> ""10.8 days""
+//   39420000.0  -> ""1.25 years""
+//   -10         -> ""-10 s""
+string HumanReadableElapsedTime(double seconds);
+
 // Join functionality
 template <typename T>
 string Join(const T& s, const char* sep);
",0,test
6e369da8b4a8668169cdd6a93c8a069a5a4888c2,tensorflow/tensorflow,"Add new str_util::HumanReadableElapsedTime that formats a 'double seconds' value
using appropriate units based on the magnitude of the time interval, and add
tests for this.
Change: 132365945",str_util_test.cc,"@@ -287,4 +287,23 @@ TEST(TitlecaseString, Basic) {
   ASSERT_EQ(s, ""Dense"");
 }
 
+TEST(HumanReadableElapsedTime, Basic) {
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(-10), ""-10 s"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(-0.001), ""-1 ms"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(-60.0), ""-1 min"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(0.00000001), ""0.01 us"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(0.0000012), ""1.2 us"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(0.0012), ""1.2 ms"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(0.12), ""120 ms"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(1.12), ""1.12 s"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(90.0), ""1.5 min"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(600.0), ""10 min"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(9000.0), ""2.5 h"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(87480.0), ""1.01 days"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(7776000.0), ""2.96 months"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(78840000.0), ""2.5 years"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(382386614.40), ""12.1 years"");
+  EXPECT_EQ(str_util::HumanReadableElapsedTime(DBL_MAX), ""5.7e+300 years"");
+}
+
 }  // namespace tensorflow
",0,test
70bc79a21abd445cca6930e51369941c3951d5ee,tensorflow/tensorflow,"NFC: Remove use of GetRankedTensorTypeForOperand for results in BroadcastGradientArgs op

GetRankedTensorTypeForOperand is for operands. Result types don't involve constants can be fetched directly.

Also, use getDimSize over getShape.

PiperOrigin-RevId: 347593054
Change-Id: Ie8fc9f27b9105f3ab06fb4a7eac8699823dfa5fc",tf_ops_a_m.cc,"@@ -655,12 +655,14 @@ static LogicalResult Verify(BroadcastGradientArgsOp op) {
   GetOutputShapeForBroadcastGradientArgs(bcasted_shape, s0_shape, s1_shape, r0,
                                          r1);
 
-  RankedTensorType r0_ty = GetRankedTensorTypeForOperand(op.r0());
-  RankedTensorType r1_ty = GetRankedTensorTypeForOperand(op.r1());
-  if (r0_ty && r0_ty.hasStaticShape() && r0_ty.getShape()[0] != r0.size())
+  // Verify that output types are of rank one and matches the computed result
+  // shape.
+  auto r0_ty = op.r0().getType().cast<RankedTensorType>();
+  auto r1_ty = op.r1().getType().cast<RankedTensorType>();
+  if (r0_ty.hasStaticShape() && r0_ty.getDimSize(0) != r0.size())
     return op.emitOpError() << ""requires dimension 0 size of 'r0' to be ""
                             << r0.size() << "" but got "" << r0_ty.getShape()[0];
-  if (r1_ty && r1_ty.hasStaticShape() && r1_ty.getShape()[0] != r1.size())
+  if (r1_ty.hasStaticShape() && r1_ty.getDimSize(0) != r1.size())
     return op.emitOpError() << ""requires dimension 0 size of 'r1' to be ""
                             << r1.size() << "" but got "" << r1_ty.getShape()[0];
 
",0,train
6ebd3bb334bd9e99eb34e4440dab5853fc84e869,tensorflow/tensorflow,"Adds support for SpaceToDepth & DepthToSpace in hexagon delegate.

PiperOrigin-RevId: 288393196
Change-Id: I8bf2ad0edd1b40230e88a10ce70dbe2449f172cc",op_builder.cc,"@@ -80,6 +80,10 @@ OpBuilder* GraphBuilder::CreateOpBuilderFromTfLiteOp(int op_type) {
       return CreateNegOpBuilder(this, OP_QuantizedNeg_8);
     case kTfLiteBuiltinTranspose:
       return CreateTransposeBuilder(this, OP_Transpose_8);
+    case kTfLiteBuiltinSpaceToDepth:
+      return CreateSpaceToDepthBuilder(this, OP_SpaceToDepth_8);
+    case kTfLiteBuiltinDepthToSpace:
+      return CreateSpaceToDepthBuilder(this, OP_DepthToSpace_8);
     default:
       context_->ReportError(context_, ""Op not supported: %d"", op_type);
       return nullptr;
",0,train
6ebd3bb334bd9e99eb34e4440dab5853fc84e869,tensorflow/tensorflow,"Adds support for SpaceToDepth & DepthToSpace in hexagon delegate.

PiperOrigin-RevId: 288393196
Change-Id: I8bf2ad0edd1b40230e88a10ce70dbe2449f172cc",op_factory.h,"@@ -43,6 +43,7 @@ OpBuilder* CreateResizeBilinearOpBuilder(GraphBuilder* graph_builder,
                                          int op_type);
 OpBuilder* CreateNegOpBuilder(GraphBuilder* graph_builder, int op_type);
 OpBuilder* CreateTransposeBuilder(GraphBuilder* graph_builder, int op_type);
+OpBuilder* CreateSpaceToDepthBuilder(GraphBuilder* graph_builder, int op_type);
 
 }  // namespace hexagon
 }  // namespace delegates
",0,train
6ebd3bb334bd9e99eb34e4440dab5853fc84e869,tensorflow/tensorflow,"Adds support for SpaceToDepth & DepthToSpace in hexagon delegate.

PiperOrigin-RevId: 288393196
Change-Id: I8bf2ad0edd1b40230e88a10ce70dbe2449f172cc",space_to_depth_builder.cc,"@@ -0,0 +1,93 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include ""tensorflow/lite/experimental/delegates/hexagon/builders/space_to_depth_builder.h""
+
+#include <stdint.h>
+
+#include <limits>
+
+#include ""tensorflow/lite/c/builtin_op_data.h""
+#include ""tensorflow/lite/experimental/delegates/hexagon/hexagon_nn/hexagon_nn.h""
+#include ""tensorflow/lite/kernels/internal/reference/reference_ops.h""
+#include ""tensorflow/lite/kernels/kernel_util.h""
+
+namespace tflite {
+namespace delegates {
+namespace hexagon {
+TfLiteStatus SpaceToDepthOpBuilder::PopulateSubGraph(
+    const TfLiteIntArray* inputs, const TfLiteIntArray* outputs,
+    TfLiteContext* context) {
+  static int quant_bound_shape[] = {1, 1, 1, 1};
+  int tensor_id;
+
+  // Input tensor.
+  tensor_id = inputs->data[0];
+  const auto& input_tensor = context->tensors[tensor_id];
+  TF_LITE_ENSURE_STATUS(
+      ComputeMinAndMaxQuantValues(input_tensor, &input_min_, &input_max_,
+                                  std::numeric_limits<uint8_t>::min(),
+                                  std::numeric_limits<uint8_t>::max()));
+  auto* input_min_const = graph_builder_->AddConstNodeWithData(
+      quant_bound_shape, reinterpret_cast<char*>(&input_min_),
+      sizeof(input_min_));
+  auto* input_max_const = graph_builder_->AddConstNodeWithData(
+      quant_bound_shape, reinterpret_cast<char*>(&input_max_),
+      sizeof(input_max_));
+
+  // Block size.
+  const TfLiteSpaceToDepthParams* space_to_depth_params =
+      reinterpret_cast<const TfLiteSpaceToDepthParams*>(builtin_data_);
+  block_size_ = space_to_depth_params->block_size;
+  auto* block_size_node = graph_builder_->AddConstNodeWithData(
+      quant_bound_shape, reinterpret_cast<char*>(&block_size_),
+      sizeof(int));
+
+  // All inputs.
+  AddInput(graph_builder_->GetHexagonTensorId(tensor_id));
+  AddInput(TensorID(block_size_node->GetID(), 0));
+  AddInput(TensorID(input_min_const->GetID(), 0));
+  AddInput(TensorID(input_max_const->GetID(), 0));
+
+  // Hexagon outputs for this node.
+  int output_batch_size, output_height_size, output_width_size,
+      output_depth_size;
+  GetDims(&output_batch_size, &output_height_size, &output_width_size,
+          &output_depth_size, context->tensors[outputs->data[0]].dims);
+  node_output_ = AddOutput(sizeof(uint8_t), 4,
+                           {output_batch_size, output_height_size,
+                            output_width_size, output_depth_size});
+  AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+  AddOutput(sizeof(float), 4, {1, 1, 1, 1});
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus SpaceToDepthOpBuilder::RegisterOutputs(
+    const TfLiteIntArray* outputs, TfLiteContext* context) {
+  // Should be only 1 output.
+  graph_builder_->AddTensorWithID(outputs->data[0], node_output_.first,
+                                  node_output_.second);
+  return kTfLiteOk;
+}
+
+SpaceToDepthOpBuilder::~SpaceToDepthOpBuilder() {}
+
+OpBuilder* CreateSpaceToDepthBuilder(GraphBuilder* graph_builder, int op_type) {
+  return new SpaceToDepthOpBuilder(graph_builder, op_type);
+}
+
+}  // namespace hexagon
+}  // namespace delegates
+}  // namespace tflite
",0,train
6ebd3bb334bd9e99eb34e4440dab5853fc84e869,tensorflow/tensorflow,"Adds support for SpaceToDepth & DepthToSpace in hexagon delegate.

PiperOrigin-RevId: 288393196
Change-Id: I8bf2ad0edd1b40230e88a10ce70dbe2449f172cc",space_to_depth_builder.h,"@@ -0,0 +1,51 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_SPACE_TO_DEPTH_BUILDER_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_SPACE_TO_DEPTH_BUILDER_H_
+
+#include <vector>
+
+#include ""tensorflow/lite/experimental/delegates/hexagon/builders/op_builder.h""
+
+namespace tflite {
+namespace delegates {
+namespace hexagon {
+
+// Supports both ways:
+// Space -> Depth & Depth -> Space.
+class SpaceToDepthOpBuilder : public OpBuilder {
+ public:
+  explicit SpaceToDepthOpBuilder(GraphBuilder* graph_builder, int op_type)
+      : OpBuilder(graph_builder, op_type) {}
+  TfLiteStatus PopulateSubGraph(const TfLiteIntArray* inputs,
+                                const TfLiteIntArray* outputs,
+                                TfLiteContext* context) override;
+
+  TfLiteStatus RegisterOutputs(const TfLiteIntArray* outputs,
+                               TfLiteContext* context) override;
+
+  ~SpaceToDepthOpBuilder() override;
+
+ private:
+  TensorID node_output_;
+  float input_min_, input_max_, output_min_, output_max_;
+  int block_size_;
+};
+
+}  // namespace hexagon
+}  // namespace delegates
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_DELEGATES_HEXAGON_BUILDERS_SPACE_TO_DEPTH_BUILDER_H_
",0,train
6ebd3bb334bd9e99eb34e4440dab5853fc84e869,tensorflow/tensorflow,"Adds support for SpaceToDepth & DepthToSpace in hexagon delegate.

PiperOrigin-RevId: 288393196
Change-Id: I8bf2ad0edd1b40230e88a10ce70dbe2449f172cc",utils.cc,"@@ -261,6 +261,12 @@ bool IsNodeSupportedByHexagon(const TfLiteRegistration* registration,
       return InputsWithCorrectTypes(node, context,
                                     {kTfLiteUInt8, kTfLiteInt32});
     }
+    case kTfLiteBuiltinSpaceToDepth: {
+      return InputsWithCorrectTypes(node, context, {kTfLiteUInt8});
+    }
+    case kTfLiteBuiltinDepthToSpace: {
+      return InputsWithCorrectTypes(node, context, {kTfLiteUInt8});
+    }
     default:
       return false;
   }
",0,train
dfbbc2d4de667e0f9fe07035d0c413d3e4bd8364,tensorflow/tensorflow,"Introduce a new XRTExecute flag which allows an exploded (in terms of its handles) tuple to be returned.
This prevents clients which are interested in the tuple element handles to do extra RPCs to get them.

PiperOrigin-RevId: 223994592",xrt_execute_op.cc,"@@ -228,14 +228,35 @@ Status XRTExecuteOp::DoWork(OpKernelContext* context) {
   TF_RETURN_IF_ERROR(XRTTupleAllocation::CreateFromBuffer(
       shaped_buffer, device_ref.backend(), device_ref.device_ordinal(),
       &output_tuple));
-
-  Tensor* output_tensor;
-  TF_RETURN_IF_ERROR(
-      context->allocate_output(0, TensorShape({}), &output_tensor));
-  int64 key;
-  TF_RETURN_IF_ERROR(output_tuple->Intern(rm, &key));
-  output_tensor->scalar<int64>()() = key;
-
+  if (config_proto.return_exploded_tuple() &&
+      xla::ShapeUtil::IsTuple(output_tuple->on_device_shape())) {
+    int64 tuple_element_count =
+        xla::ShapeUtil::TupleElementCount(output_tuple->on_device_shape());
+    Tensor* output_tensor;
+    TF_RETURN_IF_ERROR(context->allocate_output(
+        0, TensorShape({tuple_element_count}), &output_tensor));
+
+    for (int64 i = 0; i < tuple_element_count; ++i) {
+      xla::ShapeIndex shape_index;
+      shape_index.push_back(i);
+
+      XRTTupleAllocation* suballocation;
+      TF_RETURN_IF_ERROR(XRTTupleAllocation::MakeSubBuffer(
+          output_tuple, shape_index, &suballocation,
+          /*alias_parent_allocation=*/false));
+      int64 key;
+      TF_RETURN_IF_ERROR(suballocation->Intern(rm, &key));
+      output_tensor->vec<int64>()(i) = key;
+    }
+    output_tuple->Unref();
+  } else {
+    Tensor* output_tensor;
+    TF_RETURN_IF_ERROR(
+        context->allocate_output(0, TensorShape({}), &output_tensor));
+    int64 key;
+    TF_RETURN_IF_ERROR(output_tuple->Intern(rm, &key));
+    output_tensor->scalar<int64>()() = key;
+  }
   return Status::OK();
 }
 
",0,train
dfbbc2d4de667e0f9fe07035d0c413d3e4bd8364,tensorflow/tensorflow,"Introduce a new XRTExecute flag which allows an exploded (in terms of its handles) tuple to be returned.
This prevents clients which are interested in the tuple element handles to do extra RPCs to get them.

PiperOrigin-RevId: 223994592",raw_api_test.cc,"@@ -175,6 +175,18 @@ xla::XlaComputation AddAndTuple() {
   return builder.Build().ValueOrDie();
 }
 
+xla::XlaComputation AddAndSubTuple() {
+  xla::XlaBuilder builder(""AddAndSubTuple"");
+  auto p0 = xla::Parameter(&builder, 0, xla::ShapeUtil::MakeShape(xla::F32, {}),
+                           ""P0"");
+  auto p1 = xla::Parameter(&builder, 1, xla::ShapeUtil::MakeShape(xla::F32, {}),
+                           ""P1"");
+  auto sum = xla::Add(p0, p1);
+  auto sub = xla::Sub(p0, p1);
+  xla::Tuple(&builder, {sum, sub});
+  return builder.Build().ValueOrDie();
+}
+
 void StoreComputationSnapshot(const xla::XlaComputation& computation,
                               xla::HloSnapshot* dst) {
   auto snapshot = computation.Snapshot().ValueOrDie();
@@ -681,6 +693,70 @@ TEST(RawApiTest, CompileAndExecuteReturnTuple) {
   EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
 }
 
+TEST(RawApiTest, CompileAndExecuteReturnExplodedTuple) {
+  xrt::XLAAllocation p0;
+  p0.set_device_ordinal(0);
+  *p0.mutable_value() = xla::LiteralUtil::CreateR0<float>(12.0f).ToProto();
+
+  xrt::XLAAllocation p1;
+  p1.set_device_ordinal(0);
+  *p1.mutable_value() = xla::LiteralUtil::CreateR0<float>(3.0f).ToProto();
+
+  xrt::XLAComputation c;
+  auto config = c.mutable_config();
+  auto shapes = config->mutable_program_shape();
+  *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::F32, {}).ToProto();
+  *shapes->add_parameters() = xla::ShapeUtil::MakeShape(xla::F32, {}).ToProto();
+  *shapes->mutable_result() =
+      xla::ShapeUtil::MakeTupleShape({xla::ShapeUtil::MakeShape(xla::F32, {}),
+                                      xla::ShapeUtil::MakeShape(xla::F32, {})})
+          .ToProto();
+  StoreComputationSnapshot(AddAndSubTuple(), c.mutable_hlo_snapshot());
+
+  xrt::XRTExecutionConfig e;
+  e.set_release_input_handles(true);
+  e.set_release_compilation_handle(true);
+  e.set_return_exploded_tuple(true);
+
+  Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
+  auto e_config =
+      ops::Const(root.WithDevice(""/device:CPU:0""), e.SerializeAsString());
+  auto computation =
+      ops::Const(root.WithDevice(""/device:CPU:0""), c.SerializeAsString());
+  auto c_handle = ops::XRTCompile(root, computation);
+  auto p0_value =
+      ops::Const(root.WithDevice(""/device:CPU:0""), p0.SerializeAsString());
+  auto p0_handle = ops::XRTAllocate(root, p0_value);
+  auto p1_value =
+      ops::Const(root.WithDevice(""/device:CPU:0""), p1.SerializeAsString());
+  auto p1_handle = ops::XRTAllocate(root, p1_value);
+  auto result = ops::XRTExecute(root, c_handle.handle, e_config,
+                                {Output(p0_handle), Output(p1_handle)});
+  TF_ASSERT_OK(root.status());
+
+  ClientSession session(root);
+  std::vector<Tensor> outputs;
+  TF_EXPECT_OK(session.Run({result}, &outputs));
+  EXPECT_EQ(outputs.size(), 1);
+
+  auto handles_vec = outputs.front().vec<int64>();
+  EXPECT_EQ(handles_vec.size(), 2);
+
+  const float kResults[2] = {15.0f, 9.0f};
+  for (int64 i = 0; i < handles_vec.size(); ++i) {
+    auto read_back = ops::XRTReadLiteralAndRelease(root, Input(handles_vec(i)));
+    std::vector<Tensor> voutputs;
+    TF_EXPECT_OK(session.Run({read_back}, &voutputs));
+    EXPECT_EQ(voutputs.size(), 1);
+
+    xla::LiteralProto response;
+    EXPECT_TRUE(response.ParseFromString(voutputs[0].scalar<string>()()));
+
+    auto expected = xla::LiteralUtil::CreateR0<float>(kResults[i]);
+    EXPECT_TRUE(CompareLiteralToLiteralProto(expected, response));
+  }
+}
+
 TEST(RawApiTest, LeakCompilationReference) {
   xrt::XLAComputation c;
   auto config = c.mutable_config();
",0,train
014d4b5417b7a361c6b9102bf80455ea4b44e4b3,tensorflow/tensorflow,Removed Depricated API from the file.,gamma.py,"@@ -267,7 +267,7 @@ class Gamma(distribution.Distribution):
           self.batch_shape_tensor(),
           np.array(np.nan, dtype=self.dtype.as_numpy_dtype()),
           name=""nan"")
-      return array_ops.where(self.concentration > 1., mode, nan)
+      return array_ops.where_v2(self.concentration > 1., mode, nan)
     else:
       return control_flow_ops.with_dependencies([
           check_ops.assert_less(
",0,train
ff563b9436509a35bbb5087952c7fbfda44df46f,tensorflow/tensorflow,"Fixed the bug in keras where the callback attributes are not correctly checked.

PiperOrigin-RevId: 224598769",keras_test.py,"@@ -1085,8 +1085,8 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(combinations.combine(
       distribution=[
-          combinations.mirrored_strategy_with_two_gpus,
-          combinations.core_mirrored_strategy_with_two_gpus],
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
       mode=['graph', 'eager']))
   def test_unsupported_features(self, distribution):
     with self.cached_session():
@@ -1134,8 +1134,8 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(combinations.combine(
       distribution=[
-          combinations.mirrored_strategy_with_two_gpus,
-          combinations.core_mirrored_strategy_with_two_gpus],
+          combinations.mirrored_strategy_with_gpu_and_cpu,
+          combinations.core_mirrored_strategy_with_gpu_and_cpu],
       mode=['graph', 'eager']))
   def test_calling_with_unsupported_predefined_callbacks(self, distribution):
     with self.cached_session():
@@ -1161,12 +1161,6 @@ class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
                                    'using'):
         model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
                   callbacks=[keras.callbacks.ReduceLROnPlateau()])
-      with self.assertRaisesRegexp(ValueError,
-                                   'histogram_freq in the TensorBoard callback '
-                                   'is not supported when using '
-                                   'DistributionStrategy.'):
-        model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0,
-                  callbacks=[keras.callbacks.TensorBoard(histogram_freq=10)])
 
 
 class TestDistributionStrategyWithLossMasking(test.TestCase,
",0,test
ff563b9436509a35bbb5087952c7fbfda44df46f,tensorflow/tensorflow,"Fixed the bug in keras where the callback attributes are not correctly checked.

PiperOrigin-RevId: 224598769",distributed_training_utils.py,"@@ -199,11 +199,19 @@ def validate_callbacks(input_callbacks, optimizer, current_strategy):
       # running ops.
       if isinstance(callback, callbacks.TensorBoard):
         if callback.__getattribute__('histogram_freq'):
-          raise ValueError('histogram_freq in the TensorBoard callback is not '
-                           'supported when using DistributionStrategy.')
+          logging.warning(
+              UserWarning(
+                  '`histogram_freq` in the TensorBoard callback is not '
+                  'supported when using DistributionStrategy. Setting '
+                  '`histogram_freq` to `0`.'))
+          callback.histogram_freq = 0
         if callback.__getattribute__('write_grads'):
-          raise ValueError('write_grads in the TensorBoard callback is not '
-                           'supported when using DistributionStrategy.')
+          logging.warning(
+              UserWarning(
+                  '`write_grads` in the TensorBoard callback is not supported '
+                  'when using DistributionStrategy. Setting `write_grads` '
+                  'to `False`.'))
+          callback.histogram_freq = False
 
 
 def validate_distributed_dataset_inputs(distribution_strategy, x, y,
",0,test
ab48dbd4ac2095548a5bc8505e08e751d409727f,tensorflow/tensorflow,"Fixing operator order in LRN docs to match code.

The implementation adds the bias to a temporary, that temporary is what is then the base with exponent beta. The implementation also agrees with the equation in Section 3.3 of the referenced Krizhevsky et. al. paper.
Change: 115721267",nn_ops.cc,"@@ -349,7 +349,7 @@ each component is divided by the weighted, squared sum of inputs within
 
     sqr_sum[a, b, c, d] =
         sum(input[a, b, c, d - depth_radius : d + depth_radius + 1] ** 2)
-    output = input / (bias + alpha * sqr_sum ** beta)
+    output = input / (bias + alpha * sqr_sum) ** beta
 
 For details, see [Krizhevsky et al., ImageNet classification with deep
 convolutional neural networks (NIPS 2012)]
",0,test
5d4a29eaf590b4a3068ef4d0b7bea9d4f7bd9369,tensorflow/tensorflow,"Special case wrapping of ndarrays in the gradient tape code.

PiperOrigin-RevId: 317762474
Change-Id: Ie848ad90a88aff5b2faef4069c3f05887038c367",backprop.py,"@@ -62,6 +62,9 @@ from tensorflow.python.util.tf_export import tf_export
 pfor_ops = LazyLoader(
     ""pfor_ops"", globals(),
     ""tensorflow.python.ops.parallel_for.control_flow_ops"")
+np_arrays = LazyLoader(
+    ""np_arrays"", globals(),
+    ""tensorflow.python.ops.numpy_ops.np_arrays"")
 
 function = LazyLoader(""function"", globals(),
                       ""tensorflow.python.eager.function"")
@@ -721,9 +724,11 @@ pywrap_tfe.TFE_Py_RegisterVSpace(_default_vspace)
 
 
 def _handle_or_self(x):
-  """"""If x is ResourceVariable, return its handle, else x.""""""
+  """"""Unwrap resource variable/ndarray to return tensors.""""""
   if resource_variable_ops.is_resource_variable(x):
-    x = x.handle
+    return x.handle
+  if isinstance(x, np_arrays.ndarray):
+    return x.data
   return x
 
 
@@ -1023,6 +1028,7 @@ class GradientTape(object):
             ""gradient in order to compute higher order ""
             ""derivatives."", 1)
 
+    num_ndarrays = 0
     flat_targets = []
     for t in nest.flatten(target):
       if not backprop_util.IsTrainable(t):
@@ -1033,7 +1039,12 @@ class GradientTape(object):
       if resource_variable_ops.is_resource_variable(t):
         with self:
           t = ops.convert_to_tensor(t)
+      elif isinstance(t, np_arrays.ndarray):
+        t = t.data
+        num_ndarrays += 1
       flat_targets.append(t)
+    # Only rewrap if all targets are ndarray. If not, prefer tensors.
+    rewrap_as_ndarray = num_ndarrays == len(flat_targets)
 
     flat_sources = nest.flatten(sources)
     flat_sources_raw = flat_sources
@@ -1066,6 +1077,9 @@ class GradientTape(object):
       self._watched_variables = self._tape.watched_variables()
       self._tape = None
 
+    if rewrap_as_ndarray:
+      flat_grad = nest.map_structure(np_arrays.tensor_to_ndarray, flat_grad)
+
     grad = nest.pack_sequence_as(sources, flat_grad)
     return grad
 
@@ -1120,6 +1134,10 @@ class GradientTape(object):
       ValueError: If vectorization of jacobian computation fails.
     """"""
     flat_sources = nest.flatten(sources)
+    rewrap_as_ndarray = False
+    if isinstance(target, np_arrays.ndarray):
+      target = target.data
+      rewrap_as_ndarray = True
     target_static_shape = target.shape
     target_shape = array_ops.shape(target)
     # Note that we push and pop the tape here and below. This is needed since we
@@ -1169,6 +1187,8 @@ class GradientTape(object):
         out = array_ops.reshape(out, new_shape)
         if context.executing_eagerly():
           out.set_shape(target_static_shape.concatenate(flat_sources[i].shape))
+      if rewrap_as_ndarray:
+        out = np_arrays.tensor_to_ndarray(out)
       output[i] = out
 
     return nest.pack_sequence_as(sources, output)
",0,train
5d4a29eaf590b4a3068ef4d0b7bea9d4f7bd9369,tensorflow/tensorflow,"Special case wrapping of ndarrays in the gradient tape code.

PiperOrigin-RevId: 317762474
Change-Id: Ie848ad90a88aff5b2faef4069c3f05887038c367",np_arrays.py,"@@ -82,10 +82,10 @@ class NdarraySpec(type_spec.BatchableTypeSpec):
     return (self._data_spec,)
 
   def _batch(self, batch_size):
-    return NdarraySpec(self._data_spec.batch(batch_size))
+    return NdarraySpec(self._data_spec._batch(batch_size))  # pylint: disable=protected-access
 
   def _unbatch(self):
-    return NdarraySpec(self._data_spec.unbatch())
+    return NdarraySpec(self._data_spec._unbatch())  # pylint: disable=protected-access
 
 
 class ndarray(composite_tensor.CompositeTensor):  # pylint: disable=invalid-name
@@ -306,10 +306,6 @@ class ndarray(composite_tensor.CompositeTensor):  # pylint: disable=invalid-name
   def __repr__(self):
     return 'ndarray<{}>'.format(self.data.__repr__())
 
-  @property
-  def _id(self):
-    return self.data._id  # pylint: disable=protected-access
-
 
 def tensor_to_ndarray(tensor):
   return ndarray.from_tensor(tensor)
",0,train
846a73f9f336e54a02c12388ac76a0aa8700543a,tensorflow/tensorflow,"Adds a int32 to int32 HashTable mapping.

PiperOrigin-RevId: 178190131",lookup_table_op.cc,"@@ -823,6 +823,7 @@ REGISTER_KERNEL(int64, int64);
 REGISTER_KERNEL(int64, float);
 REGISTER_KERNEL(string, string);
 REGISTER_KERNEL(string, bool);
+REGISTER_KERNEL(int32, int32);
 
 #undef REGISTER_KERNEL
 
",0,train
8095a9a78a105cf1a3e196dadc9e07591c6e0439,tensorflow/tensorflow,"simplify naming convention in threading_options

refactor options.threading serialization

prefer getattr over __getattribute__

regenerate golden apis",threading_options.py,"@@ -54,13 +54,9 @@ class ThreadingOptions(options.OptionsBase):
       ""The value 0 can be used to indicate that the threadpool size should be ""
       ""determined at runtime based on the number of available CPU cores."")
 
-  def _get_option_names(self):
-    return [""max_intra_op_parallelism"", ""private_threadpool_size""]
-
   def _has_non_default_values(self):
-    attrs = self._get_option_names()
-    for attr in attrs:
-      if object.__getattribute__(self, attr) is not None:
+    for attr in filter(lambda opt: not opt.startswith(""_""), dir(self)):
+      if getattr(self, attr) is not None:
         return True
     return False
 
",0,test
8095a9a78a105cf1a3e196dadc9e07591c6e0439,tensorflow/tensorflow,"simplify naming convention in threading_options

refactor options.threading serialization

prefer getattr over __getattribute__

regenerate golden apis",dataset_ops.py,"@@ -3617,20 +3617,16 @@ class Options(options_lib.OptionsBase):
     # (kvignesh1420): We try to keep the values of `threading` and
     # `experimental_threading` the same, to prevent unexpected behaviours
     # and ensure backward-compatibility.
-    if self.threading._has_non_default_values():
-      if self.experimental_threading._has_non_default_values():
-        override_options = []
-        for name in self.threading._get_option_names():
-          if (object.__getattribute__(self.threading, name) !=
-              object.__getattribute__(self.experimental_threading, name)):
-            override_options.append(name)
-        if override_options:
-          logging.warning(""overriding options '{}' of experimental_threading ""
-                ""with respective values in threading."".format(
-                  "","".join(override_options)))
-      self.experimental_threading = self.threading
-    else:
-      self.threading = self.experimental_threading
+    override_attrs = []
+    for attr in filter(lambda opt: not opt.startswith(""_""), dir(self.threading)):
+      if (getattr(self.threading, attr) !=
+          getattr(self.experimental_threading, attr)):
+        override_attrs.append(attr)
+    if override_attrs:
+      logging.warning(""overriding attr(s) '{}' of experimental_threading ""
+            ""with respective values in threading."".format(
+              "","".join(override_attrs)))
+    self.experimental_threading = self.threading
     pb.threading_options.CopyFrom(self.threading._to_proto())  # pylint: disable=protected-access
     return pb
 
",0,test
32140ae87fd86398ac4fa45cb67bd2f29a93090d,tensorflow/tensorflow,"Boosted trees: Adding categorical split support to prediction ops.

PiperOrigin-RevId: 214448656",resources.cc,"@@ -60,14 +60,26 @@ int32 BoostedTreesEnsembleResource::next_node(
   DCHECK_LT(tree_id, tree_ensemble_->trees_size());
   DCHECK_LT(node_id, tree_ensemble_->trees(tree_id).nodes_size());
   const auto& node = tree_ensemble_->trees(tree_id).nodes(node_id);
-  DCHECK_EQ(node.node_case(), boosted_trees::Node::kBucketizedSplit);
-  const auto& split = node.bucketized_split();
-  if (bucketized_features[split.feature_id()](index_in_batch) <=
-      split.threshold()) {
-    return split.left_id();
-  } else {
-    return split.right_id();
+
+  switch (node.node_case()) {
+    case boosted_trees::Node::kBucketizedSplit: {
+      const auto& split = node.bucketized_split();
+      return (bucketized_features[split.feature_id()](index_in_batch) <=
+              split.threshold())
+                 ? split.left_id()
+                 : split.right_id();
+    }
+    case boosted_trees::Node::kCategoricalSplit: {
+      const auto& split = node.categorical_split();
+      return (bucketized_features[split.feature_id()](index_in_batch) ==
+              split.value())
+                 ? split.left_id()
+                 : split.right_id();
+    }
+    default:
+      DCHECK(false) << ""Node type "" << node.node_case() << "" not supported."";
   }
+  return -1;
 }
 
 float BoostedTreesEnsembleResource::node_value(const int32 tree_id,
",0,train
32140ae87fd86398ac4fa45cb67bd2f29a93090d,tensorflow/tensorflow,"Boosted trees: Adding categorical split support to prediction ops.

PiperOrigin-RevId: 214448656",boosted_trees_ops.cc,"@@ -180,6 +180,8 @@ REGISTER_OP(""BoostedTreesMakeStatsSummary"")
       return Status::OK();
     });
 
+// TODO(nponomareva): when/if creating the new op for unbucketized data, rename
+// bucketized_features to features.
 REGISTER_OP(""BoostedTreesPredict"")
     .Input(""tree_ensemble_handle: resource"")
     .Input(""bucketized_features: num_bucketized_features * int32"")
",0,train
32140ae87fd86398ac4fa45cb67bd2f29a93090d,tensorflow/tensorflow,"Boosted trees: Adding categorical split support to prediction ops.

PiperOrigin-RevId: 214448656",prediction_ops_test.py,"@@ -445,6 +445,78 @@ class TrainingPredictionOpsTest(test_util.TensorFlowTestCase):
       #            change= 0.1(1.14+7.0-7.0)
       self.assertAllClose([[1], [0.114]], logits_updates)
 
+  def testCategoricalSplits(self):
+    """"""Tests the training prediction work for categorical splits.""""""
+    with self.cached_session() as session:
+      tree_ensemble_config = boosted_trees_pb2.TreeEnsemble()
+      text_format.Merge(
+          """"""
+        trees {
+          nodes {
+            categorical_split {
+              feature_id: 1
+              value: 2
+              left_id: 1
+              right_id: 2
+            }
+          }
+          nodes {
+            categorical_split {
+              feature_id: 0
+              value: 13
+              left_id: 3
+              right_id: 4
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 7.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 5.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 6.0
+            }
+          }
+        }
+        tree_weights: 1.0
+        tree_metadata {
+          is_finalized: true
+        }
+      """""", tree_ensemble_config)
+
+      # Create existing ensemble with one root split
+      tree_ensemble = boosted_trees_ops.TreeEnsemble(
+          'ensemble', serialized_proto=tree_ensemble_config.SerializeToString())
+      tree_ensemble_handle = tree_ensemble.resource_handle
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      feature_0_values = [13, 1, 3]
+      feature_1_values = [2, 2, 1]
+
+      # No previous cached values.
+      cached_tree_ids = [0, 0, 0]
+      cached_node_ids = [0, 0, 0]
+
+      # Grow tree ensemble.
+      predict_op = boosted_trees_ops.training_predict(
+          tree_ensemble_handle,
+          cached_tree_ids=cached_tree_ids,
+          cached_node_ids=cached_node_ids,
+          bucketized_features=[feature_0_values, feature_1_values],
+          logits_dimension=1)
+
+      logits_updates, new_tree_ids, new_node_ids = session.run(predict_op)
+
+      self.assertAllClose([0, 0, 0], new_tree_ids)
+      self.assertAllClose([3, 4, 2], new_node_ids)
+      self.assertAllClose([[5.], [6.], [7.]], logits_updates)
+
   def testCachedPredictionFromTheSameTreeWithPostPrunedNodes(self):
     """"""Tests that prediction based on previous node in the tree works.""""""
     with self.cached_session() as session:
@@ -924,6 +996,68 @@ class PredictionOpsTest(test_util.TensorFlowTestCase):
       logits = session.run(predict_op)
       self.assertAllClose(expected_logits, logits)
 
+  def testCategoricalSplits(self):
+    """"""Tests the predictions work for categorical splits.""""""
+    with self.cached_session() as session:
+      tree_ensemble_config = boosted_trees_pb2.TreeEnsemble()
+      text_format.Merge(
+          """"""
+        trees {
+          nodes {
+            categorical_split {
+              feature_id: 1
+              value: 2
+              left_id: 1
+              right_id: 2
+            }
+          }
+          nodes {
+            categorical_split {
+              feature_id: 0
+              value: 13
+              left_id: 3
+              right_id: 4
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 7.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 5.0
+            }
+          }
+          nodes {
+            leaf {
+              scalar: 6.0
+            }
+          }
+        }
+        tree_weights: 1.0
+      """""", tree_ensemble_config)
+
+      # Create existing ensemble with one root split
+      tree_ensemble = boosted_trees_ops.TreeEnsemble(
+          'ensemble', serialized_proto=tree_ensemble_config.SerializeToString())
+      tree_ensemble_handle = tree_ensemble.resource_handle
+      resources.initialize_resources(resources.shared_resources()).run()
+
+      feature_0_values = [13, 1, 3]
+      feature_1_values = [2, 2, 1]
+
+      expected_logits = [[5.], [6.], [7.]]
+
+      # Prediction should work fine.
+      predict_op = boosted_trees_ops.predict(
+          tree_ensemble_handle,
+          bucketized_features=[feature_0_values, feature_1_values],
+          logits_dimension=1)
+
+      logits = session.run(predict_op)
+      self.assertAllClose(expected_logits, logits)
+
 
 class FeatureContribsOpsTest(test_util.TensorFlowTestCase):
   """"""Tests feature contribs ops for model understanding.""""""
",0,train
729e39b1a4f0f7a6b3e35a04bf8bbba5e921862b,tensorflow/tensorflow,"Improve the GPU memory use discipline of CollectiveReduce.

GPU memory allocation can be done in one of two modes: efficient (but
complex and therefore somewhat risky) or conservative (simpler, but less
efficient).  The main difference is that 'efficient' allocation allows
the same memory area to be allocated to mutiple independent uses
simultaenously, when it should be the case that those uses will in
fact be serial and thus temporally disjoint, while 'conservative'
allocation will always obey the invarient that one piece of memory is
allocated to at most one use at any point in time.

If GPUDevice::RequiresRecordingAccessedTensors() returns false, then
the TF runtime uses efficient memory allocation for GPU ops.  That is, GPU
ops are nominally synchronous and their tensor Ref's are deleted
immediately after the ops returns although really the corresponding GPU
kernel is only guaranteed to have been enqueued on the compute stream
and may not have yet begin execution.

If RequiresRecordingAccessedTensors() returns true, then conservative
memory allocation is used, i.e. Refs on the tensors accessed by a GPU op
are held until the corresponding kernel is guaranteed to have completed
execution and no part of the op will touch them again.

Efficient GPU memory allocation should be safe when the following criteria
are all met:

1. All GPU kernels are executed serially on a single compute stream.
2. All GPU kernel outputs and temp buffers are allocated by
   the GPU Op in the executor thread in which it is originally called.
3. Any read of a GPU tensor computed by a GPU kernel that is not
   by another kernel on that same GPU first synchronizes on
   the compute stream that produced it.
4. Any read by a GPU kernel of a value that was not produced by another
   GPU kernel first synchronizes on the entity that produced it,
   e.g. a copy stream.
5. All direct allocations of GPU memory that are not for kernel outputs
   or temp buffers are conservative in duration.
6. Any use of directly allocated GPU memory that is not part of a kernel
   execution first synchronizes on the compute stream to ensure that
   any prior granted uses of the same region have expired before this new use.

These conditions together should be sufficient for safety, and
correspond to established practice, though it may be possible to
contrive other sets of rules that are also sufficient.

Collective Ops for GPUs are unusual in that they are async (as TF
Ops) and they can directly allocate GPU memory in CPU threads that are
asynchronous to the launching executor thread.  This CL corrects a
couple of subtle misuse errors related to conditions 2 and 6.

PiperOrigin-RevId: 210841522",ring_reducer.cc,"@@ -25,6 +25,7 @@ limitations under the License.
 #include ""tensorflow/core/common_runtime/device.h""
 #include ""tensorflow/core/common_runtime/device_mgr.h""
 #include ""tensorflow/core/common_runtime/dma_helper.h""
+#include ""tensorflow/core/common_runtime/process_util.h""
 #include ""tensorflow/core/framework/allocator.h""
 #include ""tensorflow/core/framework/device_base.h""
 #include ""tensorflow/core/framework/op_kernel.h""
@@ -497,13 +498,6 @@ bool RingReducer::RunAsyncParts() {
   rfv_.clear();
   rfv_.resize(group_size_ * num_subdivs_);
   PCQueue ready_queue;
-  int field_done_count = 0;
-  int send_pending_count = 0;
-  int recv_pending_count = 0;
-  std::atomic<bool> aborted(false);
-  field_done_count = 0;
-  send_pending_count = 0;
-  recv_pending_count = 0;
   for (int chunk_idx = 0; chunk_idx < group_size_; ++chunk_idx) {
     for (int subdiv_idx = 0; subdiv_idx < num_subdivs_; ++subdiv_idx) {
       int rf_index = (chunk_idx * num_subdivs_) + subdiv_idx;
@@ -511,6 +505,30 @@ bool RingReducer::RunAsyncParts() {
       ready_queue.Enqueue(&rfv_[rf_index]);
     }
   }
+  const DeviceBase::GpuDeviceInfo* gpu_info =
+      col_ctx_->device->tensorflow_gpu_device_info();
+  if (gpu_info) {
+    // Wait for all currently queued events on the CPU compute stream to
+    // complete before proceeding.  The previous InitRingField calls allocated
+    // temp memory buffers that are not guaranteed to be valid (e.g. for RDMA
+    // write) unless we do.
+    Notification note;
+    Status s = gpu_info->default_context->ThenExecute(
+        col_ctx_->device, gpu_info->stream, [&note]() { note.Notify(); });
+    if (s.ok()) {
+      note.WaitForNotification();
+    } else {
+      mutex_lock l(status_mu_);
+      status_ =
+          errors::Internal(""Failed to dispatch ThenExecute in RingReducer"");
+      return false;
+    }
+  }
+
+  int field_done_count = 0;
+  int send_pending_count = 0;
+  int recv_pending_count = 0;
+  std::atomic<bool> aborted(false);
 
   // Loop until all RingFields have advanced to completion.
   while (field_done_count < rfv_.size()) {
",0,train
729e39b1a4f0f7a6b3e35a04bf8bbba5e921862b,tensorflow/tensorflow,"Improve the GPU memory use discipline of CollectiveReduce.

GPU memory allocation can be done in one of two modes: efficient (but
complex and therefore somewhat risky) or conservative (simpler, but less
efficient).  The main difference is that 'efficient' allocation allows
the same memory area to be allocated to mutiple independent uses
simultaenously, when it should be the case that those uses will in
fact be serial and thus temporally disjoint, while 'conservative'
allocation will always obey the invarient that one piece of memory is
allocated to at most one use at any point in time.

If GPUDevice::RequiresRecordingAccessedTensors() returns false, then
the TF runtime uses efficient memory allocation for GPU ops.  That is, GPU
ops are nominally synchronous and their tensor Ref's are deleted
immediately after the ops returns although really the corresponding GPU
kernel is only guaranteed to have been enqueued on the compute stream
and may not have yet begin execution.

If RequiresRecordingAccessedTensors() returns true, then conservative
memory allocation is used, i.e. Refs on the tensors accessed by a GPU op
are held until the corresponding kernel is guaranteed to have completed
execution and no part of the op will touch them again.

Efficient GPU memory allocation should be safe when the following criteria
are all met:

1. All GPU kernels are executed serially on a single compute stream.
2. All GPU kernel outputs and temp buffers are allocated by
   the GPU Op in the executor thread in which it is originally called.
3. Any read of a GPU tensor computed by a GPU kernel that is not
   by another kernel on that same GPU first synchronizes on
   the compute stream that produced it.
4. Any read by a GPU kernel of a value that was not produced by another
   GPU kernel first synchronizes on the entity that produced it,
   e.g. a copy stream.
5. All direct allocations of GPU memory that are not for kernel outputs
   or temp buffers are conservative in duration.
6. Any use of directly allocated GPU memory that is not part of a kernel
   execution first synchronizes on the compute stream to ensure that
   any prior granted uses of the same region have expired before this new use.

These conditions together should be sufficient for safety, and
correspond to established practice, though it may be possible to
contrive other sets of rules that are also sufficient.

Collective Ops for GPUs are unusual in that they are async (as TF
Ops) and they can directly allocate GPU memory in CPU threads that are
asynchronous to the launching executor thread.  This CL corrects a
couple of subtle misuse errors related to conditions 2 and 6.

PiperOrigin-RevId: 210841522",tensor_coding.h,"@@ -87,6 +87,9 @@ class TensorResponse {
   // modified.
   const RecvTensorResponse& metadata() const { return meta_; }
 
+  // Return pointer to the device hosting the tensor.
+  DeviceBase* device() const { return device_; }
+
  private:
   bool ParseTensorSubmessage(protobuf::io::CodedInputStream* input,
                              TensorProto* tensor_meta);
",0,train
729e39b1a4f0f7a6b3e35a04bf8bbba5e921862b,tensorflow/tensorflow,"Improve the GPU memory use discipline of CollectiveReduce.

GPU memory allocation can be done in one of two modes: efficient (but
complex and therefore somewhat risky) or conservative (simpler, but less
efficient).  The main difference is that 'efficient' allocation allows
the same memory area to be allocated to mutiple independent uses
simultaenously, when it should be the case that those uses will in
fact be serial and thus temporally disjoint, while 'conservative'
allocation will always obey the invarient that one piece of memory is
allocated to at most one use at any point in time.

If GPUDevice::RequiresRecordingAccessedTensors() returns false, then
the TF runtime uses efficient memory allocation for GPU ops.  That is, GPU
ops are nominally synchronous and their tensor Ref's are deleted
immediately after the ops returns although really the corresponding GPU
kernel is only guaranteed to have been enqueued on the compute stream
and may not have yet begin execution.

If RequiresRecordingAccessedTensors() returns true, then conservative
memory allocation is used, i.e. Refs on the tensors accessed by a GPU op
are held until the corresponding kernel is guaranteed to have completed
execution and no part of the op will touch them again.

Efficient GPU memory allocation should be safe when the following criteria
are all met:

1. All GPU kernels are executed serially on a single compute stream.
2. All GPU kernel outputs and temp buffers are allocated by
   the GPU Op in the executor thread in which it is originally called.
3. Any read of a GPU tensor computed by a GPU kernel that is not
   by another kernel on that same GPU first synchronizes on
   the compute stream that produced it.
4. Any read by a GPU kernel of a value that was not produced by another
   GPU kernel first synchronizes on the entity that produced it,
   e.g. a copy stream.
5. All direct allocations of GPU memory that are not for kernel outputs
   or temp buffers are conservative in duration.
6. Any use of directly allocated GPU memory that is not part of a kernel
   execution first synchronizes on the compute stream to ensure that
   any prior granted uses of the same region have expired before this new use.

These conditions together should be sufficient for safety, and
correspond to established practice, though it may be possible to
contrive other sets of rules that are also sufficient.

Collective Ops for GPUs are unusual in that they are async (as TF
Ops) and they can directly allocate GPU memory in CPU threads that are
asynchronous to the launching executor thread.  This CL corrects a
couple of subtle misuse errors related to conditions 2 and 6.

PiperOrigin-RevId: 210841522",collective_ops.cc,"@@ -132,14 +132,19 @@ class CollectiveReduceOpKernel : public CollectiveOpKernel {
             ""Failed to get CollectiveExecutor from OpKernelContext for Op "",
             col_params_.name),
         done);
+    // Allocate output on the first pass through this function.  This must be
+    // done immediately, while we're still in the executor thread.  Otherwise
+    // the memory is not guaranteed to be unused by any concurrently executing
+    // GPU kernel.
+    if (c->mutable_output(0) == nullptr) {
+      // Allocate the output tensor, trying to reuse the input.
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK_ASYNC(c,
+                           c->forward_input_or_allocate_output(
+                               {0}, 0, c->input(0).shape(), &output),
+                           done);
+    }
     if (!CanProceedWithCompute(c, col_exec, done)) return;
-    // Allocate the output tensor, trying to reuse the input.
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK_ASYNC(c,
-                         c->forward_input_or_allocate_output(
-                             {0}, 0, c->input(0).shape(), &output),
-                         done);
-
     auto actual_done = [c, col_exec, done](const Status& s) {
       OP_REQUIRES_OK_ASYNC(c, s, done);
       done();
@@ -183,16 +188,23 @@ class CollectiveBcastSendOpKernel : public CollectiveOpKernel {
             ""Failed to get CollectiveExecutor from OpKernelContext for Op "",
             col_params_.name),
         done);
+    // Allocate output on the first pass through this function.  This must be
+    // done immediately, while we're still in the executor thread.  Otherwise
+    // the memory is not guaranteed to be unused by any concurrently executing
+    // GPU kernel.
+    if (c->mutable_output(0) == nullptr) {
+      // Allocate the output tensor, trying to reuse the input.
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK_ASYNC(
+          c, c->forward_input_or_allocate_output({0}, 0, shape_, &output),
+          done);
+    }
     if (!CanProceedWithCompute(c, col_exec, done)) return;
     OP_REQUIRES_ASYNC(
         c, shape_.IsSameSize(c->input(0).shape()),
         errors::Internal(""Declared shape of op "", col_params_.name,
                          "" does not match shape of input""),
         done);
-    // Allocate the output Tensor, trying to reuse the input.
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK_ASYNC(
-        c, c->forward_input_or_allocate_output({0}, 0, shape_, &output), done);
 
     auto actual_done = [c, col_exec, done](const Status& s) {
       OP_REQUIRES_OK_ASYNC(c, s, done);
@@ -239,10 +251,16 @@ class CollectiveBcastRecvOpKernel : public CollectiveOpKernel {
             ""Failed to get CollectiveExecutor from OpKernelContext for Op "",
             col_params_.name),
         done);
+    // Allocate output on the first pass through this function.  This must be
+    // done immediately, while we're still in the executor thread.  Otherwise
+    // the memory is not guaranteed to be unused by any concurrently executing
+    // GPU kernel.
+    if (c->mutable_output(0) == nullptr) {
+      // No input, so must allocate output.
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, shape_, &output), done);
+    }
     if (!CanProceedWithCompute(c, col_exec, done)) return;
-    // No input, so must allocate output.
-    Tensor* output = nullptr;
-    OP_REQUIRES_OK_ASYNC(c, c->allocate_output(0, shape_, &output), done);
 
     auto actual_done = [c, col_exec, done](const Status& s) {
       OP_REQUIRES_OK_ASYNC(c, s, done);
",0,train
1505376085cc87ee03367c1aed9ca5eae970f7ff,tensorflow/tensorflow,"Break dependency on conv_ops in kernels from XLA

PiperOrigin-RevId: 236350778",conv_ops.cc,"@@ -32,7 +32,6 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_shape.h""
 #include ""tensorflow/core/framework/tensor_slice.h""
-#include ""tensorflow/core/kernels/conv_grad_ops.h""
 #include ""tensorflow/core/kernels/ops_util.h""
 #include ""tensorflow/core/util/padding.h""
 #include ""tensorflow/core/util/tensor_format.h""
",0,train
1505376085cc87ee03367c1aed9ca5eae970f7ff,tensorflow/tensorflow,"Break dependency on conv_ops in kernels from XLA

PiperOrigin-RevId: 236350778",fft_ops.cc,"@@ -27,7 +27,6 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_shape.h""
 #include ""tensorflow/core/framework/tensor_slice.h""
-#include ""tensorflow/core/kernels/conv_grad_ops.h""
 #include ""tensorflow/core/kernels/ops_util.h""
 #include ""tensorflow/core/util/padding.h""
 #include ""tensorflow/core/util/tensor_format.h""
",0,train
1505376085cc87ee03367c1aed9ca5eae970f7ff,tensorflow/tensorflow,"Break dependency on conv_ops in kernels from XLA

PiperOrigin-RevId: 236350778",pooling_ops.cc,"@@ -30,7 +30,6 @@ limitations under the License.
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
-#include ""tensorflow/core/kernels/conv_grad_ops.h""
 #include ""tensorflow/core/kernels/pooling_ops_common.h""
 
 namespace tensorflow {
",0,train
0297d9c1a64270e266a7aeb48f81c78f0a31f63b,tensorflow/tensorflow,"[tf.data] Patch to unref iterator_resource in DeserializeIteratorOp.

PiperOrigin-RevId: 195698980",iterator_ops.cc,"@@ -1051,7 +1051,7 @@ class DeserializeIteratorOp : public OpKernel {
     IteratorResource* iterator_resource;
     OP_REQUIRES_OK(
         ctx, LookupResource(ctx, HandleFromInput(ctx, 0), &iterator_resource));
-
+    core::ScopedUnref unref_iterator(iterator_resource);
     Variant variant = ctx->input(1).scalar<Variant>()();
     auto* wrapper = variant.get<IteratorStateVariant>();
     OP_REQUIRES(ctx, wrapper != nullptr,
",0,train
2bd1d7c555ad3029d5c3fcb1d0982330492305bc,tensorflow/tensorflow,"Tag `tf.keras.preprocessing` as deprecated when generating docs.

All this does is add a ""status:deprecated"" to `tf.keras.preprocessing` in the tensorflow.org TOC, just like for contrib here:

https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/contrib

PiperOrigin-RevId: 433878866",generate2.py,"@@ -25,7 +25,7 @@ Requires a local installation of `tensorflow_docs`:
 pip install git+https://github.com/tensorflow/docs
 ```
 """"""
-
+import distutils
 import pathlib
 import textwrap
 
@@ -181,6 +181,9 @@ def build_docs(output_dir, code_url_prefix, search_hints):
     code_url_prefix: prefix for ""Defined in"" links.
     search_hints: Bool. Include meta-data search hints at the top of each file.
   """"""
+  if distutils.version.LooseVersion(tf.__version__) >= ""2.9"":
+    doc_controls.set_deprecated(tf.keras.preprocessing)
+
   # The custom page will be used for raw_ops.md not the one generated above.
   doc_controls.set_custom_page_builder_cls(tf.raw_ops, RawOpsPageInfo)
 
",0,train
2bd1d7c555ad3029d5c3fcb1d0982330492305bc,tensorflow/tensorflow,"Tag `tf.keras.preprocessing` as deprecated when generating docs.

All this does is add a ""status:deprecated"" to `tf.keras.preprocessing` in the tensorflow.org TOC, just like for contrib here:

https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/contrib

PiperOrigin-RevId: 433878866",generate2_test.py,"@@ -32,6 +32,7 @@ fake_tf.nn = tf.nn
 fake_tf.summary = tf.summary
 fake_tf.raw_ops = types.ModuleType('raw_ops')
 fake_tf.Module = tf.Module
+fake_tf.__version__ = tf.__version__
 
 for name in sorted(dir(tf.raw_ops))[:5]:
   setattr(fake_tf.raw_ops, name, getattr(tf.raw_ops, name))
",0,train
4a09d23ea3346c449f4f42a447347c1f0f9a0cd0,tensorflow/tensorflow,changed from tf.cast to tf.dtypes.cast in example for tf.dtypes.cast,math_ops.py,"@@ -640,7 +640,7 @@ def cast(x, dtype, name=None):
 
   ```python
   x = tf.constant([1.8, 2.2], dtype=tf.float32)
-  tf.cast(x, tf.int32)  # [1, 2], dtype=tf.int32
+  tf.dtypes.cast(x, tf.int32)  # [1, 2], dtype=tf.int32
   ```
 
   The operation supports data types (for `x` and `dtype`) of
",0,train
f18d09553b2f26a07b0b5cd2ee96f68834fd3c10,tensorflow/tensorflow,"Add element tracing for tf.data.experimental.parallel_interleave.

PiperOrigin-RevId: 324696858
Change-Id: I099b9b8935a38e263bd24f008e123c0623432e40",parallel_interleave_dataset_op.cc,"@@ -31,6 +31,8 @@ limitations under the License.
 #include ""tensorflow/core/lib/random/random.h""
 #include ""tensorflow/core/platform/blocking_counter.h""
 #include ""tensorflow/core/platform/stringprintf.h""
+#include ""tensorflow/core/profiler/lib/traceme.h""
+#include ""tensorflow/core/profiler/lib/traceme_encode.h""
 
 namespace tensorflow {
 namespace data {
@@ -323,6 +325,11 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
             }
             *end_of_sequence = false;
             Status s = current_worker->outputs.front().status;
+            profiler::TraceMe traceme([&] {
+              return profiler::TraceMeEncode(
+                  ""ParallelInterleaveConsume"",
+                  {{""element_id"", current_worker->outputs.front().id}});
+            });
             current_worker->outputs.front().output.swap(*out_tensors);
             current_worker->outputs.pop_front();
             current_worker->cond_var.notify_one();
@@ -564,8 +571,10 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
       Status status;
       // The buffered data element.
       std::vector<Tensor> output;
+      int64 id = -1;
 
       explicit OutputElem(const Status& s) : status(s) {}
+      OutputElem(const Status& s, int64 id) : status(s), id(id) {}
     };
 
     // Worker threads operate on their relevant WorkerState structs.
@@ -813,6 +822,14 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
                   worker_thread_states_[thread_index]
                       .output_elem.output.empty() &&
                   !worker_thread_states_[thread_index].end_of_sequence) {
+                int64& id = worker_thread_states_[thread_index].output_elem.id;
+                profiler::TraceMe traceme(
+                    [&] {
+                      id = profiler::TraceMe::NewActivityId();
+                      return profiler::TraceMeEncode(
+                          ""ParallelInterleaveProduce"", {{""element_id"", id}});
+                    },
+                    profiler::kInfo);
                 worker_thread_states_[thread_index].output_elem.status =
                     worker_thread_states_[thread_index].iterator->GetNext(
                         ctx.get(),
@@ -856,7 +873,8 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
                 worker_thread_states_[thread_index].end_of_sequence = false;
               } else {
                 workers_[thread_index].outputs.emplace_back(
-                    worker_thread_states_[thread_index].output_elem.status);
+                    worker_thread_states_[thread_index].output_elem.status,
+                    worker_thread_states_[thread_index].output_elem.id);
                 workers_[thread_index].outputs.back().output.swap(
                     worker_thread_states_[thread_index].output_elem.output);
               }
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,export_simple_text_embedding.py,"@@ -87,7 +87,7 @@ class TextEmbeddingModel(tf.train.Checkpoint):
 
     return tf.nn.safe_embedding_lookup_sparse(
         embedding_weights=self.embeddings,
-        sparse_ids=tf.SparseTensor(token_ids, token_values, token_dense_shape),
+        sparse_ids=tf.sparse.SparseTensor(token_ids, token_values, token_dense_shape),
         sparse_weights=None,
         combiner=""sqrtn"")
 
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,session.py,"@@ -862,7 +862,7 @@ class BaseSession(SessionInterface):
     * A `tf.Tensor`.
       The corresponding fetched value will be a numpy ndarray containing the
       value of that tensor.
-    * A `tf.SparseTensor`.
+    * A `tf.sparse.SparseTensor`.
       The corresponding fetched value will be a
       `tf.compat.v1.SparseTensorValue`
       containing the value of that sparse tensor.
@@ -907,7 +907,7 @@ class BaseSession(SessionInterface):
       `tf.compat.v1.placeholder`, the shape of
       the value will be checked for compatibility with the placeholder.
     * If the key is a
-      `tf.SparseTensor`,
+      `tf.sparse.SparseTensor`,
       the value should be a
       `tf.compat.v1.SparseTensorValue`.
     * If the key is a nested tuple of `Tensor`s or `SparseTensor`s, the value
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,batching.py,"@@ -88,15 +88,15 @@ def dense_to_ragged_batch(batch_size,
 
 @tf_export(""data.experimental.dense_to_sparse_batch"")
 def dense_to_sparse_batch(batch_size, row_shape):
-  """"""A transformation that batches ragged elements into `tf.SparseTensor`s.
+  """"""A transformation that batches ragged elements into `tf.sparse.SparseTensor`s.
 
   Like `Dataset.padded_batch()`, this transformation combines multiple
   consecutive elements of the dataset, which might have different
   shapes, into a single element. The resulting element has three
   components (`indices`, `values`, and `dense_shape`), which
-  comprise a `tf.SparseTensor` that represents the same data. The
+  comprise a `tf.sparse.SparseTensor` that represents the same data. The
   `row_shape` represents the dense shape of each row in the
-  resulting `tf.SparseTensor`, to which the effective batch size is
+  resulting `tf.sparse.SparseTensor`, to which the effective batch size is
   prepended. For example:
 
   ```python
@@ -121,7 +121,7 @@ def dense_to_sparse_batch(batch_size, row_shape):
       consecutive elements of this dataset to combine in a single batch.
     row_shape: A `tf.TensorShape` or `tf.int64` vector tensor-like object
       representing the equivalent dense shape of a row in the resulting
-      `tf.SparseTensor`. Each element of this dataset must have the same rank as
+      `tf.sparse.SparseTensor`. Each element of this dataset must have the same rank as
       `row_shape`, and must have size less than or equal to `row_shape` in each
       dimension.
 
@@ -283,7 +283,7 @@ def unbatch():
 
 
 class _DenseToSparseBatchDataset(dataset_ops.UnaryDataset):
-  """"""A `Dataset` that batches ragged dense elements into `tf.SparseTensor`s.""""""
+  """"""A `Dataset` that batches ragged dense elements into `tf.sparse.SparseTensor`s.""""""
 
   def __init__(self, input_dataset, batch_size, row_shape):
     """"""See `Dataset.dense_to_sparse_batch()` for more details.""""""
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,grouping.py,"@@ -161,7 +161,7 @@ def bucket_by_sequence_length(element_length_func,
       bucket), and caller must ensure that the source `Dataset` does not contain
       any elements with length longer than `max(bucket_boundaries)`.
     no_padding: `bool`, indicates whether to pad the batch features (features
-      need to be either of type `tf.SparseTensor` or of same shape).
+      need to be either of type `tf.sparse.SparseTensor` or of same shape).
     drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing
       whether the last batch should be dropped in the case it has fewer than
       `batch_size` elements; the default behavior is not to drop the smaller
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,from_sparse_tensor_slices_test.py,"@@ -37,7 +37,7 @@ class FromSparseTensorSlicesTest(test_base.DatasetTestBase,
   @combinations.generate(
       combinations.combine(tf_api_version=1, mode=[""graph""]))
   def testFromSparseTensorSlices(self):
-    """"""Test a dataset based on slices of a `tf.SparseTensor`.""""""
+    """"""Test a dataset based on slices of a `tf.sparse.SparseTensor`.""""""
     st = array_ops.sparse_placeholder(dtypes.float64)
     iterator = dataset_ops.make_initializable_iterator(
         dataset_ops.Dataset.from_sparse_tensor_slices(st))
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,dataset_ops.py,"@@ -158,7 +158,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
 
   Elements can be nested structures of tuples, named tuples, and dictionaries.
   Element components can be of any type representable by `tf.TypeSpec`,
-  including `tf.Tensor`, `tf.data.Dataset`, `tf.SparseTensor`,
+  including `tf.Tensor`, `tf.data.Dataset`, `tf.sparse.SparseTensor`,
   `tf.RaggedTensor`, and `tf.TensorArray`.
 
   >>> a = 1 # Integer element
@@ -1486,7 +1486,7 @@ class DatasetV2(tracking_base.Trackable, composite_tensor.CompositeTensor):
       array([[ 10, 100], [ 11,  12]], dtype=int32))]
 
     See also `tf.data.experimental.dense_to_sparse_batch`, which combines
-    elements that may have different shapes into a `tf.SparseTensor`.
+    elements that may have different shapes into a `tf.sparse.SparseTensor`.
 
     Args:
       batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
@@ -2296,10 +2296,10 @@ class DatasetV1(DatasetV2):
   @staticmethod
   @deprecation.deprecated(None, ""Use `tf.data.Dataset.from_tensor_slices()`."")
   def from_sparse_tensor_slices(sparse_tensor):
-    """"""Splits each rank-N `tf.SparseTensor` in this dataset row-wise.
+    """"""Splits each rank-N `tf.sparse.SparseTensor` in this dataset row-wise.
 
     Args:
-      sparse_tensor: A `tf.SparseTensor`.
+      sparse_tensor: A `tf.sparse.SparseTensor`.
 
     Returns:
       Dataset: A `Dataset` of rank-(N-1) sparse tensors.
@@ -2909,13 +2909,13 @@ class TensorSliceDataset(DatasetSource):
 
 
 class SparseTensorSliceDataset(DatasetSource):
-  """"""A `Dataset` that splits a rank-N `tf.SparseTensor` into its rows.""""""
+  """"""A `Dataset` that splits a rank-N `tf.sparse.SparseTensor` into its rows.""""""
 
   def __init__(self, sparse_tensor):
     """"""See `Dataset.from_sparse_tensor_slices()` for details.""""""
     if not isinstance(sparse_tensor, sparse_tensor_lib.SparseTensor):
       raise TypeError(
-          ""`sparse_tensor` must be a `tf.SparseTensor` object. Was {}."".format(
+          ""`sparse_tensor` must be a `tf.sparse.SparseTensor` object. Was {}."".format(
               sparse_tensor))
     self._sparse_tensor = sparse_tensor
 
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,iterator_ops.py,"@@ -448,7 +448,7 @@ class Iterator(trackable.Trackable):
   def output_classes(self):
     """"""Returns the class of each component of an element of this iterator.
 
-    The expected values are `tf.Tensor` and `tf.SparseTensor`.
+    The expected values are `tf.Tensor` and `tf.sparse.SparseTensor`.
 
     Returns:
       A nested structure of Python `type` objects corresponding to each
@@ -677,7 +677,7 @@ class OwnedIterator(trackable.Trackable, composite_tensor.CompositeTensor):
   def output_classes(self):
     """"""Returns the class of each component of an element of this iterator.
 
-    The expected values are `tf.Tensor` and `tf.SparseTensor`.
+    The expected values are `tf.Tensor` and `tf.sparse.SparseTensor`.
 
     Returns:
       A nested structure of Python `type` objects corresponding to each
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,sparse.py,"@@ -47,7 +47,7 @@ def as_dense_shapes(shapes, classes):
   Returns:
     a structure matching the nested structure of `shapes`, containing
     `tensor_shape.unknown_shape()` at positions where `classes` contains
-    `tf.SparseTensor` and matching contents of `shapes` otherwise
+    `tf.sparse.SparseTensor` and matching contents of `shapes` otherwise
   """"""
   ret = nest.pack_sequence_as(shapes, [
       tensor_shape.unknown_shape() if c is sparse_tensor.SparseTensor else shape
@@ -65,7 +65,7 @@ def as_dense_types(types, classes):
 
   Returns:
     a structure matching the nested structure of `types`, containing
-    `dtypes.variant` at positions where `classes` contains `tf.SparseTensor` and
+    `dtypes.variant` at positions where `classes` contains `tf.sparse.SparseTensor` and
     matching contents of `types` otherwise
   """"""
   ret = nest.pack_sequence_as(types, [
@@ -106,7 +106,7 @@ def get_classes(tensors):
 
   Returns:
     a structure matching the nested structure of `tensors`, containing
-    `tf.SparseTensor` at positions where `tensors` contains a sparse tensor and
+    `tf.sparse.SparseTensor` at positions where `tensors` contains a sparse tensor and
     `tf.Tensor` otherwise
   """"""
   return nest.pack_sequence_as(tensors, [
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,feature_column.py,"@@ -1969,7 +1969,7 @@ class _CategoricalColumn(_FeatureColumn):
   WARNING: Do not subclass this layer unless you know what you are doing:
   the API is subject to future changes.
 
-  A categorical feature typically handled with a `tf.SparseTensor` of IDs.
+  A categorical feature typically handled with a `tf.sparse.SparseTensor` of IDs.
   """"""
 
   IdWeightPair = collections.namedtuple(  # pylint: disable=invalid-name
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,feature_column_v2.py,"@@ -2515,7 +2515,7 @@ def _create_dense_column_weighted_sum(column, transformation_cache,
 class CategoricalColumn(FeatureColumn):
   """"""Represents a categorical feature.
 
-  A categorical feature typically handled with a `tf.SparseTensor` of IDs.
+  A categorical feature typically handled with a `tf.sparse.SparseTensor` of IDs.
   """"""
 
   IdWeightPair = collections.namedtuple(  # pylint: disable=invalid-name
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,indexed_slices.py,"@@ -84,7 +84,7 @@ class IndexedSlices(_TensorLike, composite_tensor.CompositeTensor):
   (e.g. `tf.gather`).
 
   Contrast this representation with
-  `tf.SparseTensor`,
+  `tf.sparse.SparseTensor`,
   which uses multi-dimensional indices and scalar values.
   """"""
 
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,ops.py,"@@ -338,7 +338,7 @@ class Tensor(_TensorLike):
   shape of a tensor at execution time.
 
   There are specialized tensors; for these, see `tf.Variable`, `tf.constant`,
-  `tf.placeholder`, `tf.SparseTensor`, and `tf.RaggedTensor`.
+  `tf.placeholder`, `tf.sparse.SparseTensor`, and `tf.RaggedTensor`.
 
   For more on Tensors, see the [guide](https://tensorflow.org/guide/tensor`).
   """"""
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,sparse_tensor.py,"@@ -298,14 +298,14 @@ _pywrap_utils.RegisterType(""SparseTensorValue"", SparseTensorValue)
 
 @tf_export(""SparseTensorSpec"")
 class SparseTensorSpec(type_spec.BatchableTypeSpec):
-  """"""Type specification for a `tf.SparseTensor`.""""""
+  """"""Type specification for a `tf.sparse.SparseTensor`.""""""
 
   __slots__ = [""_shape"", ""_dtype""]
 
   value_type = property(lambda self: SparseTensor)
 
   def __init__(self, shape=None, dtype=dtypes.float32):
-    """"""Constructs a type specification for a `tf.SparseTensor`.
+    """"""Constructs a type specification for a `tf.sparse.SparseTensor`.
 
     Args:
       shape: The dense shape of the `SparseTensor`, or `None` to allow
@@ -473,13 +473,13 @@ def convert_to_tensor_or_sparse_tensor(value, dtype=None, name=None):
 def is_sparse(x):
   """"""Check whether `x` is sparse.
 
-  Check whether an object is a `tf.SparseTensor` or
+  Check whether an object is a `tf.sparse.SparseTensor` or
   `tf.compat.v1.SparseTensorValue`.
 
   Args:
     x: A python object to check.
 
   Returns:
-    `True` iff `x` is a `tf.SparseTensor` or `tf.compat.v1.SparseTensorValue`.
+    `True` iff `x` is a `tf.sparse.SparseTensor` or `tf.compat.v1.SparseTensorValue`.
   """"""
   return isinstance(x, (SparseTensor, SparseTensorValue))
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,local.py,"@@ -782,7 +782,7 @@ def local_conv_sparse_matmul(inputs, kernel, kernel_idxs, kernel_shape,
                              output_shape):
   """"""Apply N-D convolution with un-shared weights using a single sparse matmul.
 
-  This method outputs `inputs . tf.SparseTensor(indices=kernel_idxs,
+  This method outputs `inputs . tf.sparse.SparseTensor(indices=kernel_idxs,
   values=kernel, dense_shape=kernel_shape)`, with `.` standing for
   matrix-multiply. It also reshapes `inputs` to 2-D and `output` to (N+2)-D.
 
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,array_ops.py,"@@ -3069,7 +3069,7 @@ def sparse_placeholder(dtype, shape=None, name=None):
     print(sess.run(y, feed_dict={
       x: (indices, values, shape)}))  # Will succeed.
 
-    sp = tf.SparseTensor(indices=indices, values=values, dense_shape=shape)
+    sp = tf.sparse.SparseTensor(indices=indices, values=values, dense_shape=shape)
     sp_value = sp.eval(session=sess)
     print(sess.run(y, feed_dict={x: sp_value}))  # Will succeed.
   ```
@@ -3471,7 +3471,7 @@ def edit_distance(hypothesis, truth, normalize=True, name=""edit_distance""):
   # 'hypothesis' is a tensor of shape `[2, 1]` with variable-length values:
   #   (0,0) = [""a""]
   #   (1,0) = [""b""]
-  hypothesis = tf.SparseTensor(
+  hypothesis = tf.sparse.SparseTensor(
       [[0, 0, 0],
        [1, 0, 0]],
       [""a"", ""b""],
@@ -3482,7 +3482,7 @@ def edit_distance(hypothesis, truth, normalize=True, name=""edit_distance""):
   #   (0,1) = [""a""]
   #   (1,0) = [""b"", ""c""]
   #   (1,1) = [""a""]
-  truth = tf.SparseTensor(
+  truth = tf.sparse.SparseTensor(
       [[0, 1, 0],
        [1, 0, 0],
        [1, 0, 1],
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,ctc_ops.py,"@@ -1126,7 +1126,7 @@ def dense_labels_to_sparse(dense, length):
     length: int tensor of shape [batch] The length of each sequence in dense.
 
   Returns:
-    tf.SparseTensor with values only for the valid elements of sequences.
+    tf.sparse.SparseTensor with values only for the valid elements of sequences.
   """"""
 
   flat_values = array_ops.reshape(dense, [-1])
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,map_fn.py,"@@ -106,7 +106,7 @@ def map_fn(fn,
 
     * A `tf.DType` or `tf.TensorSpec` (to describe a `tf.Tensor`)
     * A `tf.RaggedTensorSpec` (to describe a `tf.RaggedTensor`)
-    * A `tf.SparseTensorSpec` (to describe a `tf.SparseTensor`)
+    * A `tf.SparseTensorSpec` (to describe a `tf.sparse.SparseTensor`)
     * A (possibly nested) tuple, list, or dict containing the above types.
 
   #### RaggedTensors
@@ -159,11 +159,11 @@ def map_fn(fn,
 
   #### SparseTensors
 
-  `map_fn` supports `tf.SparseTensor` inputs and outputs.  In particular:
+  `map_fn` supports `tf.sparse.SparseTensor` inputs and outputs.  In particular:
 
     * If `elems` is a `SparseTensor`, then `fn` will be called with each row
       of that sparse tensor. In particular, the value passed to `fn` will be a
-      `tf.SparseTensor` with one fewer dimension than `elems`.
+      `tf.sparse.SparseTensor` with one fewer dimension than `elems`.
 
     * If the result of `map_fn` should be a `SparseTensor`, then use a
       `tf.SparseTensorSpec` to specify `fn_output_signature`.  The individual
@@ -171,7 +171,7 @@ def map_fn(fn,
       `SparseTensor` with one more dimension.
 
   >>> # Example: SparseTensor input
-  >>> st = tf.SparseTensor([[0, 0], [2, 0], [2, 1]], [2, 3, 4], [4, 4])
+  >>> st = tf.sparse.SparseTensor([[0, 0], [2, 0], [2, 1]], [2, 3, 4], [4, 4])
   >>> tf.map_fn(tf.sparse.reduce_sum, st, fn_output_signature=tf.int32)
   <tf.Tensor: shape=(4,), dtype=int32, numpy=array([2, 0, 7, 0], dtype=int32)>
 
@@ -191,9 +191,9 @@ def map_fn(fn,
   *rows* of a `SparseTensor`.  If you wish to map a function over the nonzero
   values, then you should use:
 
-    * `tf.SparseTensor(st.indices, fn(st.values), st.dense_shape)`
+    * `tf.sparse.SparseTensor(st.indices, fn(st.values), st.dense_shape)`
       (if the function is expressible as TensorFlow ops)
-    * `tf.SparseTensor(st.indices, tf.map_fn(fn, st.values), st.dense_shape)`
+    * `tf.sparse.SparseTensor(st.indices, tf.map_fn(fn, st.values), st.dense_shape)`
       (otherwise).
 
   #### `map_fn` vs. vectorized operations
@@ -276,7 +276,7 @@ def map_fn(fn,
 
       * A `tf.DType` or `tf.TensorSpec` (to describe a `tf.Tensor`)
       * A `tf.RaggedTensorSpec` (to describe a `tf.RaggedTensor`)
-      * A `tf.SparseTensorSpec` (to describe a `tf.SparseTensor`)
+      * A `tf.SparseTensorSpec` (to describe a `tf.sparse.SparseTensor`)
       * A (possibly nested) tuple, list, or dict containing the above types.
 
   Returns:
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,math_ops.py,"@@ -1432,8 +1432,8 @@ def equal(x, y, name=None):
   <tf.Tensor: shape=(2,), dtype=bool, numpy=array([ True,  True])>
 
   Args:
-    x: A `tf.Tensor` or `tf.SparseTensor` or `tf.IndexedSlices`.
-    y: A `tf.Tensor` or `tf.SparseTensor` or `tf.IndexedSlices`.
+    x: A `tf.Tensor` or `tf.sparse.SparseTensor` or `tf.IndexedSlices`.
+    y: A `tf.Tensor` or `tf.sparse.SparseTensor` or `tf.IndexedSlices`.
     name: A name for the operation (optional).
 
   Returns:
@@ -1468,8 +1468,8 @@ def not_equal(x, y, name=None):
   <tf.Tensor: shape=(2,), dtype=bool, numpy=array([False,  False])>
 
   Args:
-    x: A `tf.Tensor` or `tf.SparseTensor` or `tf.IndexedSlices`.
-    y: A `tf.Tensor` or `tf.SparseTensor` or `tf.IndexedSlices`.
+    x: A `tf.Tensor` or `tf.sparse.SparseTensor` or `tf.IndexedSlices`.
+    y: A `tf.Tensor` or `tf.sparse.SparseTensor` or `tf.IndexedSlices`.
     name: A name for the operation (optional).
 
   Returns:
@@ -2907,12 +2907,12 @@ def matmul(a,
       **does not support `tf.sparse.SparseTensor`**, it just makes optimizations
       that assume most values in `a` are zero.
       See `tf.sparse.sparse_dense_matmul`
-      for some support for `tf.SparseTensor` multiplication.
+      for some support for `tf.sparse.SparseTensor` multiplication.
     b_is_sparse: If `True`, `b` is treated as a sparse matrix. Notice, this
       **does not support `tf.sparse.SparseTensor`**, it just makes optimizations
       that assume most values in `a` are zero.
       See `tf.sparse.sparse_dense_matmul`
-      for some support for `tf.SparseTensor` multiplication.
+      for some support for `tf.sparse.SparseTensor` multiplication.
     name: Name for the operation (optional).
 
   Returns:
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,pfor.py,"@@ -1217,10 +1217,10 @@ class PFor(object):
     the new dense shape will be (N, max_i(x_i), max_i(y_i), max_i(z_i)).
 
     Args:
-      y: A tf.SparseTensor.
+      y: A tf.sparse.SparseTensor.
 
     Returns:
-      A tf.SparseTensor that is the converted value corresponding to y.
+      A tf.sparse.SparseTensor that is the converted value corresponding to y.
     """"""
     outputs = [
         self._convert_helper(t) for t in (y.indices, y.values, y.dense_shape)
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,ragged_tensor.py,"@@ -1629,7 +1629,7 @@ class RaggedTensor(composite_tensor.CompositeTensor):
 
   @classmethod
   def from_sparse(cls, st_input, name=None, row_splits_dtype=dtypes.int64):
-    """"""Converts a 2D `tf.SparseTensor` to a `RaggedTensor`.
+    """"""Converts a 2D `tf.sparse.SparseTensor` to a `RaggedTensor`.
 
     Each row of the `output` `RaggedTensor` will contain the explicit values
     from the same row in `st_input`.  `st_input` must be ragged-right.  If not
@@ -1637,7 +1637,7 @@ class RaggedTensor(composite_tensor.CompositeTensor):
 
     Example:
 
-    >>> st = tf.SparseTensor(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0]],
+    >>> st = tf.sparse.SparseTensor(indices=[[0, 0], [0, 1], [0, 2], [1, 0], [3, 0]],
     ...                      values=[1, 2, 3, 4, 5],
     ...                      dense_shape=[4, 3])
     >>> tf.RaggedTensor.from_sparse(st).to_list()
@@ -1690,7 +1690,7 @@ class RaggedTensor(composite_tensor.CompositeTensor):
             st_input.values, segment_ids, num_segments, validate=False)
 
   def to_sparse(self, name=None):
-    """"""Converts this `RaggedTensor` into a `tf.SparseTensor`.
+    """"""Converts this `RaggedTensor` into a `tf.sparse.SparseTensor`.
 
     Example:
 
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,sets_impl.py,"@@ -156,7 +156,7 @@ def set_intersection(a, b, validate_indices=True):
         ((1, 1, 0), 5),
         ((1, 1, 1), 6),
     ])
-    a = tf.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2,2,2])
+    a = tf.sparse.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2,2,2])
 
     # b = np.array([[{1}, {}], [{4}, {5, 6, 7, 8}]])
     b = collections.OrderedDict([
@@ -167,7 +167,7 @@ def set_intersection(a, b, validate_indices=True):
         ((1, 1, 2), 7),
         ((1, 1, 3), 8),
     ])
-    b = tf.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4])
+    b = tf.sparse.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4])
 
     # `tf.sets.intersection` is applied to each aligned pair of sets.
     tf.sets.intersection(a, b)
@@ -224,7 +224,7 @@ def set_difference(a, b, aminusb=True, validate_indices=True):
         ((1, 1, 0), 5),
         ((1, 1, 1), 6),
     ])
-    a = tf.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2, 2, 2])
+    a = tf.sparse.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2, 2, 2])
 
     # np.array([[{1, 3}, {2}], [{4, 5}, {5, 6, 7, 8}]])
     b = collections.OrderedDict([
@@ -238,7 +238,7 @@ def set_difference(a, b, aminusb=True, validate_indices=True):
         ((1, 1, 2), 7),
         ((1, 1, 3), 8),
     ])
-    b = tf.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4])
+    b = tf.sparse.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4])
 
     # `set_difference` is applied to each aligned pair of sets.
     tf.sets.difference(a, b)
@@ -302,7 +302,7 @@ def set_union(a, b, validate_indices=True):
         ((1, 1, 0), 5),
         ((1, 1, 1), 6),
     ])
-    a = tf.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2, 2, 2])
+    a = tf.sparse.SparseTensor(list(a.keys()), list(a.values()), dense_shape=[2, 2, 2])
 
     # [[{1, 3}, {2}], [{4, 5}, {5, 6, 7, 8}]]
     b = collections.OrderedDict([
@@ -316,7 +316,7 @@ def set_union(a, b, validate_indices=True):
         ((1, 1, 2), 7),
         ((1, 1, 3), 8),
     ])
-    b = tf.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4])
+    b = tf.sparse.SparseTensor(list(b.keys()), list(b.values()), dense_shape=[2, 2, 4])
 
     # `set_union` is applied to each aligned pair of sets.
     tf.sets.union(a, b)
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,sparse_ops.py,"@@ -16,7 +16,7 @@
 # pylint: disable=g-short-docstring-punctuation
 """"""Sparse Tensor Representation.
 
-See also `tf.SparseTensor`.
+See also `tf.sparse.SparseTensor`.
 """"""
 
 from __future__ import absolute_import
@@ -2460,7 +2460,7 @@ def sparse_softmax(sp_input, name=None):
   values = np.asarray([[[0., np.e], [1., 0.]], [[np.e, 0.], [np.e, np.e]]])
   indices = np.vstack(np.where(values)).astype(np.int64).T
 
-  result = tf.sparse.softmax(tf.SparseTensor(indices, values, shape))
+  result = tf.sparse.softmax(tf.sparse.SparseTensor(indices, values, shape))
   # ...returning a 3-D SparseTensor, equivalent to:
   # [?   1.]     [1    ?]
   # [1.  ? ] and [.5  .5]
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,nest.py,"@@ -310,7 +310,7 @@ def flatten(structure, expand_composites=False):
   Args:
     structure: an arbitrarily nested structure. Note, numpy arrays are
       considered atoms and are not flattened.
-    expand_composites: If true, then composite tensors such as tf.SparseTensor
+    expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor
        and tf.RaggedTensor are expanded into their component tensors.
 
   Returns:
@@ -364,7 +364,7 @@ def assert_same_structure(nest1, nest2, check_types=True,
         considered the same if they are both list subtypes (which allows ""list""
         and ""_ListWrapper"" from trackable dependency tracking to compare
         equal).
-    expand_composites: If true, then composite tensors such as `tf.SparseTensor`
+    expand_composites: If true, then composite tensors such as `tf.sparse.SparseTensor`
         and `tf.RaggedTensor` are expanded into their component tensors.
 
   Raises:
@@ -537,7 +537,7 @@ def pack_sequence_as(structure, flat_sequence, expand_composites=False):
         tuples, and dicts. Note: numpy arrays and strings are considered
         scalars.
     flat_sequence: flat sequence to pack.
-    expand_composites: If true, then composite tensors such as `tf.SparseTensor`
+    expand_composites: If true, then composite tensors such as `tf.sparse.SparseTensor`
         and `tf.RaggedTensor` are expanded into their component tensors.
 
   Returns:
@@ -574,7 +574,7 @@ def map_structure(func, *structure, **kwargs):
         Note that namedtuples with identical name and fields are always
         considered to have the same shallow structure.
       * `expand_composites`: If set to `True`, then composite tensors such
-        as `tf.SparseTensor` and `tf.RaggedTensor` are expanded into their
+        as `tf.sparse.SparseTensor` and `tf.RaggedTensor` are expanded into their
         component tensors.  If `False` (the default), then composite tensors
         are not expanded.
 
@@ -762,7 +762,7 @@ def assert_shallow_structure(shallow_tree,
       `input_tree` have to be the same. Note that even with check_types==True,
       this function will consider two different namedtuple classes with the same
       name and _fields attribute to be the same class.
-    expand_composites: If true, then composite tensors such as tf.SparseTensor
+    expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor
        and tf.RaggedTensor are expanded into their component tensors.
   Raises:
     TypeError: If `shallow_tree` is a sequence but `input_tree` is not.
@@ -911,7 +911,7 @@ def flatten_up_to(shallow_tree, input_tree, check_types=True,
       Note, numpy arrays are considered scalars.
     check_types: bool. If True, check that each node in shallow_tree has the
       same type as the corresponding node in input_tree.
-    expand_composites: If true, then composite tensors such as tf.SparseTensor
+    expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor
        and tf.RaggedTensor are expanded into their component tensors.
 
   Returns:
@@ -1015,7 +1015,7 @@ def flatten_with_tuple_paths_up_to(shallow_tree,
       Note, numpy arrays are considered scalars.
     check_types: bool. If True, check that each node in shallow_tree has the
       same type as the corresponding node in input_tree.
-    expand_composites: If true, then composite tensors such as tf.SparseTensor
+    expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor
        and tf.RaggedTensor are expanded into their component tensors.
 
   Returns:
@@ -1233,7 +1233,7 @@ def get_traverse_shallow_structure(traverse_fn, structure,
       shallow structure of the same type, describing which parts of the
       substructure to traverse.
     structure: The structure to traverse.
-    expand_composites: If true, then composite tensors such as tf.SparseTensor
+    expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor
        and tf.RaggedTensor are expanded into their component tensors.
 
   Returns:
@@ -1313,7 +1313,7 @@ def yield_flat_paths(nest, expand_composites=False):
 
   Args:
     nest: the value to produce a flattened paths list for.
-    expand_composites: If true, then composite tensors such as tf.SparseTensor
+    expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor
        and tf.RaggedTensor are expanded into their component tensors.
 
   Yields:
@@ -1338,7 +1338,7 @@ def flatten_with_joined_string_paths(structure, separator=""/"",
     structure: the nested structure to flatten.
     separator: string to separate levels of hierarchy in the results, defaults
       to '/'.
-    expand_composites: If true, then composite tensors such as tf.SparseTensor
+    expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor
        and tf.RaggedTensor are expanded into their component tensors.
 
   Returns:
@@ -1362,7 +1362,7 @@ def flatten_with_tuple_paths(structure, expand_composites=False):
 
   Args:
     structure: the nested structure to flatten.
-    expand_composites: If true, then composite tensors such as tf.SparseTensor
+    expand_composites: If true, then composite tensors such as tf.sparse.SparseTensor
        and tf.RaggedTensor are expanded into their component tensors.
 
   Returns:
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,util.h,"@@ -234,7 +234,7 @@ PyObject* AssertSameStructure(PyObject* o1, PyObject* o2, bool check_types,
 //   nest: an arbitrarily nested structure or a scalar object. Note, numpy
 //       arrays are considered scalars.
 //   expand_composites: If true, then composite tensors (such as
-//       `tf.SparseTensor` and `tf.RaggedTensor` are flattened into their
+//       `tf.sparse.SparseTensor` and `tf.RaggedTensor` are flattened into their
 //       component tensors.
 //
 // Returns:
",0,train
652a4b64be70103ca2cd7cf24e8bc671a3898d6e,tensorflow/tensorflow,tf.SparseTensor to tf.sparse.SparseTensor,util_wrapper.cc,"@@ -244,7 +244,7 @@ PYBIND11_MODULE(_pywrap_utils, m) {
       Args:
         nest: an arbitrarily nested structure or a scalar object. Note, numpy
             arrays are considered scalars.
-        expand_composites: If true, then composite tensors such as `tf.SparseTensor`
+        expand_composites: If true, then composite tensors such as `tf.sparse.SparseTensor`
             and `tf.RaggedTensor` are expanded into their component tensors.
 
       Returns:
",0,train
e3ec33da39df95e24c8c22ad5dcf4c3b15707d6c,tensorflow/tensorflow,"Add a header to cover common math constants if not available on some platforms.

PiperOrigin-RevId: 415640862
Change-Id: I0d927f9b1b51429254981e733d4c3c07d1633975",constants.h,"@@ -0,0 +1,61 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_CONSTANTS_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_CONSTANTS_H_
+
+// Maths constants.
+// The following macros are not always available on all platforms.
+// E.g. MSVC requires additional compile flag to export those.
+#ifndef M_E
+#define M_E 2.7182818284590452354 /* e */
+#endif
+#ifndef M_LOG2E
+#define M_LOG2E 1.4426950408889634074 /* log_2 e */
+#endif
+#ifndef M_LOG10E
+#define M_LOG10E 0.43429448190325182765 /* log_10 e */
+#endif
+#ifndef M_LN2
+#define M_LN2 0.69314718055994530942 /* log_e 2 */
+#endif
+#ifndef M_LN10
+#define M_LN10 2.30258509299404568402 /* log_e 10 */
+#endif
+#ifndef M_PI
+#define M_PI 3.14159265358979323846 /* pi */
+#endif
+#ifndef M_PI_2
+#define M_PI_2 1.57079632679489661923 /* pi/2 */
+#endif
+#ifndef M_PI_4
+#define M_PI_4 0.78539816339744830962 /* pi/4 */
+#endif
+#ifndef M_1_PI
+#define M_1_PI 0.31830988618379067154 /* 1/pi */
+#endif
+#ifndef M_2_PI
+#define M_2_PI 0.63661977236758134308 /* 2/pi */
+#endif
+#ifndef M_2_SQRTPI
+#define M_2_SQRTPI 1.12837916709551257390 /* 2/sqrt(pi) */
+#endif
+#ifndef M_SQRT2
+#define M_SQRT2 1.41421356237309504880 /* sqrt(2) */
+#endif
+#ifndef M_SQRT1_2
+#define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
+#endif
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_CONSTANTS_H_
",0,train
86d29f8a72ad3e8042a0ff6abcb90e863b4c2d08,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-12-12

PiperOrigin-RevId: 285142785
Change-Id: Ie2e4c6f05f934fd6fea658e850d1967cc2268bba",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 12, 11)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 12, 12)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
3f9c3d05329aec8af0330db66b0481d53658ee2a,tensorflow/tensorflow,"Replaced get_shape() with shape.

This is the recommended method to use.",rnn.py,"@@ -217,10 +217,10 @@ def dynamic_rnn(cell,
     parallel_iterations = parallel_iterations or 32
     if sequence_length is not None:
       sequence_length = math_ops.cast(sequence_length, dtypes.int32)
-      if sequence_length.get_shape().rank not in (None, 1):
+      if sequence_length.shape.rank not in (None, 1):
         raise ValueError(
             ""sequence_length must be a vector of length batch_size, ""
-            ""but saw shape: %s"" % sequence_length.get_shape())
+            ""but saw shape: %s"" % sequence_length.shape)
       sequence_length = array_ops.identity(  # Just to find it in the graph.
           sequence_length,
           name=""sequence_length"")
",0,train
7605b750d32a471e234a90a8d056bad8d084fada,tensorflow/tensorflow,"Remove the usage of TF private API ops.uid from Keras.

PiperOrigin-RevId: 320420366
Change-Id: I2be3622a9cdeff207e2c112ff0c220e6ea15f729",hdf5_format_test.py,"@@ -21,6 +21,7 @@ from __future__ import print_function
 import os
 import shutil
 import tempfile
+import uuid
 
 from absl.testing import parameterized
 import numpy as np
@@ -1192,8 +1193,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase):
       m = DummySubclassModel()
       v = m.add_weight(name='v', shape=[])
       self.evaluate(v.assign(42.))
-      prefix = os.path.join(self.get_temp_dir(),
-                            '{}'.format(ops.uid()), 'ckpt/')
+      prefix = os.path.join(self.get_temp_dir(), str(uuid.uuid4()), 'ckpt/')
       m.save_weights(prefix)
       self.evaluate(v.assign(2.))
       m.load_weights(prefix)
@@ -1236,8 +1236,7 @@ class TestWeightSavingAndLoadingTFFormat(test.TestCase, parameterized.TestCase):
       m = DummySubclassModel()
       v = m.add_weight(name='v', shape=[])
       self.evaluate(v.assign(42.))
-      prefix = os.path.join(self.get_temp_dir(),
-                            '{}'.format(ops.uid()), 'bckpt')
+      prefix = os.path.join(self.get_temp_dir(), str(uuid.uuid4()), 'bckpt')
       m.save_weights(prefix)
       self.evaluate(v.assign(2.))
       m.load_weights(prefix)
",0,test
1879b51356ecf0a0f4971f9b6ef61d518830c398,tensorflow/tensorflow,Example of text classification from characters using RNNs,text_classification.py,"@@ -68,9 +68,9 @@ def rnn_model(X, y):
     # EMBEDDING_SIZE].
     word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
         embedding_size=EMBEDDING_SIZE, name='words')
-    # Split sequence into list of embedding per word.
+    # Split into list of embedding per word, while removing doc length dim.
     # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
-    word_list = [tf.squeeze(w, [1]) for w in tf.split(1, MAX_DOCUMENT_LENGTH, word_vectors)]
+    word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors)
     # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE.
     cell = rnn_cell.GRUCell(EMBEDDING_SIZE)
     # Create an unrolled Recurrent Neural Networks to length of
",0,test
1879b51356ecf0a0f4971f9b6ef61d518830c398,tensorflow/tensorflow,Example of text classification from characters using RNNs,text_classification_character_rnn.py,"@@ -0,0 +1,81 @@
+#  Copyright 2015 Google Inc. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the ""License"");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an ""AS IS"" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+""""""
+This is an example of using recurrent neural networks over characters
+for DBpedia dataset to predict class from description of an entity.
+
+This model is similar to one described in this paper:
+   ""Character-level Convolutional Networks for Text Classification""
+   http://arxiv.org/abs/1509.01626
+
+and is somewhat alternative to the Lua code from here:
+   https://github.com/zhangxiangxiao/Crepe
+""""""
+
+import csv
+import numpy as np
+from sklearn import metrics
+
+import tensorflow as tf
+from tensorflow.models.rnn import rnn, rnn_cell
+import skflow
+
+### Training data
+
+# Download dbpedia_csv.tar.gz from
+# https://drive.google.com/folderview?id=0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M
+# Unpack: tar -xvf dbpedia_csv.tar.gz
+
+def load_dataset(filename):
+    target = []
+    data = []
+    reader = csv.reader(open(filename), delimiter=',')
+    for line in reader:
+        target.append(int(line[0]))
+        data.append(line[2])
+    return data, np.array(target, np.float32)
+
+X_train, y_train = load_dataset('dbpedia_csv/train.csv')
+X_test, y_test = load_dataset('dbpedia_csv/test.csv')
+
+### Process vocabulary
+
+MAX_DOCUMENT_LENGTH = 10
+
+char_processor = skflow.preprocessing.ByteProcessor(MAX_DOCUMENT_LENGTH)
+X_train = np.array(list(char_processor.fit_transform(X_train)))
+X_test = np.array(list(char_processor.transform(X_test)))
+
+### Models
+
+HIDDEN_SIZE = 20
+
+def char_rnn_model(X, y):
+    byte_list = skflow.ops.one_hot_matrix(X, 256)
+    byte_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, byte_list)
+    cell = rnn_cell.GRUCell(HIDDEN_SIZE)
+    _, encoding = rnn.rnn(cell, byte_list, dtype=tf.float32)
+    return skflow.models.logistic_regression(encoding[-1], y)
+
+classifier = skflow.TensorFlowEstimator(model_fn=char_rnn_model, n_classes=15,
+    steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True,
+    log_device_placement=False)
+
+# Continuesly train for 1000 steps & predict on test set.
+while True:
+    classifier.fit(X_train, y_train)
+    score = metrics.accuracy_score(classifier.predict(X_test), y_test)
+    print(""Accuracy: %f"" % score)
+
",0,test
6bf5a24e8805c57a2d7e5741519a090228c76a89,tensorflow/tensorflow,Change: 112640197,padding_fifo_queue.cc,"@@ -155,7 +155,7 @@ void PaddingFIFOQueue::TryDequeueMany(int num_elements, OpKernelContext* ctx,
                       // Expand sizes to match.
                       int64 max_val = 0;
                       for (const Tuple& t : tuples) {
-                        max_val = max(max_val, t[i].shape().dim_size(j));
+                        max_val = std::max(max_val, t[i].shape().dim_size(j));
                       }
                       shape.AddDim(max_val);
                     }
",0,train
348eaa0d0af4106479ef4754277630cc4c4141c0,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-02-26

PiperOrigin-RevId: 235675690",compat.py,"@@ -27,7 +27,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 2, 25)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 2, 26)
 
 
 @tf_export(""compat.forward_compatible"")
",0,train
520c69a59b171bb27c898f8fc72fe5cd99bd32e2,tensorflow/tensorflow,"PSv2: Apply strategy.run() change to parameter_server_training_test.

PiperOrigin-RevId: 327482183
Change-Id: I1e91e50905cb7011fe987a40a65688f2ef1d091c",parameter_server_training_test.py,"@@ -146,18 +146,22 @@ class KPLTest(test.TestCase):
 
       @def_function.function
       def worker_fn(iterator):
-        batch_data, labels = next(iterator)
-        with backprop.GradientTape() as tape:
-          pred = model(batch_data, training=True)
-          loss = nn.compute_average_loss(
-              keras.losses.BinaryCrossentropy(
-                  reduction=loss_reduction.ReductionV2.NONE)(labels, pred))
-          gradients = tape.gradient(loss, model.trainable_variables)
-
-        optimizer.apply_gradients(zip(gradients, model.trainable_variables))
-
-        actual_pred = math_ops.cast(math_ops.greater(pred, 0.5), dtypes.int64)
-        accuracy.update_state(labels, actual_pred)
+
+        def train_step(iterator):
+          batch_data, labels = next(iterator)
+          with backprop.GradientTape() as tape:
+            pred = model(batch_data, training=True)
+            loss = nn.compute_average_loss(
+                keras.losses.BinaryCrossentropy(
+                    reduction=loss_reduction.ReductionV2.NONE)(labels, pred))
+            gradients = tape.gradient(loss, model.trainable_variables)
+
+          optimizer.apply_gradients(zip(gradients, model.trainable_variables))
+
+          actual_pred = math_ops.cast(math_ops.greater(pred, 0.5), dtypes.int64)
+          accuracy.update_state(labels, actual_pred)
+
+        self.client._strategy.run(train_step, args=(iterator,))
 
     distributed_iterator = iter(distributed_dataset)
     for _ in range(10):
",0,train
ad5c47fe1f62599b26687911ea9749006aa51ebb,tensorflow/tensorflow,"Replaced get_shape() with shape.

This is the recommended method to use.",audio_microfrontend_op.py,"@@ -96,7 +96,7 @@ def audio_microfrontend(audio,
   Raises:
     ValueError: If the audio tensor is not explicitly a vector.
   """"""
-  audio_shape = audio.get_shape()
+  audio_shape = audio.shape
   if audio_shape.ndims is None:
     raise ValueError(""Input to `AudioMicrofrontend` should have known rank."")
   if len(audio_shape) > 1:
",0,test
fcf57c7246472f6c9e47b9c6d804668b52848497,tensorflow/tensorflow,"Use collections.abc for Python 3.10+

In Python 3.10+, Abstract Base Classes are not longer in `collections`. `collections.abc` should be used instead. Update python/eager/core_test.py, python/util/nest_test.py, python/distribute/distribute_utils_test.py.

PiperOrigin-RevId: 415381625
Change-Id: I89b1da7af3254d34b8d2facb57ce2a7ff938f176",distribute_utils_test.py,"@@ -15,6 +15,7 @@
 """"""Tests for utility functions in distribute_utils.""""""
 
 import collections
+import collections.abc
 
 from absl.testing import parameterized
 import wrapt
@@ -82,8 +83,9 @@ class RegroupAndSelectDeviceTest(test.TestCase, parameterized.TestCase):
     self._is_per_replica(result[""b""], [""b1"", ""b2""])
 
   def testRegroupCollectionsMapping(self):
-    class CollectionsMappingBasedClass(collections.Mapping):
-      """"""Class inherited from collections.Mapping.""""""
+
+    class CollectionsMappingBasedClass(collections.abc.Mapping):
+      """"""Class inherited from collections.abc.Mapping.""""""
 
       def __init__(self, *args, **kwargs):
         self._d = dict(*args, **kwargs)
",0,train
fcf57c7246472f6c9e47b9c6d804668b52848497,tensorflow/tensorflow,"Use collections.abc for Python 3.10+

In Python 3.10+, Abstract Base Classes are not longer in `collections`. `collections.abc` should be used instead. Update python/eager/core_test.py, python/util/nest_test.py, python/distribute/distribute_utils_test.py.

PiperOrigin-RevId: 415381625
Change-Id: I89b1da7af3254d34b8d2facb57ce2a7ff938f176",core_test.py,"@@ -14,7 +14,7 @@
 # ==============================================================================
 """"""Tests for core.""""""
 
-import collections
+import collections.abc
 import os
 import pickle
 import threading
@@ -81,11 +81,11 @@ class TFETest(test_util.TensorFlowTestCase):
 
   def _test_hashable(self, a, b, hashable):
     if hashable:
-      self.assertIsInstance(b, collections.Hashable)
+      self.assertIsInstance(b, collections.abc.Hashable)
       self.assertLen(set([a, b]), 2)
     else:
       # TODO(gjn): Figure out how to make this work for tf.Tensor
-      # self.assertNotIsInstance(b, collections.Hashable)
+      # self.assertNotIsInstance(b, collections.abc.Hashable)
       with self.assertRaisesRegex(TypeError, 'unhashable'):
         set([a, b])
 
",0,train
fcf57c7246472f6c9e47b9c6d804668b52848497,tensorflow/tensorflow,"Use collections.abc for Python 3.10+

In Python 3.10+, Abstract Base Classes are not longer in `collections`. `collections.abc` should be used instead. Update python/eager/core_test.py, python/util/nest_test.py, python/distribute/distribute_utils_test.py.

PiperOrigin-RevId: 415381625
Change-Id: I89b1da7af3254d34b8d2facb57ce2a7ff938f176",nest_test.py,"@@ -15,6 +15,7 @@
 """"""Tests for utilities working with arbitrarily nested structures.""""""
 
 import collections
+import collections.abc
 import time
 from typing import NamedTuple
 
@@ -30,7 +31,6 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.platform import test
 from tensorflow.python.util import nest
-from tensorflow.python.util.compat import collections_abc
 
 try:
   import attr  # pylint:disable=g-import-not-at-top
@@ -38,7 +38,7 @@ except ImportError:
   attr = None
 
 
-class _CustomMapping(collections_abc.Mapping):
+class _CustomMapping(collections.abc.Mapping):
 
   def __init__(self, *args, **kwargs):
     self._wrapped = dict(*args, **kwargs)
@@ -57,7 +57,7 @@ class _CustomList(list):
   pass
 
 
-class _CustomSequenceThatRaisesException(collections.Sequence):
+class _CustomSequenceThatRaisesException(collections.abc.Sequence):
 
   def __len__(self):
     return 1
",0,train
71dbfdbec550845def8db48ed31fb4f978407906,tensorflow/tensorflow,"Summary ops should run on only Chef not on all workers.
Change: 123163672",estimator.py,"@@ -393,6 +393,11 @@ class BaseEstimator(sklearn.BaseEstimator):
           summary_op=logging_ops.get_summary_op(),
           save_summary_steps=100)
 
+      is_chief = self._config.task == 0
+      if not is_chief:
+        # Run monitors only on chief.
+        monitors = []
+
       # Setup monitors.
       for monitor in monitors:
         monitor.set_estimator(self)
@@ -407,7 +412,7 @@ class BaseEstimator(sklearn.BaseEstimator):
           init_feed_dict=init_feed_fn() if init_feed_fn is not None else None,
           init_fn=init_fn,
           log_every_steps=log_every_steps,
-          supervisor_is_chief=(self._config.task == 0),
+          supervisor_is_chief=is_chief,
           supervisor_master=self._config.master,
           feed_fn=feed_fn,
           max_steps=steps,
",0,train
0a451b1aa0baaa3f7abbf8d90dfe58193cf1533e,tensorflow/tensorflow,"Speed up statistical_testing_test by consolidating sess.run calls.

PiperOrigin-RevId: 190721153",statistical_testing_test.py,"@@ -22,39 +22,75 @@ import numpy as np
 
 from tensorflow.contrib.distributions.python.ops import statistical_testing as st
 from tensorflow.python.framework import errors
-from tensorflow.python.ops import check_ops
 from tensorflow.python.platform import test
 
 
 class StatisticalTestingTest(test.TestCase):
 
   def test_dkwm_design_mean_one_sample_soundness(self):
-    numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10]
+    thresholds = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10]
     rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.]
-    with self.test_session() as sess:
-      for ff in rates:
-        for fp in rates:
-          sufficient_n = st.min_num_samples_for_dkwm_mean_test(
-              numbers, 0., 1., false_fail_rate=ff, false_pass_rate=fp)
-          detectable_d = st.min_discrepancy_of_true_means_detectable_by_dkwm(
-              sufficient_n, 0., 1., false_fail_rate=ff, false_pass_rate=fp)
-          sess.run(check_ops.assert_less_equal(detectable_d, numbers))
+    false_fail_rates, false_pass_rates = np.meshgrid(rates, rates)
+    false_fail_rates = false_fail_rates.flatten().astype(np.float32)
+    false_pass_rates = false_pass_rates.flatten().astype(np.float32)
+
+    detectable_discrepancies = []
+    for false_pass_rate, false_fail_rate in zip(
+        false_pass_rates, false_fail_rates):
+      sufficient_n = st.min_num_samples_for_dkwm_mean_test(
+          thresholds, low=0., high=1., false_fail_rate=false_fail_rate,
+          false_pass_rate=false_pass_rate)
+      detectable_discrepancies.append(
+          st.min_discrepancy_of_true_means_detectable_by_dkwm(
+              sufficient_n, low=0., high=1., false_fail_rate=false_fail_rate,
+              false_pass_rate=false_pass_rate))
+
+    detectable_discrepancies_ = self.evaluate(detectable_discrepancies)
+    for discrepancies, false_pass_rate, false_fail_rate in zip(
+        detectable_discrepancies_, false_pass_rates, false_fail_rates):
+      below_threshold = discrepancies <= thresholds
+      self.assertAllEqual(
+          np.ones_like(below_threshold, np.bool), below_threshold,
+          msg='false_pass_rate({}), false_fail_rate({})'.format(
+              false_pass_rate, false_fail_rate))
 
   def test_dkwm_design_mean_two_sample_soundness(self):
-    numbers = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10]
+    thresholds = [1e-5, 1e-2, 1.1e-1, 0.9, 1., 1.02, 2., 10., 1e2, 1e5, 1e10]
     rates = [1e-6, 1e-3, 1e-2, 1.1e-1, 0.2, 0.5, 0.7, 1.]
-    with self.test_session() as sess:
-      for ff in rates:
-        for fp in rates:
-          (sufficient_n1,
-           sufficient_n2) = st.min_num_samples_for_dkwm_mean_two_sample_test(
-               numbers, 0., 1., 0., 1.,
-               false_fail_rate=ff, false_pass_rate=fp)
-          d_fn = st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample
-          detectable_d = d_fn(
-              sufficient_n1, 0., 1., sufficient_n2, 0., 1.,
-              false_fail_rate=ff, false_pass_rate=fp)
-          sess.run(check_ops.assert_less_equal(detectable_d, numbers))
+    false_fail_rates, false_pass_rates = np.meshgrid(rates, rates)
+    false_fail_rates = false_fail_rates.flatten().astype(np.float32)
+    false_pass_rates = false_pass_rates.flatten().astype(np.float32)
+
+    detectable_discrepancies = []
+    for false_pass_rate, false_fail_rate in zip(
+        false_pass_rates, false_fail_rates):
+      [
+          sufficient_n1,
+          sufficient_n2
+      ] = st.min_num_samples_for_dkwm_mean_two_sample_test(
+          thresholds, low1=0., high1=1., low2=0., high2=1.,
+          false_fail_rate=false_fail_rate,
+          false_pass_rate=false_pass_rate)
+
+      detectable_discrepancies.append(
+          st.min_discrepancy_of_true_means_detectable_by_dkwm_two_sample(
+              n1=sufficient_n1,
+              low1=0.,
+              high1=1.,
+              n2=sufficient_n2,
+              low2=0.,
+              high2=1.,
+              false_fail_rate=false_fail_rate,
+              false_pass_rate=false_pass_rate))
+
+    detectable_discrepancies_ = self.evaluate(detectable_discrepancies)
+    for discrepancies, false_pass_rate, false_fail_rate in zip(
+        detectable_discrepancies_, false_pass_rates, false_fail_rates):
+      below_threshold = discrepancies <= thresholds
+      self.assertAllEqual(
+          np.ones_like(below_threshold, np.bool), below_threshold,
+          msg='false_pass_rate({}), false_fail_rate({})'.format(
+              false_pass_rate, false_fail_rate))
 
   def test_true_mean_confidence_interval_by_dkwm_one_sample(self):
     rng = np.random.RandomState(seed=0)
",0,train
9ae3853ccabef46d77a1ef16b6a8d7a28121a7bd,tensorflow/tensorflow,"TFLM: nit: Fix an obvious issue in the test.

PiperOrigin-RevId: 277166720
Change-Id: Iff23f196c61f837d65bbda30e7c125281bb12ce6",magic_wand_test.cc,"@@ -154,7 +154,7 @@ TF_LITE_MICRO_TEST(LoadModelAndPerformInference) {
   negative_score = output->data.f[kNegativeIndex];
   TF_LITE_MICRO_EXPECT_GT(slope_score, wing_score);
   TF_LITE_MICRO_EXPECT_GT(slope_score, ring_score);
-  // TF_LITE_MICRO_EXPECT_GT(slope_score, negative_score);
+  TF_LITE_MICRO_EXPECT_GT(slope_score, negative_score);
 }
 
 TF_LITE_MICRO_TESTS_END
",0,train
74137f994faad09593ae2daad6251a4ccf72f558,tensorflow/tensorflow,"Fix signed int overflow issue in tensor_id.cc

When a node name has a long numeric suffix, e.g.,
""foo/y_0/gradient_debug_09684b60f2184c67b744721915034528"" (as has happened with tfdbg GradientsDebugger),

the parsing algorithm in ParseTensorName() may experience signed int overflow. Replacing the types with ""unsigned int"" resolves the issue.

PiperOrigin-RevId: 168039195",tensor_id.cc,"@@ -34,8 +34,8 @@ TensorId ParseTensorName(StringPiece name) {
   // whole name string forms the first part of the tensor name.
   const char* base = name.data();
   const char* p = base + name.size() - 1;
-  int index = 0;
-  int mul = 1;
+  unsigned int index = 0;
+  unsigned int mul = 1;
   while (p > base && (*p >= '0' && *p <= '9')) {
     index += ((*p - '0') * mul);
     mul *= 10;
",0,test
3745f2582daeae7a49a129e250cf0cc2d573924a,tensorflow/tensorflow,"Pad support for quantized zero.

PiperOrigin-RevId: 191938267",optimized_ops.h,"@@ -5067,7 +5067,7 @@ template <typename T>
 inline void Pad(const T* input_data, const Dims<4>& input_dims,
                 const std::vector<int>& left_paddings,
                 const std::vector<int>& right_paddings, T* output_data,
-                const Dims<4>& output_dims) {
+                const Dims<4>& output_dims, const int32_t pad_value) {
   gemmlowp::ScopedProfilingLabel label(""Pad"");
   const int output_batch = ArraySize(output_dims, 3);
   const int output_height = ArraySize(output_dims, 2);
@@ -5087,27 +5087,27 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims,
   const int input_depth = ArraySize(input_dims, 0);
 
   if (left_b_padding != 0) {
-    memset(output_data, 0,
+    memset(output_data, pad_value,
            left_b_padding * output_height * output_width * output_depth *
                sizeof(T));
   }
   for (int out_b = left_b_padding; out_b < output_batch - right_b_padding;
        ++out_b) {
     if (left_h_padding != 0) {
-      memset(output_data + Offset(output_dims, 0, 0, 0, out_b), 0,
+      memset(output_data + Offset(output_dims, 0, 0, 0, out_b), pad_value,
              left_h_padding * output_width * output_depth * sizeof(T));
     }
     for (int out_h = left_h_padding; out_h < output_height - right_h_padding;
          ++out_h) {
       if (left_w_padding != 0) {
-        memset(output_data + Offset(output_dims, 0, 0, out_h, out_b), 0,
+        memset(output_data + Offset(output_dims, 0, 0, out_h, out_b), pad_value,
                left_w_padding * output_depth * sizeof(T));
       }
       for (int out_w = left_w_padding; out_w < output_width - right_w_padding;
            ++out_w) {
         if (left_d_padding != 0) {
-          memset(output_data + Offset(output_dims, 0, out_w, out_h, out_b), 0,
-                 left_d_padding * sizeof(T));
+          memset(output_data + Offset(output_dims, 0, out_w, out_h, out_b),
+                 pad_value, left_d_padding * sizeof(T));
         }
 
         T* out = output_data +
@@ -5121,20 +5121,21 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims,
           memset(
               output_data + Offset(output_dims, output_depth - right_d_padding,
                                    out_w, out_h, out_b),
-              0, right_d_padding * sizeof(T));
+              pad_value, right_d_padding * sizeof(T));
         }
       }
       if (right_w_padding != 0) {
         memset(
             output_data + Offset(output_dims, 0, output_width - right_w_padding,
                                  out_h, out_b),
-            0, right_w_padding * output_depth * sizeof(T));
+            pad_value, right_w_padding * output_depth * sizeof(T));
       }
     }
     if (right_h_padding != 0) {
       memset(output_data + Offset(output_dims, 0, 0,
                                   output_height - right_h_padding, out_b),
-             0, right_h_padding * output_width * output_depth * sizeof(T));
+             pad_value,
+             right_h_padding * output_width * output_depth * sizeof(T));
     }
   }
   if (right_b_padding != 0) {
@@ -5146,6 +5147,15 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims,
   }
 }
 
+template <typename T>
+inline void Pad(const T* input_data, const Dims<4>& input_dims,
+                const std::vector<int>& left_paddings,
+                const std::vector<int>& right_paddings, T* output_data,
+                const Dims<4>& output_dims) {
+  Pad(input_data, input_dims, left_paddings, right_paddings, output_data,
+      output_dims, 0);
+}
+
 template <typename T>
 inline void StridedSlice(const T* input_data, const Dims<4>& input_dims,
                          int begin_mask, int end_mask,
",0,train
3745f2582daeae7a49a129e250cf0cc2d573924a,tensorflow/tensorflow,"Pad support for quantized zero.

PiperOrigin-RevId: 191938267",reference_ops.h,"@@ -3086,7 +3086,7 @@ template <typename T>
 inline void Pad(const T* input_data, const Dims<4>& input_dims,
                 const std::vector<int>& left_paddings,
                 const std::vector<int>& right_paddings, T* output_data,
-                const Dims<4>& output_dims) {
+                const Dims<4>& output_dims, const int32_t pad_value) {
   const int output_batch = ArraySize(output_dims, 3);
   const int output_height = ArraySize(output_dims, 2);
   const int output_width = ArraySize(output_dims, 1);
@@ -3116,7 +3116,7 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims,
               out_w >= output_width - right_w_padding ||
               out_d < left_d_padding ||
               out_d >= output_depth - right_d_padding) {
-            *out_ptr++ = 0;
+            *out_ptr++ = static_cast<T>(pad_value);
           } else {
             *out_ptr++ = *in_ptr++;
           }
@@ -3126,6 +3126,15 @@ inline void Pad(const T* input_data, const Dims<4>& input_dims,
   }
 }
 
+template <typename T>
+inline void Pad(const T* input_data, const Dims<4>& input_dims,
+                const std::vector<int>& left_paddings,
+                const std::vector<int>& right_paddings, T* output_data,
+                const Dims<4>& output_dims) {
+  Pad(input_data, input_dims, left_paddings, right_paddings, output_data,
+      output_dims, 0);
+}
+
 inline bool LoopCondition(int index, int stop, int stride) {
   return stride > 0 ? index < stop : index > stop;
 }
",0,train
3745f2582daeae7a49a129e250cf0cc2d573924a,tensorflow/tensorflow,"Pad support for quantized zero.

PiperOrigin-RevId: 191938267",pad.cc,"@@ -119,39 +119,46 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     after_padding.push_back(paddings_data[idx * 2 + 1]);
   }
 
-#define TF_LITE_PAD(type, scalar)                                           \
+#define TF_LITE_PAD(type, scalar, pad_value)                                \
   type::Pad(GetTensorData<scalar>(op_context.input),                        \
             GetTensorDims(op_context.input), before_padding, after_padding, \
             GetTensorData<scalar>(op_context.output),                       \
-            GetTensorDims(op_context.output))
+            GetTensorDims(op_context.output), pad_value)
 
   switch (op_context.input->type) {
     case kTfLiteFloat32:
       if (kernel_type == kReference) {
-        TF_LITE_PAD(reference_ops, float);
+        TF_LITE_PAD(reference_ops, float, 0);
       } else if (kernel_type == kGenericOptimized) {
-        TF_LITE_PAD(optimized_ops, float);
+        TF_LITE_PAD(optimized_ops, float, 0);
       }
       break;
     case kTfLiteUInt8:
+      // Quantized Pad requires that 0 is represented in the quantized range.
+      TF_LITE_ENSURE(context, op_context.output->params.zero_point >=
+                                  std::numeric_limits<uint8_t>::min());
+      TF_LITE_ENSURE(context, op_context.output->params.zero_point <=
+                                  std::numeric_limits<uint8_t>::max());
       if (kernel_type == kReference) {
-        TF_LITE_PAD(reference_ops, uint8_t);
+        TF_LITE_PAD(reference_ops, uint8_t,
+                    op_context.output->params.zero_point);
       } else if (kernel_type == kGenericOptimized) {
-        TF_LITE_PAD(optimized_ops, uint8_t);
+        TF_LITE_PAD(optimized_ops, uint8_t,
+                    op_context.output->params.zero_point);
       }
       break;
     case kTfLiteInt32:
       if (kernel_type == kReference) {
-        TF_LITE_PAD(reference_ops, int32_t);
+        TF_LITE_PAD(reference_ops, int32_t, 0);
       } else if (kernel_type == kGenericOptimized) {
-        TF_LITE_PAD(optimized_ops, int32_t);
+        TF_LITE_PAD(optimized_ops, int32_t, 0);
       }
       break;
     case kTfLiteInt64:
       if (kernel_type == kReference) {
-        TF_LITE_PAD(reference_ops, int64_t);
+        TF_LITE_PAD(reference_ops, int64_t, 0);
       } else if (kernel_type == kGenericOptimized) {
-        TF_LITE_PAD(optimized_ops, int64_t);
+        TF_LITE_PAD(optimized_ops, int64_t, 0);
       }
       break;
     default:
",0,train
3745f2582daeae7a49a129e250cf0cc2d573924a,tensorflow/tensorflow,"Pad support for quantized zero.

PiperOrigin-RevId: 191938267",pad_test.cc,"@@ -22,6 +22,7 @@ namespace tflite {
 namespace {
 
 using ::testing::ElementsAreArray;
+using ::testing::Matcher;
 
 class PadOpModel : public SingleOpModel {
  public:
@@ -29,6 +30,10 @@ class PadOpModel : public SingleOpModel {
     PopulateTensor<float>(input_, data);
   }
 
+  void SetQuantizedInput(std::initializer_list<float> data) {
+    QuantizeAndPopulate<uint8_t>(input_, data);
+  }
+
   void SetPaddings(std::initializer_list<int> paddings) {
     PopulateTensor<int>(paddings_, paddings);
   }
@@ -36,6 +41,11 @@ class PadOpModel : public SingleOpModel {
   std::vector<float> GetOutput() { return ExtractVector<float>(output_); }
   std::vector<int> GetOutputShape() { return GetTensorShape(output_); }
 
+  std::vector<float> GetDequantizedOutput() {
+    return Dequantize<uint8_t>(ExtractVector<uint8_t>(output_),
+                               GetScale(output_), GetZeroPoint(output_));
+  }
+
  protected:
   int input_;
   int output_;
@@ -50,16 +60,17 @@ class PadOpModel : public SingleOpModel {
 //    m.Invoke();
 class PadOpConstModel : public PadOpModel {
  public:
-  PadOpConstModel(std::initializer_list<int> input_shape,
+  PadOpConstModel(const TensorData& input,
                   std::initializer_list<int> paddings_shape,
-                  std::initializer_list<int> paddings) {
-    input_ = AddInput(TensorType_FLOAT32);
+                  std::initializer_list<int> paddings,
+                  const TensorData& output) {
+    input_ = AddInput(input);
     paddings_ = AddConstInput(TensorType_INT32, paddings, paddings_shape);
-    output_ = AddOutput(TensorType_FLOAT32);
+    output_ = AddOutput(output);
 
     SetBuiltinOp(BuiltinOperator_PAD, BuiltinOptions_PadOptions,
                  CreatePadOptions(builder_).Union());
-    BuildInterpreter({input_shape});
+    BuildInterpreter({input.shape});
   }
 };
 
@@ -72,40 +83,45 @@ class PadOpConstModel : public PadOpModel {
 //    m.Invoke();
 class PadOpDynamicModel : public PadOpModel {
  public:
-  PadOpDynamicModel(std::initializer_list<int> input_shape,
-                    std::initializer_list<int> paddings_shape) {
-    input_ = AddInput(TensorType_FLOAT32);
+  PadOpDynamicModel(const TensorData& input,
+                    std::initializer_list<int> paddings_shape,
+                    const TensorData& output) {
+    input_ = AddInput(input);
     paddings_ = AddInput(TensorType_INT32);
-    output_ = AddOutput(TensorType_FLOAT32);
+    output_ = AddOutput(output);
 
     SetBuiltinOp(BuiltinOperator_PAD, BuiltinOptions_PadOptions,
                  CreatePadOptions(builder_).Union());
-    BuildInterpreter({input_shape, paddings_shape});
+    BuildInterpreter({input.shape, paddings_shape});
   }
 };
 
 TEST(PadOpTest, TooManyDimensions) {
   EXPECT_DEATH(
-      PadOpConstModel({1, 2, 3, 4, 5, 6, 7, 8, 9}, {9, 2},
-                      {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9}),
+      PadOpConstModel({TensorType_FLOAT32, {1, 2, 3, 4, 5, 6, 7, 8, 9}}, {9, 2},
+                      {1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9},
+                      {TensorType_FLOAT32}),
       ""dims != 4"");
 }
 
 TEST(PadOpTest, UnequalDimensions) {
-  EXPECT_DEATH(PadOpConstModel({1, 1, 2, 1}, {3, 2}, {1, 1, 2, 2, 3, 3}),
+  EXPECT_DEATH(PadOpConstModel({TensorType_FLOAT32, {1, 1, 2, 1}}, {3, 2},
+                               {1, 1, 2, 2, 3, 3}, {TensorType_FLOAT32}),
                ""3 != 4"");
 }
 
 TEST(PadOpTest, InvalidPadValue) {
   EXPECT_DEATH(
-      PadOpConstModel({1, 1, 2, 1}, {4, 2}, {0, 0, 1, -1, 2, -1, 0, 0}),
+      PadOpConstModel({TensorType_FLOAT32, {1, 1, 2, 1}}, {4, 2},
+                      {0, 0, 1, -1, 2, -1, 0, 0}, {TensorType_FLOAT32}),
       ""Pad value has to be greater than equal to 0."");
 }
 
 TEST(PadOpTest, SimpleConstTest) {
   // Padding is represented as four 2-D lists representing above padding and
   // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}).
-  PadOpConstModel m({1, 2, 2, 1}, {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0});
+  PadOpConstModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2},
+                    {0, 0, 1, 1, 1, 1, 0, 0}, {TensorType_FLOAT32});
   m.SetInput({1, 2, 3, 4});
   m.Invoke();
   EXPECT_THAT(m.GetOutput(), ElementsAreArray({0, 0, 0, 0, 0, 1, 2, 0, 0, 3, 4,
@@ -114,7 +130,8 @@ TEST(PadOpTest, SimpleConstTest) {
 }
 
 TEST(PadOpTest, SimpleDynamicTest) {
-  PadOpDynamicModel m({1, 2, 2, 1}, {4, 2});
+  PadOpDynamicModel m({TensorType_FLOAT32, {1, 2, 2, 1}}, {4, 2},
+                      {TensorType_FLOAT32});
   m.SetInput({1, 2, 3, 4});
   m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0});
   m.Invoke();
@@ -124,7 +141,8 @@ TEST(PadOpTest, SimpleDynamicTest) {
 }
 
 TEST(PadOpTest, AdvancedConstTest) {
-  PadOpConstModel m({1, 2, 3, 1}, {4, 2}, {0, 0, 0, 2, 1, 3, 0, 0});
+  PadOpConstModel m({TensorType_FLOAT32, {1, 2, 3, 1}}, {4, 2},
+                    {0, 0, 0, 2, 1, 3, 0, 0}, {TensorType_FLOAT32});
   m.SetInput({1, 2, 3, 4, 5, 6});
   m.Invoke();
   EXPECT_THAT(m.GetOutput(),
@@ -134,7 +152,8 @@ TEST(PadOpTest, AdvancedConstTest) {
 }
 
 TEST(PadOpTest, AdvancedDynamicTest) {
-  PadOpDynamicModel m({1, 2, 3, 1}, {4, 2});
+  PadOpDynamicModel m({TensorType_FLOAT32, {1, 2, 3, 1}}, {4, 2},
+                      {TensorType_FLOAT32});
   m.SetInput({1, 2, 3, 4, 5, 6});
   m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0});
   m.Invoke();
@@ -144,6 +163,80 @@ TEST(PadOpTest, AdvancedDynamicTest) {
   EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 7, 1}));
 }
 
+class QuantizedPadOpTest : public ::testing::Test {
+ protected:
+  std::vector<Matcher<float>> DequantizedArrayNear(
+      const std::vector<float>& values, const float min, const float max) {
+    const float quantization_tolerance = (max - min) / 255.0;
+    return ArrayFloatNear(values, quantization_tolerance);
+  }
+};
+
+TEST_F(QuantizedPadOpTest, ZeroNotInQuantizationRange) {
+  // The test_util and actual quantization code currently ensure that the range
+  // must include zero, but if that ever changes, this test will catch it.
+  EXPECT_DEATH(PadOpConstModel m({TensorType_UINT8, {1, 2, 2, 1}, 1.0, 2.0},
+                                 {4, 2}, {0, 0, 1, 1, 1, 1, 0, 0},
+                                 {TensorType_UINT8, {}, 1.0, 2.0}),
+               "".*Check failed: f_min <= 0.*"");
+}
+
+TEST_F(QuantizedPadOpTest, SimpleConstTest) {
+  // Padding is represented as four 2-D lists representing above padding and
+  // below padding (i.e. {{0, 0}, {1, 1}, {1, 1}, {0, 0}}).
+  PadOpConstModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2},
+                    {0, 0, 1, 1, 1, 1, 0, 0},
+                    {TensorType_UINT8, {}, -1.0, 1.0});
+  m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7});
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput(),
+              ElementsAreArray(DequantizedArrayNear(
+                  {0, 0, 0, 0, 0, -0.8, 0.2, 0, 0, 0.9, 0.7, 0, 0, 0, 0, 0},
+                  -1.0, 1.0)));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
+}
+
+TEST_F(QuantizedPadOpTest, SimpleDynamicTest) {
+  PadOpDynamicModel m({TensorType_UINT8, {1, 2, 2, 1}, -1.0, 1.0}, {4, 2},
+                      {TensorType_UINT8, {}, -1.0, 1.0});
+  m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7});
+  m.SetPaddings({0, 0, 1, 1, 1, 1, 0, 0});
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput(),
+              ElementsAreArray(DequantizedArrayNear(
+                  {0, 0, 0, 0, 0, -0.8, 0.2, 0, 0, 0.9, 0.7, 0, 0, 0, 0, 0},
+                  -1.0, 1.0)));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 4, 1}));
+}
+
+TEST_F(QuantizedPadOpTest, AdvancedConstTest) {
+  PadOpConstModel m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2},
+                    {0, 0, 0, 2, 1, 3, 0, 0},
+                    {TensorType_UINT8, {}, -1.0, 1.0});
+  m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3});
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput(),
+              ElementsAreArray(DequantizedArrayNear(
+                  {0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0,
+                   0, 0,    0,   0,   0, 0, 0, 0, 0,   0,   0,    0, 0, 0},
+                  -1.0, 1.0)));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 7, 1}));
+}
+
+TEST_F(QuantizedPadOpTest, AdvancedDynamicTest) {
+  PadOpDynamicModel m({TensorType_UINT8, {1, 2, 3, 1}, -1.0, 1.0}, {4, 2},
+                      {TensorType_UINT8, {}, -1.0, 1.0});
+  m.SetQuantizedInput({-0.8, 0.2, 0.9, 0.7, 0.1, -0.3});
+  m.SetPaddings({0, 0, 0, 2, 1, 3, 0, 0});
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput(),
+              ElementsAreArray(DequantizedArrayNear(
+                  {0, -0.8, 0.2, 0.9, 0, 0, 0, 0, 0.7, 0.1, -0.3, 0, 0, 0,
+                   0, 0,    0,   0,   0, 0, 0, 0, 0,   0,   0,    0, 0, 0},
+                  -1.0, 1.0)));
+  EXPECT_THAT(m.GetOutputShape(), ElementsAreArray({1, 4, 7, 1}));
+}
+
 }  // namespace
 }  // namespace tflite
 
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",parallel_interleave_test.py,"@@ -760,7 +760,7 @@ class ParallelInterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
               interleave_fn, cycle_length=10, sloppy=sloppy))
 
       opts = options_lib.Options()
-      opts.experimental_deterministic = global_determinism
+      opts.deterministic = global_determinism
       dataset = dataset.with_options(opts)
       return dataset
 
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",parse_example_dataset_test.py,"@@ -1140,7 +1140,7 @@ class ParseExampleDatasetTest(test_base.DatasetTestBase,
             deterministic=local_determinism))
 
     opts = options_lib.Options()
-    opts.experimental_deterministic = global_determinism
+    opts.deterministic = global_determinism
     dataset = dataset.with_options(opts)
 
     expected = list(range(num_elements))
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",data_service_ops_test.py,"@@ -437,7 +437,7 @@ class DataServiceOpsTest(data_service_test_base.TestBase,
       ds = dataset_ops.Dataset.from_tensor_slices(elements)
       ds = ds.interleave(interleave_fn, cycle_length=10, num_parallel_calls=10)
       opts = options_lib.Options()
-      opts.experimental_deterministic = False
+      opts.deterministic = False
       ds = ds.with_options(opts)
       ds = self.make_distributed_dataset(ds, cluster)
       return ds
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",interleave_ops.py,"@@ -38,7 +38,7 @@ from tensorflow.python.util.tf_export import tf_export
     None,
     ""Use `tf.data.Dataset.interleave(map_func, cycle_length, block_length, ""
     ""num_parallel_calls=tf.data.AUTOTUNE)` instead. If sloppy ""
-    ""execution is desired, use `tf.data.Options.experimental_deterministic`."")
+    ""execution is desired, use `tf.data.Options.deterministic`."")
 @tf_export(""data.experimental.parallel_interleave"")
 def parallel_interleave(map_func,
                         cycle_length,
@@ -78,9 +78,8 @@ def parallel_interleave(map_func,
       `Dataset` before advancing to the next input `Dataset`.
     sloppy: A boolean controlling whether determinism should be traded for
       performance by allowing elements to be produced out of order.  If `sloppy`
-      is `None`, the `tf.data.Options.experimental_deterministic` dataset option
-      (`True` by default) is used to decide whether to enforce a deterministic
-      order.
+      is `None`, the `tf.data.Options.deterministic` dataset option (`True` by
+      default) is used to decide whether to enforce a deterministic order.
     buffer_output_elements: The number of elements each iterator being
       interleaved should buffer (similar to the `.prefetch()` transformation for
       each interleaved iterator).
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",parsing_ops.py,"@@ -131,9 +131,8 @@ def parse_example_dataset(features, num_parallel_calls=1, deterministic=None):
       should be traded for performance by allowing elements to be produced out
       of order if some parsing calls complete faster than others. If
       `deterministic` is `None`, the
-      `tf.data.Options.experimental_deterministic` dataset option (`True` by
-      default) is used to decide whether to produce elements
-      deterministically.
+      `tf.data.Options.deterministic` dataset option (`True` by default) is used
+      to decide whether to produce elements deterministically.
 
   Returns:
     A dataset transformation function, which can be passed to
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",readers.py,"@@ -573,7 +573,7 @@ def make_csv_dataset_v2(
     dataset = dataset.interleave(
         filename_to_dataset, num_parallel_calls=num_parallel_reads)
     options = options_lib.Options()
-    options.experimental_deterministic = not sloppy
+    options.deterministic = not sloppy
     dataset = dataset.with_options(options)
   else:
     # Read files sequentially (if num_parallel_reads=1) or in parallel
@@ -1025,7 +1025,7 @@ def make_batched_features_dataset_v2(file_pattern,
         lambda filename: reader(filename, *reader_args),
         num_parallel_calls=reader_num_threads)
     options = options_lib.Options()
-    options.experimental_deterministic = not sloppy_ordering
+    options.deterministic = not sloppy_ordering
     dataset = dataset.with_options(options)
   else:
     # Read files sequentially (if reader_num_threads=1) or in parallel
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",batch_test.py,"@@ -269,7 +269,7 @@ class BatchTest(test_base.DatasetTestBase, parameterized.TestCase):
           batch_size=6, num_parallel_calls=2,
           deterministic=local_determinism).unbatch()
       opts = options_lib.Options()
-      opts.experimental_deterministic = global_determinism
+      opts.deterministic = global_determinism
       dataset = dataset.with_options(opts)
       return dataset
 
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",interleave_test.py,"@@ -285,7 +285,7 @@ class InterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
             lambda x: dataset_ops.Dataset.from_tensors(x).repeat(x),
             cycle_length, block_length, num_parallel_calls)
     options = options_lib.Options()
-    options.experimental_deterministic = False
+    options.deterministic = False
     dataset = dataset.with_options(options)
     expected_output = [
         element for element in _interleave(
@@ -351,7 +351,7 @@ class InterleaveTest(test_base.DatasetTestBase, parameterized.TestCase):
           num_parallel_calls=10,
           deterministic=local_determinism)
       opts = options_lib.Options()
-      opts.experimental_deterministic = global_determinism
+      opts.deterministic = global_determinism
       dataset = dataset.with_options(opts)
       return dataset
 
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",map_test.py,"@@ -1291,7 +1291,7 @@ class MapTest(test_base.DatasetTestBase, parameterized.TestCase):
           num_parallel_calls=2,
           deterministic=local_determinism)
       opts = options_lib.Options()
-      opts.experimental_deterministic = global_determinism
+      opts.deterministic = global_determinism
       dataset = dataset.with_options(opts)
       return dataset
 
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",options_test.py,"@@ -66,14 +66,14 @@ class OptionsTest(test_base.DatasetTestBase, parameterized.TestCase):
     options1 = options_lib.Options()
     options1.experimental_optimization.autotune = True
     options2 = options_lib.Options()
-    options2.experimental_deterministic = False
+    options2.deterministic = False
     ds = dataset_ops.Dataset.range(0)
     ds = ds.with_options(options1)
     ds = ds.with_options(options2)
     options = self._get_options(ds)
     self.assertTrue(options.experimental_optimization.autotune)
     # Explicitly check that flag is False since assertFalse allows None
-    self.assertIs(options.experimental_deterministic, False)
+    self.assertIs(options.deterministic, False)
 
   @combinations.generate(test_base.default_test_combinations())
   def testOptionsTwiceSameOption(self):
@@ -94,13 +94,13 @@ class OptionsTest(test_base.DatasetTestBase, parameterized.TestCase):
     options1 = options_lib.Options()
     options1.experimental_optimization.autotune = True
     options2 = options_lib.Options()
-    options2.experimental_deterministic = True
+    options2.deterministic = True
     ds1 = dataset_ops.Dataset.range(0).with_options(options1)
     ds2 = dataset_ops.Dataset.range(0).with_options(options2)
     ds = dataset_ops.Dataset.zip((ds1, ds2))
     options = self._get_options(ds)
     self.assertTrue(options.experimental_optimization.autotune)
-    self.assertTrue(options.experimental_deterministic)
+    self.assertTrue(options.deterministic)
 
   @combinations.generate(test_base.default_test_combinations())
   def testOptionsHaveDefaults(self):
@@ -125,7 +125,7 @@ class OptionsTest(test_base.DatasetTestBase, parameterized.TestCase):
     ds = ds.with_options(options2)
     dataset_options = ds.options()
     with self.assertRaises(ValueError):
-      dataset_options.experimental_deterministic = True
+      dataset_options.deterministic = True
 
   @combinations.generate(test_base.eager_only_combinations())
   def testNestedDataset(self):
@@ -139,7 +139,7 @@ class OptionsTest(test_base.DatasetTestBase, parameterized.TestCase):
   @combinations.generate(test_base.default_test_combinations())
   def testOptionsProtoRoundTrip(self):
     options = options_lib.Options()
-    options.experimental_deterministic = True
+    options.deterministic = True
     options.experimental_external_state_policy = (
         options_lib.ExternalStatePolicy.FAIL)
     options.experimental_distribute.auto_shard_policy = (
@@ -209,6 +209,20 @@ class OptionsTest(test_base.DatasetTestBase, parameterized.TestCase):
     self.assertEqual(result.experimental_threading.max_intra_op_parallelism,
                      result.threading.max_intra_op_parallelism)
 
+  @combinations.generate(test_base.default_test_combinations())
+  def testExperimentalDeterministicOverride(self):
+    options = options_lib.Options()
+    self.assertEqual(options.deterministic, options.experimental_deterministic)
+    options.experimental_deterministic = False
+    pb = options._to_proto()
+    result = options_lib.Options()
+    result._from_proto(pb)
+    self.assertFalse(result.deterministic)
+    self.assertEqual(result.deterministic, result.experimental_deterministic)
+    result.experimental_deterministic = True
+    self.assertTrue(result.deterministic)
+    self.assertEqual(result.deterministic, result.experimental_deterministic)
+
   @combinations.generate(test_base.default_test_combinations())
   def testPersistenceOptionsSetOutsideFunction(self):
 
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",dataset_ops.py,"@@ -1561,8 +1561,8 @@ class DatasetV2(collections_abc.Iterable, tracking_base.Trackable,
         the transformation produces elements. If set to `False`, the
         transformation is allowed to yield elements out of order to trade
         determinism for performance. If not specified, the
-        `tf.data.Options.experimental_deterministic` option
-        (`True` by default) controls the behavior.
+        `tf.data.Options.deterministic` option (`True` by default) controls the
+        behavior.
 
     Returns:
       Dataset: A `Dataset`.
@@ -1844,8 +1844,8 @@ name=None))
         the transformation produces elements. If set to `False`, the
         transformation is allowed to yield elements out of order to trade
         determinism for performance. If not specified, the
-        `tf.data.Options.experimental_deterministic` option
-        (`True` by default) controls the behavior.
+        `tf.data.Options.deterministic` option (`True` by default) controls the
+        behavior.
 
     Returns:
       Dataset: A `Dataset`.
@@ -1998,8 +1998,8 @@ name=None))
         the transformation produces elements. If set to `False`, the
         transformation is allowed to yield elements out of order to trade
         determinism for performance. If not specified, the
-        `tf.data.Options.experimental_deterministic` option
-        (`True` by default) controls the behavior.
+        `tf.data.Options.deterministic` option (`True` by default) controls the
+        behavior.
 
     Returns:
       Dataset: A `Dataset`.
@@ -2495,7 +2495,7 @@ name=None))
     ...                    num_parallel_calls=3)
     >>> options = tf.data.Options()
     >>> # This will make the interleave order non-deterministic.
-    >>> options.experimental_deterministic = False
+    >>> options.deterministic = False
     >>> ds = ds.with_options(options)
 
     Args:
@@ -3327,8 +3327,8 @@ class DatasetV1(DatasetV2):
         boolean controls the order in which the transformation produces
         elements. If set to `False`, the transformation is allowed to yield
         elements out of order to trade determinism for performance. If not
-        specified, the `tf.data.Options.experimental_deterministic` option
-        (`True` by default) controls the behavior.
+        specified, the `tf.data.Options.deterministic` option (`True` by
+        default) controls the behavior.
 
     Returns:
       Dataset: A `Dataset`.
",0,train
2c5a876362952f21984effdedfe402d71c45effe,tensorflow/tensorflow,"[tf.data] Graduating `tf.data.Options.experimental_deterministic` to `tf.data.Options.deterministic`.

PiperOrigin-RevId: 387606044
Change-Id: Iea0e3832de11b1dd15ca568cf1bbd8108cca6c5f",options.py,"@@ -412,9 +412,9 @@ class Options(options_lib.OptionsBase):
 
   >>> dataset = tf.data.Dataset.range(42)
   >>> options = tf.data.Options()
-  >>> options.experimental_deterministic = False
+  >>> options.deterministic = False
   >>> dataset = dataset.with_options(options)
-  >>> print(dataset.options().experimental_deterministic)
+  >>> print(dataset.options().deterministic)
   False
 
   Note: A known limitation of the `tf.data.Options` implementation is that the
@@ -423,13 +423,18 @@ class Options(options_lib.OptionsBase):
   need to be set within the same tf.function.
   """"""
 
-  experimental_deterministic = options_lib.create_option(
-      name=""experimental_deterministic"",
+  deterministic = options_lib.create_option(
+      name=""deterministic"",
       ty=bool,
       docstring=
       ""Whether the outputs need to be produced in deterministic order. If None,""
       "" defaults to True."")
 
+  experimental_deterministic = options_lib.create_option(
+      name=""experimental_deterministic"",
+      ty=bool,
+      docstring=""DEPRECATED. Use `deterministic` instead."")
+
   experimental_distribute = options_lib.create_option(
       name=""experimental_distribute"",
       ty=DistributeOptions,
@@ -438,6 +443,16 @@ class Options(options_lib.OptionsBase):
       ""`tf.data.experimental.DistributeOptions` for more details."",
       default_factory=DistributeOptions)
 
+  experimental_external_state_policy = options_lib.create_option(
+      name=""experimental_external_state_policy"",
+      ty=ExternalStatePolicy,
+      docstring=""This option can be used to override the default policy for ""
+      ""how to handle external state when serializing a dataset or ""
+      ""checkpointing its iterator. There are three settings available - ""
+      ""IGNORE: External state is ignored without a warning; WARN: External ""
+      ""state is ignored and a warning is logged; FAIL: External state results ""
+      ""in an error."")
+
   experimental_optimization = options_lib.create_option(
       name=""experimental_optimization"",
       ty=OptimizationOptions,
@@ -455,15 +470,10 @@ class Options(options_lib.OptionsBase):
       ""frequency is determined by the number of devices attached to this ""
       ""input pipeline. If None, defaults to False."")
 
-  experimental_external_state_policy = options_lib.create_option(
-      name=""experimental_external_state_policy"",
-      ty=ExternalStatePolicy,
-      docstring=""This option can be used to override the default policy for ""
-      ""how to handle external state when serializing a dataset or ""
-      ""checkpointing its iterator. There are three settings available - ""
-      ""IGNORE: External state is ignored without a warning; WARN: External ""
-      ""state is ignored and a warning is logged; FAIL: External state results ""
-      ""in an error."")
+  experimental_threading = options_lib.create_option(
+      name=""experimental_threading"",
+      ty=ThreadingOptions,
+      docstring=""DEPRECATED. Use `threading` instead."")
 
   threading = options_lib.create_option(
       name=""threading"",
@@ -472,26 +482,35 @@ class Options(options_lib.OptionsBase):
       ""`tf.data.ThreadingOptions` for more details."",
       default_factory=ThreadingOptions)
 
-  def __getattr__(self, name):
+  def __getattribute__(self, name):
     if name == ""experimental_threading"":
       logging.warning(""options.experimental_threading is deprecated. ""
                       ""Use options.threading instead."")
       return getattr(self, ""threading"")
-    else:
-      raise AttributeError(""Attribute %s not found."" % name)
+    if name == ""experimental_deterministic"":
+      # TODO(aaudibert): Uncomment after internal uses have been updated.
+      # logging.warning(""options.experimental_deterministic is deprecated. ""
+      #                 ""Use options.deterministic instead."")
+      return getattr(self, ""deterministic"")
+    return super(Options, self).__getattribute__(name)
 
   def __setattr__(self, name, value):
     if name == ""experimental_threading"":
       logging.warning(""options.experimental_threading is deprecated. ""
                       ""Use options.threading instead."")
       super(Options, self).__setattr__(""threading"", value)
+    if name == ""experimental_deterministic"":
+      # TODO(aaudibert): Uncomment after internal uses have been updated.
+      # logging.warning(""options.experimental_deterministic is deprecated. ""
+      #                 ""Use options.deterministic instead."")
+      super(Options, self).__setattr__(""deterministic"", value)
     else:
       super(Options, self).__setattr__(name, value)
 
   def _to_proto(self):
     pb = dataset_options_pb2.Options()
-    if self.experimental_deterministic is not None:
-      pb.deterministic = self.experimental_deterministic
+    if self.deterministic is not None:
+      pb.deterministic = self.deterministic
     pb.distribute_options.CopyFrom(self.experimental_distribute._to_proto())  # pylint: disable=protected-access
     if self.experimental_external_state_policy is not None:
       pb.external_state_policy = (
@@ -505,7 +524,7 @@ class Options(options_lib.OptionsBase):
 
   def _from_proto(self, pb):
     if pb.WhichOneof(""optional_deterministic"") is not None:
-      self.experimental_deterministic = pb.deterministic
+      self.deterministic = pb.deterministic
     self.experimental_distribute._from_proto(pb.distribute_options)  # pylint: disable=protected-access
     if pb.WhichOneof(""optional_external_state_policy"") is not None:
       self.experimental_external_state_policy = (
",0,train
968725656bf5a01a2e3622eb4533166f2f592b88,tensorflow/tensorflow,Modified reduce_variance for extra optimization,math_ops.py,"@@ -2110,9 +2110,10 @@ def reduce_variance(input_tensor, axis=None, keepdims=False, name=None):
   name = name if name else ""reduce_variance""
   with ops.name_scope(name):
     square_of_input = gen_math_ops.square(input_tensor)
-    mean_of_square = reduce_mean(squares_of_input,
-    	                         axis=axis,
-    	                         keepdims=keepdims)
+    mean_of_square = reduce_mean(
+    	squares_of_input,
+    	axis=axis,
+    	keepdims=keepdims)
     mean = reduce_mean(input_tensor, axis=axis, keepdims=keepdims)
     square_of_mean = gen_math_ops.square(mean)
     return mean_of_square - square_of_mean
",0,test
d3f6604c622c7a6cf38bd266ff6626bfc960299c,tensorflow/tensorflow,"Including cuda_kernel_helper.h, deleting some (now) duplicated code.",maxpooling_op_gpu.cu.cc,"@@ -24,6 +24,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/tensor_types.h""
 #include ""tensorflow/core/kernels/maxpooling_op.h""
 #include ""tensorflow/core/kernels/maxpooling_op_gpu.h""
+#include ""tensorflow/core/util/cuda_kernel_helper.h""
 
 namespace tensorflow {
 namespace {
@@ -43,10 +44,7 @@ namespace {
 //         int form, keeping track of the flattened index of the input item that
 //         produces the max output. If a nullptr is passed in for mask, no mask
 //         will be produced.
-#define CUDA_1D_KERNEL_LOOP(i, n)                              \
-  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
-       i += blockDim.x * gridDim.x)
-
+//
 // To call the forward and backward functions, use e.g.:
 // const int kThreadsPerBlock = 1024
 // const int output_size = batch * channels * pooled_height * pooled_width;
@@ -201,11 +199,6 @@ __global__ void MaxPoolBackward(const int nthreads, const dtype* top_diff,
   }
 }
 
-template <typename dtype>
-__global__ void SetZero(const int nthreads, dtype* bottom_diff) {
-  CUDA_1D_KERNEL_LOOP(index, nthreads) { *(bottom_diff + index) = dtype(0); }
-}
-
 #undef CUDA_1D_KERNEL_LOOP
 }  // namespace
 
",0,train
d3f6604c622c7a6cf38bd266ff6626bfc960299c,tensorflow/tensorflow,"Including cuda_kernel_helper.h, deleting some (now) duplicated code.",cuda_kernel_helper.h,"@@ -108,6 +108,7 @@ template <typename T>
 __global__ void SetZero(const int nthreads, T* bottom_diff) {
   CUDA_1D_KERNEL_LOOP(index, nthreads) { *(bottom_diff + index) = T(0); }
 }
+
 // For atomicSub.
 
 // Custom implementation for sub by just negating the value.
",0,train
9ed22473db5e3b5d555e951c2dfc92a75ab235ca,tensorflow/tensorflow,"Capture the distribute.Strategy scope from the outer graph when entering
the FuncGraph.as_default scope instead of __init__. Fixes issues with the
grobal Keras FuncGraph keeping state between tests.

PiperOrigin-RevId: 225257506",mirrored_strategy.py,"@@ -50,12 +50,17 @@ from tensorflow.python.util.tf_export import tf_export
 
 
 @contextlib.contextmanager
-def _enter_graph(g, eager):
+def _enter_graph(g, eager, creator_stack=None):
+  """"""Context manager for selecting a graph and maybe eager mode.""""""
   if eager:
     with g.as_default(), context.eager_mode():
+      if creator_stack is not None:
+        g._variable_creator_stack = creator_stack  # pylint: disable=protected-access
       yield
   else:
     with g.as_default():
+      if creator_stack is not None:
+        g._variable_creator_stack = creator_stack  # pylint: disable=protected-access
       yield
 
 
@@ -865,7 +870,6 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
 
     def run(self):
       # pylint: disable=protected-access
-      self.graph._variable_creator_stack = self._variable_creator_stack
       self.should_run.wait()
       self.should_run.clear()
       try:
@@ -873,7 +877,8 @@ class MirroredExtended(distribute_lib.DistributionStrategyExtended):
           return
         with self.coord.stop_on_exception(), \
             _enter_graph(self._init_graph, self._init_in_eager), \
-            _enter_graph(self.graph, self.in_eager), \
+            _enter_graph(self.graph, self.in_eager,
+                         self._variable_creator_stack), \
             context.context().device_policy(self.context_device_policy), \
             MirroredReplicaContext(self.distribution, constant_op.constant(
                 self.replica_id, dtypes.int32)), \
",0,train
9ed22473db5e3b5d555e951c2dfc92a75ab235ca,tensorflow/tensorflow,"Capture the distribute.Strategy scope from the outer graph when entering
the FuncGraph.as_default scope instead of __init__. Fixes issues with the
grobal Keras FuncGraph keeping state between tests.

PiperOrigin-RevId: 225257506",func_graph.py,"@@ -36,6 +36,7 @@ from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import compat
 from tensorflow.python.util import nest
+from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util import tf_decorator
 from tensorflow.python.util.lazy_loader import LazyLoader
 
@@ -108,38 +109,20 @@ class FuncGraph(ops.Graph):
 
     graph = self.outer_graph
 
-    # pylint: disable=protected-access
-    # TODO(b/112906995, nareshmodi): distribution strategy depends on inheriting
-    # this stack from the default graph even in eager mode. Maybe it should be
-    # part of the eager context? This would also allow us to remove a
-    # get_default_graph() call from the function cache lookup.
-    self._distribution_strategy_stack = list(graph._distribution_strategy_stack)
-    # We ignore device placements from any outer scopes while tracing the
-    # function when possible, to avoid hard-coding them in the function
-    # graph. ""Default"" placements come from the PartitionedCallOp's placement,
-    # so that the same trace of the Python function may be placed on several
-    # different devices and saved functions may be placed on new devices when
-    # restored.
     if context.executing_eagerly():
       self.seed = context.global_seed()
       device_type = context.context().device_spec.device_type
       self._xla_compile = (device_type == ""TPU"" or device_type == ""XLA_GPU""
                            or device_type == ""XLA_CPU"")
-      if self._distribution_strategy_stack or self._xla_compile:
-        self._add_device_to_stack(context.context().device_name)
     else:
       self.seed = graph.seed
       self._xla_compile = getattr(graph, ""_xla_compile"", False)
       # TODO(allenl): Figure out if we can remove colocation stack
       # specialization (currently used in cond_v2), here and in the cache key.
-      self._colocation_stack = graph._colocation_stack.copy()
-      if (self._distribution_strategy_stack
-          or self._xla_compile
-          or device_stack_has_callable(graph._device_function_stack)):
-        # Hard-code devices from device functions in the function body
-        self._device_function_stack = graph._device_function_stack.copy()
+      self._colocation_stack = graph._colocation_stack.copy()  # pylint: disable=protected-access
+
     if not self._read_only_collections:
-      self._collections = graph._collections
+      self._collections = graph._collections  # pylint: disable=protected-access
     else:
       for collection_name in graph.get_all_collection_keys():
         if collection_name not in WHITELIST_COLLECTIONS:
@@ -149,11 +132,55 @@ class FuncGraph(ops.Graph):
         self._collections[collection_name] = graph.get_collection_ref(
             collection_name)
 
-    self._variable_creator_stack = graph._variable_creator_stack
-    # Inherit the graph key, since this is used for matching variables in
-    # optimizers.
-    self._graph_key = graph._graph_key
-    # pylint: enable=protected-access
+  def as_default(self):
+    outer_cm = super(FuncGraph, self).as_default()
+
+    @tf_contextlib.contextmanager
+    def inner_cm():
+      """"""Context manager for copying distribute.Strategy scope information.""""""
+      graph = ops.get_default_graph()
+      # pylint: disable=protected-access
+      # TODO(b/112906995, nareshmodi): distribution strategy depends on
+      # inheriting this stack from the default graph even in eager mode. Maybe
+      # it should be part of the eager context? This would also allow us to
+      # remove a get_default_graph() call from the function cache lookup.
+      old_strategy_stack = self._distribution_strategy_stack
+      self._distribution_strategy_stack = list(
+          graph._distribution_strategy_stack)
+      # We ignore device placements from any outer scopes while tracing the
+      # function when possible, to avoid hard-coding them in the function
+      # graph. ""Default"" placements come from the PartitionedCallOp's placement,
+      # so that the same trace of the Python function may be placed on several
+      # different devices and saved functions may be placed on new devices when
+      # restored.
+      old_device_stack = self._device_function_stack
+      if context.executing_eagerly():
+        if self._distribution_strategy_stack or self._xla_compile:
+          self._add_device_to_stack(context.context().device_name)
+      else:
+        if (self._distribution_strategy_stack
+            or self._xla_compile
+            or device_stack_has_callable(graph._device_function_stack)):
+          # Hard-code devices from device functions in the function body
+          self._device_function_stack = graph._device_function_stack.copy()
+
+      old_creator_stack = self._variable_creator_stack
+      self._variable_creator_stack = graph._variable_creator_stack
+      # Inherit the graph key, since this is used for matching variables in
+      # optimizers.
+      old_graph_key = self._graph_key
+      self._graph_key = graph._graph_key
+      # pylint: enable=protected-access
+
+      with outer_cm as g:
+        try:
+          yield g
+        finally:
+          self._distribution_strategy_stack = old_strategy_stack
+          self._device_function_stack = old_device_stack
+          self._variable_creator_stack = old_creator_stack
+          self._graph_key = old_graph_key
+    return inner_cm()
 
   @property
   def output_types(self):
",0,train
53c25e3db9876ea6bbf23cf10c15854511cf6ec8,tensorflow/tensorflow,"Add a TF1.X path to generate2.py

It's version2 of the docs generator, not the docs generator for tensorflow2

PiperOrigin-RevId: 254407102",generate2.py,"@@ -51,22 +51,6 @@ parser.tf_inspect = tf_inspect
 # So patch `tf.__all__` to list everything.
 tf.__all__ = [item_name for item_name, value in tf_inspect.getmembers(tf)]
 
-tf.__doc__ = """"""
-## TensorFlow 2.0 Beta
-
-Caution:  This is a developer preview.  You will likely find some bugs,
-performance issues, and more, and we encourage you to tell us about them.
-We value your feedback!
-
-These docs were generated from the beta build of TensorFlow 2.0.
-
-You can install the exact version that was used to generate these docs
-with:
-
-```
-pip install tensorflow==2.0.0-beta1
-```
-""""""
 
 FLAGS = flags.FLAGS
 
@@ -87,6 +71,75 @@ flags.DEFINE_string(""site_path"", """",
                     ""`_toc.yaml` and `_redirects.yaml` files"")
 
 
+if tf.__version__.startswith('1'):
+  PRIVATE_MAP = {
+      'tf.contrib.autograph': ['utils', 'operators'],
+      'tf.test': ['mock'],
+      'tf.contrib.estimator': ['python'],
+  }
+
+  DO_NOT_DESCEND_MAP = {
+      'tf': ['cli', 'lib', 'wrappers'],
+      'tf.contrib': [
+          'compiler',
+          'grid_rnn',
+          # Block contrib.keras to de-clutter the docs
+          'keras',
+          'labeled_tensor',
+          'quantization',
+          'session_bundle',
+          'slim',
+          'solvers',
+          'specs',
+          'tensor_forest',
+          'tensorboard',
+          'testing',
+          'tfprof',
+      ],
+      'tf.contrib.bayesflow': [
+          'special_math', 'stochastic_gradient_estimators',
+          'stochastic_variables'
+      ],
+      'tf.contrib.ffmpeg': ['ffmpeg_ops'],
+      'tf.contrib.graph_editor': [
+          'edit', 'match', 'reroute', 'subgraph', 'transform', 'select', 'util'
+      ],
+      'tf.contrib.keras': ['api', 'python'],
+      'tf.contrib.layers': ['feature_column', 'summaries'],
+      'tf.contrib.learn': [
+          'datasets',
+          'head',
+          'graph_actions',
+          'io',
+          'models',
+          'monitors',
+          'ops',
+          'preprocessing',
+          'utils',
+      ],
+      'tf.contrib.util': ['loader'],
+  }
+else:
+  PRIVATE_MAP = {}
+  DO_NOT_DESCEND_MAP = {}
+  tf.__doc__ = """"""
+    ## TensorFlow 2.0 Beta
+
+    Caution:  This is a developer preview.  You will likely find some bugs,
+    performance issues, and more, and we encourage you to tell us about them.
+    We value your feedback!
+
+    These docs were generated from the beta build of TensorFlow 2.0.
+
+    You can install the exact version that was used to generate these docs
+    with:
+
+    ```
+    pip install tensorflow==2.0.0-beta1
+    ```
+    """"""
+
+
 # The doc generator isn't aware of tf_export.
 # So prefix the score tuples with -1 when this is the canonical name, +1
 # otherwise. The generator chooses the name with the lowest score.
@@ -126,7 +179,6 @@ def _hide_layer_and_module_methods():
     except AttributeError:
       pass
 
-
 def build_docs(output_dir, code_url_prefix, search_hints=True):
   """"""Build api docs for tensorflow v2.
 
@@ -142,6 +194,11 @@ def build_docs(output_dir, code_url_prefix, search_hints=True):
   except AttributeError:
     pass
 
+  try:
+    doc_controls.do_not_generate_docs(tf.compat.v1.pywrap_tensorflow)
+  except AttributeError:
+    pass
+
   base_dir = path.dirname(tf.__file__)
 
   base_dirs = (
@@ -165,7 +222,9 @@ def build_docs(output_dir, code_url_prefix, search_hints=True):
       search_hints=search_hints,
       code_url_prefix=code_url_prefixes,
       site_path=FLAGS.site_path,
-      visitor_cls=TfExportAwareDocGeneratorVisitor)
+      visitor_cls=TfExportAwareDocGeneratorVisitor,
+      private_map=PRIVATE_MAP,
+      do_not_descend_map=DO_NOT_DESCEND_MAP)
 
   doc_generator.build(output_dir)
 
",0,train
de2127833462b2d2b2915c72565601d1ceb798ff,tensorflow/tensorflow,"Implement modular POSIX filesystem support for testing if paths exist.

We also provide tests to make sure all API requirements are satisfied.

Just a small sized part of work for modular filesystem plugins. For more details, consult the RFC at https://github.com/tensorflow/community/blob/master/rfcs/20190506-filesystem-plugin-modular-tensorflow.md

PiperOrigin-RevId: 281109338
Change-Id: I060ff60ce6502770d43798910a10b2d0d0a2a601",filesystem_interface.h,"@@ -527,11 +527,13 @@ typedef struct TF_FilesystemOps {
   ///     of type `TF_Status*`.
   ///
   /// If `statuses` is not null, plugins must fill each element with detailed
-  /// status for each file, as if calling `path_exists` on each one.
+  /// status for each file, as if calling `path_exists` on each one. Core
+  /// TensorFlow initializes the `statuses` array and plugins must use
+  /// `TF_SetStatus` to set each element instead of dirrectly assigning.
   ///
   /// DEFAULT IMPLEMENTATION: Checks existence of every file. Needs
   /// `path_exists`.
-  bool (*paths_exist)(const TF_Filesystem* filesystem, const char** paths,
+  bool (*paths_exist)(const TF_Filesystem* filesystem, char** paths,
                       int num_files, TF_Status** statuses);
 
   /// Obtains statistics for the given `path`.
",0,train
de2127833462b2d2b2915c72565601d1ceb798ff,tensorflow/tensorflow,"Implement modular POSIX filesystem support for testing if paths exist.

We also provide tests to make sure all API requirements are satisfied.

Just a small sized part of work for modular filesystem plugins. For more details, consult the RFC at https://github.com/tensorflow/community/blob/master/rfcs/20190506-filesystem-plugin-modular-tensorflow.md

PiperOrigin-RevId: 281109338
Change-Id: I060ff60ce6502770d43798910a10b2d0d0a2a601",modular_filesystem.cc,"@@ -14,6 +14,7 @@ limitations under the License.
 ==============================================================================*/
 #include ""tensorflow/c/experimental/filesystem/modular_filesystem.h""
 
+#include <algorithm>
 #include <string>
 #include <utility>
 
@@ -111,15 +112,47 @@ Status ModularFileSystem::NewReadOnlyMemoryRegionFromFile(
 }
 
 Status ModularFileSystem::FileExists(const std::string& fname) {
-  // TODO(mihaimaruseac): Implementation to come in a new change
-  return Status(error::UNIMPLEMENTED,
-                ""Modular filesystem stub not implemented yet"");
+  if (ops_->path_exists == nullptr)
+    return errors::Unimplemented(tensorflow::strings::StrCat(
+        ""Filesystem for "", fname, "" does not support FileExists()""));
+
+  UniquePtrTo_TF_Status plugin_status(TF_NewStatus(), TF_DeleteStatus);
+  const std::string translated_name = TranslateName(fname);
+  ops_->path_exists(filesystem_.get(), translated_name.c_str(),
+                    plugin_status.get());
+  return StatusFromTF_Status(plugin_status.get());
 }
 
 bool ModularFileSystem::FilesExist(const std::vector<std::string>& files,
                                    std::vector<Status>* status) {
-  // TODO(mihaimaruseac): Implementation to come in a new change
-  return true;
+  if (ops_->paths_exist == nullptr)
+    return FileSystem::FilesExist(files, status);
+
+  std::vector<char*> translated_names;
+  translated_names.reserve(files.size());
+  for (int i = 0; i < files.size(); i++)
+    translated_names.push_back(strdup(TranslateName(files[i]).c_str()));
+
+  bool result;
+  if (status == nullptr) {
+    result = ops_->paths_exist(filesystem_.get(), translated_names.data(),
+                               files.size(), nullptr);
+  } else {
+    std::vector<TF_Status*> plugin_status;
+    plugin_status.reserve(files.size());
+    for (int i = 0; i < files.size(); i++)
+      plugin_status.push_back(TF_NewStatus());
+    result = ops_->paths_exist(filesystem_.get(), translated_names.data(),
+                               files.size(), plugin_status.data());
+    for (int i = 0; i < files.size(); i++) {
+      status->push_back(StatusFromTF_Status(plugin_status[i]));
+      TF_DeleteStatus(plugin_status[i]);
+    }
+  }
+
+  for (int i = 0; i < files.size(); i++) free(translated_names[i]);
+
+  return result;
 }
 
 Status ModularFileSystem::GetChildren(const std::string& dir,
",0,train
de2127833462b2d2b2915c72565601d1ceb798ff,tensorflow/tensorflow,"Implement modular POSIX filesystem support for testing if paths exist.

We also provide tests to make sure all API requirements are satisfied.

Just a small sized part of work for modular filesystem plugins. For more details, consult the RFC at https://github.com/tensorflow/community/blob/master/rfcs/20190506-filesystem-plugin-modular-tensorflow.md

PiperOrigin-RevId: 281109338
Change-Id: I060ff60ce6502770d43798910a10b2d0d0a2a601",modular_filesystem_test.cc,"@@ -539,6 +539,95 @@ TEST_P(ModularFileSystemTest, TestDeleteDirectoryPathIsInvalid) {
   EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::FAILED_PRECONDITION);
 }
 
+TEST_P(ModularFileSystemTest, TestFileExists) {
+  const std::string filepath = GetURIForPath(""a_file"");
+  std::unique_ptr<WritableFile> file;
+  Status status = env_->NewWritableFile(filepath, &file);
+  if (!status.ok()) GTEST_SKIP() << ""NewWritableFile() not supported"";
+
+  status = env_->FileExists(filepath);
+  EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::OK);
+}
+
+TEST_P(ModularFileSystemTest, TestFileExistsButIsDirectory) {
+  const std::string filepath = GetURIForPath(""a_file"");
+  Status status = env_->CreateDir(filepath);
+  if (!status.ok()) GTEST_SKIP() << ""CreateDir() not supported"";
+
+  status = env_->FileExists(filepath);
+  EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::OK);
+}
+
+TEST_P(ModularFileSystemTest, TestFileExistsNotFound) {
+  const std::string filepath = GetURIForPath(""a_file"");
+  Status status = env_->FileExists(filepath);
+  EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::NOT_FOUND);
+}
+
+TEST_P(ModularFileSystemTest, TestFileExistsPathIsInvalid) {
+  const std::string filepath = GetURIForPath(""a_file"");
+  std::unique_ptr<WritableFile> file;
+  Status status = env_->NewWritableFile(filepath, &file);
+  if (!status.ok()) GTEST_SKIP() << ""NewWritableFile() not supported"";
+
+  const std::string target_path = GetURIForPath(""a_file/a_new_file"");
+  status = env_->FileExists(target_path);
+  EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::FAILED_PRECONDITION);
+}
+
+TEST_P(ModularFileSystemTest, TestFilesExist) {
+  const std::vector<std::string> filenames = {GetURIForPath(""a""),
+                                              GetURIForPath(""b"")};
+  for (const auto& filename : filenames) {
+    std::unique_ptr<WritableFile> file;
+    Status status = env_->NewWritableFile(filename, &file);
+    if (!status.ok()) GTEST_SKIP() << ""NewWritableFile() not supported"";
+  }
+
+  EXPECT_TRUE(env_->FilesExist(filenames, /*status=*/nullptr));
+
+  std::vector<Status> statuses;
+  EXPECT_TRUE(env_->FilesExist(filenames, &statuses));
+  EXPECT_EQ(statuses.size(), filenames.size());
+  for (const auto& status : statuses)
+    EXPECT_PRED2(UninmplementedOrReturnsCode, status, Code::OK);
+}
+
+TEST_P(ModularFileSystemTest, TestFilesExistAllFailureModes) {
+  // if reordering these, make sure to reorder checks at the end
+  const std::vector<std::string> filenames = {
+      GetURIForPath(""a_dir""),
+      GetURIForPath(""a_file""),
+      GetURIForPath(""a_file/a_new_file""),
+      GetURIForPath(""file_not_found""),
+  };
+
+  Status status = env_->CreateDir(filenames[0]);
+  if (!status.ok()) GTEST_SKIP() << ""CreateDir() not supported"";
+
+  std::unique_ptr<WritableFile> file;
+  status = env_->NewWritableFile(filenames[1], &file);
+  if (!status.ok()) GTEST_SKIP() << ""NewWritableFile() not supported"";
+
+  std::vector<Status> statuses;
+  EXPECT_FALSE(env_->FilesExist(filenames, &statuses));
+  EXPECT_EQ(statuses.size(), filenames.size());
+  EXPECT_PRED2(UninmplementedOrReturnsCode, statuses[0], Code::OK);
+  EXPECT_PRED2(UninmplementedOrReturnsCode, statuses[1], Code::OK);
+  EXPECT_PRED2(UninmplementedOrReturnsCode, statuses[2],
+               Code::FAILED_PRECONDITION);
+  EXPECT_PRED2(UninmplementedOrReturnsCode, statuses[3], Code::NOT_FOUND);
+}
+
+TEST_P(ModularFileSystemTest, TestFilesExistsNoFiles) {
+  const std::vector<std::string> filenames = {};
+  EXPECT_TRUE(env_->FilesExist(filenames, /*status=*/nullptr));
+
+  std::vector<Status> statuses;
+  EXPECT_TRUE(env_->FilesExist(filenames, &statuses));
+  EXPECT_TRUE(statuses.empty());
+}
+
 TEST_P(ModularFileSystemTest, TestAppendAndTell) {
   const std::string filename = GetURIForPath(""a_file"");
   std::unique_ptr<WritableFile> file;
",0,train
de2127833462b2d2b2915c72565601d1ceb798ff,tensorflow/tensorflow,"Implement modular POSIX filesystem support for testing if paths exist.

We also provide tests to make sure all API requirements are satisfied.

Just a small sized part of work for modular filesystem plugins. For more details, consult the RFC at https://github.com/tensorflow/community/blob/master/rfcs/20190506-filesystem-plugin-modular-tensorflow.md

PiperOrigin-RevId: 281109338
Change-Id: I060ff60ce6502770d43798910a10b2d0d0a2a601",posix_filesystem.cc,"@@ -287,7 +287,13 @@ static void DeleteDir(const TF_Filesystem* filesystem, const char* path,
     TF_SetStatus(status, TF_OK, """");
 }
 
-// TODO(mihaimaruseac): More implementations to follow in subsequent changes.
+static void PathExists(const TF_Filesystem* filesystem, const char* path,
+                       TF_Status* status) {
+  if (access(path, F_OK) != 0)
+    TF_SetStatusFromIOError(status, errno, path);
+  else
+    TF_SetStatus(status, TF_OK, """");
+}
 
 }  // namespace tf_posix_filesystem
 
@@ -317,6 +323,11 @@ void TF_InitPlugin(TF_Status* status) {
       /*recursively_create_dir=*/nullptr,
       tf_posix_filesystem::DeleteFile,
       tf_posix_filesystem::DeleteDir,
+      /*delete_recursively=*/nullptr,
+      /*rename_file=*/nullptr,
+      /*copy_file=*/nullptr,
+      tf_posix_filesystem::PathExists,
+      /*paths_exist=*/nullptr,
       nullptr,
   };
 
",0,train
977ea14361f7bceeeda2046f3073ea888806be02,tensorflow/tensorflow,"Added more streamlined interfaces for converting rendezvous ids
to/from strings and used these in the rendezvous code.  Improves
performance for ptb_word_lm slightly (saves several allocations and an
sscanf per CPU <-> GPU transfer).
Change: 115852277",rendezvous.cc,"@@ -41,9 +41,10 @@ string Rendezvous::CreateKey(const string& src_device, uint64 src_incarnation,
   //
   // ""src_incarnation"" is used to distinguish a worker when it
   // restarts.
-  return strings::StrCat(src_device, "";"", strings::FpToString(src_incarnation),
-                         "";"", dst_device, "";"", name, "";"", frame_iter.frame_id,
-                         "":"", frame_iter.iter_id);
+  char buf[strings::kFastToBufferSize];
+  return strings::StrCat(
+      src_device, "";"", strings::Uint64ToHexString(src_incarnation, buf), "";"",
+      dst_device, "";"", name, "";"", frame_iter.frame_id, "":"", frame_iter.iter_id);
 }
 
 // Return the prefix of ""*s"" up to the next occurrence of ""delim"", or
@@ -73,7 +74,7 @@ Status Rendezvous::ParseKey(const string& key, ParsedKey* out) {
   if (s.empty() &&          // Consumed the whole string
       !parts[4].empty() &&  // Exactly five parts
       DeviceNameUtils::ParseFullName(parts[0], &out->src) &&
-      strings::StringToFp(parts[1].ToString(), &out->src_incarnation) &&
+      strings::HexStringToUint64(parts[1], &out->src_incarnation) &&
       DeviceNameUtils::ParseFullName(parts[2], &out->dst) &&
       !parts[3].empty()) {
     out->src_device.assign(parts[0].data(), parts[0].size());
",0,test
977ea14361f7bceeeda2046f3073ea888806be02,tensorflow/tensorflow,"Added more streamlined interfaces for converting rendezvous ids
to/from strings and used these in the rendezvous code.  Improves
performance for ptb_word_lm slightly (saves several allocations and an
sscanf per CPU <-> GPU transfer).
Change: 115852277",numbers.cc,"@@ -15,6 +15,7 @@ limitations under the License.
 
 #include ""tensorflow/core/lib/strings/numbers.h""
 
+#include <ctype.h>
 #include <float.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -237,6 +238,38 @@ bool StringToFp(const string& s, Fprint* fp) {
   }
 }
 
+StringPiece Uint64ToHexString(uint64 v, char* buf) {
+  static const char* hexdigits = ""0123456789abcdef"";
+  const int num_byte = 16;
+  buf[num_byte] = '\0';
+  for (int i = num_byte - 1; i >= 0; i--) {
+    buf[i] = hexdigits[v & 0xf];
+    v >>= 4;
+  }
+  return StringPiece(buf, num_byte);
+}
+
+bool HexStringToUint64(const StringPiece& s, uint64* result) {
+  uint64 v = 0;
+  if (s.empty()) {
+    return false;
+  }
+  for (int i = 0; i < s.size(); i++) {
+    char c = s[i];
+    if (c >= '0' && c <= '9') {
+      v = (v << 4) + (c - '0');
+    } else if (c >= 'a' && c <= 'f') {
+      v = (v << 4) + 10 + (c - 'a');
+    } else if (c >= 'A' && c <= 'F') {
+      v = (v << 4) + 10 + (c - 'A');
+    } else {
+      return false;
+    }
+  }
+  *result = v;
+  return true;
+}
+
 string HumanReadableNumBytes(int64 num_bytes) {
   if (num_bytes == kint64min) {
     // Special case for number with not representable negation.
",0,test
977ea14361f7bceeeda2046f3073ea888806be02,tensorflow/tensorflow,"Added more streamlined interfaces for converting rendezvous ids
to/from strings and used these in the rendezvous code.  Improves
performance for ptb_word_lm slightly (saves several allocations and an
sscanf per CPU <-> GPU transfer).
Change: 115852277",numbers.h,"@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <string>
 
+#include ""tensorflow/core/lib/core/stringpiece.h""
 #include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
@@ -81,6 +82,16 @@ string FpToString(Fprint fp);
 // returns false.
 bool StringToFp(const string& s, Fprint* fp);
 
+// Convert a 64-bit fingerprint value to an ASCII representation that
+// is terminated by a '\0'.
+// Buf must point to an array of at least kFastToBufferSize characters
+StringPiece Uint64ToHexString(uint64 v, char* buf);
+
+// Attempt to parse a uint64 in the form encoded by FastUint64ToHexString.  If
+// successful, stores the value in *v and returns true.  Otherwise,
+// returns false.
+bool HexStringToUint64(const StringPiece& s, uint64* v);
+
 // Convert strings to 32bit integer values.
 // Leading and trailing spaces are allowed.
 // Return false with overflow or invalid input.
",0,test
977ea14361f7bceeeda2046f3073ea888806be02,tensorflow/tensorflow,"Added more streamlined interfaces for converting rendezvous ids
to/from strings and used these in the rendezvous code.  Improves
performance for ptb_word_lm slightly (saves several allocations and an
sscanf per CPU <-> GPU transfer).
Change: 115852277",numbers_test.cc,"@@ -41,6 +41,23 @@ TEST(FpToString, Ints) {
   EXPECT_FALSE(StringToFp(""0000000000000000xyz"", &dummy));
 }
 
+TEST(Uint64ToHexString, Ints) {
+  for (int s = 0; s < 64; s++) {
+    for (int delta = -1; delta <= 1; delta++) {
+      uint64 fp = (1ull << s) + delta;
+      char buf[kFastToBufferSize];
+      StringPiece s = Uint64ToHexString(fp, buf);
+      uint64 fp2;
+      EXPECT_TRUE(HexStringToUint64(s, &fp2));
+      EXPECT_EQ(fp, fp2) << s;
+    }
+  }
+  uint64 dummy;
+  EXPECT_FALSE(HexStringToUint64("""", &dummy));
+  EXPECT_FALSE(HexStringToUint64(""xyz"", &dummy));
+  EXPECT_FALSE(HexStringToUint64(""0000000000000000xyz"", &dummy));
+}
+
 TEST(HumanReadableNumBytes, Bytes) {
   EXPECT_EQ(""0B"", HumanReadableNumBytes(0));
   EXPECT_EQ(""4B"", HumanReadableNumBytes(4));
",0,test
220c8954d3856648c3deb2013df9fd383ac6ecbe,tensorflow/tensorflow,"add back-ticks for parameter formatting

add back-ticks for parameter formatting",mel_ops.py,"@@ -141,9 +141,9 @@ def linear_to_mel_weight_matrix(num_mel_bins=20,
     A `Tensor` of shape `[num_spectrogram_bins, num_mel_bins]`.
 
   Raises:
-    ValueError: If num_mel_bins/num_spectrogram_bins/sample_rate are not
-      positive, lower_edge_hertz is negative, frequency edges are incorrectly
-      ordered, or upper_edge_hertz is larger than the Nyquist frequency.
+    ValueError: If `num_mel_bins`/`num_spectrogram_bins`/`sample_rate` are not
+      positive, `lower_edge_hertz` is negative, frequency edges are incorrectly
+      ordered, or `upper_edge_hertz` is larger than the Nyquist frequency.
 
   [mel]: https://en.wikipedia.org/wiki/Mel_scale
   """"""
",0,train
657c503c4eca8919267863548cd1f516fb774944,tensorflow/tensorflow,"Update microbenchmarks after cl/271457940

PiperOrigin-RevId: 271586404",benchmarks_test.py,"@@ -679,8 +679,7 @@ class MicroBenchmarks(test.Benchmark):
       tangent = random_ops.random_uniform(shape).cpu()
 
       def func():
-        with forwardprop.ForwardGradientAccumulator() as acc:
-          acc.watch(m, tangent)
+        with forwardprop.ForwardGradientAccumulator(m, tangent) as acc:
           result = math_ops.matmul(m, m, transpose_b=True)
         return result, acc.jvp(result)
 
@@ -693,8 +692,7 @@ class MicroBenchmarks(test.Benchmark):
     with ops.device(CPU):
       @def_function.function
       def compiled_function(x, tangent):
-        with forwardprop.ForwardGradientAccumulator() as acc:
-          acc.watch(x, tangent)
+        with forwardprop.ForwardGradientAccumulator(x, tangent) as acc:
           result = math_ops.matmul(x, x, transpose_b=True)
         return result, acc.jvp(result)
 
@@ -713,8 +711,7 @@ class MicroBenchmarks(test.Benchmark):
 
       @def_function.function()
       def compiled_function(x, tangent):
-        with forwardprop.ForwardGradientAccumulator() as acc:
-          acc.watch(x, tangent)
+        with forwardprop.ForwardGradientAccumulator(x, tangent) as acc:
           result = matmul(x, x, transpose_b=True)
         return result, acc.jvp(result)
 
@@ -734,8 +731,7 @@ class MicroBenchmarks(test.Benchmark):
       matmul = def_function.function(math_ops.matmul)
 
       def func():
-        with forwardprop.ForwardGradientAccumulator() as acc:
-          acc.watch(m, tangent)
+        with forwardprop.ForwardGradientAccumulator(m, tangent) as acc:
           result = matmul(m, m, transpose_b=True)
         return result, acc.jvp(result)
 
",0,train
75d68bbef7c7a3bbed8b6d0844f66549f1913b34,tensorflow/tensorflow,"Catch some more cases in reduction_ops_test.py.

In particular, I ran into a case where `tf.reduce_mean(.., None)`
was not properly covered, and that surprised me during some other
related change.
Change: 123775966",reduction_ops_test.py,"@@ -158,11 +158,13 @@ class SumReductionTest(tf.test.TestCase):
   # Simple tests for various types.
   def testDoubleReduce1D(self):
     np_arr = np.arange(1, 6).reshape([5]).astype(np.float64)
+    self._compareAll(np_arr, None)
     self._compareAll(np_arr, [])
     self._compareAll(np_arr, [0])
 
   def testInt32Reduce1D(self):
     np_arr = np.arange(1, 6).reshape([5]).astype(np.int32)
+    self._compareAll(np_arr, None)
     self._compareAll(np_arr, [])
     self._compareAll(np_arr, [0])
 
@@ -247,14 +249,17 @@ class SumReductionTest(tf.test.TestCase):
 class MeanReductionTest(tf.test.TestCase):
 
   def _compare(self, x, reduction_axes, keep_dims, use_gpu=False):
-    np_sum = x
-    count = 1
-    for ra in reduction_axes[::-1]:
-      np_sum = np.sum(np_sum, axis=ra, keepdims=keep_dims)
-      count *= x.shape[ra]
-    np_ans = np_sum / count
-    with self.test_session(use_gpu=use_gpu):
+    np_ans = x
+    if reduction_axes is None:
+      np_ans = np.mean(np_ans, keepdims=keep_dims)
+    else:
       reduction_axes = np.array(reduction_axes).astype(np.int32)
+      count = 1
+      for ra in reduction_axes.ravel()[::-1]:
+        np_ans = np.sum(np_ans, axis=ra, keepdims=keep_dims)
+        count *= x.shape[ra]
+      np_ans /= count
+    with self.test_session(use_gpu=use_gpu):
       tf_ans = tf.reduce_mean(x, reduction_axes, keep_dims)
       out = tf_ans.eval()
     self.assertAllClose(np_ans, out)
@@ -270,6 +275,7 @@ class MeanReductionTest(tf.test.TestCase):
     # Create a 3D array of floats and reduce across all possible
     # dimensions
     np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float32)
+    self._compareAll(np_arr, None)
     self._compareAll(np_arr, [])
     self._compareAll(np_arr, [0])
     self._compareAll(np_arr, [1])
@@ -283,6 +289,7 @@ class MeanReductionTest(tf.test.TestCase):
     # Create a 3D array of doubles and reduce across all possible
     # dimensions
     np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float64)
+    self._compareAll(np_arr, None)
     self._compareAll(np_arr, [])
     self._compareAll(np_arr, [0])
     self._compareAll(np_arr, [1])
@@ -450,6 +457,7 @@ class MinReductionTest(tf.test.TestCase):
     # Create a 3D array of floats and reduce across all possible
     # dimensions
     np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float32)
+    self._compareAll(np_arr, None)
     self._compareAll(np_arr, [])
     self._compareAll(np_arr, [0])
     self._compareAll(np_arr, [1])
@@ -463,6 +471,7 @@ class MinReductionTest(tf.test.TestCase):
     # Create a 3D array of doubles and reduce across all possible
     # dimensions
     np_arr = np.arange(0, 30).reshape([2, 3, 5]).astype(np.float64)
+    self._compareAll(np_arr, None)
     self._compareAll(np_arr, [])
     self._compareAll(np_arr, [0])
     self._compareAll(np_arr, [1])
",0,train
c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,diag_op.cc,"@@ -115,7 +115,9 @@ REGISTER_XLA_OP(Name(""Diag""), DiagOp);
 
 class DiagPartOp : public XlaOpKernel {
  public:
-  explicit DiagPartOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {}
+  explicit DiagPartOp(OpKernelConstruction* ctx)
+      : XlaOpKernel(ctx),
+        is_gpu_(ctx->device_type().type_string() == DEVICE_GPU_XLA_JIT) {}
 
   void Compile(XlaOpKernelContext* ctx) override {
     const TensorShape input_shape = ctx->InputShape(0);
@@ -145,12 +147,17 @@ class DiagPartOp : public XlaOpKernel {
 
     xla::XlaOp input = ctx->Input(0);
 
+    xla::XlaOp reshape_input = xla::Reshape(input, {new_size, new_size});
     xla::XlaOp output = xla::Reshape(
-        xla::GetMatrixDiagonal(xla::Reshape(input, {new_size, new_size})),
+        is_gpu_ ? xla::GetMatrixDiagonalViaGather(reshape_input)
+                : xla::GetMatrixDiagonal(reshape_input),
         new_dims);
 
     ctx->SetOutput(0, output);
   }
+
+ private:
+  const bool is_gpu_;
 };
 
 REGISTER_XLA_OP(Name(""DiagPart""), DiagPartOp);
",0,train
c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,matrix_diag_ops.cc,"@@ -267,7 +267,8 @@ REGISTER_XLA_OP(Name(""MatrixDiagV2"")
 class MatrixDiagPartOp : public XlaOpKernel {
  public:
   explicit MatrixDiagPartOp(OpKernelConstruction* context)
-      : XlaOpKernel(context) {}
+      : XlaOpKernel(context),
+        is_gpu_(context->device_type().type_string() == DEVICE_GPU_XLA_JIT) {}
 
   void Compile(XlaOpKernelContext* context) override {
     const TensorShape input_shape = context->InputShape(0);
@@ -315,13 +316,17 @@ class MatrixDiagPartOp : public XlaOpKernel {
     std::vector<xla::XlaOp> diag_list;
     xla::PaddingConfig padding_config;
     if (num_diags == 1) {
-      context->SetOutput(0, xla::GetMatrixDiagonal(input, upper_diag_index));
+      context->SetOutput(0,
+        is_gpu_ ? xla::GetMatrixDiagonalViaGather(input, upper_diag_index)
+                : xla::GetMatrixDiagonal(input, upper_diag_index));
       return;
     }
     padding_config = xla::MakeNoPaddingConfig(input_rank - 1);
     for (int diag_index = upper_diag_index; diag_index >= lower_diag_index;
          --diag_index) {
-      auto single_diag = xla::GetMatrixDiagonal(input, diag_index);
+      xla::XlaOp single_diag =
+          is_gpu_ ? xla::GetMatrixDiagonalViaGather(input, diag_index)
+                  : xla::GetMatrixDiagonal(input, diag_index);
       const int64 diag_length =
           (diag_index >= 0) ? (num_cols - diag_index) : (num_rows + diag_index);
       const int64 padding_length = max_diag_len - diag_length;
@@ -336,6 +341,9 @@ class MatrixDiagPartOp : public XlaOpKernel {
         xla::ConcatInDim(context->builder(), diag_list, input_rank - 2);
     context->SetOutput(0, xla::Reshape(concat, output_shape.dim_sizes()));
   }
+
+ private:
+  const bool is_gpu_;
 };
 
 REGISTER_XLA_OP(Name(""MatrixDiagPart""), MatrixDiagPartOp);
",0,train
c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,matrix.cc,"@@ -19,6 +19,9 @@ limitations under the License.
 #include <limits>
 #include <numeric>
 #include <vector>
+#include <string>
+#include <utility>
+#include <algorithm>
 
 #include ""absl/algorithm/container.h""
 #include ""absl/container/flat_hash_set.h""
@@ -102,6 +105,70 @@ XlaOp GetMatrixDiagonal(XlaOp x, int k) {
   });
 }
 
+XlaOp GetMatrixDiagonalViaGather(XlaOp x, int k) {
+  XlaBuilder* builder = x.builder();
+  return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
+    TF_ASSIGN_OR_RETURN(Shape shape, builder->GetShape(x));
+    auto n_dims = static_cast<int32>(shape.rank());
+    TF_RET_CHECK(n_dims >= 2);
+    const int64 m = shape.dimensions(n_dims - 2);
+    const int64 n = shape.dimensions(n_dims - 1);
+
+    // The start_indices has a shape of {diag_len, 2}, and each pair of value in
+    // its dimension 1 represents the (row, col) of the diagonal. We set
+    // index_vector_dim to 1 and make start_index_map and collapsed_slice_dims
+    // contain the same two dimension indices. This makes sure that the (row,
+    // col) pairs in start_indices are propagated to the indices for the two
+    // collapsed dimensions in the operand indices through start_index_map.
+    const int64 num_index_dims = 2;
+    const int64 axis = n_dims - num_index_dims;
+
+    // Calculate the indices of diagonal part with offset k.
+    const int64 diag_len = std::max(std::min(m + std::min(k, 0),
+                                             n - std::max(k, 0)),
+                                    0LL);
+    XlaOp diag_base_indices = BroadcastInDim(
+        Iota(builder, S32, diag_len), { diag_len, num_index_dims }, { 0 });
+    XlaOp diag_offset = Broadcast(
+        ConstantR1<int>(builder, { std::max(-k, 0), std::max(k, 0) }),
+        { diag_len });
+    XlaOp start_indices = Add(diag_base_indices, diag_offset);
+
+    // Example of a 3D diag-part extracting diagonal part with offset=1 out of a
+    // tensor of shape [2,5,4].
+    //
+    //  operand = s32[2,5,4] parameter(0)
+    //  indices = s32[3,2] parameter(1)
+    //  gather = s32[2,3] gather(operand, indices),
+    //       offset_dims={0},
+    //       collapsed_slice_dims={1,2},
+    //       start_index_map={1,2},
+    //       index_vector_dim=1,
+    //       slice_sizes={2, 1, 1}
+
+    xla::GatherDimensionNumbers dim_numbers;
+    std::vector<int64> slice_sizes;
+    slice_sizes.reserve(n_dims);
+    for (int64 i = 0; i < n_dims; i++) {
+      int64 window_bound;
+      if (axis <= i) {
+        dim_numbers.add_collapsed_slice_dims(i);
+        dim_numbers.add_start_index_map(i);
+        window_bound = (shape.dimensions(i) != 0) ? 1 : 0;
+      } else {
+        dim_numbers.add_offset_dims(i);
+        window_bound = shape.dimensions(i);
+      }
+      slice_sizes.push_back(window_bound);
+    }
+
+    dim_numbers.set_index_vector_dim(1);
+
+    return Gather(x, start_indices, dim_numbers, slice_sizes,
+                  /*indices_are_sorted=*/true);
+  });
+}
+
 XlaOp SetMatrixDiagonal(XlaOp matrix, XlaOp diag, int k) {
   XlaBuilder* builder = matrix.builder();
   return builder->ReportErrorOrReturn([&]() -> StatusOr<XlaOp> {
",0,train
c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,matrix.h,"@@ -17,6 +17,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_XLA_CLIENT_LIB_MATRIX_H_
 
 #include <array>
+#include <vector>
 
 #include ""absl/strings/string_view.h""
 #include ""absl/types/span.h""
@@ -44,6 +45,7 @@ XlaOp GetDiagonalMask(XlaOp x, int diagonal = 0);
 //  If k < 0: then the output has shape [..., min(M + k, N)], containing the
 //            diagonal elements (i.e., with indices [..., i - k, i]).
 XlaOp GetMatrixDiagonal(XlaOp x, int k = 0);
+XlaOp GetMatrixDiagonalViaGather(XlaOp x, int k = 0);
 
 // Places diag along the kth diagonal of target.
 XlaOp SetMatrixDiagonal(XlaOp matrix, XlaOp diag, int k = 0);
",0,train
c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,matrix_test.cc,"@@ -16,6 +16,9 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/client/lib/matrix.h""
 
 #include <limits>
+#include <string>
+#include <map>
+#include <vector>
 
 #include ""absl/strings/string_view.h""
 #include ""tensorflow/compiler/xla/client/lib/constants.h""
@@ -36,6 +39,8 @@ class MatrixTest : public ClientLibraryTestBase {
   template <typename T>
   void TestMatrixDiagonal();
   template <typename T>
+  void TestMatrixDiagonal4D();
+  template <typename T>
   void TestSetMatrixDiagonal();
 
   template <typename T>
@@ -118,6 +123,43 @@ XLA_TEST_F(MatrixTest, GetMatrixDiagonal_S64) { TestMatrixDiagonal<int64>(); }
 
 XLA_TEST_F(MatrixTest, GetMatrixDiagonal_F32) { TestMatrixDiagonal<float>(); }
 
+template <typename T>
+void MatrixTest::TestMatrixDiagonal4D() {
+  XlaBuilder builder(""GetMatrixDiagonal"");
+  Array4D<T> input(2, 2, 4, 3);
+  input.FillIota(0);
+  std::map<int, Array3D<T>> k_and_expected = {
+      {0, {{{0, 4, 8}, {12, 16, 20}}, {{24, 28, 32}, {36, 40, 44}}}},
+      {1, {{{1, 5}, {13, 17}}, {{25, 29}, {37, 41}}}},
+      {2, {{{2}, {14}}, {{26}, {38}}}},
+      {3, {{{}, {}}, {{}, {}}}},
+      {4, {{{}, {}}, {{}, {}}}},
+      {-1, {{{3, 7, 11}, {15, 19, 23}}, {{27, 31, 35}, {39, 43, 47}}}},
+      {-2, {{{6, 10}, {18, 22}}, {{30, 34}, {42, 46}}}},
+      {-3, {{{9}, {21}}, {{33}, {45}}}},
+      {-4, {{{}, {}}, {{}, {}}}},
+  };
+  for (const auto& kv : k_and_expected) {
+    XlaOp a;
+    auto a_data = CreateR4Parameter<T>(input, 0, ""a"", &builder, &a);
+    GetMatrixDiagonal(a, kv.first);
+
+    ComputeAndCompareR3<T>(&builder, kv.second, {a_data.get()});
+  }
+}
+
+XLA_TEST_F(MatrixTest, GetMatrixDiagonal4D_S32) {
+  TestMatrixDiagonal4D<int32>();
+}
+
+XLA_TEST_F(MatrixTest, GetMatrixDiagonal4D_S64) {
+  TestMatrixDiagonal4D<int64>();
+}
+
+XLA_TEST_F(MatrixTest, GetMatrixDiagonal4D_F32) {
+  TestMatrixDiagonal4D<float>();
+}
+
 Array3D<float> BatchedAValsFull() {
   return {{
               {2, 0, 1, 2},
",0,train
c250a34f8a30566c55f46eb58487b211f13e7df2,tensorflow/tensorflow,[XLA] diag-part is implemented as xla gather operation,client_library_test_base.h,"@@ -344,8 +344,8 @@ class ClientLibraryTestBase : public ::testing::Test {
       const string& name, XlaBuilder* builder, XlaOp* data_handle);
 
   // Creates a parameter instruction that wraps the given constant array
-  // ""array_2d"" and then stores to ""data_handle"" the global handle for that
-  // parameter.
+  // ""array_2d"" and then stores it to the global handle for that parameter
+  // ""data_handle"".
   //
   // ""parameter_number"" is the parameter number.
   // ""name"" is the name of the parameter instruction.
@@ -358,8 +358,8 @@ class ClientLibraryTestBase : public ::testing::Test {
       const string& name, XlaBuilder* builder, XlaOp* data_handle);
 
   // Creates a parameter instruction that wraps the given constant array
-  // ""array_3d"" and then stores to ""data_handle"" the global handle for that
-  // parameter.
+  // ""array_3d"" and then stores it to the global handle for that parameter
+  // ""data_handle"".
   //
   // ""parameter_number"" is the parameter number.
   // ""name"" is the name of the parameter instruction.
@@ -371,6 +371,20 @@ class ClientLibraryTestBase : public ::testing::Test {
       const Array3D<NativeT>& array_3d, int64 parameter_number,
       const string& name, XlaBuilder* builder, XlaOp* data_handle);
 
+  // Creates a parameter instruction that wraps the given constant array
+  // ""array_4d"" and then stores it to the global handle for that parameter
+  // ""data_handle"".
+  //
+  // ""parameter_number"" is the parameter number.
+  // ""name"" is the name of the parameter instruction.
+  //
+  // When the use_bfloat16 flag is set but NativeT is float, the data will be
+  // converted to bfloat16.
+  template <typename NativeT>
+  std::unique_ptr<GlobalData> CreateR4Parameter(
+      const Array4D<NativeT>& array_4d, int64 parameter_number,
+      const string& name, XlaBuilder* builder, XlaOp* data_handle);
+
   // Getter and setter for the use_bfloat16 flag, which indicates whether to run
   // tests with all float-type input/output converted to bfloat16.
   bool use_bfloat16() const { return use_bfloat16_; }
@@ -603,6 +617,20 @@ std::unique_ptr<GlobalData> ClientLibraryTestBase::CreateR3Parameter(
   return data;
 }
 
+template <typename NativeT>
+std::unique_ptr<GlobalData> ClientLibraryTestBase::CreateR4Parameter(
+    const Array4D<NativeT>& array_4d, int64 parameter_number,
+    const string& name, XlaBuilder* builder, XlaOp* data_handle) {
+  Literal literal = LiteralUtil::CreateR4FromArray4D(array_4d);
+  if (use_bfloat16_ && literal.shape().element_type() == F32) {
+    literal = LiteralUtil::ConvertF32ToBF16(literal);
+  }
+  std::unique_ptr<GlobalData> data =
+      client_->TransferToServer(literal).ConsumeValueOrDie();
+  *data_handle = Parameter(builder, parameter_number, literal.shape(), name);
+  return data;
+}
+
 template <typename NativeT>
 std::vector<NativeT> ClientLibraryTestBase::CreatePseudorandomR1(
     const int width, NativeT min_value, NativeT max_value, uint32 seed) {
",0,train
39ae74c84bfde629d1bd2bed2f88f3e32f5417c3,tensorflow/tensorflow,"allocate_persistent comment update in op_kernel.h

PiperOrigin-RevId: 373557953
Change-Id: I99efb23a1ee27e2941120132ddb8e4d582386c0a",op_kernel.h,"@@ -272,13 +272,8 @@ class OpKernelConstruction {
   // Op kernel construction. Scratch tensors should be allocated using
   // allocate_temp below. Some kernels need to keep tensors in between
   // invocations. If such a Tensor is allocated during kernel
-  // construction this must be done using allocate_persistent, and the
-  // Op may only store the returned PersistentTensor object. When the
-  // Tensor is needed in a subsequent invocation, it can be retrieved
-  // from the PersistentTensor using the AccessTensor method. This
-  // ensures that the system is made aware of any use of the tensor's
-  // allocated memory, which is needed for correctness on asynchronous
-  // devices such as GPUs.
+  // construction this also must be done using allocate_temp, and the
+  // Op may only store the returned Tensor object.
 
   // Allocates a temporary Tensor of the specified type and shape. The
   // Tensor must not be used after kernel construction is
@@ -288,6 +283,9 @@ class OpKernelConstruction {
   Status allocate_temp(DataType type, const TensorShape& shape,
                        Tensor* out_temp, AllocatorAttributes allocator_attr);
 
+  // The following call is obsolete per b/185257650 and kept in place until
+  // it is fully removed form the code base.
+  // Please use Tensor class and allocate_temp instead.
   // Allocates a Tensor of the specified type and shape which the Op
   // plans to maintain as persistent state. out_persistent holds the
   // PersistentTensor which is the object the caller should store. For
",0,train
3c80efb7c9db7ff859cb748ea743c6693296f211,tensorflow/tensorflow,"Remove <code> tags visible in browser tabs.
Change: 147622442",parser.py,"@@ -583,7 +583,7 @@ def _generate_markdown_for_function(full_name, duplicate_names,
   else:
     aliases = ''
 
-  return '# `%s%s`\n\n%s%s%s' % (
+  return '# %s%s\n\n%s%s%s' % (
       full_name, signature, aliases, guides, docstring)
 
 
@@ -628,7 +628,7 @@ def _generate_markdown_for_class(full_name, duplicate_names, py_class,
   else:
     aliases = ''
 
-  docs = '# `%s`\n\n%s%s%s\n\n' % (full_name, aliases, guides, docstring)
+  docs = '# %s\n\n%s%s%s\n\n' % (full_name, aliases, guides, docstring)
 
   field_names = []
   properties = []
@@ -750,7 +750,7 @@ def _generate_markdown_for_module(full_name, duplicate_names, module,
 
   # TODO(deannarubin): Make this list into a table.
 
-  return '# Module `%s`\n\n%s%s\n\n## Members\n\n%s' % (
+  return '# Module: %s\n\n%s%s\n\n## Members\n\n%s' % (
       full_name, aliases, docstring, '\n\n'.join(member_links))
 
 
",0,train
598f13dc7cd495a4d5be1ecddbc34a3780ef6e8d,tensorflow/tensorflow,Refactor RepeatDatasetOpTest,repeat_dataset_op_test.cc,"@@ -41,10 +41,10 @@ class RepeatDatasetOpTest : public DatasetOpsTestBase {
       const DataTypeVector &output_types,
       const std::vector<PartialTensorShape> &output_shapes,
       std::unique_ptr<OpKernel> *op_kernel) {
-    node_def_ = test::function::NDef(
+    NodeDef node_def = test::function::NDef(
         kNodeName, kOpName, {""input_dataset"", ""count""},
         {{""output_types"", output_types}, {""output_shapes"", output_shapes}});
-    TF_RETURN_IF_ERROR(CreateOpKernel(node_def_, op_kernel));
+    TF_RETURN_IF_ERROR(CreateOpKernel(node_def, op_kernel));
     return Status::OK();
   }
 
@@ -56,9 +56,6 @@ class RepeatDatasetOpTest : public DatasetOpsTestBase {
     TF_RETURN_IF_ERROR(CreateOpKernelContext(op_kernel, inputs, context));
     return Status::OK();
   }
-
- private:
-  NodeDef node_def_;
 };
 
 struct TestCase {
@@ -123,11 +120,11 @@ TestCase ForeverRepeatTestCase() {
           /*breakpoints*/ {0, 1, 3}};
 }
 
-class ParameterizedDatasetTest
+class ParameterizedDatasetOpTest
     : public RepeatDatasetOpTest,
       public ::testing::WithParamInterface<TestCase> {};
 
-TEST_P(ParameterizedDatasetTest, GetNext) {
+TEST_P(ParameterizedDatasetOpTest, GetNext) {
   int thread_num = 2, cpu_num = 2;
   TF_ASSERT_OK(InitThreadPool(thread_num));
   TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
@@ -198,7 +195,38 @@ TEST_P(ParameterizedDatasetTest, GetNext) {
   }
 }
 
-TEST_F(RepeatDatasetOpTest, DatasetName) {
+TEST_F(RepeatDatasetOpTest, DatasetNodeName) {
+  int thread_num = 2, cpu_num = 2;
+  TF_ASSERT_OK(InitThreadPool(thread_num));
+  TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
+
+  const TestCase &test_case = FiniteRepeatTestCase();
+  Tensor tensor_slice_dataset_tensor(DT_VARIANT, TensorShape({}));
+  std::vector<Tensor> inputs_for_tensor_slice_dataset = test_case.input_tensors;
+  TF_ASSERT_OK(CreateTensorSliceDatasetTensor(&inputs_for_tensor_slice_dataset,
+                                              &tensor_slice_dataset_tensor));
+  Tensor count = CreateTensor<int64>(TensorShape{}, {test_case.count});
+  gtl::InlinedVector<TensorValue, 4> inputs_for_repeat_dataset;
+  inputs_for_repeat_dataset.emplace_back(&tensor_slice_dataset_tensor);
+  inputs_for_repeat_dataset.emplace_back(&count);
+
+  std::unique_ptr<OpKernel> repeat_dataset_kernel;
+  TF_ASSERT_OK(CreateRepeatDatasetKernel(test_case.expected_output_dtypes,
+                                         test_case.expected_output_shapes,
+                                         &repeat_dataset_kernel));
+  std::unique_ptr<OpKernelContext> repeat_dataset_context;
+  TF_ASSERT_OK(CreateRepeatDatasetContext(repeat_dataset_kernel.get(),
+                                          &inputs_for_repeat_dataset,
+                                          &repeat_dataset_context));
+  DatasetBase *repeat_dataset;
+  TF_ASSERT_OK(CreateDataset(repeat_dataset_kernel.get(),
+                             repeat_dataset_context.get(), &repeat_dataset));
+  core::ScopedUnref scoped_unref(repeat_dataset);
+
+  EXPECT_EQ(repeat_dataset->node_name(), kNodeName);
+}
+
+TEST_F(RepeatDatasetOpTest, DatasetTypeString) {
   int thread_num = 2, cpu_num = 2;
   TF_ASSERT_OK(InitThreadPool(thread_num));
   TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
@@ -229,7 +257,7 @@ TEST_F(RepeatDatasetOpTest, DatasetName) {
   EXPECT_EQ(repeat_dataset->type_string(), kOpName);
 }
 
-TEST_P(ParameterizedDatasetTest, DatasetOutputDtypes) {
+TEST_P(ParameterizedDatasetOpTest, DatasetOutputDtypes) {
   int thread_num = 2, cpu_num = 2;
   TF_ASSERT_OK(InitThreadPool(thread_num));
   TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
@@ -259,7 +287,7 @@ TEST_P(ParameterizedDatasetTest, DatasetOutputDtypes) {
                                 test_case.expected_output_dtypes));
 }
 
-TEST_P(ParameterizedDatasetTest, DatasetOutputShapes) {
+TEST_P(ParameterizedDatasetOpTest, DatasetOutputShapes) {
   int thread_num = 2, cpu_num = 2;
   TF_ASSERT_OK(InitThreadPool(thread_num));
   TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
@@ -289,7 +317,7 @@ TEST_P(ParameterizedDatasetTest, DatasetOutputShapes) {
                                       test_case.expected_output_shapes));
 }
 
-TEST_P(ParameterizedDatasetTest, Cardinality) {
+TEST_P(ParameterizedDatasetOpTest, Cardinality) {
   int thread_num = 2, cpu_num = 2;
   TF_ASSERT_OK(InitThreadPool(thread_num));
   TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
@@ -354,7 +382,7 @@ TEST_F(RepeatDatasetOpTest, DatasetSave) {
   TF_ASSERT_OK(writer.Flush());
 }
 
-TEST_P(ParameterizedDatasetTest, IteratorOutputDtypes) {
+TEST_P(ParameterizedDatasetOpTest, IteratorOutputDtypes) {
   int thread_num = 2, cpu_num = 2;
   TF_ASSERT_OK(InitThreadPool(thread_num));
   TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
@@ -391,7 +419,7 @@ TEST_P(ParameterizedDatasetTest, IteratorOutputDtypes) {
                                 test_case.expected_output_dtypes));
 }
 
-TEST_P(ParameterizedDatasetTest, IteratorOutputShapes) {
+TEST_P(ParameterizedDatasetOpTest, IteratorOutputShapes) {
   int thread_num = 2, cpu_num = 2;
   TF_ASSERT_OK(InitThreadPool(thread_num));
   TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
@@ -428,7 +456,7 @@ TEST_P(ParameterizedDatasetTest, IteratorOutputShapes) {
                                       test_case.expected_output_shapes));
 }
 
-TEST_P(ParameterizedDatasetTest, IteratorOutputPrefix) {
+TEST_P(ParameterizedDatasetOpTest, IteratorOutputPrefix) {
   int thread_num = 2, cpu_num = 2;
   TF_ASSERT_OK(InitThreadPool(thread_num));
   TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
@@ -470,7 +498,7 @@ TEST_P(ParameterizedDatasetTest, IteratorOutputPrefix) {
   }
 }
 
-TEST_P(ParameterizedDatasetTest, Roundtrip) {
+TEST_P(ParameterizedDatasetOpTest, Roundtrip) {
   int thread_num = 2, cpu_num = 2;
   TF_ASSERT_OK(InitThreadPool(thread_num));
   TF_ASSERT_OK(InitFunctionLibraryRuntime({}, cpu_num));
@@ -550,7 +578,7 @@ TEST_P(ParameterizedDatasetTest, Roundtrip) {
   }
 }
 
-INSTANTIATE_TEST_SUITE_P(RepeatDatasetOpTest, ParameterizedDatasetTest,
+INSTANTIATE_TEST_SUITE_P(RepeatDatasetOpTest, ParameterizedDatasetOpTest,
                          ::testing::ValuesIn(std::vector<TestCase>(
                              {FiniteRepeatTestCase(), EmptyRepeatTestCase(),
                               ForeverRepeatTestCase()})));
",0,train
9f320410a2cc0de6e3cdc8d372bfa79676f85058,tensorflow/tensorflow,"Support align_corners and half_pixel_centers for resize ops in NNAPI delegate.

PiperOrigin-RevId: 310835481
Change-Id: I6538e64b453bc3b633a5656a8130ed2139781a94",acceleration_test_list.cc,"@@ -300,13 +300,15 @@ VariedShapeSpec/ReshapeOpTest/RegularShapes/1
 VariedShapeSpec/ReshapeOpTest/WithStretchDimension/1
 
 # resize_bilinear_test
+// align_corners & half_pixel_centers are not implemented in NNAPI before API 30
+ResizeBilinearOpTest/ResizeBilinearOpTest.+HalfPixelCenters.*,30
 // Only models with constant size tensor are accelerated
 ResizeBilinearOpTest/ResizeBilinearOpTest/.+/0,29
 
 # resize_nearest_neighbor_test
-// align_corners & half_pixel_centers are not implemented in NNAPI.
--ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+AlignCorners.*,29
--ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+HalfPixelCenters.*,29
+// align_corners & half_pixel_centers are not implemented in NNAPI before API 30
+ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+AlignCorners.*,30
+ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest.+HalfPixelCenters.*,30
 // Only models with constant size tensor are accelerated
 ResizeNearestNeighborOpTest/ResizeNearestNeighborOpTest/.+/0,29
 
",0,train
9f320410a2cc0de6e3cdc8d372bfa79676f85058,tensorflow/tensorflow,"Support align_corners and half_pixel_centers for resize ops in NNAPI delegate.

PiperOrigin-RevId: 310835481
Change-Id: I6538e64b453bc3b633a5656a8130ed2139781a94",nnapi_delegate.cc,"@@ -1648,13 +1648,14 @@ bool NNAPIDelegateKernel::Validate(
       }
       auto builtin =
           reinterpret_cast<TfLiteResizeBilinearParams*>(node->builtin_data);
-      Expect(!builtin->align_corners,
-             NNAPIValidationFailureType::kUnsupportedOperandValue,
-             ""NNAPI does not support align_corners == true."", &val_ctx);
-      // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior.
-      Expect(!builtin->half_pixel_centers,
-             NNAPIValidationFailureType::kUnsupportedOperandValue,
-             ""NNAPI does not support half_pixel_centers == true."", &val_ctx);
+      if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
+        Expect(!builtin->align_corners,
+               NNAPIValidationFailureType::kUnsupportedOperandValue,
+               ""NNAPI does not support align_corners == true."", &val_ctx);
+        Expect(!builtin->half_pixel_centers,
+               NNAPIValidationFailureType::kUnsupportedOperandValue,
+               ""NNAPI does not support half_pixel_centers == true."", &val_ctx);
+      }
       if (android_sdk_version < kMinSdkVersionForNNAPI12) {
         Expect(input.type == kTfLiteFloat32,
                NNAPIValidationFailureType::kUnsupportedInputType,
@@ -1668,14 +1669,14 @@ bool NNAPIDelegateKernel::Validate(
       ExpectIsFloatOrQuant8Operator(context, node, &val_ctx);
       auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
           node->builtin_data);
-      // TODO(b/149823713): Update when NNAPI delegate can support align_corners
-      // & half_pixel_centers.
-      Expect(!builtin->align_corners,
-             NNAPIValidationFailureType::kUnsupportedOperandValue,
-             ""NNAPI does not support align_corners == true."", &val_ctx);
-      Expect(!builtin->half_pixel_centers,
-             NNAPIValidationFailureType::kUnsupportedOperandValue,
-             ""NNAPI does not support half_pixel_centers == true."", &val_ctx);
+      if (android_sdk_version <= kMinSdkVersionForNNAPI12) {
+        Expect(!builtin->align_corners,
+               NNAPIValidationFailureType::kUnsupportedOperandValue,
+               ""NNAPI does not support align_corners == true."", &val_ctx);
+        Expect(!builtin->half_pixel_centers,
+               NNAPIValidationFailureType::kUnsupportedOperandValue,
+               ""NNAPI does not support half_pixel_centers == true."", &val_ctx);
+      }
     } break;
     case kTfLiteBuiltinSqueeze: {
       ExpectOpVersion(version, 1, &val_ctx);
@@ -2436,6 +2437,14 @@ TfLiteStatus NNAPIDelegateKernel::Map(
       const int output_width = output.dims->data[2];
       mapping_args.builder->AddScalarInt32Operand(output_width);
       mapping_args.builder->AddScalarInt32Operand(output_height);
+      auto builtin = reinterpret_cast<TfLiteResizeBilinearParams*>(
+          mapping_args.node->builtin_data);
+      if (builtin->align_corners == true ||
+          builtin->half_pixel_centers == true) {
+        mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
+        mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
+        mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
+      }
       *nn_op_type = ANEURALNETWORKS_RESIZE_BILINEAR;
     } break;
     case kTfLiteBuiltinResizeNearestNeighbor: {
@@ -2445,7 +2454,13 @@ TfLiteStatus NNAPIDelegateKernel::Map(
       mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[1]);
       mapping_args.builder->AddScalarInt32Operand(new_shape.data.i32[0]);
       mapping_args.builder->AddScalarBoolOperand(false);  // Use NHWC format
-
+      auto builtin = reinterpret_cast<TfLiteResizeNearestNeighborParams*>(
+          mapping_args.node->builtin_data);
+      if (builtin->align_corners == true ||
+          builtin->half_pixel_centers == true) {
+        mapping_args.builder->AddScalarBoolOperand(builtin->align_corners);
+        mapping_args.builder->AddScalarBoolOperand(builtin->half_pixel_centers);
+      }
       *nn_op_type = ANEURALNETWORKS_RESIZE_NEAREST_NEIGHBOR;
     } break;
     case kTfLiteBuiltinSqueeze: {
",0,train
9f320410a2cc0de6e3cdc8d372bfa79676f85058,tensorflow/tensorflow,"Support align_corners and half_pixel_centers for resize ops in NNAPI delegate.

PiperOrigin-RevId: 310835481
Change-Id: I6538e64b453bc3b633a5656a8130ed2139781a94",resize_bilinear_test.cc,"@@ -190,10 +190,6 @@ TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatches) {
 
 TEST_P(ResizeBilinearOpTest,
        TwoDimensionalResizeWithTwoBatches_HalfPixelCenters) {
-  // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior.
-  if (SingleOpModel::GetForceUseNnapi()) {
-    return;
-  }
   ResizeBilinearOpModel m({TensorType_FLOAT32, {2, 2, 2, 1}}, {3, 3},
                           GetParam(), /**half_pixel_centers**/ true);
   m.SetInput<float>({
@@ -253,10 +249,6 @@ TEST_P(ResizeBilinearOpTest, TwoDimensionalResizeWithTwoBatchesUInt8) {
 
 TEST_P(ResizeBilinearOpTest,
        TwoDimensionalResizeWithTwoBatchesUInt8_HalfPixelCenters) {
-  // TODO(b/147696142): Update when NNAPI delegate can support TF2 behavior.
-  if (SingleOpModel::GetForceUseNnapi()) {
-    return;
-  }
   ResizeBilinearOpModel m({TensorType_UINT8, {2, 2, 2, 1}}, {3, 3}, GetParam(),
                           /**half_pixel_centers**/ true);
   m.SetInput<uint8>({
",0,train
6161367d8942561f2eeb640b3798cfd8397ea35c,tensorflow/tensorflow,"Blacklist XRT ops in TF Eager's small Tensor pin to host optimization.

PiperOrigin-RevId: 231513080",execute.cc,"@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <vector>
 
+#include ""absl/strings/match.h""
 #include ""tensorflow/core/common_runtime/device.h""
 #include ""tensorflow/core/common_runtime/device_set.h""
 #include ""tensorflow/core/common_runtime/eager/context.h""
@@ -750,7 +751,10 @@ bool IsPinnableOp(const string& op_type) {
       ""StatelessRandomNormal"",
   });
 
-  return unpinnable_ops->find(op_type) == unpinnable_ops->end();
+  // XRT ops refer to per-device handles that are not safe to move between
+  // devices.
+  return unpinnable_ops->find(op_type) == unpinnable_ops->end() &&
+         !absl::StartsWith(op_type, ""XRT"");
 }
 
 // The Op device may be updated if:
",0,train
c8530b907a686b92c94d13f854dc504fa10901db,tensorflow/tensorflow,"tfe.Network naming under variable scopes. Networks take on the full prefix of their parent variable scopes.

Fixes #14164.

PiperOrigin-RevId: 174934769",network.py,"@@ -244,6 +244,12 @@ class Network(base.Layer):
     self._owned_layers = {}
     # The scope to use if we end up without a parent.
     self._default_parent_variable_scope = variable_scope.get_variable_scope()
+    # Hold on to the variable scope counts from init to check whether a scope
+    # with the name we want was ever created in our parent scope. Without this
+    # check we might have name collisions if the parent scope on init gets
+    # closed before build is called.
+    self._variable_scope_counts_on_init = (
+        variable_scope._get_default_variable_store().variable_scopes_count)
     self._custom_getter, self._deferred_restorations = (
         _make_custom_getter_for_deferred_restorations())
 
@@ -261,18 +267,29 @@ class Network(base.Layer):
 
   def _finalize_name(self, parent_network):
     if not self._name:
-      if not parent_network:
-        name_uid_map = base._get_default_graph_uid_map()
-      else:
-        name_uid_map = parent_network._sub_layer_name_uids
       # Were were not passed a name explicitly (or it was blank), so this is an
       # anonymous Network. We make up a unique name.
       if parent_network:
         avoid_names = parent_network._owned_layers
+        name_uid_map = parent_network._sub_layer_name_uids
       else:
-        avoid_names = None
+        name_uid_map = base._get_default_graph_uid_map()
+        # Figure out which names we have to avoid based on which variable scope
+        # we're nested in.
+        strip_name = self._default_parent_variable_scope.name
+        if strip_name:
+          strip_name += ""/""
+        def _strip_on_init_scope(name):
+          if name.startswith(strip_name):
+            return name[len(strip_name):]
+          else:
+            return None
+        avoid_names = set(
+            _strip_on_init_scope(name)
+            for name in self._variable_scope_counts_on_init.keys() if name)
       self._name, self._base_name = self._make_unique_name(
-          name_uid_map=name_uid_map, avoid_names=avoid_names)
+          name_uid_map=name_uid_map, avoid_names=avoid_names,
+          namespace=self._default_parent_variable_scope.name)
     if self._first_parent is None or (self._first_parent  # False = no parent
                                       and self._first_parent() is None):
       # Save a pointer to the parent Network so that we can later check that the
@@ -302,7 +319,13 @@ class Network(base.Layer):
         parent_scope = first_parent._scope
       else:
         parent_scope = self._default_parent_variable_scope
-      with variable_scope.variable_scope(parent_scope):
+      with variable_scope.variable_scope(parent_scope) as parent_vs:
+        expected_scope_name = parent_vs.name + ""/"" + self._name
+        if expected_scope_name in self._variable_scope_counts_on_init:
+          raise ValueError(
+              (""A Network named '%s' already exists (or a variable_scope was ""
+               ""created with this name). Names must be unique."") % (
+                   self._name,))
         # Make sure variables with this prefix will be unique.
         with variable_scope.variable_scope(
             None, use_resource=True, default_name=self._name) as scope:
@@ -319,25 +342,22 @@ class Network(base.Layer):
                  ""created with this name). Names must be unique."") % (
                      self._name,))
           if (first_parent
-              and scope_prefix[:-1] != first_parent._scope.name):
+              and scope_prefix[:-1] != first_parent.scope_name):
             raise ValueError(
                 (""Network variable names must match a nesting of sub-Network ""
                  ""names. Expected prefix '%s' from parent network, but got ""
                  ""'%s' when attempting to create a variable_scope for Network ""
                  ""'%s'. Likely an explicit variable_scope was inserted into ""
                  ""the nesting."") % (
-                     first_parent._scope.name,
+                     first_parent.scope_name,
                      scope_prefix[:-1],
                      self._name))
           elif not first_parent and scope_prefix:
             # For the case when this Network is not nested inside any other
-            # Network, but is in a variable_scope. This is an error for now.
-            raise ValueError(
-                ""Creating Networks inside named variable_scopes is currently ""
-                ""not supported (to ensure that variable names match the names ""
-                ""of Networks in which they were first created). To set ""
-                ""options, try `with tf.variable_scope(''):`. If this ""
-                ""limitation bothers you, please file a feature request."")
+            # Network, but is in a variable_scope. This Network's name takes on
+            # the full variable scope prefix.
+            self._name = scope_name
+
       for non_network_sublayer in self._non_network_sublayers:
         self._set_scope_for_nonnetwork_sublayer(non_network_sublayer)
 
@@ -355,8 +375,7 @@ class Network(base.Layer):
         raise ValueError(
             (""The parent of a Layer added to Network %s was garbage collected ""
              ""before the Layer was built. If this limitation bothers you ""
-             ""please, comment on ""
-             ""https://github.com/tensorflow/tensorflow/issues/14164."") %
+             ""please file a feature request."") %
             (self.name,))
       with variable_scope.variable_scope(parent_scope):
         # Horrid hack to make Layer variable names which are direct
@@ -420,7 +439,9 @@ class Network(base.Layer):
             # name, and we should respect it (subject to error checking).
             layer._name, layer._base_name = layer._make_unique_name(
                 name_uid_map=self._sub_layer_name_uids,
-                avoid_names=self._owned_layers)
+                avoid_names=self._owned_layers
+                # No namespace required, since we've specified our own UID map.
+            )
           layer._first_parent = weakref.ref(self)
         self._non_network_sublayers.append(layer)
     if (not layer.built
@@ -556,7 +577,7 @@ class Network(base.Layer):
     if os.path.isdir(save_path):
       # If we were passed a directory, default to naming based on the Network
       # name.
-      save_path = os.path.join(save_path, self.name)
+      save_path = os.path.join(save_path, self.name.replace(""/"", ""_""))
     user_map_func = map_func
     if map_func is None:
       map_func = _make_prefix_stripping_map_fn(self.scope_name)
@@ -750,7 +771,7 @@ class Network(base.Layer):
     self._set_scope()  # scope_name should be available to map_funcs
     if os.path.isdir(save_path):
       # If we don't have a name yet, set no parent.
-      save_path = os.path.join(save_path, self.name)
+      save_path = os.path.join(save_path, self.name.replace(""/"", ""_""))
     user_map_func = map_func
     if map_func is None:
       map_func = _make_prefix_stripping_map_fn(self.scope_name)
",0,train
c8530b907a686b92c94d13f854dc504fa10901db,tensorflow/tensorflow,"tfe.Network naming under variable scopes. Networks take on the full prefix of their parent variable scopes.

Fixes #14164.

PiperOrigin-RevId: 174934769",network_test.py,"@@ -410,19 +410,103 @@ class NetworkTest(test.TestCase):
 
   @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
   def testWrappingInVariableScope(self):
+    one = constant_op.constant([[1.]])
+    # Naming happens in the order of first build rather than the order of
+    # construction, but for clarity they're the same here and construction is
+    # annotated.
+    outside_net_before = MyNetwork()  # name=my_network_1
+    outside_net_before(one)
+    captured_scope = variable_scope.get_variable_scope()
     with variable_scope.variable_scope(""outside_scope""):
-      net = MyNetwork()
-      one = constant_op.constant([[1.]])
-      with self.assertRaisesRegexp(
-          ValueError,
-          (""Creating Networks inside named variable_scopes is currently not ""
-           ""supported"")):
-        net(one)
-      # Alternatively, we could re-name the Network to match the variable_scope:
-      # self.assertEqual(""outside_scope/my_network_1"", net.name)
-      # self.assertStartsWith(
-      #     expected_start=""outside_scope/my_network_1/dense/"",
-      #     actual=net.trainable_weights[0].name)
+      net1 = MyNetwork()  # name=outside_scope/my_network_1
+      net1(one)
+      name_conflict1 = MyNetwork(name=""name_conflict"")  # fine, unique so far
+      name_conflict2 = MyNetwork(name=""name_conflict"")  # error on build
+      with variable_scope.variable_scope(""inside_scope""):
+        # No issue here since the name is unique within its scope.
+        name_conflict3 = MyNetwork(name=""name_conflict"")
+      net2 = MyNetwork()  # name=outside_scope/my_network_3 to avoid the
+                          # variable_scope my_network_2 below.
+      vs_name_conflict = MyNetwork(name=""vs_name_conflict"")  # conflict below
+    with variable_scope.variable_scope(""intervening_scope""):
+      with variable_scope.variable_scope(captured_scope):
+        with variable_scope.variable_scope(""outside_scope""):
+          name_conflict4 = MyNetwork(name=""name_conflict"")  # error on build
+          with variable_scope.variable_scope(""my_network_2""):
+            pass
+          with variable_scope.variable_scope(""vs_name_conflict""):
+            pass
+          net3 = MyNetwork()  # name=outside_scope/my_network_4
+    name_conflict1(one)
+    with self.assertRaisesRegexp(
+        ValueError, ""named 'name_conflict' already exists""):
+      name_conflict2(one)
+    name_conflict3(one)
+    net2(one)
+    with self.assertRaisesRegexp(
+        ValueError, ""or a variable_scope was created with this name""):
+      vs_name_conflict(one)
+    with self.assertRaisesRegexp(
+        ValueError, ""named 'name_conflict' already exists""):
+      name_conflict4(one)
+    self.assertEqual(""outside_scope/name_conflict"",
+                     name_conflict1.name)
+    self.assertStartsWith(
+        expected_start=""outside_scope/name_conflict/dense_1/"",
+        actual=name_conflict1.variables[0].name)
+    self.assertEqual(""outside_scope/inside_scope/name_conflict"",
+                     name_conflict3.name)
+    self.assertStartsWith(
+        expected_start=""outside_scope/inside_scope/name_conflict/dense_1/"",
+        actual=name_conflict3.variables[0].name)
+    self.assertEqual(""outside_scope/my_network_1"", net1.name)
+    self.assertStartsWith(
+        expected_start=""outside_scope/my_network_1/dense_1/"",
+        actual=net1.trainable_weights[0].name)
+    self.assertEqual(""outside_scope/my_network_3"", net2.name)
+    self.assertStartsWith(
+        expected_start=""outside_scope/my_network_3/dense_1/"",
+        actual=net2.trainable_weights[0].name)
+    net3(one)
+    self.assertEqual(""outside_scope/my_network_4"", net3.name)
+    self.assertStartsWith(
+        expected_start=""outside_scope/my_network_4/dense_1/"",
+        actual=net3.trainable_weights[0].name)
+    outside_net_after = MyNetwork()
+    outside_net_after(one)
+    self.assertEqual(""my_network_1"", outside_net_before.name)
+    self.assertStartsWith(
+        expected_start=""my_network_1/dense_1/"",
+        actual=outside_net_before.trainable_weights[0].name)
+    self.assertEqual(""my_network_2"", outside_net_after.name)
+    self.assertStartsWith(
+        expected_start=""my_network_2/dense_1/"",
+        actual=outside_net_after.trainable_weights[0].name)
+
+  @test_util.run_in_graph_and_eager_modes()
+  def testVariableScopeStripping(self):
+    with variable_scope.variable_scope(""scope1""):
+      with variable_scope.variable_scope(""scope2""):
+        net = MyNetwork()
+    net(constant_op.constant([[2.0]]))
+    self.evaluate(net.variables[0].assign([[42.]]))
+    self.assertEqual(net.name, ""scope1/scope2/my_network_1"")
+    self.assertStartsWith(
+        expected_start=""scope1/scope2/my_network_1/dense_1/"",
+        actual=net.trainable_weights[0].name)
+    save_path = net.save(self.get_temp_dir())
+    self.assertIn(""scope1_scope2_my_network_1"", save_path)
+    restore_net = MyNetwork()
+    # Delayed restoration
+    restore_net.restore(save_path)
+    restore_net(constant_op.constant([[1.0]]))
+    self.assertAllEqual([[42.]],
+                        self.evaluate(restore_net.variables[0]))
+    self.evaluate(restore_net.variables[0].assign([[-1.]]))
+    # Immediate restoration
+    restore_net.restore(save_path)
+    self.assertAllEqual([[42.]],
+                        self.evaluate(restore_net.variables[0]))
 
   @test_util.run_in_graph_and_eager_modes()
   def testLayerNamesRespected(self):
",0,train
c8530b907a686b92c94d13f854dc504fa10901db,tensorflow/tensorflow,"tfe.Network naming under variable scopes. Networks take on the full prefix of their parent variable scopes.

Fixes #14164.

PiperOrigin-RevId: 174934769",base.py,"@@ -401,10 +401,11 @@ class Layer(object):
     """"""
     return input_shape
 
-  def _make_unique_name(self, name_uid_map=None, avoid_names=None):
+  def _make_unique_name(self, name_uid_map=None, avoid_names=None,
+                        namespace=''):
     base_name = _to_snake_case(self.__class__.__name__)
     name = _unique_layer_name(base_name, name_uid_map=name_uid_map,
-                              avoid_names=avoid_names)
+                              avoid_names=avoid_names, namespace=namespace)
     return (name, base_name)
 
   def _set_scope(self, scope=None):
@@ -2370,7 +2371,7 @@ def _get_default_graph_uid_map():
   return name_uid_map
 
 
-def _unique_layer_name(name, name_uid_map=None, avoid_names=None):
+def _unique_layer_name(name, name_uid_map=None, avoid_names=None, namespace=''):
   """"""Makes a layer name (or arbitrary string) unique within a TensorFlow graph.
 
   Arguments:
@@ -2379,6 +2380,9 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None):
       names. If None (default), uses a per-Graph dictionary.
     avoid_names: An optional set or dict with names which should not be used. If
       None (default) does not avoid any names.
+    namespace: Gets a name which is unique within the (graph, namespace). Layers
+      which are not Networks use a blank namespace and so get graph-global
+      names.
 
   Returns:
     Unique string name.
@@ -2396,6 +2400,7 @@ def _unique_layer_name(name, name_uid_map=None, avoid_names=None):
     avoid_names = set()
   proposed_name = None
   while proposed_name is None or proposed_name in avoid_names:
-    name_uid_map[name] += 1
-    proposed_name = name + '_' + str(name_uid_map[name])
+    name_key = (namespace, name)
+    name_uid_map[name_key] += 1
+    proposed_name = name + '_' + str(name_uid_map[name_key])
   return proposed_name
",0,train
9bea7a8aa991b63f7349514a5a2dc0d04d261f8f,tensorflow/tensorflow,"Add support for Softmax of 3D tensors

PiperOrigin-RevId: 211524810",activations.cc,"@@ -200,7 +200,7 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_EQ(context, input->type, output->type);
 
   const int num_dims = NumDimensions(input);
-  TF_LITE_ENSURE(context, num_dims == 1 || num_dims == 2 || num_dims == 4);
+  TF_LITE_ENSURE(context, num_dims >= 1 && num_dims <= 4);
 
   if (input->type == kTfLiteUInt8) {
     TF_LITE_ENSURE_EQ(context, output->params.zero_point, 0);
@@ -453,6 +453,19 @@ void Softmax2DFloat(const TfLiteTensor* input, TfLiteTensor* output,
   Softmax(input->data.f, input_size, batch_size, params->beta, output->data.f);
 }
 
+// Takes a 3D tensor and perform softmax along the last dimension.
+void Softmax3DFloat(const TfLiteTensor* input, TfLiteTensor* output,
+                    TfLiteSoftmaxParams* params) {
+  const int batch_size = input->dims->data[0];
+  const int intermediate_size = input->dims->data[1];
+  const int input_size = input->dims->data[2];
+  optimized_ops::Softmax(
+      GetTensorData<float>(input),
+      GetTensorShape({batch_size, intermediate_size, 1, input_size}),
+      params->beta, GetTensorData<float>(output),
+      GetTensorShape({batch_size, intermediate_size, 1, input_size}));
+}
+
 void Softmax1DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
                         TfLiteSoftmaxParams* params, OpData* data) {
   // TODO(ahentz): this is arguably a dirty trick. Since the implementation
@@ -480,6 +493,19 @@ void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
                          GetTensorShape({batch_size, 1, 1, input_size}));
 }
 
+void Softmax3DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
+                        TfLiteSoftmaxParams* params, OpData* data) {
+  const int batch_size = input->dims->data[0];
+  const int intermediate_size = input->dims->data[1];
+  const int input_size = input->dims->data[2];
+  optimized_ops::Softmax(
+      GetTensorData<uint8_t>(input),
+      GetTensorShape({batch_size, intermediate_size, 1, input_size}),
+      data->input_multiplier, data->input_left_shift, data->diff_min,
+      GetTensorData<uint8_t>(output),
+      GetTensorShape({batch_size, intermediate_size, 1, input_size}));
+}
+
 // Takes a 4D tensor and perform softmax along the forth dimension.
 void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output,
                     TfLiteSoftmaxParams* params) {
@@ -515,6 +541,10 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
         Softmax2DFloat(input, output, params);
         return kTfLiteOk;
       }
+      if (NumDimensions(input) == 3) {
+        Softmax3DFloat(input, output, params);
+        return kTfLiteOk;
+      }
       if (NumDimensions(input) == 4) {
         Softmax4DFloat(input, output, params);
         return kTfLiteOk;
@@ -533,6 +563,10 @@ TfLiteStatus SoftmaxEval(TfLiteContext* context, TfLiteNode* node) {
         Softmax2DQuantized(input, output, params, data);
         return kTfLiteOk;
       }
+      if (NumDimensions(input) == 3) {
+        Softmax3DQuantized(input, output, params, data);
+        return kTfLiteOk;
+      }
       if (NumDimensions(input) == 4) {
         Softmax4DQuantized(input, output, params, data);
         return kTfLiteOk;
",0,test
9bea7a8aa991b63f7349514a5a2dc0d04d261f8f,tensorflow/tensorflow,"Add support for Softmax of 3D tensors

PiperOrigin-RevId: 211524810",activations_test.cc,"@@ -339,6 +339,76 @@ TEST(QuantizedActivationsOpTest, Softmax4D) {
                   kQuantizedTolerance)));
 }
 
+TEST(FloatActivationsOpTest, Softmax3D) {
+  FloatActivationsOpModel m(0.1,
+                            /*input=*/{TensorType_FLOAT32, {1, 2, 4}});
+  m.SetInput({
+      0, -6, 2, 4,   // depth = 0
+      3, -2, 10, 1,  // depth = 1
+  });
+  m.Invoke();
+  EXPECT_THAT(m.GetOutput(), ElementsAreArray(ArrayFloatNear({
+                                 .23463, .12877, .28658, .35003,  //
+                                 .22528, .13664, .45365, .18443,  //
+                             })));
+
+  // Same input, but a different shape.
+  FloatActivationsOpModel m2(0.1,
+                             /*input=*/{TensorType_FLOAT32, {4, 1, 2}});
+  m2.SetInput({
+      0, -6,  //
+      2, 4,   //
+      3, -2,  //
+      10, 1,  //
+  });
+  m2.Invoke();
+  EXPECT_THAT(m2.GetOutput(), ElementsAreArray(ArrayFloatNear({
+                                  0.645656, 0.354344,  //
+                                  0.450166, 0.549834,  //
+                                  0.622459, 0.377541,  //
+                                  0.710949, 0.28905,   //
+                              })));
+}
+
+TEST(QuantizedActivationsOpTest, Softmax3D) {
+  QuantizedActivationsOpModel m(
+      0.1,
+      /*input=*/{TensorType_UINT8, {1, 2, 4}, -10, 10});
+  m.SetInput<uint8_t>({
+      0, -6, 2, 4,   // depth = 0
+      3, -2, 10, 1,  // depth = 1
+  });
+  m.Invoke();
+  EXPECT_THAT(m.GetDequantizedOutput<uint8_t>(),
+              ElementsAreArray(ArrayFloatNear(
+                  {
+                      .23463, .12877, .28658, .35003,  //
+                      .22528, .13664, .45365, .18443,  //
+                  },
+                  kQuantizedTolerance)));
+
+  // Same input, but a different shape.
+  QuantizedActivationsOpModel m2(
+      0.1,
+      /*input=*/{TensorType_UINT8, {4, 1, 2}, -10, 10});
+  m2.SetInput<uint8_t>({
+      0, -6,  //
+      2, 4,   //
+      3, -2,  //
+      10, 1,  //
+  });
+  m2.Invoke();
+  EXPECT_THAT(m2.GetDequantizedOutput<uint8_t>(),
+              ElementsAreArray(ArrayFloatNear(
+                  {
+                      0.645656, 0.354344,  //
+                      0.450166, 0.549834,  //
+                      0.622459, 0.377541,  //
+                      0.710949, 0.28905,   //
+                  },
+                  kQuantizedTolerance)));
+}
+
 TEST(FloatActivationsOpTest, Softmax1D) {
   FloatActivationsOpModel m(0.1,
                             /*input=*/{TensorType_FLOAT32, {8}});
",0,test
7e54bf3113361e36f1a49e71a6ecbcd10ddf7015,tensorflow/tensorflow,"[XLA] Add pattern matcher for select and scatter.

PiperOrigin-RevId: 337122945
Change-Id: I9aaa3cdcb3cb3379492e4697ada514124995a8ce",pattern_matcher.h,"@@ -2125,6 +2125,7 @@ XLA_BINOP_PATTERN(ShiftRightLogical)
 XLA_TERNOP_PATTERN(Clamp);
 XLA_TERNOP_PATTERN(Scatter);
 XLA_TERNOP_PATTERN(Select);
+XLA_TERNOP_PATTERN(SelectAndScatter);
 #undef XLA_TERNOP_PATTERN
 
 namespace detail {
",0,train
c6156d4c7bf79250626b8f13f752777b24967455,tensorflow/tensorflow,"Minor refactor: move unused_min / unused_max variables to the smallest scope possible.

PiperOrigin-RevId: 289475255
Change-Id: I16d718482e91d51def3d2eb3a52f444763382ee0",lstm_eval.cc,"@@ -500,9 +500,9 @@ inline void LstmStepHybrid(
   // For each batch and cell: compute input_weight * input.
   // Skip if input is all zeros.
   if (!tensor_utils::IsZeroVector(input_ptr, n_batch * n_input)) {
-    float unused_min, unused_max;
     for (int b = 0; b < n_batch; ++b) {
       const int offset = b * n_input;
+      float unused_min, unused_max;
       tensor_utils::SymmetricQuantizeFloats(
           input_ptr + offset, n_input, quantized_input_ptr + offset,
           &unused_min, &unused_max, &scaling_factors[b]);
@@ -549,9 +549,9 @@ inline void LstmStepHybrid(
   // Skip if auxiliary input is not available or all zeros.
   if (aux_input_ptr != nullptr &&
       !tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input)) {
-    float unused_min, unused_max;
     for (int b = 0; b < n_batch; ++b) {
       const int offset = b * n_aux_input;
+      float unused_min, unused_max;
       tensor_utils::SymmetricQuantizeFloats(
           aux_input_ptr + offset, n_aux_input, quantized_aux_input_ptr + offset,
           &unused_min, &unused_max, &scaling_factors[b]);
@@ -597,9 +597,9 @@ inline void LstmStepHybrid(
 
   if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) {
     // Save quantization and matmul computation for all zero input.
-    float unused_min, unused_max;
     for (int b = 0; b < n_batch; ++b) {
       const int offset = b * n_output;
+      float unused_min, unused_max;
       tensor_utils::SymmetricQuantizeFloats(output_state_ptr + offset, n_output,
                                             quantized_output_state_ptr + offset,
                                             &unused_min, &unused_max,
@@ -761,9 +761,9 @@ inline void LstmStepHybrid(
     }
     if (!tensor_utils::IsZeroVector(output_gate_scratch, n_batch * n_cell)) {
       // Save quantization and matmul computation for all zero input.
-      float unused_min, unused_max;
       for (int b = 0; b < n_batch; ++b) {
         const int offset = b * n_cell;
+        float unused_min, unused_max;
         tensor_utils::SymmetricQuantizeFloats(
             output_gate_scratch + offset, n_cell,
             quantized_cell_state_ptr + offset, &unused_min, &unused_max,
",0,train
1597edbeede9d7376626f0026cd94bd8eb7e50b3,tensorflow/tensorflow,"Update VirtualScheduler constructor.

PiperOrigin-RevId: 241662817",analytical_cost_estimator.cc,"@@ -123,7 +123,22 @@ AnalyticalCostEstimator::AnalyticalCostEstimator(
       use_aggressive_shape_inference_(use_aggressive_shape_inference) {
   scheduler_ = absl::make_unique<VirtualScheduler>(
       use_static_shapes_, use_aggressive_shape_inference_, cluster,
-      node_manager_.get());
+      node_manager_.get(),
+      absl::make_unique<VirtualPlacer>(cluster->GetDevices()));
+}
+
+AnalyticalCostEstimator::AnalyticalCostEstimator(
+    Cluster* cluster, std::unique_ptr<OpLevelCostEstimator> node_estimator,
+    std::unique_ptr<ReadyNodeManager> node_manager,
+    std::unique_ptr<VirtualPlacer> placer, bool use_static_shapes,
+    bool use_aggressive_shape_inference)
+    : node_estimator_(std::move(node_estimator)),
+      node_manager_(std::move(node_manager)),
+      use_static_shapes_(use_static_shapes),
+      use_aggressive_shape_inference_(use_aggressive_shape_inference) {
+  scheduler_ = absl::make_unique<VirtualScheduler>(
+      use_static_shapes_, use_aggressive_shape_inference_, cluster,
+      node_manager_.get(), std::move(placer));
 }
 
 Status AnalyticalCostEstimator::Initialize(const GrapplerItem& item) {
",0,train
1597edbeede9d7376626f0026cd94bd8eb7e50b3,tensorflow/tensorflow,"Update VirtualScheduler constructor.

PiperOrigin-RevId: 241662817",analytical_cost_estimator.h,"@@ -47,6 +47,12 @@ class AnalyticalCostEstimator : public CostEstimator {
                           std::unique_ptr<ReadyNodeManager> node_manager,
                           bool use_static_shapes,
                           bool use_aggressive_shape_inference);
+  AnalyticalCostEstimator(Cluster* cluster,
+                          std::unique_ptr<OpLevelCostEstimator> node_estimator,
+                          std::unique_ptr<ReadyNodeManager> node_manager,
+                          std::unique_ptr<VirtualPlacer> placer,
+                          bool use_static_shapes,
+                          bool use_aggressive_shape_inference);
   ~AnalyticalCostEstimator() override {}
 
   // Initializes the estimator for the specified grappler item.
",0,train
1597edbeede9d7376626f0026cd94bd8eb7e50b3,tensorflow/tensorflow,"Update VirtualScheduler constructor.

PiperOrigin-RevId: 241662817",virtual_scheduler.cc,"@@ -259,13 +259,15 @@ std::unique_ptr<ReadyNodeManager> ReadyNodeManagerFactory(
 VirtualScheduler::VirtualScheduler(const bool use_static_shapes,
                                    const bool use_aggressive_shape_inference,
                                    Cluster* cluster,
-                                   ReadyNodeManager* ready_nodes)
+                                   ReadyNodeManager* ready_nodes,
+                                   std::unique_ptr<VirtualPlacer> placer)
     : ready_nodes_(ready_nodes),
       graph_costs_(Costs::ZeroCosts()),
       cluster_(cluster),
       use_static_shapes_(use_static_shapes),
       use_aggressive_shape_inference_(use_aggressive_shape_inference),
-      placer_(cluster->GetDevices()) {
+      placer_(std::move(placer)) {
+  DCHECK(placer_);  // check if the pointer is valid.
   graph_costs_.num_ops_total = 0;
   initialized_ = false;
   track_mem_usage_snapshot_ = VLOG_IS_ON(1);
@@ -524,13 +526,13 @@ bool VirtualScheduler::IsPersistentNode(const NodeDef* node) const {
 }
 
 string VirtualScheduler::DeviceName(const NodeDef* node) const {
-  return placer_.get_canonical_device_name(*node);
+  return placer_->get_canonical_device_name(*node);
 }
 
 string VirtualScheduler::SanitizedDeviceName(const NodeDef* node) const {
   // Replace the "":"" characters that may be present in the device name with ""_"".
   // This makes it possible to then use the resulting string in a node name.
-  return str_util::StringReplace(placer_.get_canonical_device_name(*node), "":"",
+  return str_util::StringReplace(placer_->get_canonical_device_name(*node), "":"",
                                  ""_"", true);
 }
 
@@ -620,7 +622,7 @@ OpContext VirtualScheduler::GetCurrNode() const {
 
   // Get the device from the placer.
   DeviceProperties device;
-  device = placer_.get_device(*node);
+  device = placer_->get_device(*node);
 
   // Special case for _Send op.
   if (IsSend(*node)) {
",0,train
1597edbeede9d7376626f0026cd94bd8eb7e50b3,tensorflow/tensorflow,"Update VirtualScheduler constructor.

PiperOrigin-RevId: 241662817",virtual_scheduler.h,"@@ -263,7 +263,9 @@ class VirtualScheduler {
   // Does not take ownership of cluster or ready_nodes.
   VirtualScheduler(const bool use_static_shapes,
                    const bool use_aggressive_shape_inference, Cluster* cluster,
-                   ReadyNodeManager* ready_nodes);
+                   ReadyNodeManager* ready_nodes,
+                   std::unique_ptr<VirtualPlacer> placer);
+
   // Initializes the scheduler for the specific grappler item.
   // Should be called immediately after the c'tor or when the scheduler will be
   // reused for a new grappler item. All internal states of the scheduler
@@ -356,7 +358,7 @@ class VirtualScheduler {
   bool track_mem_usage_snapshot_;
   const bool use_aggressive_shape_inference_;
 
-  VirtualPlacer placer_;  // owned.
+  std::unique_ptr<VirtualPlacer> placer_;
 };
 
 }  // namespace grappler
",0,train
1597edbeede9d7376626f0026cd94bd8eb7e50b3,tensorflow/tensorflow,"Update VirtualScheduler constructor.

PiperOrigin-RevId: 241662817",virtual_scheduler_test.cc,"@@ -33,8 +33,10 @@ class TestVirtualScheduler : public VirtualScheduler {
   TestVirtualScheduler(const bool use_static_shapes,
                        const bool use_aggressive_shape_inference,
                        Cluster* cluster)
-      : VirtualScheduler(use_static_shapes, use_aggressive_shape_inference,
-                         cluster, &ready_node_manager_) {
+      : VirtualScheduler(
+            use_static_shapes, use_aggressive_shape_inference, cluster,
+            &ready_node_manager_,
+            absl::make_unique<VirtualPlacer>(cluster->GetDevices())) {
     enable_mem_usage_tracking();
   }
 
",0,train
74a99ac8ee77e724163912bbc3e6b45b0a455ec7,tensorflow/tensorflow,"Fix bug causing errors when loops create dynamically-shaped variables. For such variables, their initial dummy value will have a dynamic shape that is always zero at runtime.

PiperOrigin-RevId: 390182690
Change-Id: I9640fd7a52c3a14edb9b679e1804d58a5552789a",control_flow.py,"@@ -76,11 +76,13 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import func_graph
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.types import distribute
@@ -971,14 +973,39 @@ LEGAL_LOOP_TYPES = 'Tensor, int, float, bool or a list, tuple or dict thereof'
 
 
 def _placeholder_value(like, original=None):
+  """"""Constructs a (dummy) placeholder value for a loop-initialized variable.""""""
   if isinstance(like, (variables.Undefined, variables.UndefinedReturnValue)):
     return original
-  if isinstance(like, (int, float, bool)):
+
+  elif isinstance(like, (int, float, bool)):
     return type(like)(0)
-  if tensor_util.is_tf_type(like):
-    return array_ops.zeros(like.shape, like.dtype)
+
+  elif tensor_util.is_tf_type(like):
+
+    # To avoid while_loop complaining about shape invariants, the placeholder's
+    # shape must be identical to the corresponding loop var's shape. This means
+    # dynamic dimensions where the like value had dynamic dimensions. We
+    # simulate that by passing a tensor that is deterministically 0, but is
+    # obtained by means which most constant folders can't see through.
+    # TODO(mdan): Just use 0 once while_loop is smarter about shape invariants.
+    dynamic_zero = random_ops.random_uniform(minval=0, maxval=1, shape=())
+    placeholder_shape = []
+    for s in like.shape:
+      if s is None:
+        placeholder_shape.append(dynamic_zero)
+      elif isinstance(s, tensor_shape.Dimension):
+        if s.value is None:
+          placeholder_shape.append(dynamic_zero)
+        else:
+          placeholder_shape.append(s.value)
+      else:
+        placeholder_shape.append(s)
+
+    return array_ops.zeros(placeholder_shape, like.dtype)
+
   elif isinstance(like, (list, tuple, dict)):
     return nest.map_structure(_placeholder_value, like)
+
   return original
 
 
",0,test
74a99ac8ee77e724163912bbc3e6b45b0a455ec7,tensorflow/tensorflow,"Fix bug causing errors when loops create dynamically-shaped variables. For such variables, their initial dummy value will have a dynamic shape that is always zero at runtime.

PiperOrigin-RevId: 390182690
Change-Id: I9640fd7a52c3a14edb9b679e1804d58a5552789a",control_flow_test.py,"@@ -669,6 +669,32 @@ class WhileLoopTest(testing.AutoGraphTestCase):
     # Node naming is inconsistent between V1 and V2.
     self.assertGraphContains(r'(while/)?pow$', 1)
 
+  def test_tensor_creating_variable_of_dynamic_shape(self):
+
+    def body():
+      nonlocal i, s
+      i = array_ops.ones(
+          [random_ops.random_uniform(minval=1, maxval=4, shape=()), 7])
+      s = math_ops.reduce_sum(i)
+
+    def set_state(loop_vars):
+      nonlocal i, s
+      i, s = loop_vars
+
+    i = variable_operators.Undefined('i')
+    s = constant_op.constant(0.0)
+    control_flow.while_stmt(
+        test=lambda: math_ops.equal(s, 0),
+        body=body,
+        get_state=lambda: (i, s),
+        set_state=set_state,
+        symbol_names=('i', 's'),
+        opts={})
+
+    self.assertEqual(i[0][0], 1)
+    self.assertGreaterEqual(s, 7)
+    self.assertOpCreated('While')  # Not stateless because of the random op.
+
   def test_tensor_with_side_effecting_condition(self):
     v = self.variable('v', 0, dtypes.int32)
 
",0,test
74a99ac8ee77e724163912bbc3e6b45b0a455ec7,tensorflow/tensorflow,"Fix bug causing errors when loops create dynamically-shaped variables. For such variables, their initial dummy value will have a dynamic shape that is always zero at runtime.

PiperOrigin-RevId: 390182690
Change-Id: I9640fd7a52c3a14edb9b679e1804d58a5552789a",testing.py,"@@ -156,6 +156,9 @@ class AutoGraphTestCase(test.TestCase):
   def assertEqual(self, *args):
     self.assertions.append((super().assertEqual, list(args)))
 
+  def assertGreaterEqual(self, *args):
+    self.assertions.append((super().assertGreaterEqual, list(args)))
+
   def assertDictEqual(self, *args):
     self.assertions.append((super().assertDictEqual, list(args)))
 
",0,test
f88bcfc6bd02b7065c4bfc3b401dd5b0a682922f,tensorflow/tensorflow,"Invoke export strategies when train_and_evaluate runs locally.

Previous changes export the model in accordance with the known export strategies when train_and_evaluate runs in the distributed mode.  This change adds a similar support for the local mode.

PiperOrigin-RevId: 170546015",training.py,"@@ -105,21 +105,6 @@ def _is_google_env():
   return tf_config.get(_ENVIRONMENT_KEY) == _ENVIRONMENT_GOOGLE_VALUE
 
 
-def _export_eval_result(eval_result, checkpoint_path, estimator, eval_spec):
-  """"""Export `eval_result` according to strategies in `EvalSpec`.""""""
-  export_dir_base = os.path.join(
-      compat.as_str_any(estimator.model_dir), compat.as_str_any('export'))
-
-  for strategy in eval_spec.export_strategies:
-    strategy.export(
-        estimator,
-        os.path.join(
-            compat.as_str_any(export_dir_base), compat.as_str_any(
-                strategy.name)),
-        checkpoint_path=checkpoint_path,
-        eval_result=eval_result)
-
-
 class TrainSpec(
     collections.namedtuple('TrainSpec', ['input_fn', 'max_steps', 'hooks'])):
   """"""Objects passed to `train_and_evaluate`.
@@ -384,18 +369,16 @@ class _TrainingExecutor(object):
     logging.info('Start train and evaluate loop. The evaluate will happen '
                  'after {} secs (eval_spec.throttle_secs) or training is '
                  'finished.'.format(self._eval_spec.throttle_secs))
+
+    evaluator = _TrainingExecutor._Evaluator(self._estimator, self._eval_spec)
+
     while True:
       self._estimator.train(
           input_fn=self._train_spec.input_fn,
           max_steps=self._train_spec.max_steps,
           hooks=train_hooks)
-      metrics = self._estimator.evaluate(
-          input_fn=self._eval_spec.input_fn,
-          steps=self._eval_spec.steps,
-          hooks=self._eval_spec.hooks,
-          name=self._eval_spec.name)
 
-      # TODO(b/65169058): Adds export once export strategies are moved.
+      metrics = evaluator.evaluate_and_export()
 
       if _should_stop_local_train(metrics[ops.GraphKeys.GLOBAL_STEP]):
         break
@@ -503,7 +486,6 @@ class _TrainingExecutor(object):
             'evaluation pass as evaluation results are expected to be same '
             'for the same checkpoint.')
         return None
-
       eval_result = self._estimator.evaluate(
           input_fn=self._eval_spec.input_fn,
           steps=self._eval_spec.steps,
@@ -515,8 +497,7 @@ class _TrainingExecutor(object):
         self._log_err_msg('Estimator evaluate returns empty result.')
         return None
 
-      _export_eval_result(eval_result, latest_ckpt_path, self._estimator,
-                          self._eval_spec)
+      self._export_eval_result(eval_result, latest_ckpt_path)
 
       self._last_warning_time = 0
       self._previous_ckpt_path = latest_ckpt_path
@@ -528,3 +509,18 @@ class _TrainingExecutor(object):
       if current_time - self._last_warning_time > 600:
         logging.warning(message)
         self._last_warning_time = current_time
+
+    def _export_eval_result(self, eval_result, checkpoint_path):
+      """"""Export `eval_result` according to strategies in `EvalSpec`.""""""
+      export_dir_base = os.path.join(
+          compat.as_str_any(self._estimator.model_dir),
+          compat.as_str_any('export'))
+
+      for strategy in self._eval_spec.export_strategies:
+        strategy.export(
+            self._estimator,
+            os.path.join(
+                compat.as_str_any(export_dir_base),
+                compat.as_str_any(strategy.name)),
+            checkpoint_path=checkpoint_path,
+            eval_result=eval_result)
",0,train
f88bcfc6bd02b7065c4bfc3b401dd5b0a682922f,tensorflow/tensorflow,"Invoke export strategies when train_and_evaluate runs locally.

Previous changes export the model in accordance with the known export strategies when train_and_evaluate runs in the distributed mode.  This change adds a similar support for the local mode.

PiperOrigin-RevId: 170546015",training_test.py,"@@ -21,6 +21,7 @@ from __future__ import print_function
 
 
 import json
+import random
 import time
 
 from tensorflow.python.estimator import estimator as estimator_lib
@@ -32,7 +33,6 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.platform import test
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import monitored_session
-from tensorflow.python.training import saver
 from tensorflow.python.training import server_lib
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.util import compat
@@ -747,8 +747,7 @@ class TrainingExecutorRunEvaluatorTest(test.TestCase):
     mock_sleep.assert_called_with(throttle_secs - operation_secs)
     self.assertTrue(mock_est.evaluate.called)
 
-  @test.mock.patch.object(saver, 'latest_checkpoint')
-  def test_that_export_fn_is_called(self, mock_latest_ckpt):
+  def test_that_export_fn_is_called(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
     mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
     self._set_up_mock_est_to_train_and_evaluate_once(mock_est, mock_train_spec)
@@ -895,8 +894,12 @@ class StopAtSecsHookTest(test.TestCase):
 class TrainingExecutorRunLocalTest(test.TestCase):
   """"""Tests run_local of _TrainingExecutor.""""""
 
+  def unique_checkpoint_every_time_fn(self):
+    return 'checkpoint_path_%s/' % random.random()
+
   def test_send_stop_at_secs_to_train(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/')
+    mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn
     train_spec = training.TrainSpec(
         input_fn=lambda: 1, max_steps=2, hooks=[_FakeHook()])
     eval_spec = training.EvalSpec(
@@ -911,11 +914,24 @@ class TrainingExecutorRunLocalTest(test.TestCase):
     self.assertEqual(eval_spec.throttle_secs, stop_hook._stop_after_secs)
 
   def test_runs_in_a_loop_until_max_steps(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/')
+    mock_est.latest_checkpoint = self.unique_checkpoint_every_time_fn
+
+    mock_est.times_export_fn_was_called = 0
+    def export_fn(estimator, *args, **kwargs):
+      del args, kwargs
+      estimator.times_export_fn_was_called += 1
+
+    export_strategy = export_strategy_lib.ExportStrategy(
+        name='see_whether_export_fn_is_called', export_fn=export_fn)
+
     train_spec = training.TrainSpec(
         input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()])
     eval_spec = training.EvalSpec(
-        input_fn=lambda: 1, hooks=[_FakeHook()], throttle_secs=100)
+        input_fn=lambda: 1,
+        hooks=[_FakeHook()],
+        throttle_secs=100,
+        export_strategies=export_strategy)
     # should be called 3 times.
     mock_est.evaluate.side_effect = [{
         _GLOBAL_STEP_KEY: train_spec.max_steps - 100
@@ -930,9 +946,11 @@ class TrainingExecutorRunLocalTest(test.TestCase):
 
     self.assertEqual(3, mock_est.train.call_count)
     self.assertEqual(3, mock_est.evaluate.call_count)
+    self.assertEqual(3, mock_est.times_export_fn_was_called)
 
   def test_train_and_evaluate_args(self):
-    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator, model_dir='path/')
+    mock_est.latest_checkpoint.return_value = 'checkpoint_path/'
     train_spec = training.TrainSpec(
         input_fn=lambda: 1, max_steps=300, hooks=[_FakeHook()])
     eval_spec = training.EvalSpec(
@@ -946,6 +964,7 @@ class TrainingExecutorRunLocalTest(test.TestCase):
         name=eval_spec.name,
         input_fn=eval_spec.input_fn,
         steps=eval_spec.steps,
+        checkpoint_path='checkpoint_path/',
         hooks=eval_spec.hooks)
 
     train_args = mock_est.train.call_args[1]
@@ -962,6 +981,36 @@ class TrainingExecutorRunLocalTest(test.TestCase):
     with self.assertRaisesRegexp(ValueError, 'throttle_secs'):
       executor.run_local()
 
+  def test_that_export_fn_is_called_with_run_local(self):
+    mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
+    mock_train_spec = test.mock.Mock(spec=training.TrainSpec)
+    mock_train_spec.max_steps = 200
+    mock_est.evaluate.return_value = {
+        _GLOBAL_STEP_KEY: mock_train_spec.max_steps
+    }
+    # _validate_hooks would have made sure that train_spec.hooks is [], when
+    # None were passed.
+    mock_train_spec.hooks = []
+
+    def export_fn(estimator, *args, **kwargs):
+      del args, kwargs
+      estimator.export_fn_was_called = True
+
+    export_strategy = export_strategy_lib.ExportStrategy(
+        name='see_whether_export_fn_is_called', export_fn=export_fn)
+
+    eval_spec = training.EvalSpec(
+        input_fn=lambda: 1,
+        steps=2,
+        delay_secs=0,
+        throttle_secs=213,
+        export_strategies=export_strategy)
+
+    executor = training._TrainingExecutor(mock_est, mock_train_spec, eval_spec)
+    executor.run_local()
+
+    self.assertTrue(mock_est.export_fn_was_called)
+
 
 if __name__ == '__main__':
   test.main()
",0,train
60e499b4b608a52f5cf2a117c006ce8eac0941e0,tensorflow/tensorflow,"[XLA:GPU] Enable async all-reduce by default.

PiperOrigin-RevId: 434787265",debug_options_flags.cc,"@@ -76,6 +76,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
   opts.set_xla_allow_excess_precision(true);
   opts.set_xla_force_host_platform_device_count(1);
   opts.set_xla_gpu_all_reduce_combine_threshold_bytes(30 * 1024 * 1024);
+  opts.set_xla_gpu_enable_async_all_reduce(true);
   opts.set_xla_cpu_enable_xprof_traceme(false);
   opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false);
   opts.set_xla_multiheap_size_constraint_per_heap(-1);
",0,test
f3bb411f120066c244131c0e5d80856948a45674,tensorflow/tensorflow,"Add a standard way to add a warning in tf_upgrade_v2.py script.

PiperOrigin-RevId: 219352016",ast_edits.py,"@@ -184,6 +184,17 @@ class _ASTCallVisitor(ast.NodeVisitor):
     except KeyError:
       pass
 
+  def _print_warning_for_function(self, node, full_name):
+    function_warnings = self._api_change_spec.function_warnings
+    try:
+      warning_message = function_warnings[full_name]
+      warning_message = warning_message.replace(""<function name>"", full_name)
+      self._file_edit.add(warning_message,
+                          node.lineno, node.col_offset, full_name, full_name,
+                          error=""%s requires manual check."" % full_name)
+    except KeyError:
+      pass
+
   def _get_attribute_full_path(self, node):
     """"""Traverse an attribute to generate a full name e.g. tf.foo.bar.
 
@@ -350,6 +361,7 @@ class _ASTCallVisitor(ast.NodeVisitor):
     full_name = self._get_attribute_full_path(node)
     if full_name:
       self._rename_functions(node, full_name)
+      self._print_warning_for_function(node, full_name)
     if full_name in self._api_change_spec.change_to_function:
       if not hasattr(node, ""is_function_for_call""):
         new_text = full_name + ""()""
",0,train
f3bb411f120066c244131c0e5d80856948a45674,tensorflow/tensorflow,"Add a standard way to add a warning in tf_upgrade_v2.py script.

PiperOrigin-RevId: 219352016",tf_upgrade.py,"@@ -178,6 +178,9 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
     # Specially handled functions.
     self.function_handle = {""tf.reverse"": self._reverse_handler}
 
+    # Warnings that should be printed if corresponding functions are used.
+    self.function_warnings = {}
+
   @staticmethod
   def _reverse_handler(file_edit_recorder, node):
     # TODO(aselle): Could check for a literal list of bools and try to convert
",0,train
f3bb411f120066c244131c0e5d80856948a45674,tensorflow/tensorflow,"Add a standard way to add a warning in tf_upgrade_v2.py script.

PiperOrigin-RevId: 219352016",tf_upgrade_v2.py,"@@ -19,7 +19,6 @@ from __future__ import division
 from __future__ import print_function
 
 import argparse
-import functools
 
 from tensorflow.tools.compatibility import ast_edits
 from tensorflow.tools.compatibility import renames_v2
@@ -46,29 +45,28 @@ class TFAPIChangeSpec(ast_edits.APIChangeSpec):
 
     # Specially handled functions.
     self.function_handle = {}
-    for decay in [""tf.train.exponential_decay"", ""tf.train.piecewise_constant"",
-                  ""tf.train.polynomial_decay"", ""tf.train.natural_exp_decay"",
-                  ""tf.train.inverse_time_decay"", ""tf.train.cosine_decay"",
-                  ""tf.train.cosine_decay_restarts"",
-                  ""tf.train.linear_cosine_decay"",
-                  ""tf.train.noisy_linear_cosine_decay""]:
-      self.function_handle[decay] = functools.partial(
-          self._learning_rate_decay_handler, decay_name=decay)
-
-  @staticmethod
-  def _learning_rate_decay_handler(file_edit_recorder, node, decay_name):
-    comment = (""ERROR: %s has been changed to return a callable instead of a ""
-               ""tensor when graph building, but its functionality remains ""
-               ""unchanged during eager execution (returns a callable like ""
-               ""before). The converter cannot detect and fix this reliably, so ""
-               ""you need to inspect this usage manually.\n"") % decay_name
-    file_edit_recorder.add(
-        comment,
-        node.lineno,
-        node.col_offset,
-        decay_name,
-        decay_name,
-        error=""%s requires manual check."" % decay_name)
+
+    decay_function_comment = (
+        ""ERROR: <function name> has been changed to return a callable instead ""
+        ""of a tensor when graph building, but its functionality remains ""
+        ""unchanged during eager execution (returns a callable like ""
+        ""before). The converter cannot detect and fix this reliably, so ""
+        ""you need to inspect this usage manually.\n""
+    )
+
+    # Function warnings. <function name> placeholder inside warnings will be
+    # replaced by function name.
+    self.function_warnings = {
+        ""tf.train.exponential_decay"": decay_function_comment,
+        ""tf.train.piecewise_constant"": decay_function_comment,
+        ""tf.train.polynomial_decay"": decay_function_comment,
+        ""tf.train.natural_exp_decay"": decay_function_comment,
+        ""tf.train.inverse_time_decay"": decay_function_comment,
+        ""tf.train.cosine_decay"": decay_function_comment,
+        ""tf.train.cosine_decay_restarts"": decay_function_comment,
+        ""tf.train.linear_cosine_decay"": decay_function_comment,
+        ""tf.train.noisy_linear_cosine_decay"": decay_function_comment,
+    }
 
 
 if __name__ == ""__main__"":
",0,train
f3bb411f120066c244131c0e5d80856948a45674,tensorflow/tensorflow,"Add a standard way to add a warning in tf_upgrade_v2.py script.

PiperOrigin-RevId: 219352016",tf_upgrade_v2_test.py,"@@ -73,9 +73,10 @@ class TestUpgrade(test_util.TensorFlowTestCase):
                   ""tf.train.noisy_linear_cosine_decay""]:
 
       text = ""%s(a, b)\n"" % decay
-      _, unused_report, errors, new_text = self._upgrade(text)
+      _, report, errors, new_text = self._upgrade(text)
       self.assertEqual(text, new_text)
       self.assertEqual(errors, [""test.py:1: %s requires manual check."" % decay])
+      self.assertIn(""%s has been changed"" % decay, report)
 
 
 class TestUpgradeFiles(test_util.TensorFlowTestCase):
",0,train
091f6253725ae9a2370135237e42c5b3666b6138,tensorflow/tensorflow,"Made the logdir argument used to start up Tensorboard appear under the TOGGLE ALL RUNS button.

As part of this effort, made the handler respond with a JSON object with a single key 'logdir' containing the value. It seems like the request manager parses JSON. Previously, the endpoint returned a raw string that was the logdir argument.

Updated screen diff integration tests.
Change: 135722994",handler.py,"@@ -177,7 +177,7 @@ class TensorboardHandler(BaseHTTPServer.BaseHTTPRequestHandler):
   def _serve_logdir(self, unused_query_params):
     """"""Writes out the logdir argument with which this tensorboard was started.
     """"""
-    self.respond(self._logdir, 'text/plain')
+    self.respond({'logdir': self._logdir}, 'application/json')
 
   def _serve_scalars(self, query_params):
     """"""Given a tag and single run, return array of ScalarEvents.
",0,train
091f6253725ae9a2370135237e42c5b3666b6138,tensorflow/tensorflow,"Made the logdir argument used to start up Tensorboard appear under the TOGGLE ALL RUNS button.

As part of this effort, made the handler respond with a JSON object with a single key 'logdir' containing the value. It seems like the request manager parses JSON. Previously, the endpoint returned a raw string that was the logdir argument.

Updated screen diff integration tests.
Change: 135722994",server_test.py,"@@ -102,10 +102,9 @@ class TensorboardServerTest(tf.test.TestCase):
     self.assertEqual(response.status, 400)
 
   def testLogdir(self):
-    """"""Test the status code and content of the data/logdir endpoint.""""""
-    response = self._get('/data/logdir')
-    self.assertEqual(response.status, 200)
-    self.assertEqual(response.read().decode('utf-8'), '/foo/logdir/argument')
+    """"""Test the format of the data/logdir endpoint.""""""
+    parsed_object = self._getJson('/data/logdir')
+    self.assertEqual(parsed_object, {'logdir': '/foo/logdir/argument'})
 
   def testRuns(self):
     """"""Test the format of the /data/runs endpoint.""""""
",0,train
801b09624f0488132638166fe782be4163269657,tensorflow/tensorflow,"More accurate input-pipeline analysis for TPU.

PiperOrigin-RevId: 286672368
Change-Id: I14e6e47e79304154404629295b6728857583d781",event_span.cc,"@@ -116,17 +116,17 @@ EventType ClassifyGpuEvent(absl::string_view event_name) {
 }
 
 EventType ClassifyCpuEvent(absl::string_view event_name, int64 correlation_id) {
-  if (absl::StartsWithIgnoreCase(event_name, ""MEMCPYHtoD""))
+  if (absl::StartsWithIgnoreCase(event_name, ""MEMCPYHtoD"") ||
+      absl::StrContains(event_name, ""Infeed""))
     return HOST_TO_DEVICE;
   if (absl::StartsWithIgnoreCase(event_name, ""MEMCPYHtoH"")) return HOST_TO_HOST;
   if (correlation_id >= 0 ||
       absl::StartsWithIgnoreCase(event_name, ""ExecutorState::Process"")) {
     return HOST_PREPARE;
-  } else {
-    if (absl::StartsWithIgnoreCase(event_name, ""IteratorGetNext""))
-      return HOST_WAIT_INPUT;
-    return HOST_COMPUTE;
   }
+  if (absl::StartsWithIgnoreCase(event_name, ""IteratorGetNext""))
+    return HOST_WAIT_INPUT;
+  return HOST_COMPUTE;
 }
 
 std::string PrintEventType(EventType event_type) {
",0,test
22443c0f157658e04b96cbc06904b32486584055,tensorflow/tensorflow,"When using fake infeed data, fill the infeed when it is empty.

This makes sure we avoid OOM when there is too much infeed data to send it at
once, and we also don't need the magic ""num_infeeds"" parameter anymore.

PiperOrigin-RevId: 197886121",infeed_manager.cc,"@@ -49,13 +49,25 @@ void InfeedManager::EnqueueBuffers(const std::vector<InfeedBuffer*>& buffers) {
 }
 
 InfeedBuffer* InfeedManager::BlockingDequeueBuffer() {
-  tensorflow::mutex_lock l(mu_);
-  while (enqueued_buffer_.empty()) {
-    cv_.wait(l);
+  bool became_empty = false;
+  InfeedBuffer* current_buffer;
+  {
+    tensorflow::mutex_lock l(mu_);
+    while (enqueued_buffer_.empty()) {
+      cv_.wait(l);
+    }
+    current_buffer = enqueued_buffer_.front();
+    enqueued_buffer_.pop_front();
+    dequeued_buffer_.insert(current_buffer);
+    if (enqueued_buffer_.empty()) {
+      became_empty = true;
+    }
+  }
+  if (became_empty) {
+    for (const auto& callback : on_empty_callbacks_) {
+      callback();
+    }
   }
-  InfeedBuffer* current_buffer = enqueued_buffer_.front();
-  enqueued_buffer_.pop_front();
-  dequeued_buffer_.insert(current_buffer);
   return current_buffer;
 }
 
@@ -88,6 +100,10 @@ se::Stream* InfeedManager::GetStream(se::StreamExecutor* executor) {
   return host_to_device_stream_.get();
 }
 
+void InfeedManager::RegisterOnEmptyCallback(std::function<void()> callback) {
+  on_empty_callbacks_.push_back(std::move(callback));
+}
+
 InfeedManager* GetOrCreateInfeedManager() {
   static InfeedManager* manager = new InfeedManager;
   return manager;
",0,train
22443c0f157658e04b96cbc06904b32486584055,tensorflow/tensorflow,"When using fake infeed data, fill the infeed when it is empty.

This makes sure we avoid OOM when there is too much infeed data to send it at
once, and we also don't need the magic ""num_infeeds"" parameter anymore.

PiperOrigin-RevId: 197886121",infeed_manager.h,"@@ -21,6 +21,7 @@ limitations under the License.
 #define TENSORFLOW_COMPILER_XLA_SERVICE_GPU_INFEED_MANAGER_H_
 
 #include <deque>
+#include <vector>
 
 #include ""tensorflow/compiler/xla/types.h""
 #include ""tensorflow/core/lib/gtl/flatset.h""
@@ -100,6 +101,10 @@ class InfeedManager {
   // returns null.
   se::Stream* GetStream(se::StreamExecutor* executor);
 
+  // Registers a callback that will be called when 'enqueued_buffer_' becomes
+  // empty.
+  void RegisterOnEmptyCallback(std::function<void()> callback);
+
  private:
   // TODO(b/30467474): Revisit if this mutex becomes a point of
   // contention.
@@ -122,6 +127,10 @@ class InfeedManager {
 
   // Executor that the host_to_device_stream belongs to. Not owned.
   se::StreamExecutor* host_to_device_executor_;
+
+  // List of callbacks which will be called when 'enqueued_buffer_' becomes
+  // empty.
+  std::vector<std::function<void()>> on_empty_callbacks_;
 };
 
 // Singleton creator-or-accessor: Returns the GPU infeed manager.
",0,train
22443c0f157658e04b96cbc06904b32486584055,tensorflow/tensorflow,"When using fake infeed data, fill the infeed when it is empty.

This makes sure we avoid OOM when there is too much infeed data to send it at
once, and we also don't need the magic ""num_infeeds"" parameter anymore.

PiperOrigin-RevId: 197886121",replay_computation.cc,"@@ -41,6 +41,7 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/client/local_client.h""
 #include ""tensorflow/compiler/xla/execution_options_util.h""
 #include ""tensorflow/compiler/xla/literal_util.h""
+#include ""tensorflow/compiler/xla/service/gpu/infeed_manager.h""
 #include ""tensorflow/compiler/xla/service/hlo.pb.h""
 #include ""tensorflow/compiler/xla/shape_util.h""
 #include ""tensorflow/compiler/xla/status_macros.h""
@@ -64,7 +65,6 @@ namespace {
 struct Options {
   string fake_infeed_shape;
   bool generate_fake_infeed = false;
-  int num_infeeds = 10;
   bool use_fake_data = false;
   bool print_result = true;
   int num_runs = 1;
@@ -126,22 +126,26 @@ StatusOr<std::unique_ptr<Literal>> ReplayComputation(const HloSnapshot& module,
   // --generate_fake_infeed is passed and there exists an infeed operation in
   // the HloSnapshot.
   tensorflow::gtl::optional<tensorflow::thread::ThreadPool> pool;
+  std::unique_ptr<Literal> data;
+  if (provide_infeed) {
+    data = std::move(MakeFakeLiteral(infeed_shape)).ValueOrDie();
+  }
+  auto transfer_infeed = [&data, client]() {
+    TF_CHECK_OK(client->TransferToInfeed(*data));
+  };
   if (provide_infeed) {
     pool.emplace(tensorflow::Env::Default(), ""infeed"",
                  /*num_threads=*/1);
-    pool->Schedule([opts, infeed_shape, client]() {
-      StatusOr<std::unique_ptr<Literal>> data_status =
-          MakeFakeLiteral(infeed_shape);
-      TF_CHECK_OK(data_status.status());
-      std::unique_ptr<Literal> data = std::move(data_status).ValueOrDie();
+    pool->Schedule([transfer_infeed]() {
       // There may be several infeed buffers needed, however we don't know how
       // many. If we proactively transfer too many infeed buffers, we may run
       // out of memory. If we transfer too few infeed buffers, the program will
-      // hang.
-      // TODO(akuegel): Figure out a better way to handle this.
-      for (int i = 0; i < opts.num_infeeds; ++i) {
-        TF_CHECK_OK(client->TransferToInfeed(*data));
-      }
+      // hang. Therefore, we register a callback that is called when the infeed
+      // becomes empty, and in this callback we will transfer another fake
+      // infeed.
+      auto infeed_manager = xla::gpu::GetOrCreateInfeedManager();
+      infeed_manager->RegisterOnEmptyCallback(transfer_infeed);
+      transfer_infeed();
     });
   }
 
@@ -234,8 +238,6 @@ int main(int argc, char** argv) {
                        ""Print the result of the computation to stdout""),
       tensorflow::Flag(""num_runs"", &opts.num_runs,
                        ""Number of times to run each computation""),
-      tensorflow::Flag(""num_infeeds"", &opts.num_infeeds,
-                       ""Number of times we transfer the fake infeed data""),
       tensorflow::Flag(""fake_infeed_shape"", &opts.fake_infeed_shape,
                        ""Shape of fake data to construct for (infinite) infeed""),
       tensorflow::Flag(""generate_fake_infeed"", &opts.generate_fake_infeed,
",0,train
31b0dc568f23966c8e5fc576a08825e5b039aca4,tensorflow/tensorflow,"tridiagonal_solve: Remove stale forward compatibility checks

`forward_compatible(2019, 10, 18)` always evaluates to `True` so a bit of stale code can be removed.",tridiagonal_solve_op_test.py,"@@ -24,7 +24,6 @@ import numpy as np
 
 from tensorflow.python.eager import backprop
 from tensorflow.python.client import session
-from tensorflow.python.compat import compat
 from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -43,8 +42,6 @@ _sample_diags = np.array([[2, 1, 4, 0], [1, 3, 2, 2], [0, 1, -1, 1]])
 _sample_rhs = np.array([1, 2, 3, 4])
 _sample_result = np.array([-9, 5, -4, 4])
 
-FORWARD_COMPATIBLE_DATE = (2019, 10, 18)
-
 # Flag, indicating that test should be run only with partial_pivoting=True
 FLAG_REQUIRES_PIVOTING = ""FLAG_REQUIRES_PIVOT""
 
@@ -303,13 +300,10 @@ class TridiagonalSolveOpTest(test.TestCase):
   # Tests with transpose and adjoint
 
   def testTransposeRhs(self):
-    expected = np.array([_sample_result, 2 * _sample_result])
-    if compat.forward_compatible(*FORWARD_COMPATIBLE_DATE):
-      expected = expected.T
     self._testWithLists(
         diags=_sample_diags,
         rhs=np.array([_sample_rhs, 2 * _sample_rhs]),
-        expected=expected,
+        expected=np.array([_sample_result, 2 * _sample_result]).T,
         transpose_rhs=True)
 
   def testConjugateRhs(self):
@@ -321,28 +315,22 @@ class TridiagonalSolveOpTest(test.TestCase):
         conjugate_rhs=True)
 
   def testAdjointRhs(self):
-    expected = np.array(
-        [_sample_result * (1 - 1j), _sample_result * (1 + 2j)])
-    if compat.forward_compatible(*FORWARD_COMPATIBLE_DATE):
-      expected = expected.T
     self._testWithLists(
         diags=_sample_diags,
         rhs=np.array([_sample_rhs * (1 + 1j), _sample_rhs * (1 - 2j)]),
-        expected=expected,
+        expected=np.array(
+            [_sample_result * (1 - 1j), _sample_result * (1 + 2j)]).T,
         transpose_rhs=True,
         conjugate_rhs=True)
 
   def testTransposeRhsWithBatching(self):
-    expected = np.array(
-        [[_sample_result, 2 * _sample_result],
-         [-3 * _sample_result, -4 * _sample_result]])
-    if compat.forward_compatible(*FORWARD_COMPATIBLE_DATE):
-      expected = expected.transpose(0, 2, 1)
     self._testWithLists(
         diags=np.array([_sample_diags, -_sample_diags]),
         rhs=np.array([[_sample_rhs, 2 * _sample_rhs],
                       [3 * _sample_rhs, 4 * _sample_rhs]]),
-        expected=expected,
+        expected=np.array(
+            [[_sample_result, 2 * _sample_result],
+             [-3 * _sample_result, -4 * _sample_result]]).transpose(0, 2, 1),
         transpose_rhs=True)
 
   def testTransposeRhsWithRhsAsVector(self):
",0,test
31b0dc568f23966c8e5fc576a08825e5b039aca4,tensorflow/tensorflow,"tridiagonal_solve: Remove stale forward compatibility checks

`forward_compatible(2019, 10, 18)` always evaluates to `True` so a bit of stale code can be removed.",linalg_impl.py,"@@ -20,7 +20,6 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python.compat import compat
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -537,10 +536,7 @@ def _tridiagonal_solve_compact_format(diagonals, rhs, transpose_rhs,
     rhs = math_ops.conj(rhs)
 
   check_num_lhs_matches_num_rhs()
-  result = linalg_ops.tridiagonal_solve(diagonals, rhs, partial_pivoting, name)
-  if transpose_rhs and not compat.forward_compatible(2019, 10, 18):
-    return array_ops.matrix_transpose(result)
-  return result
+  return linalg_ops.tridiagonal_solve(diagonals, rhs, partial_pivoting, name)
 
 
 @tf_export('linalg.tridiagonal_matmul')
",0,test
23e9fc61dbd05621fa703630de66b373179a4de6,tensorflow/tensorflow,"Create a placer rule that co-locates edges of uncopyable types. ""Uncopyable"" is identified based on FullType, and typically includes some DT_VARIANT tensors like datasets, but might extend to some resources as well.

PiperOrigin-RevId: 397989212
Change-Id: I9fc428a6a91b95b1c14855af4b9dd19707422270",colocation_graph.cc,"@@ -146,6 +146,25 @@ bool IsVariantWithUnsupportedDeviceCopy(const Node* node) {
   return is_mutex_lock_op || is_dataset_op;
 }
 
+bool HasNoCopyReturns(const Node& node) {
+  if (!node.def().has_experimental_type()) {
+    return false;
+  }
+  const FullTypeDef& ft = node.def().experimental_type();
+  DCHECK(ft.type_id() == TFT_PRODUCT) << ft.DebugString();
+
+  for (const auto& arg : ft.args()) {
+    switch (arg.type_id()) {
+      case TFT_DATASET:
+        return true;
+      default:
+        continue;
+    }
+  }
+
+  return false;
+}
+
 }  // namespace
 
 Status Member::SetParentAndSupportedDevices(
@@ -717,6 +736,36 @@ Status ColocationGraph::ColocateResourceOrRefEdge(const Node* src,
   return Status::OK();
 }
 
+Status ColocationGraph::ColocateUncopiableTypeEdges(
+    std::unordered_set<Node*>* inspection_required) {
+  for (const Edge* edge : graph_.edges()) {
+    if (edge->IsControlEdge()) {
+      continue;
+    }
+    Node* src = edge->src();
+    Node* dst = edge->dst();
+    bool needs_inspection;
+    TF_RETURN_IF_ERROR(inspection_required_checker_.IsPlacerInspectionRequired(
+        *src, &needs_inspection));
+    if (needs_inspection) {
+      inspection_required->insert(src);
+      continue;
+    }
+    TF_RETURN_IF_ERROR(inspection_required_checker_.IsPlacerInspectionRequired(
+        *dst, &needs_inspection));
+    if (needs_inspection) {
+      inspection_required->insert(dst);
+      continue;
+    }
+
+    if (HasNoCopyReturns(*src)) {
+      TF_RETURN_IF_ERROR(ColocateResourceOrRefEdge(src, dst));
+    }
+  }
+
+  return Status::OK();
+}
+
 Status ColocationGraph::ColocateResourceAndRefEdges(
     std::unordered_set<Node*>* inspection_required) {
   // If `node` has an input edge with reference type, add an edge from the
@@ -770,6 +819,7 @@ Status ColocationGraph::ColocateResourceAndRefEdges(
 namespace {
 // Returns tensor list element data type, if the node is one of the ops that
 // operate with TensorLists. Otherwise returns DT_INVALID.
+// TODO(b/199443424): Don't use op names, use FullType here.
 DataType GetElementDataType(const Node& node) {
   static absl::flat_hash_set<std::string>* tensor_list_ops =
       new absl::flat_hash_set<std::string>(
@@ -884,6 +934,7 @@ Status ColocationGraph::Initialize() {
 
   std::unordered_set<Node*> inspection_required;
   TF_RETURN_IF_ERROR(ColocateResourceAndRefEdges(&inspection_required));
+  TF_RETURN_IF_ERROR(ColocateUncopiableTypeEdges(&inspection_required));
   TF_RETURN_IF_ERROR(AddHostOnlyDataTypesConstraints());
   TF_RETURN_IF_ERROR(AddInspectionConstraints(inspection_required));
   TF_RETURN_IF_ERROR(ColocateAllNodes());
",0,train
23e9fc61dbd05621fa703630de66b373179a4de6,tensorflow/tensorflow,"Create a placer rule that co-locates edges of uncopyable types. ""Uncopyable"" is identified based on FullType, and typically includes some DT_VARIANT tensors like datasets, but might extend to some resources as well.

PiperOrigin-RevId: 397989212
Change-Id: I9fc428a6a91b95b1c14855af4b9dd19707422270",colocation_graph.h,"@@ -278,12 +278,17 @@ class ColocationGraph {
 
   Status ColocateResourceOrRefEdge(const Node* src, const Node* dst);
 
+  // Adds colocation constraints to data types known not to support copying.
+  Status ColocateUncopiableTypeEdges(
+      std::unordered_set<Node*>* inspection_required);
+
   // Updates this ColocationGraph by making sure that all nodes
   // touching resource and/or ref tensors are colocated.
   // As it iterates over the edges, fills the `inspection_required` set with
   // the nodes that
   // PlacerInspectionRequiredOpChecker::IsPlacerInspectionRequired
   // deems as requiring deep inspection by placer. This is an optimization.
+  // TODO(mdan): Deprecate in favor of ColocateUncopiableTypeEdges.
   Status ColocateResourceAndRefEdges(
       std::unordered_set<Node*>* inspection_required);
 
",0,train
23e9fc61dbd05621fa703630de66b373179a4de6,tensorflow/tensorflow,"Create a placer rule that co-locates edges of uncopyable types. ""Uncopyable"" is identified based on FullType, and typically includes some DT_VARIANT tensors like datasets, but might extend to some resources as well.

PiperOrigin-RevId: 397989212
Change-Id: I9fc428a6a91b95b1c14855af4b9dd19707422270",placer.cc,"@@ -115,6 +115,10 @@ void LogDeviceAssignment(const Node* node, bool log_device_placement) {
               << ""("" << node->type_string()
               << ""): "" << node->assigned_device_name();
   }
+  if (VLOG_IS_ON(1)) {
+    VLOG(1) << node->name() << ""("" << node->type_string()
+            << "") placed on: "" << node->assigned_device_name();
+  }
 }
 
 Status AssignAndLog(int assigned_device, Node* node,
@@ -211,6 +215,8 @@ Status Placer::Run() {
           *node);
     }
 
+    // TODO(mdan): This is a constrained optimization solver. Write it like one.
+
     // Returns the first device in sorted devices list so we will always
     // choose the same device.
     //
",0,train
23e9fc61dbd05621fa703630de66b373179a4de6,tensorflow/tensorflow,"Create a placer rule that co-locates edges of uncopyable types. ""Uncopyable"" is identified based on FullType, and typically includes some DT_VARIANT tensors like datasets, but might extend to some resources as well.

PiperOrigin-RevId: 397989212
Change-Id: I9fc428a6a91b95b1c14855af4b9dd19707422270",placer_test.cc,"@@ -201,6 +201,18 @@ REGISTER_KERNEL_BUILDER(Name(""TestXlaOp"").Device(""XLA_CPU"").Priority(2),
 REGISTER_KERNEL_BUILDER(Name(""TestXlaOp"").Device(""FakeCPU"").Priority(1),
                         DummyOp);
 
+// Op with no-copy type definition.
+REGISTER_OP(""TestUncopiableTypeGeneratorCPU"")
+    .Output(""d: variant"")
+    .SetTypeConstructor(full_type::UnaryGeneric(TFT_DATASET));
+REGISTER_KERNEL_BUILDER(
+    Name(""TestUncopiableTypeGeneratorCPU"").Device(""FakeCPU""), DummyOp);
+
+// Op consuming a typed input.
+REGISTER_OP(""TestTypedConsumer"").Input(""i: variant"");
+REGISTER_KERNEL_BUILDER(Name(""TestTypedConsumer"").Device(""FakeCPU""), DummyOp);
+REGISTER_KERNEL_BUILDER(Name(""TestTypedConsumer"").Device(""FakeGPU""), DummyOp);
+
 ////////////////////////////////////////////////////////////////////////////////
 //
 // A PlacerTest method has three phases:
@@ -762,6 +774,24 @@ TEST_F(PlacerTest, TestHeuristicGeneratorFollowsSingleConsumer) {
   EXPECT_COLOCATED(g, ""assign"", ""in"");
 }
 
+TEST_F(PlacerTest, TestUncopiableTypeEdges) {
+  Graph g(OpRegistry::Global());
+
+  GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
+
+  // The producer can only be on the CPU. Without colocation constraints,
+  // the consumer would be placed on GPU, causing a copy.
+  Node* input =
+      ops::SourceOp(""TestUncopiableTypeGeneratorCPU"", b.opts().WithName(""ds""));
+  ops::UnaryOp(""TestTypedConsumer"", ops::NodeOut(input, 0),
+               b.opts().WithName(""c""));
+
+  TF_EXPECT_OK(BuildGraph(b, &g));
+
+  TF_EXPECT_OK(Place(&g));
+  EXPECT_COLOCATED(g, ""ds"", ""c"");
+}
+
 TEST_F(PlacerTest, TestIgnoreGeneratorHeuristicIfWrongDevice) {
   Graph g(OpRegistry::Global());
   {  // Scope for temporary variables used to construct g.
",0,train
1c0c9f2f6b7d6683c1aa16229ff9242c60ec760d,tensorflow/tensorflow,"Initialize context using memset

PiperOrigin-RevId: 251821825",micro_interpreter.cc,"@@ -67,7 +67,8 @@ MicroInterpreter::MicroInterpreter(const Model* model,
       op_resolver_(op_resolver),
       tensor_allocator_(tensor_allocator),
       error_reporter_(error_reporter),
-      initialization_status_(kTfLiteOk) {
+      initialization_status_(kTfLiteOk),
+      context_() {
   auto* subgraphs = model->subgraphs();
   if (subgraphs->size() != 1) {
     error_reporter->Report(""Only 1 subgraph is currently supported.\n"");
@@ -82,23 +83,15 @@ MicroInterpreter::MicroInterpreter(const Model* model,
   context_.tensors =
       reinterpret_cast<TfLiteTensor*>(tensor_allocator_->AllocateMemory(
           sizeof(TfLiteTensor) * context_.tensors_size, 4));
+  context_.impl_ = static_cast<void*>(this);
+  context_.ReportError = ReportOpError;
+  context_.recommended_num_threads = 1;
 
   initialization_status_ = AllocateInputAndActTensors();
   if (initialization_status_ != kTfLiteOk) {
     return;
   }
 
-  context_.impl_ = static_cast<void*>(this);
-  context_.GetExecutionPlan = nullptr;
-  context_.ResizeTensor = nullptr;
-  context_.ReportError = ReportOpError;
-  context_.AddTensors = nullptr;
-  context_.GetNodeAndRegistration = nullptr;
-  context_.ReplaceNodeSubsetsWithDelegateKernels = nullptr;
-  context_.recommended_num_threads = 1;
-  context_.GetExternalContext = nullptr;
-  context_.SetExternalContext = nullptr;
-
   initialization_status_ = AllocateTemporaryTensors();
   if (initialization_status_ != kTfLiteOk) {
     return;
",0,test
1c0c9f2f6b7d6683c1aa16229ff9242c60ec760d,tensorflow/tensorflow,"Initialize context using memset

PiperOrigin-RevId: 251821825",micro_interpreter_test.cc,"@@ -20,6 +20,10 @@ limitations under the License.
 namespace tflite {
 namespace {
 void* MockInit(TfLiteContext* context, const char* buffer, size_t length) {
+  // We don't support delegate in TFL micro. This is a weak check to test if
+  // context struct being zero-initialized.
+  TF_LITE_MICRO_EXPECT_EQ(nullptr,
+                          context->ReplaceNodeSubsetsWithDelegateKernels);
   // Do nothing.
   return nullptr;
 }
",0,test
5991a12e8c717f49caba2cc5673e6c72b0e301f5,tensorflow/tensorflow,"Switch away from deprecated tf.distribute.Strategy APIs to their supported
replacements.

PiperOrigin-RevId: 242605915",optimizer_v2.py,"@@ -916,7 +916,8 @@ class OptimizerV2(optimizer_v1.Optimizer):
     var_list = [v for _, v in grads_and_vars]
     grads_and_vars = zip(reduced_grads, var_list)
 
-    unwrapped_var_list = [x for v in var_list for x in distribution.unwrap(v)]
+    unwrapped_var_list = [
+        x for v in var_list for x in distribution.experimental_local_results(v)]
     eager_execution = context.executing_eagerly()
     if eager_execution:
       # Give a clear error in this case instead of ""name not supported
",0,train
5991a12e8c717f49caba2cc5673e6c72b0e301f5,tensorflow/tensorflow,"Switch away from deprecated tf.distribute.Strategy APIs to their supported
replacements.

PiperOrigin-RevId: 242605915",tape.py,"@@ -66,7 +66,7 @@ def watch_variable(tape, variable):
   if context:
     variables = [strategy.extended.value_container(variable)]
   else:
-    variables = strategy.unwrap(variable)
+    variables = strategy.experimental_local_results(variable)
   for var in variables:
     pywrap_tensorflow.TFE_Py_TapeWatchVariable(tape._tape, var)  # pylint: disable=protected-access
 
@@ -82,7 +82,7 @@ def variable_accessed(variable):
   if context:
     variables = [strategy.extended.value_container(variable)]
   else:
-    variables = strategy.unwrap(variable)
+    variables = strategy.experimental_local_results(variable)
   for var in variables:
     pywrap_tensorflow.TFE_Py_TapeVariableAccessed(var)
 
@@ -104,7 +104,7 @@ def variables_accessed(variables):
   else:
     for variable in variables:
       if variable.trainable:
-        accessed.extend(strategy.unwrap(variable))
+        accessed.extend(strategy.experimental_local_results(variable))
 
   for var in accessed:
     pywrap_tensorflow.TFE_Py_TapeVariableAccessed(var)
",0,train
5991a12e8c717f49caba2cc5673e6c72b0e301f5,tensorflow/tensorflow,"Switch away from deprecated tf.distribute.Strategy APIs to their supported
replacements.

PiperOrigin-RevId: 242605915",optimizer_v2.py,"@@ -560,7 +560,7 @@ class OptimizerV2(trackable.Trackable):
       else:
         initial_value = initializer
       strategy = distribute_ctx.get_strategy()
-      with strategy.colocate_vars_with(var):
+      with strategy.extended.colocate_vars_with(var):
         weight = tf_variables.Variable(
             name=""%s/%s"" % (var._shared_name, slot_name),  # pylint: disable=protected-access
             dtype=var.dtype,
",0,train
5991a12e8c717f49caba2cc5673e6c72b0e301f5,tensorflow/tensorflow,"Switch away from deprecated tf.distribute.Strategy APIs to their supported
replacements.

PiperOrigin-RevId: 242605915",metrics_utils.py,"@@ -110,7 +110,8 @@ def result_wrapper(result_fn):
       def merge_fn_wrapper(distribution, merge_fn, *args):
         # We will get `PerDevice` merge function. Taking the first one as all
         # are identical copies of the function that we had passed below.
-        merged_result_fn = distribution.unwrap(merge_fn)[0](*args)
+        merged_result_fn = (
+            distribution.experimental_local_results(merge_fn)[0](*args))
 
         # Wrapping result in identity so that control dependency between
         # update_op from `update_state` and result works in case result returns
",0,train
5991a12e8c717f49caba2cc5673e6c72b0e301f5,tensorflow/tensorflow,"Switch away from deprecated tf.distribute.Strategy APIs to their supported
replacements.

PiperOrigin-RevId: 242605915",template_mirrored_strategy_test.py,"@@ -45,7 +45,8 @@ class TemplateMirroredStrategyTest(test.TestCase):
     temp = template.make_template(""my_template"", fn)
 
     strategy = mirrored_strategy.MirroredStrategy([""/cpu:0"", ""/gpu:0""])
-    out = strategy.unwrap(strategy.experimental_run_v2(temp))
+    out = strategy.experimental_local_results(
+        strategy.experimental_run_v2(temp))
 
     self.evaluate(variables.global_variables_initializer())
     self.assertAllEqual([42., 42.], self.evaluate(out))
",0,train
915b02917db21de5a0ae304a067aedb0b5dd759d,tensorflow/tensorflow,"Adding a RecursivelyCreateDir function to the base environment. Removing functionality from the python library.
Change: 132704774",env.cc,"@@ -137,6 +137,31 @@ Status Env::DeleteFile(const string& fname) {
   return fs->DeleteFile(fname);
 }
 
+Status Env::RecursivelyCreateDir(const string& dirname) {
+  FileSystem* fs;
+  TF_RETURN_IF_ERROR(GetFileSystemForFile(dirname, &fs));
+  std::vector<StringPiece> sub_dirs;
+  StringPiece remaining_dir(dirname);
+  while (!fs->FileExists(remaining_dir.ToString())) {
+    // Basename returns """" for / ending dirs.
+    if (!remaining_dir.ends_with(""/"")) {
+      sub_dirs.push_back(io::Basename(remaining_dir));
+    }
+    remaining_dir = io::Dirname(remaining_dir);
+  }
+
+  // sub_dirs contains all the dirs to be created but in reverse order.
+  std::reverse(sub_dirs.begin(), sub_dirs.end());
+
+  // Now create the directories.
+  string built_path = remaining_dir.ToString();
+  for (const StringPiece sub_dir : sub_dirs) {
+    built_path = io::JoinPath(built_path, sub_dir);
+    TF_RETURN_IF_ERROR(fs->CreateDir(built_path));
+  }
+  return Status::OK();
+}
+
 Status Env::CreateDir(const string& dirname) {
   FileSystem* fs;
   TF_RETURN_IF_ERROR(GetFileSystemForFile(dirname, &fs));
",0,train
915b02917db21de5a0ae304a067aedb0b5dd759d,tensorflow/tensorflow,"Adding a RecursivelyCreateDir function to the base environment. Removing functionality from the python library.
Change: 132704774",env.h,"@@ -158,7 +158,17 @@ class Env {
   Status DeleteRecursively(const string& dirname, int64* undeleted_files,
                            int64* undeleted_dirs);
 
-  /// Creates the specified directory.
+  /// \brief Creates the specified directory and all the necessary
+  /// subdirectories. Typical return codes.
+  ///  * OK - successfully created the directory and sub directories, even if
+  ///         they were already created.
+  ///  * PERMISSION_DENIED - dirname or some subdirectory is not writable.
+  Status RecursivelyCreateDir(const string& dirname);
+
+  /// \brief Creates the specified directory. Typical return codes
+  ///  * OK - successfully created the directory.
+  ///  * ALREADY_EXISTS - directory already exists.
+  ///  * PERMISSION_DENIED - dirname is not writable.
   Status CreateDir(const string& dirname);
 
   /// Deletes the specified directory.
",0,train
915b02917db21de5a0ae304a067aedb0b5dd759d,tensorflow/tensorflow,"Adding a RecursivelyCreateDir function to the base environment. Removing functionality from the python library.
Change: 132704774",env_test.cc,"@@ -128,6 +128,41 @@ TEST(EnvTest, DeleteRecursivelyFail) {
   EXPECT_EQ(1, undeleted_dirs);
 }
 
+TEST(EnvTest, RecursivelyCreateDir) {
+  Env* env = Env::Default();
+  const string create_path = io::JoinPath(testing::TmpDir(), ""a/b/c/d"");
+  TF_CHECK_OK(env->RecursivelyCreateDir(create_path));
+  TF_CHECK_OK(env->RecursivelyCreateDir(create_path));  // repeat creation.
+  EXPECT_TRUE(env->FileExists(create_path));
+
+  // Clean up.
+  // TODO(rohanj): Do this more elegantly using SetUp() and TearDown() methods.
+  int64 undeleted_files, undeleted_dirs;
+  TF_CHECK_OK(env->DeleteRecursively(io::JoinPath(testing::TmpDir(), ""a""),
+                                     &undeleted_files, &undeleted_dirs));
+}
+
+TEST(EnvTest, RecursivelyCreateDirSubdirsExist) {
+  Env* env = Env::Default();
+  // First create a/b.
+  const string subdir_path = io::JoinPath(testing::TmpDir(), ""a/b"");
+  TF_CHECK_OK(env->CreateDir(io::JoinPath(testing::TmpDir(), ""a"")));
+  TF_CHECK_OK(env->CreateDir(subdir_path));
+  EXPECT_TRUE(env->FileExists(subdir_path));
+
+  // Now try to recursively create a/b/c/d/
+  const string create_path = io::JoinPath(testing::TmpDir(), ""a/b/c/d/"");
+  TF_CHECK_OK(env->RecursivelyCreateDir(create_path));
+  TF_CHECK_OK(env->RecursivelyCreateDir(create_path));  // repeat creation.
+  EXPECT_TRUE(env->FileExists(create_path));
+  EXPECT_TRUE(env->FileExists(io::JoinPath(testing::TmpDir(), ""a/b/c"")));
+
+  // Clean up.
+  int64 undeleted_files, undeleted_dirs;
+  TF_CHECK_OK(env->DeleteRecursively(io::JoinPath(testing::TmpDir(), ""a""),
+                                     &undeleted_files, &undeleted_dirs));
+}
+
 TEST(EnvTest, LocalFileSystem) {
   // Test filename with file:// syntax.
   Env* env = Env::Default();
",0,train
915b02917db21de5a0ae304a067aedb0b5dd759d,tensorflow/tensorflow,"Adding a RecursivelyCreateDir function to the base environment. Removing functionality from the python library.
Change: 132704774",file_io.py,"@@ -257,11 +257,7 @@ def recursive_create_dir(dirname):
     errors.OpError: If the operation fails.
   """"""
   with errors.raise_exception_on_not_ok_status() as status:
-    dirs = compat.as_str_any(dirname).split(""/"")
-    for i in range(len(dirs)):
-      partial_dir = ""/"".join(dirs[0:i + 1])
-      if partial_dir and not file_exists(partial_dir):
-        pywrap_tensorflow.CreateDir(compat.as_bytes(partial_dir), status)
+    pywrap_tensorflow.RecursivelyCreateDir(compat.as_bytes(dirname), status)
 
 
 def copy(oldpath, newpath, overwrite=False):
",0,train
915b02917db21de5a0ae304a067aedb0b5dd759d,tensorflow/tensorflow,"Adding a RecursivelyCreateDir function to the base environment. Removing functionality from the python library.
Change: 132704774",file_io_test.py,"@@ -95,6 +95,7 @@ class FileIoTest(tf.test.TestCase):
   def testCreateRecursiveDir(self):
     dir_path = os.path.join(self._base_dir, ""temp_dir/temp_dir1/temp_dir2"")
     file_io.recursive_create_dir(dir_path)
+    file_io.recursive_create_dir(dir_path)  # repeat creation
     file_path = os.path.join(dir_path, ""temp_file"")
     file_io.FileIO(file_path, mode=""w"").write(""testing"")
     self.assertTrue(file_io.file_exists(file_path))
",0,train
6dfb912e1f9735f0f8a151272a741780e34e7a74,tensorflow/tensorflow,"[XLA:SPMD] Make offset calculation faster.

It was quadratic time before.

PiperOrigin-RevId: 327827558
Change-Id: Ib50d2b567e0458b5d2146ba3d3b1006050f3d06f",spmd_partitioner_test.cc,"@@ -138,8 +138,7 @@ ENTRY entry {
               op::AllReduce(op::Select(
                   op::Broadcast(op::Compare(op::PartitionId(), op::Constant())),
                   op::Constant(), op::Broadcast())),
-              op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(),
-                                           op::Constant())),
+              op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId())),
               op::Constant())),
           op::Shape(""s32[1,3]"")));
 }
@@ -161,8 +160,7 @@ ENTRY entry {
       op::Copy(op::AllReduce(AllOf(
           op::DynamicUpdateSlice(
               op::Broadcast(), AllOf(op::Constant(), op::Shape(""s32[1,3]"")),
-              op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(),
-                                           op::Constant())),
+              op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId())),
               op::Constant()),
           op::Shape(""s32[2,3]"")))));
 }
@@ -184,8 +182,7 @@ ENTRY entry {
       op::Copy(op::Copy(op::AllReduce(AllOf(
           op::DynamicUpdateSlice(
               op::Broadcast(), AllOf(op::Constant(), op::Shape(""s32[1,3]"")),
-              op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(),
-                                           op::Constant())),
+              op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId())),
               op::Constant()),
           op::Shape(""s32[2,3]""))))));
 }
@@ -279,8 +276,8 @@ ENTRY entry {
   HloInstruction* root = module->entry_computation()->root_instruction();
   ASSERT_THAT(root, op::Tuple());
 
-  auto offset = op::Reshape(
-      op::DynamicSlice(op::Constant(), op::PartitionId(), op::Constant()));
+  auto offset =
+      op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId()));
 
   EXPECT_THAT(root->operand(0),
               op::DynamicSlice(op::GetTupleElement(op::Parameter()), offset,
@@ -305,13 +302,13 @@ ENTRY entry {
                           PartitionComputation(hlo_string, /*num_devices=*/2));
   HloInstruction* root = module->entry_computation()->root_instruction();
   EXPECT_THAT(
-      root, op::Copy(op::AllReduce(op::DynamicUpdateSlice(
-                op::Broadcast(),
-                op::GetTupleElement(
-                    AllOf(op::Infeed(), op::Shape(""(f32[4,2]{1,0}, token[])""))),
-                op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId(),
-                                             op::Constant())),
-                op::Constant()))));
+      root,
+      op::Copy(op::AllReduce(op::DynamicUpdateSlice(
+          op::Broadcast(),
+          op::GetTupleElement(
+              AllOf(op::Infeed(), op::Shape(""(f32[4,2]{1,0}, token[])""))),
+          op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId())),
+          op::Constant()))));
 }
 
 TEST_F(SpmdPartitioningTest, UnevenTiledInfeed) {
@@ -3956,8 +3953,8 @@ ENTRY entry {
   TF_ASSERT_OK_AND_ASSIGN(auto module,
                           PartitionComputation(hlo_string, /*num_devices=*/2));
   VLOG(1) << module->ToString();
-  auto offset = op::Reshape(
-      op::DynamicSlice(op::Constant(), op::PartitionId(), op::Constant()));
+  auto offset =
+      op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId()));
   auto min = AllOf(op::Broadcast(offset), op::Shape(""s32[2,3]""));
   auto max = AllOf(op::Broadcast(op::Add(offset, op::Constant())),
                    op::Shape(""s32[2,3]""));
@@ -4093,8 +4090,8 @@ ENTRY entry {
   TF_ASSERT_OK_AND_ASSIGN(auto module,
                           PartitionComputation(hlo_string, /*num_devices=*/2));
   VLOG(1) << module->ToString();
-  auto offset = op::Reshape(
-      op::DynamicSlice(op::Constant(), op::PartitionId(), op::Constant()));
+  auto offset =
+      op::Reshape(op::DynamicSlice(op::Constant(), op::PartitionId()));
   auto indices = op::Subtract(
       op::Parameter(1), AllOf(op::Broadcast(offset), op::Shape(""s32[2,3]"")));
   HloInstruction* root = module->entry_computation()->root_instruction();
",0,train
6dfb912e1f9735f0f8a151272a741780e34e7a74,tensorflow/tensorflow,"[XLA:SPMD] Make offset calculation faster.

It was quadratic time before.

PiperOrigin-RevId: 327827558
Change-Id: Ib50d2b567e0458b5d2146ba3d3b1006050f3d06f",spmd_partitioner_util.cc,"@@ -29,6 +29,7 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/service/hlo_instruction.h""
 #include ""tensorflow/compiler/xla/service/hlo_instructions.h""
 #include ""tensorflow/compiler/xla/service/hlo_module.h""
+#include ""tensorflow/compiler/xla/service/hlo_opcode.h""
 #include ""tensorflow/compiler/xla/service/hlo_sharding.h""
 #include ""tensorflow/compiler/xla/service/hlo_sharding_util.h""
 #include ""tensorflow/compiler/xla/service/pattern_matcher.h""
@@ -202,13 +203,17 @@ std::vector<HloInstruction*> MakePartitionOffsets(
     absl::Span<const int64> dims) {
   CHECK(!shape.IsTuple());
 
-  Array2D<int32> offset_array(
-      {sharding.tile_assignment().num_elements(), shape.rank()});
-  offset_array.Each([&](int64 i, int64 j, int32* value) {
-    *value = sharding.TileOffsetForDevice(shape, i)[j];
-  });
-  auto offset_table = b->AddInstruction(HloInstruction::CreateConstant(
-      LiteralUtil::CreateR2FromArray2D(offset_array)));
+  std::vector<std::vector<int32>> offset_arrays(shape.rank());
+  for (int64 i = 0; i < shape.rank(); ++i) {
+    offset_arrays[i].resize(sharding.tile_assignment().num_elements());
+  }
+  auto shard_shape = MakePartitionedShape(shape, sharding);
+  sharding.tile_assignment().Each(
+      [&](absl::Span<const int64> indices, int64 device) {
+        for (int64 i = 0; i < shape.rank(); ++i) {
+          offset_arrays[i][device] = indices[i] * shard_shape.dimensions(i);
+        }
+      });
   std::vector<HloInstruction*> offsets;
   for (int64 i = 0; i < shape.rank(); ++i) {
     if (sharding.tile_assignment().dim(i) == 1 ||
@@ -216,11 +221,10 @@ std::vector<HloInstruction*> MakePartitionOffsets(
       offsets.push_back(b->AddInstruction(
           HloInstruction::CreateConstant(LiteralUtil::Zero(S32))));
     } else {
+      auto offset_table = b->AddInstruction(HloInstruction::CreateConstant(
+          LiteralUtil::CreateR1<int32>(offset_arrays[i])));
       auto index = b->AddInstruction(HloInstruction::CreateDynamicSlice(
-          ShapeUtil::MakeShape(S32, {1, 1}), offset_table,
-          {partition_id, b->AddInstruction(HloInstruction::CreateConstant(
-                             LiteralUtil::CreateR0<uint32>(i)))},
-          {1, 1}));
+          ShapeUtil::MakeShape(S32, {1}), offset_table, {partition_id}, {1}));
       offsets.push_back(b->AddInstruction(
           HloInstruction::CreateReshape(ShapeUtil::MakeShape(S32, {}), index)));
     }
",0,train
a6976d4034fae34128a93ea23129760eae63980c,tensorflow/tensorflow,"Improve TF-TRT logging:

- set the engine name (without scope) before converting the segment to a
  GraaphDef, so we can print it out and know which graph is being operated on.
- format the output of segment nodes in segment.cc

PiperOrigin-RevId: 238043350",convert_graph.cc,"@@ -331,9 +331,13 @@ Status GetEngineInfo(const Graph* g,
   // Construct the const nodes first.
   subgraph_nodes.insert(subgraph_nodes.begin(), added_const_nodes.begin(),
                         added_const_nodes.end());
+  string scope_name;
   TF_RETURN_IF_ERROR(ConvertSegmentToGraphDef(
       g, graph_properties, subgraph_nodes, &info->connections,
-      &info->segment_graph_def, &info->engine_name));
+      &info->segment_graph_def, &scope_name));
+  info->engine_name = StrCat(scope_name, info->engine_name);
+  VLOG(1) << ""Converted TensorRT candidate segment '"" << info->engine_name
+          << ""' to a GraphDef"";
   // TODO(sami): This should not happen once segmenter is updated.
   if (segment_devices.size() == 1) {
     info->device = *segment_devices.begin();
@@ -492,8 +496,7 @@ Status CreateTRTNode(const ConversionParams& params,
   // these segments.
   if (inputs.empty()) {
     return errors::Internal(
-        ""Segment has no inputs (possible ""
-        ""constfold failure)"");
+        ""Segment has no inputs (possible constfold failure)"");
   }
 
   const bool calibrate_int8 =
@@ -839,6 +842,7 @@ Status ConvertAfterShapes(const ConversionParams& params) {
   for (size_t t = 0; t < initial_segments.size(); t++) {
     auto& curr_segment = initial_segments.at(t);
     EngineInfo curr_engine;
+    curr_engine.engine_name = StrCat(""TRTEngineOp_"", t);
     Status status =
         GetEngineInfo(&graph, *params.graph_properties, curr_segment.first,
                       node_map, reverse_topo_order, &curr_engine);
@@ -854,7 +858,6 @@ Status ConvertAfterShapes(const ConversionParams& params) {
     curr_engine.use_calibration = params.use_calibration;
     curr_engine.cached_engine_batches = params.cached_engine_batches;
     curr_engine.maximum_cached_engines = params.max_cached_engines;
-    StrAppend(&curr_engine.engine_name, ""TRTEngineOp_"", t);
     if (params.use_function_backup) {
       status = RegisterSegmentFunctionToFunctionLibrary(
           &graph, curr_engine.segment_graph_def, curr_engine.engine_name);
",0,train
a6976d4034fae34128a93ea23129760eae63980c,tensorflow/tensorflow,"Improve TF-TRT logging:

- set the engine name (without scope) before converting the segment to a
  GraaphDef, so we can print it out and know which graph is being operated on.
- format the output of segment nodes in segment.cc

PiperOrigin-RevId: 238043350",convert_nodes.cc,"@@ -4148,7 +4148,7 @@ Status ConvertSegmentToGraphDef(
     const Graph* graph, const grappler::GraphProperties& graph_properties,
     const std::vector<const Node*>& subgraph_nodes,  // In topological order
     std::vector<EngineConnection>* connections, GraphDef* segment_def,
-    string* common_scope) {
+    string* scope_name) {
   std::set<string> marker_nodes;
   // Update connection shapes/data types and add corresponding input/output
   // nodes in the segment graphdef.
@@ -4281,9 +4281,7 @@ Status ConvertSegmentToGraphDef(
       snode->mutable_input()->RemoveLast();
     }
   }
-  *common_scope = local_scope;
-  VLOG(1) << ""Converted TensorRT candidate segment @scope '"" << local_scope
-          << ""' to a GraphDef"";
+  *scope_name = local_scope;
   return Status::OK();
 }
 
",0,train
a6976d4034fae34128a93ea23129760eae63980c,tensorflow/tensorflow,"Improve TF-TRT logging:

- set the engine name (without scope) before converting the segment to a
  GraaphDef, so we can print it out and know which graph is being operated on.
- format the output of segment nodes in segment.cc

PiperOrigin-RevId: 238043350",convert_nodes.h,"@@ -123,13 +123,14 @@ struct EngineInfo {
 //   topological order.
 // - segment_def: the output GraphDef, whose non-input/output nodedefs will be
 //   sorted in topological order.
+// - scope_name: the name of the scope where the TRTEngineOp will be placed.
 //
 // TODO(aaroey): add tests to validate these properties.
 Status ConvertSegmentToGraphDef(
     const Graph* graph, const grappler::GraphProperties& graph_properties,
     const std::vector<const Node*>& subgraph_nodes,
     std::vector<EngineConnection>* connections, GraphDef* segment_def,
-    string* common_scope);
+    string* scope_name);
 
 // Converts given subgraph to a TRT engine saved in 'engine'. Returns ok iff
 // 'builder' successfully build the engine. If the result is not ok, 'engine'
",0,train
a6976d4034fae34128a93ea23129760eae63980c,tensorflow/tensorflow,"Improve TF-TRT logging:

- set the engine name (without scope) before converting the segment to a
  GraaphDef, so we can print it out and know which graph is being operated on.
- format the output of segment nodes in segment.cc

PiperOrigin-RevId: 238043350",segment.cc,"@@ -668,10 +668,13 @@ Status SegmentGraph(const Graph* tf_graph,
     const string& segment_root = itr.first;
     // Return format does not require set comparator.
     std::set<const Node*> segment_nodes(itr.second.begin(), itr.second.end());
-    if (VLOG_IS_ON(1)) {
-      string s = ""parent="" + segment_root + "":"";
-      for (auto node : segment_nodes) s += "" "" + node->name();
-      VLOG(1) << ""Segment "" << segments->size() << "": "" << s;
+    if (VLOG_IS_ON(1) && !segment_nodes.empty()) {
+      string s;
+      for (auto node : segment_nodes) {
+        StrAppend(&s, ""\n[Op type: "", node->type_string(), ""] "", node->name());
+      }
+      VLOG(1) << ""Nodes in segment "" << segments->size()
+              << "" with parent="" << segment_root << "":"" << s;
     }
 
     // Don't use small segments.
",0,train
d9a9a0a2ba89bbbdab5bb232f2d80534ccd3706c,tensorflow/tensorflow,"Add HLO canonicalize patterns.

Add a pattern to convert some dynamic slice operations into slice operations.

Add test cases.

PiperOrigin-RevId: 274858314
Change-Id: I3135db7601944186bdd8c6064479070862f62336",hlo_ops.cc,"@@ -80,6 +80,38 @@ static LogicalResult Verify(T op) {
   return success();
 }
 
+namespace {
+
+//===----------------------------------------------------------------------===//
+// Utilities for the canonicalize patterns
+//===----------------------------------------------------------------------===//
+
+// Returns 1D 64-bit dense elements attribute with the given values.
+DenseIntElementsAttr GetI64ElementsAttr(ArrayRef<int64_t> values,
+                                        Builder* builder) {
+  RankedTensorType ty = builder->getTensorType(
+      {static_cast<int64_t>(values.size())}, builder->getIntegerType(64));
+  return DenseElementsAttr::get<int64_t>(ty, values)
+      .cast<DenseIntElementsAttr>();
+}
+
+// Given the start indices and slice sizes for a dynamic-slice that can be
+// converted to a static slice, returns the limits for the static slice.
+DenseIntElementsAttr BuildSliceLimits(DenseIntElementsAttr start_indices,
+                                      DenseIntElementsAttr slice_sizes,
+                                      Builder* builder) {
+  SmallVector<int64_t, 4> slice_limits;
+  for (int64_t i = 0; i < slice_sizes.getNumElements(); ++i) {
+    int64_t start_index = start_indices.getValue<IntegerAttr>(i).getInt();
+    int64_t slice_size = slice_sizes.getValue<IntegerAttr>(i).getInt();
+    slice_limits.push_back(start_index + slice_size);
+  }
+  return GetI64ElementsAttr(slice_limits, builder);
+}
+
+#include ""tensorflow/compiler/mlir/xla/transforms/generated_canonicalize.inc""
+}  // namespace
+
 //===----------------------------------------------------------------------===//
 // ConstOp
 //===----------------------------------------------------------------------===//
@@ -468,6 +500,15 @@ static LogicalResult Verify(ConcatenateOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// DynamicSliceOp
+//===----------------------------------------------------------------------===//
+
+void DynamicSliceOp::getCanonicalizationPatterns(
+    OwningRewritePatternList& results, MLIRContext* context) {
+  results.insert<DynamicSliceToSlice>(context);
+}
+
 //===----------------------------------------------------------------------===//
 // ReshapeOp
 //===----------------------------------------------------------------------===//
",0,train
238424ffcf04c38561ed48ebadb16b3b3a770e2e,tensorflow/tensorflow,"[XLA:TF] Re-disable testRandomUniformIsInRange

The bug is still there and makes this test flakily fail with fp16.

PiperOrigin-RevId: 213669453",random_ops_test.py,"@@ -76,7 +76,8 @@ class RandomOpsTest(xla_test.XLATestCase):
     for dtype in self._random_types():
       # TODO (b/112272078): enable bfloat16 for CPU and GPU when the bug is
       # fixed.
-      if (self.device in [""XLA_GPU"", ""XLA_CPU""]) and (dtype == dtypes.bfloat16):
+      if (self.device in [""XLA_GPU"", ""XLA_CPU""
+                         ]) and (dtype in [dtypes.bfloat16, dtypes.half]):
         continue
       with self.cached_session() as sess:
         with self.test_scope():
",0,test
d5a18ab07beda13db1b7cc5bea5f8d6c2e33303d,tensorflow/tensorflow,"Remove PersistentTensor from softmax_op.cc

PiperOrigin-RevId: 371825789
Change-Id: I007d72c934208e7e581f5e7ee671b3dbd8274c06",softmax_op.cc,"@@ -165,20 +165,19 @@ class CSRSoftmaxGradOp : public OpKernel {
     // tensor is the elementwise product of some function with the
     // softmax value, it will keep the sparsity structure of the softmax.
     const int total_nnz = softmax_matrix->total_nnz();
-    PersistentTensor gradient_values_pt;
-    Tensor* gradient_values_t;
-    OP_REQUIRES_OK(ctx, ctx->allocate_persistent(
-                            DataTypeToEnum<T>::value, TensorShape({total_nnz}),
-                            &gradient_values_pt, &gradient_values_t));
+    Tensor gradient_values;
+    OP_REQUIRES_OK(
+        ctx, ctx->allocate_temp(DataTypeToEnum<T>::value,
+                                TensorShape({total_nnz}), &gradient_values));
 
     CSRSparseMatrix gradient_matrix;
 
     OP_REQUIRES_OK(
-        ctx, CSRSparseMatrix::CreateCSRSparseMatrix(
-                 DataTypeToEnum<T>::value, dense_shape_t,
-                 softmax_matrix->batch_pointers(),
-                 softmax_matrix->row_pointers(), softmax_matrix->col_indices(),
-                 *gradient_values_t, &gradient_matrix));
+        ctx,
+        CSRSparseMatrix::CreateCSRSparseMatrix(
+            DataTypeToEnum<T>::value, dense_shape_t,
+            softmax_matrix->batch_pointers(), softmax_matrix->row_pointers(),
+            softmax_matrix->col_indices(), gradient_values, &gradient_matrix));
 
     if (total_nnz > 0) {
       functor::CSRSparseMatrixSoftmaxGrad<Device, T> softmax_grad;
",0,train
5c135e7a7d48ea650fe0786670cc0560f1793b2b,tensorflow/tensorflow,"Fix the existing docstrings of array_ops and init_ops_v2.

PiperOrigin-RevId: 267168146",array_ops.py,"@@ -1343,38 +1343,34 @@ def concat(values, axis, name=""concat""):
 
   For example:
 
-  ```python
-  t1 = [[1, 2, 3], [4, 5, 6]]
-  t2 = [[7, 8, 9], [10, 11, 12]]
-  tf.concat([t1, t2], 0)  # [[1, 2, 3], [4, 5, 6], [7, 8, 9], [10, 11, 12]]
-  tf.concat([t1, t2], 1)  # [[1, 2, 3, 7, 8, 9], [4, 5, 6, 10, 11, 12]]
-
-  # tensor t3 with shape [2, 3]
-  # tensor t4 with shape [2, 3]
-  tf.shape(tf.concat([t3, t4], 0))  # [4, 3]
-  tf.shape(tf.concat([t3, t4], 1))  # [2, 6]
-  ```
+  >>> t1 = [[1, 2, 3], [4, 5, 6]]
+  >>> t2 = [[7, 8, 9], [10, 11, 12]]
+  >>> concat([t1, t2], 0)
+  <tf.Tensor: id=..., shape=(4, 3), dtype=int32, numpy=
+  array([[ 1,  2,  3],
+         [ 4,  5,  6],
+         [ 7,  8,  9],
+         [10, 11, 12]], dtype=int32)>
+
+  >>> concat([t1, t2], 1)
+  <tf.Tensor: id=..., shape=(2, 6), dtype=int32, numpy=
+  array([[ 1,  2,  3,  7,  8,  9],
+         [ 4,  5,  6, 10, 11, 12]], dtype=int32)>
+
   As in Python, the `axis` could also be negative numbers. Negative `axis`
   are interpreted as counting from the end of the rank, i.e.,
    `axis + rank(values)`-th dimension.
 
   For example:
 
-  ```python
-  t1 = [[[1, 2], [2, 3]], [[4, 4], [5, 3]]]
-  t2 = [[[7, 4], [8, 4]], [[2, 10], [15, 11]]]
-  tf.concat([t1, t2], -1)
-  ```
-
-  would produce:
-
-  ```python
-  [[[ 1,  2,  7,  4],
-    [ 2,  3,  8,  4]],
-
-   [[ 4,  4,  2, 10],
-    [ 5,  3, 15, 11]]]
-  ```
+  >>> t1 = [[[1, 2], [2, 3]], [[4, 4], [5, 3]]]
+  >>> t2 = [[[7, 4], [8, 4]], [[2, 10], [15, 11]]]
+  >>> tf.concat([t1, t2], -1)
+  <tf.Tensor: id=..., shape=(2, 2, 4), dtype=int32, numpy=
+    array([[[ 1,  2,  7,  4],
+            [ 2,  3,  8,  4]],
+           [[ 4,  4,  2, 10],
+            [ 5,  3, 15, 11]]], dtype=int32)>
 
   Note: If you are concatenating along a new axis consider using stack.
   E.g.
@@ -4810,14 +4806,20 @@ def repeat_with_axis(data, repeats, axis, name=None):
     A tensor with `max(N, 1)` dimensions.  Has the same shape as `data`,
     except that dimension `axis` has size `sum(repeats)`.
   #### Examples:
-    ```python
     >>> repeat(['a', 'b', 'c'], repeats=[3, 0, 2], axis=0)
-    ['a', 'a', 'a', 'c', 'c']
+    <tf.Tensor: ..., shape=(5,), dtype=string,
+    numpy=array([b'a', b'a', b'a', b'c', b'c'], dtype=object)>
     >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=0)
-    [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4]]
+    <tf.Tensor: ..., shape=(5, 2), dtype=int32, numpy=
+    array([[1, 2],
+           [1, 2],
+           [3, 4],
+           [3, 4],
+           [3, 4]], dtype=int32)>
     >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=1)
-    [[1, 1, 2, 2, 2], [3, 3, 4, 4, 4]]
-    ```
+    <tf.Tensor: ..., shape=(2, 5), dtype=int32, numpy=
+    array([[1, 1, 2, 2, 2],
+           [3, 3, 4, 4, 4]], dtype=int32)>
   """"""
   if not isinstance(axis, int):
     raise TypeError(""axis must be an int; got %s"" % type(axis).__name__)
@@ -4916,7 +4918,7 @@ def _with_nonzero_rank(data):
 
 @tf_export(""repeat"")
 def repeat(input, repeats, axis=None, name=None):  # pylint: disable=redefined-builtin
-  """"""Repeat elements of `input`
+  """"""Repeat elements of `input`.
 
   Args:
     input: An `N`-dimensional Tensor.
@@ -4932,18 +4934,31 @@ def repeat(input, repeats, axis=None, name=None):  # pylint: disable=redefined-b
       If axis is None then the output array is flattened to match the flattened
       input array.
   #### Examples:
-    ```python
+
     >>> repeat(['a', 'b', 'c'], repeats=[3, 0, 2], axis=0)
-    ['a', 'a', 'a', 'c', 'c']
+    <tf.Tensor: ..., shape=(5,), dtype=string,
+    numpy=array([b'a', b'a', b'a', b'c', b'c'], dtype=object)>
+
     >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=0)
-    [[1, 2], [1, 2], [3, 4], [3, 4], [3, 4]]
+    <tf.Tensor: id=..., shape=(5, 2), dtype=int32, numpy=
+    array([[1, 2],
+           [1, 2],
+           [3, 4],
+           [3, 4],
+           [3, 4]], dtype=int32)>
+
     >>> repeat([[1, 2], [3, 4]], repeats=[2, 3], axis=1)
-    [[1, 1, 2, 2, 2], [3, 3, 4, 4, 4]]
+    <tf.Tensor: id=..., shape=(2, 5), dtype=int32, numpy=
+    array([[1, 1, 2, 2, 2],
+           [3, 3, 4, 4, 4]], dtype=int32)>
+
     >>> repeat(3, repeats=4)
-    [3, 3, 3, 3]
+    <tf.Tensor: id=..., shape=(4,), dtype=int32,
+    numpy=array([3, 3, 3, 3], dtype=int32)>
+
     >>> repeat([[1,2], [3,4]], repeats=2)
-    [1, 1, 2, 2, 3, 3, 4, 4]
-    ```
+    <tf.Tensor: id=..., shape=(8,), dtype=int32,
+    numpy=array([1, 1, 2, 2, 3, 3, 4, 4], dtype=int32)>
   """"""
   if axis is None:
     input = reshape(input, [-1])
",0,test
5c135e7a7d48ea650fe0786670cc0560f1793b2b,tensorflow/tensorflow,"Fix the existing docstrings of array_ops and init_ops_v2.

PiperOrigin-RevId: 267168146",init_ops_v2.py,"@@ -149,12 +149,8 @@ class Constant(Initializer):
     of the `value` list, even reshaped, as shown in the two commented lines
     below the `value` list initialization.
 
-  ```python
   >>> value = [0, 1, 2, 3, 4, 5, 6, 7]
-  >>> # value = np.array(value)
-  >>> # value = value.reshape([2, 4])
   >>> init = tf.compat.v1.constant_initializer(value)
-  >>>
   >>> # Fitting shape
   >>> with tf.compat.v1.Session():
   ...   x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init)
@@ -164,18 +160,19 @@ class Constant(Initializer):
    [4. 5. 6. 7.]]
   >>> # Larger shape
   >>> with tf.compat.v1.Session():
-  ...   x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init)
-  ...   x.initializer.run()
-  ...   print(x.eval())
-  [[ 0.  1.  2.  3.]
-   [ 4.  5.  6.  7.]
-   [ 7.  7.  7.  7.]]
+  ...   y = tf.compat.v1.get_variable('y', shape=[3, 4], initializer=init)
+  ...   y.initializer.run()
+  ...   print(y.eval())
+  [[0. 1. 2. 3.]
+   [4. 5. 6. 7.]
+   [7. 7. 7. 7.]]
   >>> # Smaller shape
   >>> with tf.compat.v1.Session():
-  ...   x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init)
+  ...   z = tf.compat.v1.get_variable('z', shape=[2, 3], initializer=init)
+  Traceback (most recent call last):
+  ...
   ValueError: Too many elements provided. Needed at most 6, but received 8
 
-  ```
   """"""
 
   def __init__(self, value=0):
",0,test
d7b9d9a60fcbd6a0d294f4886793f54b381a7145,tensorflow/tensorflow,"Expose configuration options for the LhloFuseLinalg pass.

These were available as pass options but not when constructing the
pass using `createLhloFuseLinalg`.

PiperOrigin-RevId: 297550513
Change-Id: Ie0ce3bf3ea1f2cf40dd77eeec53eb121d2785b65",lhlo_fuse_linalg.cc,"@@ -22,6 +22,7 @@ limitations under the License.
 #include ""mlir/Dialect/Linalg/Utils/Utils.h""  // TF:llvm-project
 #include ""mlir/Pass/Pass.h""  // TF:llvm-project
 #include ""mlir/Transforms/FoldUtils.h""  // TF:llvm-project
+#include ""tensorflow/compiler/mlir/xla/transforms/passes.h""
 
 namespace mlir {
 namespace xla_lhlo {
@@ -123,8 +124,9 @@ class LhloFuseLinalg : public FunctionPass<LhloFuseLinalg> {
 
 }  // namespace
 
-std::unique_ptr<OpPassBase<FuncOp>> createLhloFuseLinalg() {
-  return absl::make_unique<LhloFuseLinalg>();
+std::unique_ptr<OpPassBase<FuncOp>> createLhloFuseLinalg(
+    bool use_parallel_loops, ArrayRef<unsigned> tile_sizes) {
+  return absl::make_unique<LhloFuseLinalg>(use_parallel_loops, tile_sizes);
 }
 
 static PassRegistration<LhloFuseLinalg> legalize_pass(
",0,train
d7b9d9a60fcbd6a0d294f4886793f54b381a7145,tensorflow/tensorflow,"Expose configuration options for the LhloFuseLinalg pass.

These were available as pass options but not when constructing the
pass using `createLhloFuseLinalg`.

PiperOrigin-RevId: 297550513
Change-Id: Ie0ce3bf3ea1f2cf40dd77eeec53eb121d2785b65",passes.h,"@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <memory>
 
+#include ""llvm/ADT/ArrayRef.h""
 #include ""mlir/IR/MLIRContext.h""  // TF:llvm-project
 #include ""mlir/Support/LogicalResult.h""  // TF:llvm-project
 
@@ -76,8 +77,17 @@ std::unique_ptr<OpPassBase<FuncOp>> createLegalizeLhloToLinalgPass();
 // Lowers from LHLO dialect to GPU dialect.
 std::unique_ptr<OpPassBase<FuncOp>> createLegalizeToGpuPass();
 
-// Fuses linalg ops obtained after LHLO lowering.
-std::unique_ptr<OpPassBase<FuncOp>> createLhloFuseLinalg();
+// Fuses linalg ops obtained after LHLO lowering. To enable fusion,
+// operations are first tiled.
+//
+// When 'use_parallel_loops' is set, the tiling will use loop.parallel
+// operations. Otherwise, loop.for operations are used.
+//
+// 'tile_sizes' provides the tile sizes to use for tiling. If the linalg
+// operation has more dimensions than tile sizes provided, 1 is used as
+// default.
+std::unique_ptr<OpPassBase<FuncOp>> createLhloFuseLinalg(
+    bool use_parallel_loops = false, ArrayRef<unsigned> tile_sizes = {});
 
 }  // namespace xla_lhlo
 }  // namespace mlir
",0,train
1127ae0a91fcee00d2931ef142f0ac2c63bdc7be,tensorflow/tensorflow,Resolved description,lite.py,"@@ -308,8 +308,9 @@ class TFLiteConverterV2(TFLiteConverterBase):
       to apply when converting the model. E.g. `[Optimize.DEFAULT]`
     representative_dataset: A representative dataset that can be used to
       generate input and output samples for the model. The converter can use the
-      dataset to evaluate different optimizations. Note that this is a necessary
-      attribute since the conversion optimization depends upon it.
+      dataset to evaluate different optimizations. Note that this is an optional
+      attribute but it is necessary if INT8 is the only support builtin ops in
+      target ops.
     experimental_new_converter: Experimental flag, subject to change.
       Enables MLIR-based conversion instead of TOCO conversion.
     experimental_new_quantizer: Experimental flag, subject to change.
",0,train
8ae2eecf4f70b5efb55a108f2b0000ea6cad3e05,tensorflow/tensorflow,"Add a broadcasting test case with rank 6.

With broadcast shape rank minimization, this test can be computed with
broadcast rank specialization of rank 3.

PiperOrigin-RevId: 360155936
Change-Id: I6f359af5ae7204ff6446216ce5172d69f43a1ac2",base_binary_ops_test.h,"@@ -309,6 +309,41 @@ class BinaryOpsTestBase : public OpsTestBase {
                                 expected_output, config);
   }
 
+  template <typename T, typename BaselineT, typename OutT,
+            typename BaselineOutT>
+  void TestBroadcastingRank6(const std::string& op_name,
+                             const absl::InlinedVector<T, 10>& lhs_input,
+                             const absl::InlinedVector<T, 10>& rhs_input,
+                             BaselineOutT (*baseline_callback)(BaselineT,
+                                                               BaselineT),
+                             const test::OpsTestConfig& config) {
+    // Prepare inputs.
+    TensorShape lhs_shape{1, 2, 3, 1, 2, 1};
+    TensorShape rhs_shape{1, 1, 1, 2, 3};
+    auto repeated_lhs_input =
+        test::RepeatInputToMatchShape(lhs_input, lhs_shape.num_elements());
+    auto repeated_rhs_input =
+        test::RepeatInputToMatchShape(rhs_input, rhs_shape.num_elements());
+
+    // Compute expected results.
+    TensorShape expected_shape{1, 2, 3, 1, 2, 3};
+    std::vector<int> lhs_indices = {0, 0, 0, 1, 1, 1, 2,  2,  2,  3,  3,  3,
+                                    4, 4, 4, 5, 5, 5, 6,  6,  6,  7,  7,  7,
+                                    8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11};
+    std::vector<int> rhs_indices = {
+        0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
+        0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4, 5,
+    };
+    auto expected_output =
+        ComputeExpectedOutput<T, BaselineT, OutT, BaselineOutT>(
+            lhs_indices, repeated_lhs_input, rhs_indices, repeated_rhs_input,
+            baseline_callback);
+
+    RunAndExpectResult<T, OutT>(op_name, lhs_shape, repeated_lhs_input,
+                                rhs_shape, repeated_rhs_input, expected_shape,
+                                expected_output, config);
+  }
+
   template <typename T, typename BaselineT, typename OutT,
             typename BaselineOutT>
   void TestEmptyShapeBroadcasting(const std::string& op_name,
@@ -392,6 +427,11 @@ class BinaryOpsTestBase : public OpsTestBase {
         #op_name, lhs_input, rhs_input, baseline_callback, config);           \
   }                                                                           \
                                                                               \
+  TEST_F(BinaryOpsTest, op_name##BroadcastingRank6##test_name) {              \
+    TestBroadcastingRank6<T, BaselineT, OutT, BaselineOutT>(                  \
+        #op_name, lhs_input, rhs_input, baseline_callback, config);           \
+  }                                                                           \
+                                                                              \
   TEST_F(BinaryOpsTest, op_name##EmptyShapeBroadcasting##test_name) {         \
     TestEmptyShapeBroadcasting<T, BaselineT, OutT, BaselineOutT>(             \
         #op_name, lhs_input, rhs_input, config);                              \
",0,train
7a24845e237f42d3f0bc6ab031ee96e7ef896800,tensorflow/tensorflow,fixes file loading mechanism in datasets,boston_housing.py,"@@ -45,10 +45,9 @@ def load_data(path='boston_housing.npz', test_split=0.2, seed=113):
       origin=origin_folder + 'boston_housing.npz',
       file_hash=
       'f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5')
-  f = np.load(path)
-  x = f['x']
-  y = f['y']
-  f.close()
+  with np.load(path) as f:
+    x = f['x']
+    y = f['y']
 
   np.random.seed(seed)
   indices = np.arange(len(x))
",0,test
7a24845e237f42d3f0bc6ab031ee96e7ef896800,tensorflow/tensorflow,fixes file loading mechanism in datasets,mnist.py,"@@ -51,4 +51,4 @@ def load_data(path='mnist.npz'):
     x_train, y_train = f['x_train'], f['y_train']
     x_test, y_test = f['x_test'], f['y_test']
     
-  return (x_train, y_train), (x_test, y_test)
+    return (x_train, y_train), (x_test, y_test)
",0,test
fdd01eb06b2f01ca9db3fdde528aba6fffd8079b,tensorflow/tensorflow,"Correctly set execution mode on the context, not the module

PiperOrigin-RevId: 235634263",core_test.py,"@@ -311,7 +311,7 @@ class TFETest(test_util.TensorFlowTestCase):
                  three.dtype.as_datatype_enum))
       context.async_wait()
     context.async_clear_error()
-    context.execution_mode = context.SYNC
+    context.context().execution_mode = context.SYNC
 
   def testExecuteTooManyNumOutputs(self):
     # num_outputs provided is 50, but only one output is produced.
",0,train
a0d429cafe8226834365210caf703c28996d5795,tensorflow/tensorflow,"Avoid use of sufficient_statistics in moments() (#8906)

* Avoid use of sufficient_statistics in moments()

sufficient_statistics() uses reduce_prod if some of the dimensions are unknown.
But reduce_prod is not differentiable on GPU for now. This commit avoids the usage of sufficient_statistics in moments().

* Add names to operations

* Use squeeze() to reduce axes",nn_impl.py,"@@ -639,19 +639,21 @@ def moments(x, axes, shift=None, name=None, keep_dims=False):
           math_ops.reduce_mean(y, axes, keep_dims=True))
     else:
       shift = math_ops.cast(shift, y.dtype)
-    counts, m_ss, v_ss, shift = sufficient_statistics(
-        y, axes, shift=shift, keep_dims=keep_dims, name=name)
-    # Reshape shift as needed.
-    shift = array_ops.reshape(shift, array_ops.shape(m_ss))
-    shift.set_shape(m_ss.get_shape())
-    with ops.control_dependencies([counts, m_ss, v_ss]):
-      mean, variance = normalize_moments(counts, m_ss, v_ss, shift, name=name)
-      if x.dtype == dtypes.float16:
-        return (math_ops.cast(mean, dtypes.float16),
-                math_ops.cast(variance, dtypes.float16))
-      else:
-        return (mean, variance)
-
+    shifted_mean = math_ops.reduce_mean(
+      math_ops.subtract(y, shift), axes, keep_dims=True, name=""shifted_mean"")
+    variance = math_ops.subtract(
+        math_ops.reduce_mean(math_ops.squared_difference(y, shift), axes, keep_dims=True),
+        math_ops.square(shifted_mean),
+        name=""variance"")
+    mean = math_ops.add(shifted_mean, shift, name=""mean"")
+    if not keep_dims:
+      mean = array_ops.squeeze(mean, axes)
+      variance = array_ops.squeeze(variance, axes)
+    if x.dtype == dtypes.float16:
+      return (math_ops.cast(mean, dtypes.float16),
+              math_ops.cast(variance, dtypes.float16))
+    else:
+      return (mean, variance)
 
 def weighted_moments(x, axes, frequency_weights, name=None, keep_dims=False):
   """"""Returns the frequency-weighted mean and variance of `x`.
",0,train
b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api.cc,"@@ -79,11 +79,6 @@ void TfLiteInterpreterOptionsSetNumThreads(TfLiteInterpreterOptions* options,
   options->num_threads = num_threads;
 }
 
-void TfLiteInterpreterOptionsSetUseNNAPI(TfLiteInterpreterOptions* options,
-                                         bool enable) {
-  options->useNNAPI = enable;
-}
-
 void TfLiteInterpreterOptionsAddDelegate(TfLiteInterpreterOptions* options,
                                          TfLiteDelegate* delegate) {
   options->delegates.push_back(delegate);
",0,train
b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api.h,"@@ -120,10 +120,6 @@ TFL_CAPI_EXPORT extern void TfLiteInterpreterOptionsDelete(
 TFL_CAPI_EXPORT extern void TfLiteInterpreterOptionsSetNumThreads(
     TfLiteInterpreterOptions* options, int32_t num_threads);
 
-// Enable or disable the NN API for the interpreter (true to enable).
-TFL_CAPI_EXPORT extern void TfLiteInterpreterOptionsSetUseNNAPI(
-    TfLiteInterpreterOptions* options, bool enable);
-
 // Adds a delegate to be applied during `TfLiteInterpreter` creation.
 //
 // If delegate application fails, interpreter creation will also fail with an
",0,train
b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api_experimental.cc,"@@ -50,6 +50,11 @@ void TfLiteInterpreterOptionsAddCustomOp(TfLiteInterpreterOptions* options,
   options->op_resolver.AddCustom(name, registration, min_version, max_version);
 }
 
+void TfLiteInterpreterOptionsSetUseNNAPI(TfLiteInterpreterOptions* options,
+                                         bool enable) {
+  options->useNNAPI = enable;
+}
+
 #ifdef __cplusplus
 }  // extern ""C""
 #endif  // __cplusplus
",0,train
b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api_experimental.h,"@@ -49,6 +49,10 @@ TFL_CAPI_EXPORT void TfLiteInterpreterOptionsAddCustomOp(
     const TfLiteRegistration* registration, int32_t min_version,
     int32_t max_version);
 
+// Enable or disable the NN API for the interpreter (true to enable).
+TFL_CAPI_EXPORT extern void TfLiteInterpreterOptionsSetUseNNAPI(
+    TfLiteInterpreterOptions* options, bool enable);
+
 #ifdef __cplusplus
 }  // extern ""C""
 #endif  // __cplusplus
",0,train
b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api_experimental_test.cc,"@@ -41,6 +41,7 @@ TEST(CApiExperimentalTest, Smoke) {
   TfLiteInterpreterOptions* options = TfLiteInterpreterOptionsCreate();
   TfLiteInterpreterOptionsAddBuiltinOp(options, kTfLiteBuiltinAdd,
                                        GetDummyRegistration(), 1, 1);
+  TfLiteInterpreterOptionsSetUseNNAPI(options, true);
 
   TfLiteInterpreter* interpreter = TfLiteInterpreterCreate(model, options);
   ASSERT_NE(interpreter, nullptr);
",0,train
b97023504c53efab507c20ed8af5f6430c475834,tensorflow/tensorflow,Moved to experimental api,c_api_test.cc,"@@ -38,7 +38,6 @@ TEST(CApiSimple, Smoke) {
   TfLiteInterpreterOptions* options = TfLiteInterpreterOptionsCreate();
   ASSERT_NE(options, nullptr);
   TfLiteInterpreterOptionsSetNumThreads(options, 2);
-  TfLiteInterpreterOptionsSetUseNNAPI(options, true);
 
   TfLiteInterpreter* interpreter = TfLiteInterpreterCreate(model, options);
   ASSERT_NE(interpreter, nullptr);
",0,train
bd9224f5a066be8ec591bb2ac79c8bd87a9a395b,tensorflow/tensorflow,"Modify reference quantized LSTM implementation so that it only needs one instantiation of fixed-point Tanh, for 3 integer bits, regardless of the value of StateIntegerBits

PiperOrigin-RevId: 186075161",reference_ops.h,"@@ -1577,9 +1577,19 @@ void LstmCell(const uint8* input_data_uint8, const Dims<4>& input_dims,
       FS new_state = gemmlowp::SaturatingAdd(
           gemmlowp::Rescale<StateIntegerBits>(input_times_input_modulation),
           prev_state_times_forget_state);
-      // Implementation of last internal tanh node, still in fixed-point.
-      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state);
+      // Implementation of last internal Tanh node, still in fixed-point.
+      // Since a Tanh fixed-point implementation is specialized for a given
+      // number or integer bits, and each specialization can have a substantial
+      // code size, and we already used above a Tanh on an input with 3 integer
+      // bits, and per the table in the above function comment there is no
+      // significant accuracy to be lost by clamping to [-8, +8] for a
+      // 3-integer-bits representation, let us just do that. This helps people
+      // porting this to targets where code footprint must be minimized.
+      F3 new_state_f3 = gemmlowp::Rescale<3>(new_state);
+      F0 output_activ_int16 = output_gate_output * gemmlowp::tanh(new_state_f3);
       // Store the new internal state back to memory, as 16-bit integers.
+      // Note: here we store the original value with StateIntegerBits, not
+      // the rescaled 3-integer-bits value fed to tanh.
       output_state_data_int16[b * output_depth + c] = new_state.raw();
       // Down-scale the output activations to 8-bit integers, saturating,
       // and store back to memory.
",0,test
e873954c8507c5b983c3dddd6f3ae1eeb2c04e01,tensorflow/tensorflow,"Expand the documentation of tf.learn feature columns.
Change: 139102565",feature_column.py,"@@ -14,63 +14,102 @@
 # ==============================================================================
 """"""This API defines FeatureColumn abstraction.
 
-To distinguish the concept of a feature family and a specific binary feature
-within a family, we refer to a feature family like ""country"" as a feature
-column. For example ""country:US"" is a feature which is in ""country"" feature
-column and has a feature value (""US"").
+FeatureColumns provide a high level abstraction for ingesting and representing
+features in tf.learn Estimator models.
 
-Supported feature types are:
- * _SparseColumn: also known as categorical features.
- * _RealValuedColumn: also known as continuous features.
+FeatureColumns are the primary way of encoding features for pre-canned
+tf.learn Estimators.
 
-Supported transformations on above features are:
- * Bucketization: also known as binning.
- * Crossing: also known as conjunction or combination.
- * Embedding.
+When using FeatureColumns with tf.learn models, the type of feature column you
+should choose depends on (1) the feature type and (2) the model type.
 
-Typical usage example:
+(1) Feature type:
+ * Continuous features can be represented by `real_valued_column`.
+ * Categorical features can be represented by any `sparse_column_with_*`
+ column (`sparse_column_with_keys`, `sparse_column_with_hash_bucket`,
+ `sparse_column_with_integerized_feature`).
 
-  ```python
-  # Define features and transformations
-  sparse_feature_a = sparse_column_with_keys(
-      column_name=""sparse_feature_a"", keys=[""AB"", ""CD"", ...])
+(2) Model type:
+ * Deep neural network models (`DNNClassifier`, `DNNRegressor`).
+
+   Continuous features can be directly fed into deep neural network models.
+
+     age_column = real_valued_column(""age"")
 
-  embedding_feature_a = embedding_column(
-      sparse_id_column=sparse_feature_a, dimension=3, combiner=""sum"")
+   To feed sparse features into DNN models, wrap the column with
+   `embedding_column` or `one_hot_column`. `one_hot_column` is recommended for
+   features with only a few possible values. For features with many possible
+   values, `embedding_column` is recommended.
 
-  sparse_feature_b = sparse_column_with_hash_bucket(
-      column_name=""sparse_feature_b"", hash_bucket_size=1000)
+     embedded_dept_column = embedding_column(
+       sparse_column_with_keys(""department"", [""math"", ""philosphy"", ...]),
+       dimension=10)
 
-  embedding_feature_b = embedding_column(
-      sparse_id_column=sparse_feature_b, dimension=16, combiner=""sum"")
+* Wide (aka linear) models (`LinearClassifier`, `LinearRegressor`).
 
-  crossed_feature_a_x_b = crossed_column(
-      columns=[sparse_feature_a, sparse_feature_b], hash_bucket_size=10000)
+   Sparse features can be fed directly into linear models.
 
-  real_feature = real_valued_column(""real_feature"")
-  real_feature_buckets = bucketized_column(
-      source_column=real_feature,
+     dept_column = sparse_column_with_keys(""department"",
+       [""math"", ""philosphy"", ""english""])
+
+   It is recommended that continuous features be bucketized before being
+   fed into linear models.
+
+     bucketized_age_column = bucketized_column(
+      source_column=age_column,
       boundaries=[18, 25, 30, 35, 40, 45, 50, 55, 60, 65])
 
-  my_features = [embedding_feature_b, real_feature_buckets, embedding_feature_a]
+   Sparse features can be crossed (also known as conjuncted or combined) in
+   order to form non-linearities, and then fed into linear models.
+
+    cross_dept_age_column = crossed_column(
+      columns=[department_column, bucketized_age_column],
+      hash_bucket_size=1000)
+
+Example of building tf.learn model using FeatureColumns:
+
+  # Define features and transformations
+  deep_feature_columns = [age_column, embedded_dept_column]
+  wide_feature_columns = [dept_column, bucketized_age_column,
+      cross_dept_age_column]
+
+  # Build deep model
+  estimator = DNNClassifier(
+      feature_columns=deep_feature_columns,
+      hidden_units=[500, 250, 50])
+  estimator.train(...)
+
+  # Or build a wide model
+  estimator = LinearClassifier(
+      feature_columns=wide_feature_columns)
+  estimator.train(...)
+
+  # Or build a wide and deep model!
+  estimator = DNNLinearCombinedClassifier(
+      linear_feature_columns=wide_feature_columns,
+      dnn_feature_columns=deep_feature_columns,
+      dnn_hidden_units=[500, 250, 50])
+  estimator.train(...)
+
+
+FeatureColumns can also be transformed into a generic input layer for
+custom models using `input_from_feature_columns` within
+`feature_column_ops.py`.
+
+Example of building non-tf.learn model using FeatureColumns:
+
   # Building model via layers
+
+  deep_feature_columns = [age_column, embedded_dept_column]
   columns_to_tensor = parse_feature_columns_from_examples(
       serialized=my_data,
-      feature_columns=my_features)
+      feature_columns=deep_feature_columns)
   first_layer = input_from_feature_columns(
       columns_to_tensors=columns_to_tensor,
-      feature_columns=my_features)
+      feature_columns=deep_feature_columns)
   second_layer = fully_connected(first_layer, ...)
 
-  # Building model via tf.learn.estimators
-  estimator = DNNLinearCombinedClassifier(
-      linear_feature_columns=my_wide_features,
-      dnn_feature_columns=my_deep_features,
-      dnn_hidden_units=[500, 250, 50])
-  estimator.train(...)
-  ```
-
-  See feature_column_ops_test for more examples.
+See feature_column_ops_test for more examples.
 """"""
 
 from __future__ import absolute_import
@@ -871,7 +910,7 @@ class _EmbeddingColumn(_FeatureColumn, collections.namedtuple(
 
 
 def one_hot_column(sparse_id_column):
-  """"""Creates a _OneHotColumn.
+  """"""Creates an `_OneHotColumn` for a one-hot or multi-hot repr in a DNN.
 
   Args:
       sparse_id_column: A _SparseColumn which is created by
@@ -891,7 +930,7 @@ def embedding_column(sparse_id_column,
                      initializer=None,
                      ckpt_to_load_from=None,
                      tensor_name_in_ckpt=None):
-  """"""Creates an `_EmbeddingColumn`.
+  """"""Creates an `_EmbeddingColumn` for feeding sparse data into a DNN.
 
   Args:
     sparse_id_column: A `_SparseColumn` which is created by for example
@@ -1244,7 +1283,7 @@ def real_valued_column(column_name,
                        default_value=None,
                        dtype=dtypes.float32,
                        normalizer=None):
-  """"""Creates a _RealValuedColumn.
+  """"""Creates a `_RealValuedColumn` for dense numeric data.
 
   Args:
     column_name: A string defining real valued column name.
@@ -1477,7 +1516,7 @@ class _BucketizedColumn(_FeatureColumn, collections.namedtuple(
 
 
 def bucketized_column(source_column, boundaries):
-  """"""Creates a _BucketizedColumn.
+  """"""Creates a _BucketizedColumn for discretizing dense input.
 
   Args:
     source_column: A _RealValuedColumn defining dense column.
@@ -1676,7 +1715,7 @@ def crossed_column(columns, hash_bucket_size, combiner=None,
                    ckpt_to_load_from=None,
                    tensor_name_in_ckpt=None,
                    hash_key=None):
-  """"""Creates a _CrossedColumn.
+  """"""Creates a _CrossedColumn for performing feature crosses.
 
   Args:
     columns: An iterable of _FeatureColumn. Items can be an instance of
",0,train
e1f8843adce2333be3357650c24b3ecf97c42704,tensorflow/tensorflow,"Evaluation tool internal refactors

PiperOrigin-RevId: 280984929
Change-Id: I4b8f085d6258d7e041ccf0cfbeaac9a155813cef",preprocess_coco_minival.py,"@@ -83,6 +83,7 @@ def _get_ground_truth_detections(instances_file,
     if image_id not in image_id_whitelist:
       continue
     image_data_dict = {}
+    image_data_dict['id'] = image_dict['id']
     image_data_dict['file_name'] = image_dict['file_name']
     all_file_names.append(image_data_dict['file_name'])
     image_data_dict['height'] = image_dict['height']
@@ -154,6 +155,7 @@ def _dump_data(ground_truth_detections, images_folder_path, output_folder_path):
   for image_dict in ground_truth_detections.values():
     # Create an ObjectsSet proto for this file's ground truth.
     detection_result = ground_truth_data.detection_results.add()
+    detection_result.image_id = image_dict['id']
     detection_result.image_name = image_dict['file_name']
     for detection_dict in image_dict['detections']:
       object_instance = detection_result.objects.add()
",0,train
ebb278520add4b046e283f81398df03395b5d342,tensorflow/tensorflow,"Give clear errors for bad input names.

PiperOrigin-RevId: 155857515",strip_unused_lib.py,"@@ -41,14 +41,26 @@ def strip_unused(input_graph_def, input_node_names, output_node_names,
         a list that specifies one value per input node name.
 
   Returns:
-    A GraphDef with all unnecessary ops removed.
+    A `GraphDef` with all unnecessary ops removed.
+
+  Raises:
+    ValueError: If any element in `input_node_names` refers to a tensor instead
+      of an operation.
+    KeyError: If any element in `input_node_names` is not found in the graph.
   """"""
+  for name in input_node_names:
+    if "":"" in name:
+      raise ValueError(""Name '%s' appears to refer to a Tensor, ""
+                       ""not a Operation."" % name)
+
   # Here we replace the nodes we're going to override as inputs with
   # placeholders so that any unused nodes that are inputs to them are
   # automatically stripped out by extract_sub_graph().
+  not_found = {name for name in input_node_names}
   inputs_replaced_graph_def = graph_pb2.GraphDef()
   for node in input_graph_def.node:
     if node.name in input_node_names:
+      not_found.remove(node.name)
       placeholder_node = node_def_pb2.NodeDef()
       placeholder_node.op = ""Placeholder""
       placeholder_node.name = node.name
@@ -67,6 +79,9 @@ def strip_unused(input_graph_def, input_node_names, output_node_names,
     else:
       inputs_replaced_graph_def.node.extend([copy.deepcopy(node)])
 
+  if not_found:
+    raise KeyError(""The following input nodes were not found: %s\n"" % not_found)
+
   output_graph_def = graph_util.extract_sub_graph(inputs_replaced_graph_def,
                                                   output_node_names)
   return output_graph_def
",0,train
ebb278520add4b046e283f81398df03395b5d342,tensorflow/tensorflow,"Give clear errors for bad input names.

PiperOrigin-RevId: 155857515",strip_unused_test.py,"@@ -58,16 +58,25 @@ class StripUnusedTest(test_util.TensorFlowTestCase):
     # routine.
     input_graph_path = os.path.join(self.get_temp_dir(), input_graph_name)
     input_binary = False
-    input_node_names = ""wanted_input_node""
     output_binary = True
     output_node_names = ""output_node""
     output_graph_path = os.path.join(self.get_temp_dir(), output_graph_name)
 
-    strip_unused_lib.strip_unused_from_files(input_graph_path, input_binary,
-                                             output_graph_path, output_binary,
-                                             input_node_names,
-                                             output_node_names,
-                                             dtypes.float32.as_datatype_enum)
+    def strip(input_node_names):
+      strip_unused_lib.strip_unused_from_files(input_graph_path, input_binary,
+                                               output_graph_path, output_binary,
+                                               input_node_names,
+                                               output_node_names,
+                                               dtypes.float32.as_datatype_enum)
+
+    with self.assertRaises(KeyError):
+      strip(""does_not_exist"")
+
+    with self.assertRaises(ValueError):
+      strip(""wanted_input_node:0"")
+
+    input_node_names = ""wanted_input_node""
+    strip(input_node_names)
 
     # Now we make sure the variable is now a constant, and that the graph still
     # produces the expected result.
",0,train
9ba6299242237b16dd812a2beb4a7d4bcad3a1c8,tensorflow/tensorflow,"Use correct spelling for Fake device in placer_test

The test use FakeGPU/FakeGPU spelling in device types but all lower
case spelling in device names. It has worked in Placer because we never
directly compare names with types in Placer, but such misspelling are
not generally supported (e.g. TF Python code converts lower case ""cpu/gpu""
to ""CPU/GPU"" before passing the string to C++ core).

Having this unsupported usage in tests is misleading and wastes development
time when working on Placer.

PiperOrigin-RevId: 239868502",placer_test.cc,"@@ -49,11 +49,11 @@ using ::tensorflow::test::function::GDef;
 using ::tensorflow::test::function::NDef;
 using FDH = ::tensorflow::FunctionDefHelper;
 
-constexpr char kCPU[] = ""/device:fakecpu:0"";
-constexpr char kGPU[] = ""/device:fakegpu:0"";
+constexpr char kCPU[] = ""/device:FakeCPU:0"";
+constexpr char kGPU[] = ""/device:FakeGPU:0"";
 
-constexpr char kFullCPU[] = ""/job:a/replica:0/task:0/device:fakecpu:0"";
-constexpr char kFullGPU[] = ""/job:a/replica:0/task:0/device:fakegpu:0"";
+constexpr char kFullCPU[] = ""/job:a/replica:0/task:0/device:FakeCPU:0"";
+constexpr char kFullGPU[] = ""/job:a/replica:0/task:0/device:FakeGPU:0"";
 
 namespace {
 
@@ -205,11 +205,11 @@ class PlacerTest : public ::testing::Test {
     // objects.
     for (int i = 0; i < 10; ++i) {
       local_devices_.emplace_back(FakeDevice::MakeCPU(
-          strings::StrCat(""/job:a/replica:0/task:0/device:fakecpu:"", i)));
+          strings::StrCat(""/job:a/replica:0/task:0/device:FakeCPU:"", i)));
       devices_.AddDevice(local_devices_.back().get());
       // Insert the GPUs in reverse order.
       local_devices_.emplace_back(FakeDevice::MakeGPU(
-          strings::StrCat(""/job:a/replica:0/task:0/device:fakegpu:"", 9 - i)));
+          strings::StrCat(""/job:a/replica:0/task:0/device:FakeGPU:"", 9 - i)));
       devices_.AddDevice(local_devices_.back().get());
     }
   }
@@ -690,7 +690,7 @@ TEST_F(PlacerTest, TestIgnoreGeneratorHeuristicIfWrongPartialDevice) {
     // of valid devices for the generator.
     Node* input =
         ops::SourceOp(""TestCPUGPUOutput"",
-                      b.opts().WithName(""in"").WithDevice(""/device:fakecpu:1""));
+                      b.opts().WithName(""in"").WithDevice(""/device:FakeCPU:1""));
 
     // The assign is bound to CPU by the reference edge.
     ops::BinaryOp(""TestAssign"", var_cpu, input, b.opts().WithName(""assign""));
@@ -700,10 +700,10 @@ TEST_F(PlacerTest, TestIgnoreGeneratorHeuristicIfWrongPartialDevice) {
 
   TF_EXPECT_OK(Place(&g));
   EXPECT_DEVICE_TYPE(g, ""in"", ""FakeCPU"");
-  EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:fakecpu:1"");
+  EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:FakeCPU:1"");
   EXPECT_DEVICE_TYPE(g, ""var_cpu"", ""FakeCPU"");
   EXPECT_COLOCATED(g, ""var_cpu"", ""assign"");
-  EXPECT_DEVICE_CONTAINS(g, ""var_cpu"", ""/device:fakecpu:0"");
+  EXPECT_DEVICE_CONTAINS(g, ""var_cpu"", ""/device:FakeCPU:0"");
 }
 
 // Test that a graph with partial device specifications on the ops
@@ -735,10 +735,10 @@ TEST_F(PlacerTest, TestAssignedDevicePreserved) {
   }
 
   GetNodeByName(g, ""in"")->set_assigned_device_name(
-      ""/job:a/replica:0/task:0/device:fakecpu:7"");
+      ""/job:a/replica:0/task:0/device:FakeCPU:7"");
 
   TF_EXPECT_OK(Place(&g));
-  EXPECT_EQ(""/job:a/replica:0/task:0/device:fakecpu:7"",
+  EXPECT_EQ(""/job:a/replica:0/task:0/device:FakeCPU:7"",
             GetNodeByName(g, ""in"")->assigned_device_name());
 }
 
@@ -749,17 +749,17 @@ TEST_F(PlacerTest, TestPartialSpecGpuToCpu) {
   {  // Scope for temporary variables used to construct g.
     GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
     ops::SourceOp(""TestInput"",
-                  b.opts().WithName(""in"").WithDevice(""/device:fakegpu:0""));
+                  b.opts().WithName(""in"").WithDevice(""/device:FakeGPU:0""));
     ops::SourceOp(""TestVariable"",
-                  b.opts().WithName(""var"").WithDevice(""/device:fakegpu:0""));
+                  b.opts().WithName(""var"").WithDevice(""/device:FakeGPU:0""));
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
   TF_EXPECT_OK(Place(&g, true, false));
   EXPECT_DEVICE_TYPE(g, ""in"", ""FakeCPU"");
-  EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:fakecpu"");
+  EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:FakeCPU"");
   EXPECT_DEVICE_TYPE(g, ""var"", ""FakeGPU"");
-  EXPECT_DEVICE_CONTAINS(g, ""var"", ""/device:fakegpu:0"");
+  EXPECT_DEVICE_CONTAINS(g, ""var"", ""/device:FakeGPU:0"");
 }
 
 // Test that a node with an assigned GPU device but has not registered
@@ -773,14 +773,15 @@ TEST_F(PlacerTest, TestAssignedGpuDeviceToCpuDevice) {
   }
 
   GetNodeByName(g, ""in"")->set_assigned_device_name(
-      ""/job:a/replica:0/task:0/device:fakegpu:0"");
+      ""/job:a/replica:0/task:0/device:FakeGPU:0"");
 
   Status s = Place(&g);
-  EXPECT_EQ(error::INTERNAL, s.code());
+  EXPECT_EQ(error::INTERNAL, s.code()) << s.ToString();
   EXPECT_TRUE(str_util::StrContains(
       s.error_message(),
-      ""Assigned device '/job:a/replica:0/task:0/device:fakegpu:0' ""
-      ""does not have registered OpKernel support for TestInput""));
+      ""Assigned device '/job:a/replica:0/task:0/device:FakeGPU:0' ""
+      ""does not have registered OpKernel support for TestInput""))
+      << s.ToString();
 }
 
 // Test that graphs with reference connections are correctly placed.
@@ -917,15 +918,15 @@ TEST_F(PlacerTest, TestResourceHandlesOnDifferentDevicesFails) {
       if (set_assigned) {
         GetNodeByName(g, ""var_cpu"")
             ->set_assigned_device_name(
-                ""/job:a/replica:0/task:0/device:fakecpu:0"");
+                ""/job:a/replica:0/task:0/device:FakeCPU:0"");
         GetNodeByName(g, ""var_gpu"")
             ->set_assigned_device_name(
-                ""/job:a/replica:0/task:0/device:fakegpu:0"");
+                ""/job:a/replica:0/task:0/device:FakeGPU:0"");
       } else {
         GetNodeByName(g, ""var_cpu"")
-            ->set_requested_device(""/job:a/replica:0/task:0/device:fakecpu:0"");
+            ->set_requested_device(""/job:a/replica:0/task:0/device:FakeCPU:0"");
         GetNodeByName(g, ""var_gpu"")
-            ->set_requested_device(""/job:a/replica:0/task:0/device:fakegpu:0"");
+            ->set_requested_device(""/job:a/replica:0/task:0/device:FakeGPU:0"");
       }
     }
 
@@ -936,8 +937,8 @@ TEST_F(PlacerTest, TestResourceHandlesOnDifferentDevicesFails) {
         ""Cannot place the graph because a reference or resource edge ""
         ""connects ""
         ""colocation groups with incompatible assigned devices: ""
-        ""/job:a/replica:0/task:0/device:fakegpu:0 vs ""
-        ""/job:a/replica:0/task:0/device:fakecpu:0""));
+        ""/job:a/replica:0/task:0/device:FakeGPU:0 vs ""
+        ""/job:a/replica:0/task:0/device:FakeCPU:0""));
 
     return Status::OK();
   };
@@ -958,16 +959,16 @@ TEST_F(PlacerTest, TestReferenceConnectionIgnoreInfeasible) {
     GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
     Node* input = ops::SourceOp(
         ""TestDevice"",
-        b.opts().WithName(""in"").WithDevice(""/job:a/task:0/device:fakegpu:0""));
+        b.opts().WithName(""in"").WithDevice(""/job:a/task:0/device:FakeGPU:0""));
     Node* var =
         ops::SourceOp(""TestVariable"", b.opts().WithName(""var_0"").WithDevice(
-                                          ""/job:a/task:0/device:fakegpu:0""));
+                                          ""/job:a/task:0/device:FakeGPU:0""));
 
     // This op is specified on CPU, but in practice will be ignored,
     // because the reference edges forces it on GPU.
     ops::BinaryOp(""TestAssign"", var, input,
                   b.opts().WithName(""assign"").WithDevice(
-                      ""/job:a/task:0/device:fakecpu:0""));
+                      ""/job:a/task:0/device:FakeCPU:0""));
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
@@ -998,7 +999,7 @@ TEST_F(PlacerTest, TestReferenceConnectionMoreSpecificDestinationSourceWins) {
     // assigned to CPU.
     ops::BinaryOp(""TestAssign"", var, input,
                   b.opts().WithName(""assign"").WithDevice(
-                      ""/job:a/task:0/device:fakecpu:0""));
+                      ""/job:a/task:0/device:FakeCPU:0""));
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
@@ -1019,11 +1020,11 @@ TEST_F(PlacerTest, TestReferenceConnectionNoSourceDevice) {
     GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
     Node* input = ops::SourceOp(
         ""TestDevice"",
-        b.opts().WithName(""in"").WithDevice(""/job:a/task:0/device:fakegpu:0""));
+        b.opts().WithName(""in"").WithDevice(""/job:a/task:0/device:FakeGPU:0""));
     Node* var = ops::SourceOp(""TestVariable"", b.opts().WithName(""var_0""));
     ops::BinaryOp(""TestAssign"", var, input,
                   b.opts().WithName(""assign"").WithDevice(
-                      ""/job:a/task:0/device:fakecpu:0""));
+                      ""/job:a/task:0/device:FakeCPU:0""));
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
@@ -1260,10 +1261,10 @@ TEST_F(PlacerTest, TestHeterogeneousDeviceSetFailure) {
 
   DeviceSet heterogeneous;
   std::unique_ptr<Device> gpu(
-      FakeDevice::MakeGPU(""/job:b/replica:0/task:0/device:fakegpu:0""));
+      FakeDevice::MakeGPU(""/job:b/replica:0/task:0/device:FakeGPU:0""));
   heterogeneous.AddDevice(gpu.get());
   std::unique_ptr<Device> cpu(
-      FakeDevice::MakeCPU(""/job:b/replica:0/task:1/device:fakecpu:0""));
+      FakeDevice::MakeCPU(""/job:b/replica:0/task:1/device:FakeCPU:0""));
   heterogeneous.AddDevice(cpu.get());
   Status s = Place(&g, &heterogeneous);
   EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
@@ -1361,7 +1362,7 @@ TEST_F(PlacerTest, TestNoDevicesRegistered) {
 
   DeviceSet cpu_only;
   std::unique_ptr<Device> cpu(
-      FakeDevice::MakeCPU(""/job:a/replica:0/task:0/device:fakecpu:0""));
+      FakeDevice::MakeCPU(""/job:a/replica:0/task:0/device:FakeCPU:0""));
   cpu_only.AddDevice(cpu.get());
 
   Status s = Place(&g, &cpu_only);
@@ -1429,12 +1430,12 @@ TEST_F(PlacerTest, TestNonexistentGpuAllowSoftPlacement) {
   {  // Scope for temporary variables used to construct g.
     GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
     ops::SourceOp(""TestDevice"",
-                  b.opts().WithName(""in"").WithDevice(""/device:fakegpu:11""));
+                  b.opts().WithName(""in"").WithDevice(""/device:FakeGPU:11""));
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
   TF_EXPECT_OK(Place(&g, true, false));
-  EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:fakegpu:0"");
+  EXPECT_DEVICE_CONTAINS(g, ""in"", ""/device:FakeGPU:0"");
 }
 
 // Test that ops request to be placed on non-existent devices will fail if
@@ -1444,13 +1445,13 @@ TEST_F(PlacerTest, TestNonexistentGpuNoAllowSoftPlacement) {
   {  // Scope for temporary variables used to construct g.
     GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
     ops::SourceOp(""TestDevice"",
-                  b.opts().WithName(""in"").WithDevice(""/device:fakegpu:11""));
+                  b.opts().WithName(""in"").WithDevice(""/device:FakeGPU:11""));
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
   Status s = Place(&g, false, false);
   EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
-  EXPECT_TRUE(str_util::StrContains(s.error_message(), ""/device:fakegpu:11""));
+  EXPECT_TRUE(str_util::StrContains(s.error_message(), ""/device:FakeGPU:11""));
 }
 
 // Test that the ""Cannot assign a device"" error message contains a format tag
@@ -1460,7 +1461,7 @@ TEST_F(PlacerTest, TestNonexistentGpuNoAllowSoftPlacementFormatTag) {
   {  // Scope for temporary variables used to construct g.
     GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
     ops::SourceOp(""TestDevice"",
-                  b.opts().WithName(""in"").WithDevice(""/device:fakegpu:11""));
+                  b.opts().WithName(""in"").WithDevice(""/device:FakeGPU:11""));
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
@@ -1479,16 +1480,18 @@ TEST_F(PlacerTest, TestUnsupportedDeviceNoAllowSoftPlacement) {
   {  // Scope for temporary variables used to construct g.
     GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
     ops::SourceOp(""VariableGPU"",
-                  b.opts().WithName(""var"").WithDevice(""/device:fakecpu:0""));
+                  b.opts().WithName(""var"").WithDevice(""/device:FakeCPU:0""));
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
   Status s = Place(&g, false, false);
-  EXPECT_EQ(error::INVALID_ARGUMENT, s.code());
-  EXPECT_TRUE(str_util::StrContains(s.error_message(), ""/device:fakecpu:0""));
+  EXPECT_EQ(error::INVALID_ARGUMENT, s.code()) << s.ToString();
+  EXPECT_TRUE(str_util::StrContains(s.error_message(), ""/device:FakeCPU:0""))
+      << s.ToString();
   EXPECT_TRUE(str_util::StrContains(
       s.error_message(),
-      ""no supported kernel for fakecpu devices is available""));
+      ""no supported kernel for FakeCPU devices is available""))
+      << s.ToString();
 }
 
 // Test that placement fails when a node requests an explicit device that is not
@@ -1537,7 +1540,7 @@ TEST_F(PlacerTest, TestUnsupportedDeviceAllowSoftPlacement) {
   {  // Scope for temporary variables used to construct g.
     GraphDefBuilder b(GraphDefBuilder::kFailImmediately);
     ops::SourceOp(""VariableGPU"",
-                  b.opts().WithName(""var"").WithDevice(""/device:fakecpu:0""));
+                  b.opts().WithName(""var"").WithDevice(""/device:FakeCPU:0""));
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
@@ -1557,14 +1560,14 @@ TEST_F(PlacerTest, TestDeviceTypeConstraintsAllowSoftPlacement) {
     Node* var_gpu = ops::SourceOp(""VariableGPU"", b.opts().WithName(""var_gpu""));
     ops::UnaryOp(
         ""TestDeviceEnforce"", var_gpu,
-        b.opts().WithName(""force_gpu"").WithDevice(""/device:fakecpu:0""));
+        b.opts().WithName(""force_gpu"").WithDevice(""/device:FakeCPU:0""));
     // var_cpu has ref output and runs on CPU.
     // force_cpu takes var_cpu and requested GPU.
     // Verify that both are placed on CPU.
     Node* var_cpu = ops::SourceOp(""VariableCPU"", b.opts().WithName(""var_cpu""));
     ops::UnaryOp(
         ""TestDeviceEnforce"", var_cpu,
-        b.opts().WithName(""force_cpu"").WithDevice(""/device:fakegpu:0""));
+        b.opts().WithName(""force_cpu"").WithDevice(""/device:FakeGPU:0""));
     TF_EXPECT_OK(BuildGraph(b, &g));
   }
 
@@ -1655,10 +1658,10 @@ TEST_F(PlacerTest, TestGeneratorNodeDoesntFollowNonColocatedConsumers) {
     TF_EXPECT_OK(BuildGraph(b, &g));
 
     GetNodeByName(g, ""var1_cpu"")
-        ->set_assigned_device_name(""/job:a/replica:0/task:0/device:fakecpu:1"");
+        ->set_assigned_device_name(""/job:a/replica:0/task:0/device:FakeCPU:1"");
 
     GetNodeByName(g, ""var2_cpu"")
-        ->set_assigned_device_name(""/job:a/replica:0/task:0/device:fakecpu:2"");
+        ->set_assigned_device_name(""/job:a/replica:0/task:0/device:FakeCPU:2"");
   }
 
   TF_EXPECT_OK(Place(&g));
@@ -1720,7 +1723,7 @@ TEST_P(SoftPlacementPlacerTest,
         s.error_message(),
         ""Cannot colocate nodes {{colocation_node id2}} and {{colocation_node ""
         ""id1}}: Cannot merge devices with incompatible types: ""
-        ""'/device:fakecpu:0' and '/device:fakegpu:0'""))
+        ""'/device:FakeCPU:0' and '/device:FakeGPU:0'""))
         << s.ToString();
   }
 }
@@ -1811,8 +1814,8 @@ TEST_P(SoftPlacementPlacerTest,
         s.error_message(),
         ""Cannot colocate nodes {{colocation_node id2}} and {{colocation_node ""
         ""id1}}: Cannot merge devices with incompatible types: ""
-        ""'/job:a/replica:0/task:0/device:fakecpu:0' and ""
-        ""'/job:a/replica:0/task:0/device:fakegpu:0'""))
+        ""'/job:a/replica:0/task:0/device:FakeCPU:0' and ""
+        ""'/job:a/replica:0/task:0/device:FakeGPU:0'""))
         << s.ToString();
   }
 }
",0,train
19c39157c0ac76545ae82bf48d2e11784ff232fb,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-07-25

PiperOrigin-RevId: 259906647",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 7, 24)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 7, 25)
 
 _FORWARD_COMPATIBILITY_HORIZON_OVERRIDDEN = False
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
",0,train
2e1822ff68a48091e934a1d7b572410a9fb42281,tensorflow/tensorflow,"Stabilize sorting order of nodes by hash.

PiperOrigin-RevId: 389999148
Change-Id: I0564a2383fb00491a8ce4afde774bcace1457648",sig_node.cc,"@@ -275,8 +275,8 @@ void Signature::PrepareNodes() {
 
 void Signature::FindUniqueHashes(size_t* next_node_id_p) {
   // Start by sorting by the hash value.
-  std::sort(nodes.begin() + *next_node_id_p, nodes.end(),
-            SigNode::NodeOrderLess());
+  std::stable_sort(nodes.begin() + *next_node_id_p, nodes.end(),
+                   SigNode::NodeOrderLess());
 
   // At each call, if no nodes have unique hashes, one node that has a
   // non-unique (shared) hash can be made unique by assigning a unique id.
",0,train
0e9a670e66bdc163ac3b8fb807ca5629caf4f784,tensorflow/tensorflow,"Make caching to be by default True under eager mode.

PiperOrigin-RevId: 289769741
Change-Id: Iacd2d60749ec80d99c68deadfc2de7b8beb85b00",recurrent.py,"@@ -1270,7 +1270,11 @@ class SimpleRNNCell(DropoutRNNCellMixin, Layer):
                dropout=0.,
                recurrent_dropout=0.,
                **kwargs):
-    self._enable_caching_device = kwargs.pop('enable_caching_device', False)
+    # By default use cached variable under v2 mode, see b/143699808.
+    if ops.executing_eagerly_outside_functions():
+      self._enable_caching_device = kwargs.pop('enable_caching_device', True)
+    else:
+      self._enable_caching_device = kwargs.pop('enable_caching_device', False)
     super(SimpleRNNCell, self).__init__(**kwargs)
     self.units = units
     self.activation = activations.get(activation)
@@ -1701,7 +1705,11 @@ class GRUCell(DropoutRNNCellMixin, Layer):
                implementation=1,
                reset_after=False,
                **kwargs):
-    self._enable_caching_device = kwargs.pop('enable_caching_device', False)
+    # By default use cached variable under v2 mode, see b/143699808.
+    if ops.executing_eagerly_outside_functions():
+      self._enable_caching_device = kwargs.pop('enable_caching_device', True)
+    else:
+      self._enable_caching_device = kwargs.pop('enable_caching_device', False)
     super(GRUCell, self).__init__(**kwargs)
     self.units = units
     self.activation = activations.get(activation)
@@ -2255,7 +2263,11 @@ class LSTMCell(DropoutRNNCellMixin, Layer):
                recurrent_dropout=0.,
                implementation=1,
                **kwargs):
-    self._enable_caching_device = kwargs.pop('enable_caching_device', False)
+    # By default use cached variable under v2 mode, see b/143699808.
+    if ops.executing_eagerly_outside_functions():
+      self._enable_caching_device = kwargs.pop('enable_caching_device', True)
+    else:
+      self._enable_caching_device = kwargs.pop('enable_caching_device', False)
     super(LSTMCell, self).__init__(**kwargs)
     self.units = units
     self.activation = activations.get(activation)
",0,train
b146fdcdf11217244f7984f8b88c71dda6f3dc02,tensorflow/tensorflow,"Make sure while body DT_RESOURCE _Retval comes from _Arg with same index.

PiperOrigin-RevId: 245816414",rearrange_function_argument_pass_test.cc,"@@ -30,6 +30,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/graph_to_functiondef.h""
 #include ""tensorflow/core/framework/node_def_util.h""
 #include ""tensorflow/core/framework/tensor_shape.h""
+#include ""tensorflow/core/lib/core/error_codes.pb.h""
 #include ""tensorflow/core/platform/test.h""
 #include ""tensorflow/core/public/session_options.h""
 #include ""tensorflow/core/public/version.h""
@@ -211,4 +212,67 @@ TEST_F(RearrangeFunctionArgumentForFunctionTest, Basic) {
   EXPECT_EQ(input_node->name(), ""while"");
 }
 
+TEST_F(RearrangeFunctionArgumentForFunctionTest,
+       WhileResourceRetvalFromDifferentArgUnimplemented) {
+  FunctionDefLibrary fdl;
+  {
+    // Function for While's ""body"".
+    // ""arg0"" (T=DT_RESOURCE), ""arg1"" (T=DT_RESOURCE), ""arg2"" (T=DT_INT32)
+    // ""ret0"" = ""arg1""
+    // ""ret1"" = ""arg0""
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output arg0 = ops::_Arg(s.WithOpName(""arg0""), DT_RESOURCE, 0);
+    Output arg1 = ops::_Arg(s.WithOpName(""arg1""), DT_RESOURCE, 1);
+    Output arg2 = ops::_Arg(s.WithOpName(""arg2""), DT_INT32, 2);
+    auto ret0 = ops::_Retval(s.WithOpName(""ret0""), arg1, 0);
+    auto ret1 = ops::_Retval(s.WithOpName(""ret1""), arg0, 1);
+    auto ret2 = ops::_Retval(s.WithOpName(""ret2""), arg2, 2);
+    std::unique_ptr<Graph> g(new Graph(OpRegistry::Global()));
+    TF_CHECK_OK(s.ToGraph(g.get()));
+    FunctionDef *xla_fdef = fdl.add_function();
+    TF_CHECK_OK(GraphToFunctionDef(*g, ""f2"", xla_fdef));
+  }
+  {
+    // Function for While's ""cond"".
+    // ""arg0"" (T=DT_RESOURCE), ""arg1"" (T=DT_RESOURCE), ""arg2"" (T=DT_INT32)
+    // ""ret0"" = true
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output arg0 = ops::_Arg(s.WithOpName(""arg0""), DT_RESOURCE, 0);
+    Output arg1 = ops::_Arg(s.WithOpName(""arg1""), DT_RESOURCE, 1);
+    Output arg2 = ops::_Arg(s.WithOpName(""arg2""), DT_INT32, 2);
+    Output cond = ops::Const(s.WithOpName(""const""), true, TensorShape({}));
+    auto ret0 = ops::_Retval(s.WithOpName(""ret0""), cond, 0);
+    std::unique_ptr<Graph> g(new Graph(OpRegistry::Global()));
+    TF_CHECK_OK(s.ToGraph(g.get()));
+    FunctionDef *xla_fdef = fdl.add_function();
+    TF_CHECK_OK(GraphToFunctionDef(*g, ""f1"", xla_fdef));
+  }
+  {
+    // Build the XLA computation func.
+    // ""arg0"" (T=DT_RESOURCE), ""arg1"" (T=DT_RESOURCE), ""arg2"" (T=DT_INT32)
+    // ""arg0"", ""arg1"" -> ""while"" (While)
+    tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+    Output arg0 = ops::_Arg(s.WithOpName(""arg0""), DT_RESOURCE, 0);
+    Output arg1 = ops::_Arg(s.WithOpName(""arg1""), DT_RESOURCE, 1);
+    Output arg2 = ops::_Arg(s.WithOpName(""arg2""), DT_INT32, 2);
+    NameAttrList cond_fn, body_fn;
+    cond_fn.set_name(""f1"");
+    body_fn.set_name(""f2"");
+    auto while_op = ops::While(s.WithOpName(""while""),
+                               std::initializer_list<Input>{arg0, arg1, arg2},
+                               cond_fn, body_fn);
+    std::unique_ptr<Graph> g(new Graph(OpRegistry::Global()));
+    TF_CHECK_OK(s.ToGraph(g.get()));
+    FunctionDef *xla_fdef = fdl.add_function();
+    TF_CHECK_OK(GraphToFunctionDef(*g, ""cluster"", xla_fdef));
+  }
+  FunctionLibraryDefinition fld(OpRegistry::Global(), fdl);
+
+  bool modified;
+  protobuf::Map<string, tensorflow::AttrValue> attrs;
+  Status s = RearrangeFunctionArgumentTest(""cluster"", ""cluster_rewritten"",
+                                           attrs, &fld, &modified);
+  EXPECT_EQ(s.code(), error::UNIMPLEMENTED);
+}
+
 }  // namespace tensorflow
",0,test
b146fdcdf11217244f7984f8b88c71dda6f3dc02,tensorflow/tensorflow,"Make sure while body DT_RESOURCE _Retval comes from _Arg with same index.

PiperOrigin-RevId: 245816414",rearrange_function_argument_pass.cc,"@@ -309,6 +309,43 @@ Status MaybeRewriteWhileNode(Graph* g, Node* n, FunctionLibraryDefinition* fld,
     TF_RETURN_IF_ERROR(
         FunctionDefToBodyHelper(*fdef, AttrSlice(), fld, &fbody));
 
+    // Check that resource _Arg nodes for While node are always returned with
+    // the same index, and we don't have cases like this:
+    // tf.while_loop(
+    //     cond,
+    //     lambda resource_var1, resource_var2: [resource_var2, resource_var1],
+    //     [resource_var1, resource_var2])
+    if (attr_name == ""body"") {
+      for (int i = 0; i < fbody->ret_nodes.size(); i++) {
+        Node* n = fbody->ret_nodes[i];
+        DataType dtype;
+        TF_RETURN_IF_ERROR(GetNodeAttr(n->def(), ""T"", &dtype));
+        if (dtype != DT_RESOURCE) {
+          continue;
+        }
+
+        Node* input_node;
+        TF_RETURN_IF_ERROR(n->input_node(0, &input_node));
+        while (input_node->IsIdentity()) {
+          TF_RETURN_IF_ERROR(input_node->input_node(0, &input_node));
+        }
+        if (input_node->IsArg()) {
+          int index;
+          TF_RETURN_IF_ERROR(GetNodeAttr(input_node->def(), ""index"", &index));
+          if (index != i) {
+            return errors::Unimplemented(""While node "", n->DebugString(),
+                                         "" has resource _Retval["", i,
+                                         ""] coming from _Arg["", index, ""]"");
+          }
+        } else {
+          return errors::Unimplemented(""Encountered node "",
+                                       input_node->DebugString(),
+                                       "" while tracing _Arg node for _Retval["",
+                                       i, ""] of while node "", n->DebugString());
+        }
+      }
+    }
+
     RearrangeArgNodes(&fbody->arg_nodes, index_mapping);
     if (attr_name == ""body"") {
       for (int i = 0; i < fbody->ret_nodes.size(); i++) {
",0,test
9f7901630972ca5e53441c81f628c18d1137ab08,tensorflow/tensorflow,"[XLA:CPU] Fix fast-math flags in llvm_ir_runtime.cc

Previously we were calling setFastMath(), but we should have been calling
setFastMath(fast_math_enabled).

Also disable fastmath in the exhaustive f32 elementwise op test.

PiperOrigin-RevId: 231636911",llvm_ir_runtime.cc,"@@ -83,7 +83,7 @@ llvm::Function* EmitVectorF32ExpIfNeeded(llvm::Module* module,
 
   llvm::IRBuilder<> b(vector_exp_body);
   llvm::FastMathFlags fast_math_flags;
-  fast_math_flags.setFast();
+  fast_math_flags.setFast(enable_fast_math);
   b.setFastMathFlags(fast_math_flags);
 
   VectorSupportLibrary vsl(F32, vector_width, &b, ""exp_f32"");
@@ -166,7 +166,7 @@ llvm::Function* EmitVectorF32LogIfNeeded(llvm::Module* module,
 
   llvm::IRBuilder<> b(vector_log_body);
   llvm::FastMathFlags fast_math_flags;
-  fast_math_flags.setFast();
+  fast_math_flags.setFast(enable_fast_math);
   b.setFastMathFlags(fast_math_flags);
 
   llvm::Value* input = &*vector_log_function->arg_begin();
",0,train
9f7901630972ca5e53441c81f628c18d1137ab08,tensorflow/tensorflow,"[XLA:CPU] Fix fast-math flags in llvm_ir_runtime.cc

Previously we were calling setFastMath(), but we should have been calling
setFastMath(fast_math_enabled).

Also disable fastmath in the exhaustive f32 elementwise op test.

PiperOrigin-RevId: 231636911",exhaustive_f32_elementwise_op_test.cc,"@@ -25,12 +25,14 @@ class ExhaustiveF32ElementwiseOpTest
     : public ClientLibraryTestBase,
       public ::testing::WithParamInterface<std::pair<int64, int64>> {
  protected:
-  ErrorSpec error_spec_{0.0001, 0.0001, /*relaxed_nans=*/true};
+  ErrorSpec error_spec_{0.0001, 0.0001};
 
   template <typename EnqueueOpTy>
   void ExhaustivelyTestF32Op(EnqueueOpTy enqueue_op,
                              float (*evaluate_op)(float),
                              std::pair<int64, int64> known_incorrect_range) {
+    SetFastMathDisabled(true);
+
     int64 begin, end;
     std::tie(begin, end) = GetParam();
     int64 input_size = end - begin;
",0,train
150f44ce844725096241036a48cd78bcc8075ef3,tensorflow/tensorflow,"Reference math_ops symbols in the documentation and remove from whitelist.
Change: 133902853",__init__.py,"@@ -152,12 +152,9 @@ _allowed_symbols = [
     'lin_space',
     'list_diff',  # Use tf.listdiff instead.
     'parse_single_sequence_example',
-    'scalar_mul',
     'serialize_many_sparse',
     'serialize_sparse',
-    'sparse_matmul',
-    'sparse_segment_mean_grad',
-    'sparse_segment_sqrt_n_grad',
+    'sparse_matmul',   ## use tf.matmul instead.
     'user_ops',
 ]
 
",0,train
150f44ce844725096241036a48cd78bcc8075ef3,tensorflow/tensorflow,"Reference math_ops symbols in the documentation and remove from whitelist.
Change: 133902853",segment_reduction_ops_test.py,"@@ -21,6 +21,8 @@ from __future__ import print_function
 import numpy as np
 import tensorflow as tf
 
+from tensorflow.python.util.all_util import reveal_undocumented
+
 
 class SegmentReductionHelper(tf.test.TestCase):
 
@@ -349,6 +351,12 @@ class SparseSegmentReductionHelper(SegmentReductionHelper):
 
 class SparseSegmentReductionOpTest(SparseSegmentReductionHelper):
 
+  def setUp(self):
+    reveal_undocumented(""tensorflow.python.""
+                        ""sparse_segment_mean_grad"", tf)
+    reveal_undocumented(""tensorflow.python.""
+                        ""sparse_segment_sqrt_n_grad"", tf)
+
   def testValues(self):
     dtypes = [tf.float32,
               tf.float64,
",0,train
150f44ce844725096241036a48cd78bcc8075ef3,tensorflow/tensorflow,"Reference math_ops symbols in the documentation and remove from whitelist.
Change: 133902853",math_ops.py,"@@ -24,6 +24,7 @@ operators to your graph.
 @@add
 @@sub
 @@mul
+@@scalar_mul
 @@div
 @@truediv
 @@floordiv
",0,train
cc00a3bc7077c75995c6a781decb1a3e7e279e30,tensorflow/tensorflow,"Avoid redundant bitcast.

When creating the GlobalVariable for constants, we don't need to create a
bitcast to the correct type. This is already done in
HloToIrBindings::BindHloToIrValue().
PiperOrigin-RevId: 201924685",ir_emitter.cc,"@@ -94,10 +94,7 @@ Status IrEmitter::HandleConstant(HloInstruction* constant) {
           << std::endl
           << ""  its type: ""
           << llvm_ir::DumpToString(*global_for_const->getType());
-  llvm::Constant* shape_constant = llvm::ConstantExpr::getBitCast(
-      global_for_const,
-      llvm_ir::ShapeToIrType(literal.shape(), module_)->getPointerTo());
-  bindings_.BindHloToIrValue(*constant, shape_constant);
+  bindings_.BindHloToIrValue(*constant, global_for_const);
   return Status::OK();
 }
 
",0,train
6cc3d1c4ec15c4bc59870bf6f1db710d218b92a1,tensorflow/tensorflow,"Drop MemRefUtils from the ExecutionEngine

The ExecutionEngine was updated recently to only take the LLVM dialect as
input. Memrefs are no longer expected in the signature of the entry point
function by the executor so there is no need to allocate and free them. The
code in MemRefUtils is therefore dead and furthermore out of sync with the
recent evolution of memref type to support strides. Drop it.

PiperOrigin-RevId: 276272302
Change-Id: I097a5fa112bcfdbec8e5fd822c0968ae2c7ecc14",MemRefUtils.h,"@@ -1,54 +0,0 @@
-//===- MemRefUtils.h - MLIR runtime utilities for memrefs -------*- C++ -*-===//
-//
-// Copyright 2019 The MLIR Authors.
-//
-// Licensed under the Apache License, Version 2.0 (the ""License"");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an ""AS IS"" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-// =============================================================================
-//
-// This is a set of utilities to working with objects of memref type in an JIT
-// context using the MLIR execution engine.
-//
-//===----------------------------------------------------------------------===//
-
-#ifndef MLIR_EXECUTIONENGINE_MEMREFUTILS_H_
-#define MLIR_EXECUTIONENGINE_MEMREFUTILS_H_
-
-#include ""mlir/Support/LLVM.h""
-
-namespace llvm {
-template <typename T> class Expected;
-}
-
-namespace mlir {
-class FuncOp;
-
-/// Simple memref descriptor class compatible with the ABI of functions emitted
-/// by MLIR to LLVM IR conversion for statically-shaped memrefs of float type.
-struct StaticFloatMemRef {
-  float *data;
-};
-
-/// Given an MLIR function that takes only statically-shaped memrefs with
-/// element type f32, allocate the memref descriptor and the data storage for
-/// each of the arguments, initialize the storage with `initialValue`, and
-/// return a list of type-erased descriptor pointers.
-llvm::Expected<SmallVector<void *, 8>>
-allocateMemRefArguments(FuncOp func, float initialValue = 0.0);
-
-/// Free a list of type-erased descriptors to statically-shaped memrefs with
-/// element type f32.
-void freeMemRefArguments(ArrayRef<void *> args);
-
-} // namespace mlir
-
-#endif // MLIR_EXECUTIONENGINE_MEMREFUTILS_H_
",0,train
683e21314a80ac6cb89eb959465ded41e381d23c,tensorflow/tensorflow,"Automated rollback of commit 5aaebe06b476d7b7484d6eb2b68440654557018a

PiperOrigin-RevId: 210594076",generate_validation_labels.py,"@@ -1,101 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the ""License"");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an ""AS IS"" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-""""""Tool to convert ILSVRC devkit validation ground truth to synset labels.""""""
-
-import argparse
-from os import path
-import sys
-import scipy.io
-
-_SYNSET_ARRAYS_RELATIVE_PATH = 'data/meta.mat'
-_VALIDATION_FILE_RELATIVE_PATH = 'data/ILSVRC2012_validation_ground_truth.txt'
-
-
-def _synset_to_word(filepath):
-  """"""Returns synset to word dictionary by reading sysnset arrays.""""""
-  mat = scipy.io.loadmat(filepath)
-  entries = mat['synsets']
-  # These fields are listed in devkit readme.txt
-  fields = [
-      'synset_id', 'WNID', 'words', 'gloss', 'num_children', 'children',
-      'wordnet_height', 'num_train_images'
-  ]
-  synset_index = fields.index('synset_id')
-  words_index = fields.index('words')
-  synset_to_word = {}
-  for entry in entries:
-    entry = entry[0]
-    synset_id = int(entry[synset_index][0])
-    first_word = entry[words_index][0].split(',')[0]
-    synset_to_word[synset_id] = first_word
-  return synset_to_word
-
-
-def _validation_file_path(ilsvrc_dir):
-  return path.join(ilsvrc_dir, _VALIDATION_FILE_RELATIVE_PATH)
-
-
-def _synset_array_path(ilsvrc_dir):
-  return path.join(ilsvrc_dir, _SYNSET_ARRAYS_RELATIVE_PATH)
-
-
-def _generate_validation_labels(ilsvrc_dir, output_file):
-  synset_to_word = _synset_to_word(_synset_array_path(ilsvrc_dir))
-  with open(_validation_file_path(ilsvrc_dir), 'r') as synset_id_file, open(
-      output_file, 'w') as output:
-    for synset_id in synset_id_file:
-      synset_id = int(synset_id)
-      output.write('%s\n' % synset_to_word[synset_id])
-
-
-def _check_arguments(args):
-  if not args.validation_labels_output:
-    raise ValueError('Invalid path to output file.')
-  ilsvrc_dir = args.ilsvrc_devkit_dir
-  if not ilsvrc_dir or not path.isdir(ilsvrc_dir):
-    raise ValueError('Invalid path to ilsvrc_dir')
-  if not path.exists(_validation_file_path(ilsvrc_dir)):
-    raise ValueError('Invalid path to ilsvrc_dir, cannot find validation file.')
-  if not path.exists(_synset_array_path(ilsvrc_dir)):
-    raise ValueError(
-        'Invalid path to ilsvrc_dir, cannot find synset arrays file.')
-
-
-def main():
-  parser = argparse.ArgumentParser(
-      description='Converts ILSVRC devkit validation_ground_truth.txt to synset'
-      ' labels file that can be used by the accuracy script.')
-  parser.add_argument(
-      '--validation_labels_output',
-      type=str,
-      help='Full path for outputting validation labels.')
-  parser.add_argument(
-      '--ilsvrc_devkit_dir',
-      type=str,
-      help='Full path to ILSVRC 2012 devikit directory.')
-  args = parser.parse_args()
-  try:
-    _check_arguments(args)
-  except ValueError as e:
-    parser.print_usage()
-    file_name = path.basename(sys.argv[0])
-    sys.stderr.write('{0}: error: {1}\n'.format(file_name, str(e)))
-    sys.exit(1)
-  _generate_validation_labels(args.ilsvrc_devkit_dir,
-                              args.validation_labels_output)
-
-
-if __name__ == '__main__':
-  main()
",0,train
62feb4525be38ee620fccabb6757f723adea5ba2,tensorflow/tensorflow,"NFC: Simplify ModuleOp by using the SingleBlockImplicitTerminator trait.
PiperOrigin-RevId: 261944712",Module.h,"@@ -25,6 +25,8 @@
 #include ""mlir/IR/SymbolTable.h""
 
 namespace mlir {
+class ModuleTerminatorOp;
+
 //===----------------------------------------------------------------------===//
 // Module Operation.
 //===----------------------------------------------------------------------===//
@@ -33,8 +35,11 @@ namespace mlir {
 /// single block containing opaque operations. The region of a module is not
 /// allowed to implicitly capture global values, and all external references
 /// must use symbolic references via attributes(e.g. via a string name).
-class ModuleOp : public Op<ModuleOp, OpTrait::ZeroOperands, OpTrait::ZeroResult,
-                           OpTrait::IsIsolatedFromAbove, OpTrait::SymbolTable> {
+class ModuleOp
+    : public Op<
+          ModuleOp, OpTrait::ZeroOperands, OpTrait::ZeroResult,
+          OpTrait::IsIsolatedFromAbove, OpTrait::SymbolTable,
+          OpTrait::SingleBlockImplicitTerminator<ModuleTerminatorOp>::Impl> {
 public:
   using Op::Op;
   using Op::print;
",0,train
4c149223e3bcf18b0c30b876dfed443f75593387,tensorflow/tensorflow,"TensorFlow: enable cuda host memory allocation for GPU compatible
buffers when copying to the CPU device.

Re-arranges some of the internal gpu libraries to be library vs. runtime
specific.
Change: 116472314",threadpool_device.cc,"@@ -15,6 +15,7 @@ limitations under the License.
 
 #include ""tensorflow/core/common_runtime/threadpool_device.h""
 
+#include ""tensorflow/core/common_runtime/gpu/process_state.h""
 #include ""tensorflow/core/common_runtime/local_device.h""
 #include ""tensorflow/core/framework/allocator.h""
 #include ""tensorflow/core/framework/device_base.h""
@@ -52,7 +53,12 @@ void ThreadPoolDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
 }
 
 Allocator* ThreadPoolDevice::GetAllocator(AllocatorAttributes attr) {
-  return allocator_;
+  ProcessState* ps = ProcessState::singleton();
+  if (attr.gpu_compatible()) {
+    return ps->GetCUDAHostAllocator(0);
+  } else {
+    return allocator_;
+  }
 }
 
 Status ThreadPoolDevice::MakeTensorFromProto(
",0,train
1e66408e0190dc16f3405fb69c3237a09be3c5ca,tensorflow/tensorflow,"Use resource id to distinguish different combinations of Resource Variables

PiperOrigin-RevId: 394716703
Change-Id: I6b52913148296737f365b6fc5567f18193fbda52",function.py,"@@ -92,6 +92,12 @@ FORWARD_FUNCTION_ATTRIBUTE_NAME = ""forward_function_name""
 BACKWARD_FUNCTION_ATTRIBUTE_NAME = ""backward_function_name""
 IMPLEMENTS_ATTRIBUTE_NAME = ""_implements""
 SHARED_RENDEZVOUS_ATTRIBUTE_NAME = ""shared_rendezvous""
+# A temporary flag. Turning this on will allow tf.function to aggressively avoid
+# retracing ResourceVariable inputs. This feature will change tf.function's
+# Variable tracing behavior, hence we want to limit the potential blockers that
+# are not detected by Global TAP.
+# TODO(jiaweix): remove this flag and related args (b/198782192)
+ENCODE_VARIABLES_BY_RESOURCE_ID = False
 
 _graph_building_time_counter = monitoring.Counter(
     ""/tensorflow/core/tf_function/graph_building_time_usecs"",
@@ -3175,8 +3181,8 @@ class Function(object):
       # This reduces ambiguity, for example, when args contains a dict and
       # kwargs is empty.
       inputs = (args, kwargs)
-      input_signature = pywrap_tfe.TFE_Py_EncodeArg(inputs,
-                                                    include_tensor_ranks_only)
+      input_signature = pywrap_tfe.TFE_Py_EncodeArg(
+          inputs, include_tensor_ranks_only, ENCODE_VARIABLES_BY_RESOURCE_ID)
       hashable_input_signature = _make_input_signature_hashable(input_signature)
     else:
       del args, kwargs
",0,train
1e66408e0190dc16f3405fb69c3237a09be3c5ca,tensorflow/tensorflow,"Use resource id to distinguish different combinations of Resource Variables

PiperOrigin-RevId: 394716703
Change-Id: I6b52913148296737f365b6fc5567f18193fbda52",pywrap_tfe.h,"@@ -370,7 +370,8 @@ PyObject* TFE_Py_TensorShapeOnDevice(PyObject* tensor);
 // then the encoding only stores tensor ranks, and the key is
 // agnostic to dimension sizes.  Otherwise, full tensor shape encodings are
 // returned.
-PyObject* TFE_Py_EncodeArg(PyObject*, bool include_tensor_ranks_only);
+PyObject* TFE_Py_EncodeArg(PyObject*, bool include_tensor_ranks_only,
+                           bool encode_var_by_res_id);
 
 void TFE_Py_EnableInteractivePythonLogging();
 
",0,train
1e66408e0190dc16f3405fb69c3237a09be3c5ca,tensorflow/tensorflow,"Use resource id to distinguish different combinations of Resource Variables

PiperOrigin-RevId: 394716703
Change-Id: I6b52913148296737f365b6fc5567f18193fbda52",pywrap_tfe_src.cc,"@@ -3972,6 +3972,7 @@ const char kTupleEnd[] = ""u"";
 const char kDIter[] = ""I"";
 const char kDict[] = ""D"";
 const char kRaw[] = ""R"";
+const char kResourceVariable[] = ""r"";
 const char kShape[] = ""s"";
 const char kShapeDelim[] = ""-"";
 const char kDType[] = ""d"";
@@ -4092,12 +4093,14 @@ tensorflow::Status TFE_Py_EncodeTensorOrTensorSpec(
 
 tensorflow::Status TFE_Py_EncodeArgHelperInternal(
     PyObject* arg, bool include_tensor_ranks_only, std::vector<int>& res_vec,
-    absl::flat_hash_map<int, int>& res_map, int& cur_res, EncodeResult* result);
+    absl::flat_hash_map<int, int>& res_map, int& cur_res,
+    bool encode_var_by_res_id, EncodeResult* result);
 
 // This function doesn't set the type of sequence before
 tensorflow::Status TFE_Py_EncodeSequence(PyObject* arg, const char* type,
                                          const char* end_type,
                                          bool include_tensor_ranks_only,
+                                         bool encode_var_by_res_id,
                                          std::vector<int>& res_vec,
                                          absl::flat_hash_map<int, int>& res_map,
                                          int& cur_res, EncodeResult* result) {
@@ -4113,7 +4116,8 @@ tensorflow::Status TFE_Py_EncodeSequence(PyObject* arg, const char* type,
       absl::StrAppend(&result->str, kNone);
     } else {
       TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelperInternal(
-          item, include_tensor_ranks_only, res_vec, res_map, cur_res, result));
+          item, include_tensor_ranks_only, res_vec, res_map, cur_res,
+          encode_var_by_res_id, result));
     }
   }
   absl::StrAppend(&result->str, end_type);
@@ -4136,7 +4140,7 @@ void UpdateResourceCount(int res_id, std::vector<int>& res_vec,
 tensorflow::Status TFE_Py_EncodeArgHelperInternal(
     PyObject* arg, bool include_tensor_ranks_only, std::vector<int>& res_vec,
     absl::flat_hash_map<int, int>& res_map, int& cur_res,
-    EncodeResult* result) {
+    bool encode_var_by_res_id, EncodeResult* result) {
   if (tensorflow::swig::IsTensorSpec(arg)) {
     TF_RETURN_IF_ERROR(TFE_Py_EncodeTensorOrTensorSpec(
         arg, true, include_tensor_ranks_only, result));
@@ -4182,13 +4186,13 @@ tensorflow::Status TFE_Py_EncodeArgHelperInternal(
       absl::StrAppend(&result->str, kCompositeTensor);
     }
   } else if (PyList_Check(arg)) {
-    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kList, kListEnd,
-                                             include_tensor_ranks_only, res_vec,
-                                             res_map, cur_res, result));
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(
+        arg, kList, kListEnd, include_tensor_ranks_only, encode_var_by_res_id,
+        res_vec, res_map, cur_res, result));
   } else if (tensorflow::swig::IsTuple(arg)) {
-    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(arg, kTuple, kTupleEnd,
-                                             include_tensor_ranks_only, res_vec,
-                                             res_map, cur_res, result));
+    TF_RETURN_IF_ERROR(TFE_Py_EncodeSequence(
+        arg, kTuple, kTupleEnd, include_tensor_ranks_only, encode_var_by_res_id,
+        res_vec, res_map, cur_res, result));
   } else if (tensorflow::swig::IsMapping(arg)) {
     tensorflow::Safe_PyObjectPtr keys(tensorflow::swig::MappingKeys(arg));
     if (PyList_Sort(keys.get()) == -1) {
@@ -4201,11 +4205,12 @@ tensorflow::Status TFE_Py_EncodeArgHelperInternal(
     for (int i = 0; i < len; i++) {
       PyObject* key = PyList_GetItem(keys.get(), i);
       TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelperInternal(
-          key, include_tensor_ranks_only, res_vec, res_map, cur_res, result));
+          key, include_tensor_ranks_only, res_vec, res_map, cur_res,
+          encode_var_by_res_id, result));
       tensorflow::Safe_PyObjectPtr value(PyObject_GetItem(arg, key));
-      TF_RETURN_IF_ERROR(
-          TFE_Py_EncodeArgHelperInternal(value.get(), include_tensor_ranks_only,
-                                         res_vec, res_map, cur_res, result));
+      TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelperInternal(
+          value.get(), include_tensor_ranks_only, res_vec, res_map, cur_res,
+          encode_var_by_res_id, result));
     }
   } else if (tensorflow::swig::IsCompositeTensor(arg)) {
     absl::StrAppend(&result->str, kCompositeTensor);
@@ -4235,9 +4240,29 @@ tensorflow::Status TFE_Py_EncodeArgHelperInternal(
       tensorflow::Safe_PyObjectPtr attr_arg(PyObject_GetAttr(arg, name.get()));
       TF_RETURN_IF_ERROR(TFE_Py_EncodeArgHelperInternal(
           attr_arg.get(), include_tensor_ranks_only, res_vec, res_map, cur_res,
-          result));
+          encode_var_by_res_id, result));
     }
     absl::StrAppend(&result->str, kAttrsEnd);
+  } else if (tensorflow::swig::IsResourceVariable(arg) &&
+             encode_var_by_res_id) {
+    absl::StrAppend(&result->str, kResourceVariable);
+    // Get resource id, similar to OwnedIterator
+    tensorflow::Safe_PyObjectPtr p_res_id(
+        PyObject_CallMethod(arg, ""__tf_resource_id__"", nullptr));
+    if (p_res_id == nullptr) {
+      return tensorflow::errors::InvalidArgument(
+          ""Error while calling __tf_resource_id__()."");
+    }
+    int res_id = PyLong_AsSize_t(p_res_id.get());
+    if (res_id < 0) {
+      return tensorflow::errors::InvalidArgument(""PyLong_AsSize_t failure"");
+    }
+    UpdateResourceCount(res_id, res_vec, res_map, cur_res);
+
+    // Get dtype and shape, similar to Tensor.
+    tensorflow::Safe_PyObjectPtr type_spec(
+        PyObject_CallMethod(arg, ""__tf_function_cache_spec__"", nullptr));
+    absl::StrAppend(&result->str, PyUnicode_AsUTF8(type_spec.get()));
   } else {
     PyObject* object = PyWeakref_NewRef(arg, nullptr);
 
@@ -4257,12 +4282,14 @@ tensorflow::Status TFE_Py_EncodeArgHelperInternal(
 
 tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg,
                                           bool include_tensor_ranks_only,
+                                          bool encode_var_by_res_id,
                                           EncodeResult* result) {
   std::vector<int> res_vec;
   absl::flat_hash_map<int, int> res_map;
   int cur_res = 0;
-  auto status = TFE_Py_EncodeArgHelperInternal(
-      arg, include_tensor_ranks_only, res_vec, res_map, cur_res, result);
+  auto status = TFE_Py_EncodeArgHelperInternal(arg, include_tensor_ranks_only,
+                                               res_vec, res_map, cur_res,
+                                               encode_var_by_res_id, result);
 
   // Add 'encoding' of resources
   std::string str_resource_encoding = """";
@@ -4289,10 +4316,11 @@ tensorflow::Status TFE_Py_EncodeArgHelper(PyObject* arg,
 // `include_tensor_ranks_only` allows caching on arguments excluding shape info,
 // so that a slow path using relaxed shape can rely on a cache key that excludes
 // shapes.
-PyObject* TFE_Py_EncodeArg(PyObject* arg, bool include_tensor_ranks_only) {
+PyObject* TFE_Py_EncodeArg(PyObject* arg, bool include_tensor_ranks_only,
+                           bool encode_var_by_res_id) {
   EncodeResult result;
-  const auto status =
-      TFE_Py_EncodeArgHelper(arg, include_tensor_ranks_only, &result);
+  const auto status = TFE_Py_EncodeArgHelper(arg, include_tensor_ranks_only,
+                                             encode_var_by_res_id, &result);
   if (MaybeRaiseExceptionFromStatus(status, nullptr)) {
     return nullptr;
   }
",0,train
1e66408e0190dc16f3405fb69c3237a09be3c5ca,tensorflow/tensorflow,"Use resource id to distinguish different combinations of Resource Variables

PiperOrigin-RevId: 394716703
Change-Id: I6b52913148296737f365b6fc5567f18193fbda52",resource_variable_ops.py,"@@ -463,6 +463,16 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor):
       return ""<tf.Variable '%s' shape=%s dtype=%s>"" % (
           self.name, self.get_shape(), self.dtype.name)
 
+  def __tf_function_cache_spec__(self):
+    res = f""d{self.dtype.as_datatype_enum}s""
+    for dim_size in self.shape:
+      res += f""{dim_size}-""
+
+    return res
+
+  def __tf_resource_id__(self):
+    return self._handle._id  # pylint:disable=protected-access
+
   @contextlib.contextmanager
   def _assign_dependencies(self):
     """"""Makes assignments depend on the cached value, if any.
",0,train
1e66408e0190dc16f3405fb69c3237a09be3c5ca,tensorflow/tensorflow,"Use resource id to distinguish different combinations of Resource Variables

PiperOrigin-RevId: 394716703
Change-Id: I6b52913148296737f365b6fc5567f18193fbda52",tfe_wrapper.cc,"@@ -1159,11 +1159,12 @@ PYBIND11_MODULE(_pywrap_tfe, m) {
   m.def(""TFE_Py_RegisterVSpace"", [](const py::handle& o) {
     return tensorflow::PyoOrThrow(TFE_Py_RegisterVSpace(o.ptr()));
   });
-  m.def(""TFE_Py_EncodeArg"",
-        [](const py::handle& o, bool include_tensor_ranks_only) {
-          return tensorflow::PyoOrThrow(
-              TFE_Py_EncodeArg(o.ptr(), include_tensor_ranks_only));
-        });
+  m.def(""TFE_Py_EncodeArg"", [](const py::handle& o,
+                               bool include_tensor_ranks_only,
+                               bool encode_variables_by_resource_id) {
+    return tensorflow::PyoOrThrow(TFE_Py_EncodeArg(
+        o.ptr(), include_tensor_ranks_only, encode_variables_by_resource_id));
+  });
   m.def(""TFE_EnableCollectiveOps"", [](const py::handle& ctx, py::bytes proto) {
     tensorflow::Safe_TF_StatusPtr status =
         tensorflow::make_safe(TF_NewStatus());
",0,train
40cb9181f9c93a58471ed668fb1760717ff9baad,tensorflow/tensorflow,Use strcat,s3_file_system.cc,"@@ -292,6 +292,7 @@ class S3WritableFile : public WritableFile {
         ""application/octet-stream"", Aws::Map<Aws::String, Aws::String>());
     handle->WaitUntilFinished();
     int retries = 0;
+
     while (handle->GetStatus() == Aws::Transfer::TransferStatus::FAILED &&
            retries++ < kUploadRetries) {
       // if multipart upload was used, only the failed parts will be re-sent
@@ -300,6 +301,7 @@ class S3WritableFile : public WritableFile {
       transfer_manager_.get()->RetryUpload(outfile_, handle);
       handle->WaitUntilFinished();
     }
+
     if (handle->GetStatus() != Aws::Transfer::TransferStatus::COMPLETED) {
       auto error = handle->GetLastError();
       if (error.GetResponseCode() == Aws::Http::HttpResponseCode::FORBIDDEN) {
@@ -711,7 +713,7 @@ Status S3FileSystem::CopyFile(const Aws::String& source_bucket, const Aws::Strin
   Aws::String source = Aws::String((source_bucket + ""/"" + source_key).c_str());
   Aws::String source_full_path = Aws::String(""s3://"") + source;
   uint64 file_length;
-  TF_RETURN_IF_ERROR(this->GetFileSize(std::string(source_full_path.c_str()), &file_length));
+  TF_RETURN_IF_ERROR(this->GetFileSize(string(source_full_path.c_str()), &file_length));
   int num_parts;
   if (file_length <= multi_part_copy_part_size_) {
     num_parts = 1;
@@ -722,12 +724,12 @@ Status S3FileSystem::CopyFile(const Aws::String& source_bucket, const Aws::Strin
   if (num_parts == 1) {
     return SimpleCopy(source, target_bucket, target_key);
   } else if (num_parts > 10000) {
-    std::ostringstream s;
-    s << ""MultiPartCopy with number of parts more than 10000 is not supported. Your object ""
-      << source << "" required "" << num_parts << "" as multi_part_copy_part_size is set to ""
-      << multi_part_copy_part_size_ << "". You can control this part size using the environment variable ""
-    ""S3_MULTI_PART_COPY_PART_SIZE to increase it."";
-    return tensorflow::errors::Unimplemented(s.str());
+    string message = strings::StrCat(
+      ""MultiPartCopy with number of parts more than 10000 is not supported. Your object "",
+      source, "" required "", num_parts, "" as multi_part_copy_part_size is set to "",
+      multi_part_copy_part_size_, "". You can control this part size using the environment variable "",
+      ""S3_MULTI_PART_COPY_PART_SIZE to increase it."");
+    return tensorflow::errors::Unimplemented(message);
   } else {
     return MultiPartCopy(source, target_bucket, target_key, num_parts, file_length);
   }
@@ -798,9 +800,7 @@ Status S3FileSystem::MultiPartCopy(const Aws::String& source,
         endPos = file_length - 1;
       }
 
-      std::ostringstream rangeStream;
-      rangeStream << ""bytes="" << startPos << ""-"" << std::to_string(endPos);
-      string range = rangeStream.str();
+      string range = strings::StrCat(""bytes="", startPos, ""-"", endPos);
 
       Aws::S3::Model::UploadPartCopyRequest uploadPartCopyRequest;
       uploadPartCopyRequest.SetBucket(target_bucket);
",0,train
10ad29455ada3003bb02abb737c6ba166d1be751,tensorflow/tensorflow,"[Pluggable Device] Use default settings when device ""architecture"" field is not set.",op_level_cost_estimator.cc,"@@ -736,29 +736,38 @@ DeviceInfo OpLevelCostEstimator::GetDeviceInfo(
       }
     }
   } else if (device.type() == ""GPU"") {
-    const std::string architecture = device.environment().at(""architecture"");
-    int cores_per_multiprocessor;
-    if (architecture < ""3"") {
-      // Fermi
-      cores_per_multiprocessor = 32;
-    } else if (architecture < ""4"") {
-      // Kepler
-      cores_per_multiprocessor = 192;
-    } else if (architecture < ""6"") {
-      // Maxwell
-      cores_per_multiprocessor = 128;
-    } else {
-      // Pascal (compute capability version 6) and Volta (compute capability
-      // version 7)
-      cores_per_multiprocessor = 64;
-    }
-    gflops = device.num_cores() * device.frequency() * 1e-3 *
-             cores_per_multiprocessor * kOpsPerMac;
-    if (device.bandwidth() > 0) {
-      gb_per_sec = device.bandwidth() / 1e6;
+    const auto& device_env = device.environment();
+    auto it = device_env.find(""architecture"");
+    if (it != device_env.end()) {
+      const std::string architecture = device_env.at(""architecture"");
+      int cores_per_multiprocessor;
+      if (architecture < ""3"") {
+        // Fermi
+        cores_per_multiprocessor = 32;
+      } else if (architecture < ""4"") {
+        // Kepler
+        cores_per_multiprocessor = 192;
+      } else if (architecture < ""6"") {
+        // Maxwell
+        cores_per_multiprocessor = 128;
+      } else {
+        // Pascal (compute capability version 6) and Volta (compute capability
+        // version 7)
+        cores_per_multiprocessor = 64;
+      }
+      gflops = device.num_cores() * device.frequency() * 1e-3 *
+               cores_per_multiprocessor * kOpsPerMac;
+      if (device.bandwidth() > 0) {
+        gb_per_sec = device.bandwidth() / 1e6;
+      } else {
+        gb_per_sec = 100;
+      }
     } else {
+      // Architecture is not available (ex: pluggable device), return default value.
       gb_per_sec = 100;
-    }
+      gflops = 100;     // Dummy value;
+      gb_per_sec = 12;  // default PCIe x16 gen3.
+     }
   } else {
     LOG_EVERY_N(WARNING, 1000) << ""Unknown device type: "" << device.type()
                                << "", assuming PCIe between CPU and GPU."";
",0,train
27c78a334bfb7db71221818f6ba52982926993a3,tensorflow/tensorflow,Delete extra spaces from the changes & fix indent on sublists,densenet.py,"@@ -162,7 +162,7 @@ def DenseNet(
       and width and height should be no smaller than 32.
       E.g. `(200, 200, 3)` would be one valid value.
     pooling: optional pooling mode for feature extraction
-      when `include_top` is `False`.
+      when `include_top` is `False`. It could be:
       - `None` means that the output of the model will be
           the 4D tensor output of the
           last convolutional block.
@@ -469,7 +469,6 @@ DOC = """"""
       or invalid input shape.
     ValueError: if `classifier_activation` is not `softmax` or `None` when
       using a pretrained top layer.
-
 """"""
 
 setattr(DenseNet121, '__doc__', DenseNet121.__doc__ + DOC)
",0,train
27c78a334bfb7db71221818f6ba52982926993a3,tensorflow/tensorflow,Delete extra spaces from the changes & fix indent on sublists,nasnet.py,"@@ -347,7 +347,7 @@ def NASNetMobile(input_shape=None,
           `layers.Input()`)
           to use as image input for the model.
       pooling: Optional pooling mode for feature extraction
-          when `include_top` is `False`.
+          when `include_top` is `False`. It could be:
           - `None` means that the output of the model
               will be the 4D tensor output of the
               last convolutional layer.
@@ -438,7 +438,7 @@ def NASNetLarge(input_shape=None,
           `layers.Input()`)
           to use as image input for the model.
       pooling: Optional pooling mode for feature extraction
-          when `include_top` is `False`.
+          when `include_top` is `False`. It could be:
           - `None` means that the output of the model
               will be the 4D tensor output of the
               last convolutional layer.
",0,train
27c78a334bfb7db71221818f6ba52982926993a3,tensorflow/tensorflow,Delete extra spaces from the changes & fix indent on sublists,resnet.py,"@@ -527,7 +527,7 @@ DOC = """"""
   Optionally loads weights pre-trained on ImageNet.
   Note that the data format convention used by the model is
   the one specified in your Keras config at `~/.keras/keras.json`.
-  
+
   Arguments:
     include_top: whether to include the fully-connected
       layer at the top of the network.
@@ -544,7 +544,7 @@ DOC = """"""
       and width and height should be no smaller than 32.
       E.g. `(200, 200, 3)` would be one valid value.
     pooling: Optional pooling mode for feature extraction
-      when `include_top` is `False`.
+      when `include_top` is `False`. It could be:
       - `None` means that the output of the model will be
           the 4D tensor output of the
           last convolutional block.
",0,train
27c78a334bfb7db71221818f6ba52982926993a3,tensorflow/tensorflow,Delete extra spaces from the changes & fix indent on sublists,resnet_v2.py,"@@ -220,7 +220,6 @@ DOC = """"""
       or invalid input shape.
     ValueError: if `classifier_activation` is not `softmax` or `None` when
       using a pretrained top layer.
-
 """"""
 
 setattr(ResNet50V2, '__doc__', ResNet50V2.__doc__ + DOC)
",0,train
79c27b9b3acee58481cc55e6b249795713b00ca8,tensorflow/tensorflow,"Adding support for multiple input types.
Change: 142507111",optimize_for_inference_lib.py,"@@ -72,7 +72,8 @@ def optimize_for_inference(input_graph_def, input_node_names,
       inference.
     output_node_names: A list of names of the nodes that produce the final
       results.
-    placeholder_type_enum: Data type of the placeholders used for inputs.
+    placeholder_type_enum: The AttrValue enum for the placeholder data type, or
+        a list that specifies one value per input node name.
 
   Returns:
     An optimized version of the input graph.
",0,test
79c27b9b3acee58481cc55e6b249795713b00ca8,tensorflow/tensorflow,"Adding support for multiple input types.
Change: 142507111",strip_unused_lib.py,"@@ -35,7 +35,8 @@ def strip_unused(input_graph_def, input_node_names, output_node_names,
     input_graph_def: A graph with nodes we want to prune.
     input_node_names: A list of the nodes we use as inputs.
     output_node_names: A list of the output nodes.
-    placeholder_type_enum: The AttrValue enum for the placeholder data type.
+    placeholder_type_enum: The AttrValue enum for the placeholder data type, or
+        a list that specifies one value per input node name.
 
   Returns:
     A GraphDef with all unnecessary ops removed.
@@ -49,8 +50,13 @@ def strip_unused(input_graph_def, input_node_names, output_node_names,
       placeholder_node = tf.NodeDef()
       placeholder_node.op = ""Placeholder""
       placeholder_node.name = node.name
-      placeholder_node.attr[""dtype""].CopyFrom(tf.AttrValue(
-          type=placeholder_type_enum))
+      if isinstance(placeholder_type_enum, list):
+        input_node_index = input_node_names.index(node.name)
+        placeholder_node.attr[""dtype""].CopyFrom(tf.AttrValue(
+            type=placeholder_type_enum[input_node_index]))
+      else:
+        placeholder_node.attr[""dtype""].CopyFrom(tf.AttrValue(
+            type=placeholder_type_enum))
       if ""_output_shapes"" in node.attr:
         placeholder_node.attr[""_output_shapes""].CopyFrom(
             node.attr[""_output_shapes""])
",0,test
79c27b9b3acee58481cc55e6b249795713b00ca8,tensorflow/tensorflow,"Adding support for multiple input types.
Change: 142507111",strip_unused_test.py,"@@ -43,8 +43,9 @@ class StripUnusedTest(test_util.TensorFlowTestCase):
     # and that then multiplies it by 2.
     with ops.Graph().as_default():
       constant_node = constant_op.constant(1.0, name=""constant_node"")
-      wanted_input_node = math_ops.sub(
-          constant_node, 3.0, name=""wanted_input_node"")
+      wanted_input_node = math_ops.sub(constant_node,
+                                       3.0,
+                                       name=""wanted_input_node"")
       output_node = math_ops.multiply(
           wanted_input_node, 2.0, name=""output_node"")
       math_ops.add(output_node, 2.0, name=""later_node"")
@@ -89,6 +90,66 @@ class StripUnusedTest(test_util.TensorFlowTestCase):
         output = sess.run(output_node, feed_dict={input_node: [10.0]})
         self.assertNear(20.0, output, 0.00001)
 
+  def testStripUnusedMultipleInputs(self):
+    input_graph_name = ""input_graph.pb""
+    output_graph_name = ""output_graph.pb""
+
+    # We'll create an input graph that multiplies two input nodes.
+    with ops.Graph().as_default():
+      constant_node1 = constant_op.constant(1.0, name=""constant_node1"")
+      constant_node2 = constant_op.constant(2.0, name=""constant_node2"")
+      input_node1 = math_ops.sub(constant_node1, 3.0, name=""input_node1"")
+      input_node2 = math_ops.sub(constant_node2, 5.0, name=""input_node2"")
+      output_node = math_ops.multiply(
+          input_node1, input_node2, name=""output_node"")
+      math_ops.add(output_node, 2.0, name=""later_node"")
+      sess = session.Session()
+      output = sess.run(output_node)
+      self.assertNear(6.0, output, 0.00001)
+      graph_io.write_graph(sess.graph, self.get_temp_dir(), input_graph_name)
+
+    # We save out the graph to disk, and then call the const conversion
+    # routine.
+    input_graph_path = os.path.join(self.get_temp_dir(), input_graph_name)
+    input_binary = False
+    input_node_names = ""input_node1,input_node2""
+    input_node_types = [
+        dtypes.float32.as_datatype_enum, dtypes.float32.as_datatype_enum
+    ]
+    output_binary = True
+    output_node_names = ""output_node""
+    output_graph_path = os.path.join(self.get_temp_dir(), output_graph_name)
+
+    strip_unused_lib.strip_unused_from_files(input_graph_path, input_binary,
+                                             output_graph_path, output_binary,
+                                             input_node_names,
+                                             output_node_names,
+                                             input_node_types)
+
+    # Now we make sure the variable is now a constant, and that the graph still
+    # produces the expected result.
+    with ops.Graph().as_default():
+      output_graph_def = graph_pb2.GraphDef()
+      with open(output_graph_path, ""rb"") as f:
+        output_graph_def.ParseFromString(f.read())
+        _ = importer.import_graph_def(output_graph_def, name="""")
+
+      self.assertEqual(3, len(output_graph_def.node))
+      for node in output_graph_def.node:
+        self.assertNotEqual(""Add"", node.op)
+        self.assertNotEqual(""Sub"", node.op)
+        if node.name == input_node_names:
+          self.assertTrue(""shape"" in node.attr)
+
+      with session.Session() as sess:
+        input_node1 = sess.graph.get_tensor_by_name(""input_node1:0"")
+        input_node2 = sess.graph.get_tensor_by_name(""input_node2:0"")
+        output_node = sess.graph.get_tensor_by_name(""output_node:0"")
+        output = sess.run(output_node,
+                          feed_dict={input_node1: [10.0],
+                                     input_node2: [-5.0]})
+        self.assertNear(-50.0, output, 0.00001)
+
 
 if __name__ == ""__main__"":
   test.main()
",0,test
ba4804031357abecd1f412eeb5a04810a248391a,tensorflow/tensorflow,"Add a global resource manager for TPU specific operations.

PiperOrigin-RevId: 312388244
Change-Id: I30dd6ce3a2f0eed3d257750626e11b3bb6eded97",tpu_configuration.cc,"@@ -0,0 +1,44 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/core/tpu/tpu_configuration.h""
+
+namespace tensorflow {
+
+namespace {
+
+ResourceMgr* GetGlobalResourceMgr() {
+  static ResourceMgr* const rmgr = new ResourceMgr();
+  return rmgr;
+}
+
+}  // namespace
+
+#if !defined(PLATFORM_GOOGLE)
+// Used only by Google-internal tests, so deliberately left empty.
+void MaybeInitializeTPUSystemForTests() {}
+#endif
+
+ResourceMgr* GetTPUConfigResourceMgr() {
+  MaybeInitializeTPUSystemForTests();
+
+  // Put all TPU-related state in the global ResourceMgr. This includes the
+  // TpuPodState, compilation cache, etc. We don't use the TPU_SYSTEM
+  // ResourceMgr because there may be more than one TPU_SYSTEM ResourceMgr when
+  // DirectSession or isolate_session_state are used.
+  return GetGlobalResourceMgr();
+}
+
+}  // namespace tensorflow
",0,train
ba4804031357abecd1f412eeb5a04810a248391a,tensorflow/tensorflow,"Add a global resource manager for TPU specific operations.

PiperOrigin-RevId: 312388244
Change-Id: I30dd6ce3a2f0eed3d257750626e11b3bb6eded97",tpu_configuration.h,"@@ -0,0 +1,30 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_TPU_TPU_CONFIGURATION_H_
+#define TENSORFLOW_CORE_TPU_TPU_CONFIGURATION_H_
+
+#include ""tensorflow/core/framework/resource_mgr.h""
+
+namespace tensorflow {
+
+void MaybeInitializeTPUSystemForTests();
+
+// Returns a process-wide global ResourceMgr.
+ResourceMgr* GetTPUConfigResourceMgr();
+
+}  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_TPU_TPU_CONFIGURATION_H_
",0,train
7358025743951b42fe0f99fb85b4418769de5357,tensorflow/tensorflow,"Add test cases with axis and keepdims for tf.count_nonzero and string

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",reduction_ops_test.py,"@@ -889,9 +889,9 @@ class AnyReductionTest(test.TestCase):
 
 class CountNonzeroReductionTest(test.TestCase):
 
-  def _compare(self, x, reduction_axes, keepdims, use_gpu=False,
+  def _compare(self, x, reduction_axes, keepdims, use_gpu=False, zero=0,
                feed_dict=None):
-    np_ans = (x != 0).astype(np.int32)
+    np_ans = (x != zero).astype(np.int32)
     if reduction_axes is None:
       np_ans = np.sum(np_ans, keepdims=keepdims)
     else:
@@ -964,6 +964,15 @@ class CountNonzeroReductionTest(test.TestCase):
       v = math_ops.count_nonzero(constant_op.constant([""test""]))
       self.assertAllClose(sess.run(v), 1)
 
+  def testStringReduce1D(self):
+    # Create a 1D array of strings
+    x = np.asarray(["""", """", ""a"", """", """", ""b""])
+    self._compare(x, None, keepdims=False, zero=np.str(""""))
+    self._compare(x, [], keepdims=False, zero=np.str(""""))
+    self._compare(x, [0], keepdims=False, zero=np.str(""""))
+    self._compare(x, None, keepdims=True, zero=np.str(""""))
+    self._compare(x, [], keepdims=True, zero=np.str(""""))
+    self._compare(x, [0], keepdims=True, zero=np.str(""""))
 
 if __name__ == ""__main__"":
   test.main()
",0,train
aa50969378a2efe745c37f120452bc89effaf7ba,tensorflow/tensorflow,"Remove SavedModel dependency on manifest proto.
Change: 133885459",builder.py,"@@ -26,7 +26,7 @@ import os
 
 from google.protobuf.any_pb2 import Any
 
-from tensorflow.contrib.session_bundle import manifest_pb2
+from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.core.protobuf import saved_model_pb2
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -123,12 +123,12 @@ class SavedModelBuilder(object):
 
     Args:
       asset_filename: The filename of the asset to be added.
-      asset_tensor: The asset tensor used to populate the tensor binding of the
+      asset_tensor: The asset tensor used to populate the tensor info of the
           asset proto.
     """"""
-    asset_proto = manifest_pb2.AssetFile()
+    asset_proto = meta_graph_pb2.AssetFileDef()
     asset_proto.filename = asset_filename
-    asset_proto.tensor_binding.tensor_name = asset_tensor.name
+    asset_proto.tensor_info.name = asset_tensor.name
 
     asset_any_proto = Any()
     asset_any_proto.Pack(asset_proto)
",0,train
aa50969378a2efe745c37f120452bc89effaf7ba,tensorflow/tensorflow,"Remove SavedModel dependency on manifest proto.
Change: 133885459",saved_model_test.py,"@@ -20,8 +20,8 @@ from __future__ import print_function
 import os
 import tensorflow as tf
 
-from tensorflow.contrib.session_bundle import manifest_pb2
 from tensorflow.core.protobuf import config_pb2
+from tensorflow.core.protobuf import meta_graph_pb2
 from tensorflow.python.framework import errors
 from tensorflow.python.lib.io import file_io
 from tensorflow.python.saved_model import builder as saved_model_builder
@@ -363,7 +363,7 @@ class SavedModelTest(tf.test.TestCase):
       collection_def = foo_graph.collection_def
       assets_any = collection_def[constants.ASSETS_KEY].any_list.value
       self.assertEqual(len(assets_any), 1)
-      asset = manifest_pb2.AssetFile()
+      asset = meta_graph_pb2.AssetFileDef()
       assets_any[0].Unpack(asset)
       assets_path = os.path.join(
           compat.as_bytes(export_dir),
@@ -372,7 +372,7 @@ class SavedModelTest(tf.test.TestCase):
       asset_contents = file_io.read_file_to_string(assets_path)
       self.assertEqual(""foo bar baz"", compat.as_text(asset_contents))
       self.assertEqual(""hello42.txt"", asset.filename)
-      self.assertEqual(""asset_file_tensor:0"", asset.tensor_binding.tensor_name)
+      self.assertEqual(""asset_file_tensor:0"", asset.tensor_info.name)
       ignored_asset_path = os.path.join(
           compat.as_bytes(export_dir),
           compat.as_bytes(constants.ASSETS_DIRECTORY),
",0,train
5afcbe91aa90a7795b49910f0e542f07be796448,tensorflow/tensorflow,"eager: Some more backprop tests

PiperOrigin-RevId: 166246790",backprop_test.py,"@@ -30,7 +30,12 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import embedding_ops
 from tensorflow.python.ops import gradients
 from tensorflow.python.ops import math_ops
+from tensorflow.python.ops import nn_grad  # pylint: disable=unused-import
+from tensorflow.python.ops import nn_ops
+from tensorflow.python.ops import random_ops
 from tensorflow.python.ops import resource_variable_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.training import training
 
 
 class BackpropTest(test.TestCase):
@@ -70,16 +75,86 @@ class BackpropTest(test.TestCase):
       self.assertAllClose(grad.numpy(), tf_dense_grad.eval())
 
   def testImplicitGradWithResourceVariable(self):
-    x = resource_variable_ops.ResourceVariable(initial_value=tensor.Tensor(1.0),
-                                               name='x')
+    x = resource_variable_ops.ResourceVariable(
+        initial_value=tensor.Tensor(1.0), name='x')
+
     def fn():
       tape.watch(x.handle)
       b = tensor.Tensor(2.0)
       c = math_ops.add(x.value(), b)
       return math_ops.add(c, tensor.Tensor(3.0))
+
     grad = backprop.implicit_grad(fn)()[0][1]
     self.assertEqual(grad.numpy(), 1.0)
 
+  def testImplicitGradOverEmbeddingLookup(self):
+    batch_size = 8
+    embedding_size = 512
+    vocab_size = 1000
+    lrn_rate = 0.1
+    random_init = random_ops.random_uniform([vocab_size, embedding_size])
+
+    x = array_ops.ones((batch_size), dtypes.int64)
+    embedding = resource_variable_ops.ResourceVariable(
+        initial_value=random_init, dtype=dtypes.float32, name='embedding')
+
+    def f():
+      tape.watch(embedding.handle)
+      embedded_x = embedding_ops.embedding_lookup(embedding, x)
+      return tensor.Tensor(1.0, dtypes.float32) - embedded_x
+
+    grad = backprop.implicit_grad(f)()[0][1]
+    opt = training.GradientDescentOptimizer(lrn_rate)
+
+    with context.graph_mode(), self.test_session():
+      tf_x = array_ops.ones((batch_size), dtypes.int64)
+      # TODO(ashankar,apassos): Change to ResourceVariable.
+      tf_embedding = variables.Variable(
+          random_init.numpy(), name='tf_embedding')
+      tf_embedded_x = embedding_ops.embedding_lookup(tf_embedding, tf_x)
+      tf_y = 1.0 - tf_embedded_x
+      tf_grad = gradients.gradients(tf_y, [tf_embedding])[0]
+      tf_opt = training.GradientDescentOptimizer(0.1)
+      tf_embedding.initializer.run()
+
+      self.assertAllClose(tf_grad.indices.eval(), grad.indices.numpy())
+      self.assertAllClose(tf_grad.values.eval(), grad.values.numpy())
+
+      tf_opt.apply_gradients([(tf_grad, tf_embedding)]).run()
+      expected = tf_embedding.eval()
+    opt.apply_gradients([(grad, embedding)])
+    self.assertAllClose(expected, embedding.read_value().numpy())
+
+  def testGradientNone(self):
+
+    def loss(x, l):
+      return math_ops.reduce_mean(
+          nn_ops.softmax_cross_entropy_with_logits(logits=x, labels=l),
+          tensor.Tensor([0]))
+
+    logits = tensor.Tensor([[0.0, 0.0]])
+    labels = tensor.Tensor([[1.0, 0.0]])
+    # softmax_cross_entropy_with_logits returns two outputs and in this case the
+    # gradient wrt the second is None.
+    g, = backprop.gradients_function(loss, [0])(logits, labels)
+    self.assertAllEqual(g.numpy(), [[-0.5, 0.5]])
+
+  def testSecondGrad(self):
+
+    def first(x):
+      l = tensor.Tensor([[0.0]])
+      x = nn_ops.softmax_cross_entropy_with_logits(labels=l, logits=x)
+      x = math_ops.reduce_sum(x, tensor.Tensor([0]))
+      return x
+
+    def second(x):
+      grad = backprop.gradients_function(first, [0])(x)[0]
+      return math_ops.reduce_sum(grad, tensor.Tensor([0]))
+
+    f = tensor.Tensor([[0.1]])
+    grad = backprop.gradients_function(second, [0])(f)[0]
+    self.assertAllEqual([[0.0]], grad.numpy())
+
   def testGPU(self):
     if not context.context().num_gpus():
       self.skipTest('No GPUs found')
@@ -95,6 +170,20 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(fn, [0])(tensor.Tensor(1.0))[0]
     self.assertEqual(grad.numpy(), 1.0)
 
+  def testGPUImplicitGrad(self):
+    if not context.context().num_gpus():
+      self.skipTest('No GPU found')
+    with context.device('gpu:0'):
+      v = resource_variable_ops.ResourceVariable(tensor.Tensor(1.0), name='v')
+
+    def f():
+      with context.device('gpu:0'):
+        tape.watch(v.handle)
+        return v.read_value()
+
+    self.assertEqual(
+        backprop.implicit_grad(f)()[0][1].as_cpu_tensor().numpy(), 1.0)
+
   def testCPU(self):
 
     def fn(x):
",0,train
11f1dab4fce23c73073e32cda910a2a1a87c394f,tensorflow/tensorflow,"StridedSlice gradient more efficient in tfe.

PiperOrigin-RevId: 210927458",pywrap_tfe_src.cc,"@@ -1784,6 +1784,7 @@ bool OpDoesntRequireOutput(const string& op_name) {
           ""ReadVariableOp"",
           ""VarHandleOp"",
           ""Shape"",
+          ""StridedSlice"",
       });
 
   return ops_that_dont_require_outputs->find(op_name) !=
",0,train
9b488c5d150b50db57f2e8560b37f0c1e4f0c92d,tensorflow/tensorflow,Add monitor.BaseMonitor and arg pass through from val monitor,digits.py,"@@ -46,7 +46,7 @@ def conv_model(X, y):
     features = tf.reshape(features, [-1, 12])
     return skflow.models.logistic_regression(features, y)
 
-val_monitor = monitors.ValidationMonitor(X_val, y_val, n_classes=10)
+val_monitor = monitors.ValidationMonitor(X_val, y_val, n_classes=10, print_steps=50)
 # Create a classifier, train and predict.
 classifier = skflow.TensorFlowEstimator(model_fn=conv_model, n_classes=10,
                                         steps=1000, learning_rate=0.05,
",0,train
9b488c5d150b50db57f2e8560b37f0c1e4f0c92d,tensorflow/tensorflow,Add monitor.BaseMonitor and arg pass through from val monitor,base.py,"@@ -200,7 +200,7 @@ class TensorFlowEstimator(BaseEstimator):
                                                     self.batch_size)
 
         if monitor is None:
-            self._monitor = monitors.BaseMonitor()
+            self._monitor = monitors.default_monitor()
         else:
             self._monitor = monitor
 
",0,train
9b488c5d150b50db57f2e8560b37f0c1e4f0c92d,tensorflow/tensorflow,Add monitor.BaseMonitor and arg pass through from val monitor,monitors.py,"@@ -26,16 +26,21 @@ from skflow.io.data_feeder import setup_train_data_feeder
 # pylint: disable=unused-argument
 # pylint: disable=attribute-defined-outside-init
 
+def default_monitor():
+    return(BaseMonitor())
+
+
 class BaseMonitor(object):
     """""" Base class for all learning monitors. Stores and reports training loss throughout learning
 
+        Parameters:
         print_steps: Number of steps in between printing cost.
         early_stopping_rounds:  Activates early stopping if this is not None.
                                 Loss needs to decrease at least every every <early_stopping_rounds>
                                 round(s) to continue training. (default: None)
 
     """"""
-    def __init__(self, print_steps=100, early_stopping_rounds=500, verbose=1):
+    def __init__(self, print_steps=100, early_stopping_rounds=250, verbose=1):
         self.print_steps = print_steps
         self.early_stopping_rounds = early_stopping_rounds
 
@@ -127,10 +132,15 @@ class ValidationMonitor(BaseMonitor):
         val_X: Validation features
         val_y: Validation labels
         n_classes: Number of labels in output. 0 for regression
-        See BaseMonitor for arguments
+        print_steps: Number of steps in between printing cost.
+        early_stopping_rounds:  Activates early stopping if this is not None.
+                                Loss needs to decrease at least every every <early_stopping_rounds>
+                                round(s) to continue training. (default: None)
+
     """"""
-    def __init__(self, val_X, val_y, n_classes=0, *args, **kwargs):
-        super(ValidationMonitor, self).__init__()
+    def __init__(self, val_X, val_y, n_classes=0, print_steps=100, early_stopping_rounds=250):
+        super(ValidationMonitor, self).__init__(print_steps=print_steps,
+                                                early_stopping_rounds=early_stopping_rounds)
         self.val_feeder = setup_train_data_feeder(val_X, val_y, n_classes, -1)
         self.print_val_loss_buffer = []
         self.all_val_loss_buffer = []
",0,train
58a58794c915d70b4429eb5b80e21ba59d0f84f3,tensorflow/tensorflow,"Break Python-independent logic out of pywrap_tfe_src, to avoid cpython deps into C++-only targets.

PiperOrigin-RevId: 424613840
Change-Id: I604e4d2a0dc79f0b675a91ff7cccc0820104e401",eager_context.cc,"@@ -1,36 +0,0 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the ""License"");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an ""AS IS"" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include ""tensorflow/python/eager/eager_context.h""
-
-#include ""tensorflow/c/eager/c_api.h""
-
-namespace tensorflow {
-namespace eager {
-
-namespace {
-// This object tracks the EagerContext owned by global_py_eager_context in
-// pywrap_tfe_src.cc. Since the vast majority of the Python API is dependent on
-// that global_py_eager_context (including memory management), the Py object
-// owns the C object, so this pointer is non-owning.
-TFE_Context* global_c_eager_context = nullptr;
-}  // namespace
-
-void TFE_Py_SetCEagerContext(TFE_Context* ctx) { global_c_eager_context = ctx; }
-
-TFE_Context* GetCEagerContext() { return global_c_eager_context; }
-
-}  // namespace eager
-}  // namespace tensorflow
",0,train
58a58794c915d70b4429eb5b80e21ba59d0f84f3,tensorflow/tensorflow,"Break Python-independent logic out of pywrap_tfe_src, to avoid cpython deps into C++-only targets.

PiperOrigin-RevId: 424613840
Change-Id: I604e4d2a0dc79f0b675a91ff7cccc0820104e401",eager_context.h,"@@ -1,44 +0,0 @@
-/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the ""License"");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an ""AS IS"" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#ifndef TENSORFLOW_PYTHON_EAGER_EAGER_CONTEXT_H_
-#define TENSORFLOW_PYTHON_EAGER_EAGER_CONTEXT_H_
-
-#include ""tensorflow/c/eager/c_api.h""
-
-namespace tensorflow {
-namespace eager {
-
-// Sets the EagerContext owned by the current Python eager Context (see
-// TFE_Py_SetEagerContext in pywrap_tfe.h). This is always called in tandem with
-// TFE_Py_SetEagerContext (but not called by it, because its py_context
-// argument is opaque).
-//
-// Do not use this function in production. It is only intended for testing.
-// (see _reset_context in context.py).
-//
-// Not thread-safe.
-void TFE_Py_SetCEagerContext(TFE_Context* ctx);
-
-// Returns the EagerContext owned by the current Python eager Context (see
-// TFE_Py_SetEagerContext in pywrap_tfe.h).
-//
-// Not thread-safe.
-TFE_Context* GetCEagerContext();
-
-}  // namespace eager
-}  // namespace tensorflow
-
-#endif  // TENSORFLOW_PYTHON_EAGER_EAGER_CONTEXT_H_
",0,train
58a58794c915d70b4429eb5b80e21ba59d0f84f3,tensorflow/tensorflow,"Break Python-independent logic out of pywrap_tfe_src, to avoid cpython deps into C++-only targets.

PiperOrigin-RevId: 424613840
Change-Id: I604e4d2a0dc79f0b675a91ff7cccc0820104e401",pywrap_tfe.h,"@@ -384,6 +384,23 @@ PyObject* TFE_Py_SetEagerContext(PyObject* py_context);
 // some point.
 PyObject* GetPyEagerContext();
 
+// Sets the EagerContext owned by the current Python eager Context (see
+// TFE_Py_SetEagerContext). This is always called in tandem with
+// TFE_Py_SetEagerContext (but not called by it, because its py_context
+// argument is opaque).
+//
+// Do not use this function in production. It is only intended for testing.
+// (see _reset_context in context.py).
+//
+// Not thread-safe.
+void TFE_Py_SetCEagerContext(TFE_Context* ctx);
+
+// Returns the EagerContext owned by the current Python eager Context (see
+// TFE_Py_SetEagerContext).
+//
+// Not thread-safe.
+TFE_Context* GetCEagerContext();
+
 // These are exposed since there is SWIG code that calls these.
 // Returns a pre-allocated status if it exists.
 TF_Status* GetStatus();
",0,train
58a58794c915d70b4429eb5b80e21ba59d0f84f3,tensorflow/tensorflow,"Break Python-independent logic out of pywrap_tfe_src, to avoid cpython deps into C++-only targets.

PiperOrigin-RevId: 424613840
Change-Id: I604e4d2a0dc79f0b675a91ff7cccc0820104e401",pywrap_tfe_src.cc,"@@ -4004,10 +4004,19 @@ namespace {
 // object currently active. This object is opaque and wrapped inside a Python
 // Capsule. However, the EagerContext object it holds is tracked by the
 // global_c_eager_context object.
-// Also see eager_context.cc.
 PyObject* global_py_eager_context = nullptr;
+
+// This object tracks the EagerContext owned by global_py_eager_context. Since
+// the vast majority of the Python API is dependent on that
+// global_py_eager_context (including memory management), the Py object owns the
+// C object, so this pointer is non-owning.
+TFE_Context* global_c_eager_context = nullptr;
 }  // namespace
 
+void TFE_Py_SetCEagerContext(TFE_Context* ctx) { global_c_eager_context = ctx; }
+
+TFE_Context* GetCEagerContext() { return global_c_eager_context; }
+
 PyObject* TFE_Py_SetEagerContext(PyObject* py_context) {
   Py_XDECREF(global_py_eager_context);
   global_py_eager_context = PyWeakref_NewRef(py_context, nullptr);
",0,train
58a58794c915d70b4429eb5b80e21ba59d0f84f3,tensorflow/tensorflow,"Break Python-independent logic out of pywrap_tfe_src, to avoid cpython deps into C++-only targets.

PiperOrigin-RevId: 424613840
Change-Id: I604e4d2a0dc79f0b675a91ff7cccc0820104e401",tfe_wrapper.cc,"@@ -38,7 +38,6 @@ limitations under the License.
 #include ""tensorflow/c/tf_status_helper.h""
 #include ""tensorflow/compiler/jit/flags.h""
 #include ""tensorflow/compiler/jit/get_compiler_ir.h""
-#include ""tensorflow/python/eager/eager_context.h""
 #include ""tensorflow/python/eager/pywrap_tensor_conversion.h""
 #include ""tensorflow/python/eager/pywrap_tfe.h""
 #include ""tensorflow/python/lib/core/py_exception_registry.h""
@@ -1202,8 +1201,7 @@ PYBIND11_MODULE(_pywrap_tfe, m) {
     return tensorflow::PyoOrThrow(TFE_Py_SetEagerContext(o.ptr()));
   });
   m.def(""TFE_Py_SetCEagerContext"", [](const py::handle& ctx) {
-    tensorflow::eager::TFE_Py_SetCEagerContext(
-        tensorflow::InputTFE_Context(ctx));
+    TFE_Py_SetCEagerContext(tensorflow::InputTFE_Context(ctx));
   });
   m.def(""TFE_Py_RegisterVSpace"", [](const py::handle& o) {
     return tensorflow::PyoOrThrow(TFE_Py_RegisterVSpace(o.ptr()));
",0,train
9614961027fbf30b4489054bb898056f7c0fda8e,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-04-16

PiperOrigin-RevId: 368805037
Change-Id: Idc7633582a2d8e70367934b6e5ed40d0da216229",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 4, 15)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 4, 16)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,test
7c642ef7f713a53b8c04730b33a4d55da8915ac1,tensorflow/tensorflow,"[tf.data] Fix noisy warning. (#22778)

PiperOrigin-RevId: 215607171",dataset_ops.py,"@@ -1831,9 +1831,10 @@ class StructuredFunctionWrapper(object):
           flat_classes.append(component)
           flat_shapes.append(component)
           flat_types.append(component)
-          if t.options() is not None:  # pylint: disable=protected-access
-            warnings.warn(""Encountered a nested dataset with options. These ""
-                          ""options will not be applied to the outer dataset."")
+          if t.options() != Options():
+            warnings.warn(""Encountered a nested dataset with non-default ""
+                          ""options. These options will not be propagated to ""
+                          ""the outer dataset."")
         else:
           try:
             t = ops.convert_to_tensor(t)
",0,test
612a5fb91ed6a6c229c5f4932307747699cabe90,tensorflow/tensorflow,"Clamp f32->f16 quantization to max/min range of float16

PiperOrigin-RevId: 339569171
Change-Id: Ic9695ef175aca449ec905b9d9e5d3893ca07fbd4",quantization_utils.cc,"@@ -502,9 +502,14 @@ TfLiteStatus QuantizeTensorFloat16(ModelT* model, TensorT* tensor) {
   // Transform float data to float16.
   std::vector<Eigen::half> quantized_buffer;
   quantized_buffer.resize(num_elements);
-  std::transform(
-      float_vector.begin(), float_vector.end(), quantized_buffer.begin(),
-      [](float a) { return Eigen::half_impl::float_to_half_rtne(a); });
+  constexpr float kMaxFloat16Value = 65504.f;
+  constexpr float kMinFloat16Value = -65504.f;
+  std::transform(float_vector.begin(), float_vector.end(),
+                 quantized_buffer.begin(), [=](float a) {
+                   float clamped = std::min(std::max(a, kMinFloat16Value),
+                                            kMaxFloat16Value);
+                   return Eigen::half_impl::float_to_half_rtne(clamped);
+                 });
 
   char* half_buffer = reinterpret_cast<char*>(quantized_buffer.data());
   model->buffers[tensor->buffer]->data.assign(
",0,train
612a5fb91ed6a6c229c5f4932307747699cabe90,tensorflow/tensorflow,"Clamp f32->f16 quantization to max/min range of float16

PiperOrigin-RevId: 339569171
Change-Id: Ic9695ef175aca449ec905b9d9e5d3893ca07fbd4",quantization_utils_test.cc,"@@ -575,6 +575,42 @@ TEST_F(QuantizationUtilsTest, SymmetricQuantizeTensor) {
   EXPECT_EQ(quant_buffer_size * 4, float_buffer_size);
 }
 
+TEST_F(QuantizationUtilsTest, QuantizeFloat16Clamp) {
+  // Create data.
+  auto model = absl::make_unique<ModelT>();
+  auto subgraph = absl::make_unique<tflite::SubGraphT>();
+  auto tensor = absl::make_unique<TensorT>();
+  auto buffer = absl::make_unique<tflite::BufferT>();
+  constexpr int kNumElements = 6;
+  const std::vector<float> weights = {2.0, 1.0, 65504., 65505, -65504., -99999};
+  auto weights_reinterpreted_data =
+      reinterpret_cast<const unsigned char*>(weights.data());
+  buffer->data.assign(weights_reinterpreted_data,
+                      weights_reinterpreted_data + weights.size() * 4);
+  tensor->buffer = 0;
+  tensor->shape = {1, kNumElements};
+
+  // Wire the model.
+  model->subgraphs.push_back(std::move(subgraph));
+  model->subgraphs[0]->tensors.push_back(std::move(tensor));
+  model->buffers.push_back(std::move(buffer));
+
+  // Call and verify.
+  EXPECT_EQ(
+      QuantizeTensorFloat16(model.get(), model->subgraphs[0]->tensors[0].get()),
+      kTfLiteOk);
+  auto weightsf16 = reinterpret_cast<Eigen::half*>(
+      model->buffers[model->subgraphs[0]->tensors[0]->buffer]->data.data());
+  std::vector<float> wf32(kNumElements);
+  std::transform(weightsf16, weightsf16 + 6, wf32.begin(), [](Eigen::half a) {
+    return Eigen::half_impl::half_to_float(a);
+  });
+
+  EXPECT_THAT(wf32,
+              ElementsAreArray({2.0, 1.0, 65504., 65504., -65504., -65504.}));
+  EXPECT_EQ(model->subgraphs[0]->tensors[0]->type, TensorType_FLOAT16);
+}
+
 TEST_F(QuantizationUtilsTest, QuantizeFloat16) {
   // Conv model has weights between 0 and 10.
   // Quantize the weights tensor.
",0,train
eabf2ec12818c4e8ae7008ce14ed12ad38bd1537,tensorflow/tensorflow,"Work around a buggy get_config by ignoring its errors when checkpointing

This is extra ""nice to have"" metadata, and otherwise it looks like a checkpointing error. Not worth bothering people about.

PiperOrigin-RevId: 231318216",base.py,"@@ -846,11 +846,16 @@ class Checkpointable(object):
       return {}
     weak_self = weakref.ref(self)
     def _state_callback():
+      """"""Serializes `self.get_config()` for saving.""""""
       dereferenced_self = weak_self()
       if dereferenced_self:
-        return json.dumps(dereferenced_self,
-                          default=serialization.get_json_type,
-                          sort_keys=True).encode(""utf8"")
+        try:
+          return json.dumps(dereferenced_self,
+                            default=serialization.get_json_type,
+                            sort_keys=True).encode(""utf8"")
+        except TypeError:
+          # Even if get_config worked objects may have produced garbage.
+          return """"
       else:
         return """"
     return {OBJECT_CONFIG_JSON_KEY: functools.partial(
",0,train
eabf2ec12818c4e8ae7008ce14ed12ad38bd1537,tensorflow/tensorflow,"Work around a buggy get_config by ignoring its errors when checkpointing

This is extra ""nice to have"" metadata, and otherwise it looks like a checkpointing error. Not worth bothering people about.

PiperOrigin-RevId: 231318216",base_test.py,"@@ -83,6 +83,19 @@ class InterfaceTests(test.TestCase):
     with self.assertRaisesRegexp(AssertionError, ""foo_attr""):
       status.assert_consumed()
 
+  def testBuggyGetConfig(self):
+
+    class NotSerializable(object):
+      pass
+
+    class GetConfigRaisesError(base.Checkpointable):
+
+      def get_config(self):
+        return NotSerializable()
+
+    util.Checkpoint(obj=GetConfigRaisesError()).save(
+        os.path.join(self.get_temp_dir(), ""ckpt""))
+
 
 if __name__ == ""__main__"":
   ops.enable_eager_execution()
",0,train
eaa3e88ec3322fd0aa4224040215c3c29a752613,tensorflow/tensorflow,"[XLA] Show metric name in categories table header.

Instead of

   ********** microseconds above estimated optimum report **********
   [...]

   ********** categories table **********
   The left hand side numbers are microseconds above estimated optimum.
   [...]

we now print

   ********** microseconds above estimated optimum report **********
   [...]
   ********** categories table for microseconds above estimated optimum **********
   [...]

which I think is more explicit and harder to misread.

PiperOrigin-RevId: 207325046",metric_table_report.cc,"@@ -134,8 +134,7 @@ void MetricTableReport::AppendHeader() {
 void MetricTableReport::AppendCategoryTable() {
   const std::vector<Category> categories = MakeCategories(&entries_);
 
-  AppendLine(""********** categories table **********"");
-  AppendLine(""The left hand side numbers are "", metric_name_, ""."");
+  AppendLine(""********** categories table for "", metric_name_, "" **********"");
   AppendLine();
 
   double metric_sum = UnaccountedMetric();
@@ -185,8 +184,8 @@ void MetricTableReport::AppendCategoryTable() {
 }
 
 void MetricTableReport::AppendEntryTable() {
-  AppendLine(""********** "", entry_name_, "" table **********"");
-  AppendLine(""The left hand side numbers are "", metric_name_, ""."");
+  AppendLine(""********** "", entry_name_, "" table for "", metric_name_,
+             "" **********"");
   AppendLine();
 
   double metric_sum = UnaccountedMetric();
",0,train
27e6c7b49f4558dfc4bd59a9c492bf4f390a77da,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-12-16

PiperOrigin-RevId: 285720630
Change-Id: Ib744d5f7de70a6c6d73dd1a386712e404d0c2b99",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 12, 15)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 12, 16)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
1d6f79ac5f55fe49b3410eb7c6e6a4269e53f9a3,tensorflow/tensorflow,"Cleanup: remove unused lambda captures, pass string view by value

PiperOrigin-RevId: 233420932",bigtable_kernels.cc,"@@ -19,7 +19,6 @@ limitations under the License.
 #include ""tensorflow/core/lib/core/threadpool.h""
 
 namespace tensorflow {
-
 namespace {
 
 class BigtableClientOp : public OpKernel {
@@ -341,8 +340,8 @@ class ToBigtableOp : public AsyncOpKernel {
   }
 
   template <typename T>
-  Status ParseScalarArgument(OpKernelContext* ctx,
-                             const StringPiece& argument_name, T* output) {
+  Status ParseScalarArgument(OpKernelContext* ctx, StringPiece argument_name,
+                             T* output) {
     const Tensor* argument_t;
     TF_RETURN_IF_ERROR(ctx->input(argument_name, &argument_t));
     if (!TensorShapeUtils::IsScalar(argument_t->shape())) {
@@ -360,5 +359,4 @@ REGISTER_KERNEL_BUILDER(Name(""DatasetToBigtable"").Device(DEVICE_CPU),
 
 }  // namespace
 }  // namespace data
-
 }  // namespace tensorflow
",0,train
1d6f79ac5f55fe49b3410eb7c6e6a4269e53f9a3,tensorflow/tensorflow,"Cleanup: remove unused lambda captures, pass string view by value

PiperOrigin-RevId: 233420932",gdr_collective_executor_mgr.cc,"@@ -100,8 +100,7 @@ class CollectiveRemoteAccessDistributed : public CollectiveRemoteAccessLocal {
 
     // Logic to be executed on the RecvBufAsync callback.
     auto recv_buf_callback = [this, state, peer_task, to_device, to_alloc_attr,
-                              to_device_ctx, to_tensor, dev_to_dev_stream_index,
-                              done](const Status& s) {
+                              to_device_ctx, to_tensor, done](const Status& s) {
       if (s.ok()) {
         remote_memory_manager_->TensorFromTransportOptions(
             to_tensor, state->call->resp_.transport_options(), to_device,
",0,train
1d6f79ac5f55fe49b3410eb7c6e6a4269e53f9a3,tensorflow/tensorflow,"Cleanup: remove unused lambda captures, pass string view by value

PiperOrigin-RevId: 233420932",model_ops.cc,"@@ -304,7 +304,7 @@ class TraverseTreeV4Op : public OpKernel {
     auto worker_threads = context->device()->tensorflow_cpu_worker_threads();
     int num_threads = worker_threads->num_threads;
     const int64 costPerTraverse = 500;
-    auto traverse = [this, &set_leaf_ids, &data_set, decision_tree_resource,
+    auto traverse = [&set_leaf_ids, &data_set, decision_tree_resource,
                      num_data](int64 start, int64 end) {
       CHECK(start <= end);
       CHECK(end <= num_data);
",0,train
1d6f79ac5f55fe49b3410eb7c6e6a4269e53f9a3,tensorflow/tensorflow,"Cleanup: remove unused lambda captures, pass string view by value

PiperOrigin-RevId: 233420932",stats_ops.cc,"@@ -307,7 +307,7 @@ class ProcessInputOp : public OpKernel {
     // from a digits run on local desktop.  Heuristics might be necessary
     // if it really matters that much.
     const int64 costPerUpdate = 1000;
-    auto update = [this, &target, &leaf_ids_tensor, &num_targets, &data_set,
+    auto update = [&target, &leaf_ids_tensor, &num_targets, &data_set,
                    fertile_stats_resource, &locks, &set_lock, &ready_to_split,
                    num_data](int64 start, int64 end) {
       CHECK(start <= end);
@@ -317,7 +317,7 @@ class ProcessInputOp : public OpKernel {
                   static_cast<int32>(end), &ready_to_split);
     };
 
-    auto update_collated = [this, &target, &num_targets, fertile_stats_resource,
+    auto update_collated = [&target, &num_targets, fertile_stats_resource,
                             tree_resource, &leaf_examples, &set_lock,
                             &ready_to_split, &data_set,
                             num_leaves](int64 start, int64 end) {
",0,train
ed64647b2b408c9b7c84af796793e6c32ab5f23e,tensorflow/tensorflow,"Add the support for IteratorGetNextAsOptionalOp.

PiperOrigin-RevId: 309505494
Change-Id: I939fb769f8338d99402592858e4d7b8e7a1aa56c",group_events.cc,"@@ -434,6 +434,9 @@ std::vector<InterThreadConnectInfo> CreateInterThreadConnectInfoList() {
       {HostEventType::kExecutorStateProcess,
        HostEventType::kIteratorGetNextOp,
        {StatType::kStepId, StatType::kIterNum}},
+      {HostEventType::kExecutorStateProcess,
+       HostEventType::kIteratorGetNextAsOptionalOp,
+       {StatType::kStepId, StatType::kIterNum}},
       {HostEventType::kKernelLaunch,
        HostEventType::kKernelExecute,
        {StatType::kCorrelationId}},
",0,train
ed64647b2b408c9b7c84af796793e6c32ab5f23e,tensorflow/tensorflow,"Add the support for IteratorGetNextAsOptionalOp.

PiperOrigin-RevId: 309505494
Change-Id: I939fb769f8338d99402592858e4d7b8e7a1aa56c",xplane_schema.cc,"@@ -102,6 +102,7 @@ const HostEventTypeMap& GetHostEventTypeMap() {
       {""LocalExecutable::Execute"", kLocalExecutableExecute},
       // tf.data related.
       {""IteratorGetNextOp::DoCompute"", kIteratorGetNextOp},
+      {""IteratorGetNextAsOptionalOp::DoCompute"", kIteratorGetNextAsOptionalOp},
       // Virtual events for grouping.
       {""HostTrainingLoopIteration"", kHostTrainingLoopIteration},
       {""AsyncExecutorTraceContext"", kAsyncExecutorTraceContext},
",0,train
ed64647b2b408c9b7c84af796793e6c32ab5f23e,tensorflow/tensorflow,"Add the support for IteratorGetNextAsOptionalOp.

PiperOrigin-RevId: 309505494
Change-Id: I939fb769f8338d99402592858e4d7b8e7a1aa56c",xplane_schema.h,"@@ -100,6 +100,7 @@ enum HostEventType {
   kLocalExecutableExecute,
   // tf.data related.
   kIteratorGetNextOp,
+  kIteratorGetNextAsOptionalOp,
   // Virtual events for grouping.
   kHostTrainingLoopIteration,
   kAsyncExecutorTraceContext,
",0,train
bc87c28c60dddc6137b11f8a1fd31fa79bcf0c1f,tensorflow/tensorflow,"Register fp16 Reduce min on GPU.

PiperOrigin-RevId: 177274800",reduction_ops_min.cc,"@@ -50,6 +50,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_CPU_KERNELS);
           .TypeConstraint<int64>(""Tidx"")                                       \
           .HostMemory(""reduction_indices""),                                    \
       ReductionOp<GPUDevice, type, int64, Eigen::internal::MinReducer<type>>);
+REGISTER_GPU_KERNELS(Eigen::half);
 REGISTER_GPU_KERNELS(float);
 REGISTER_GPU_KERNELS(double);
 
",0,train
bc87c28c60dddc6137b11f8a1fd31fa79bcf0c1f,tensorflow/tensorflow,"Register fp16 Reduce min on GPU.

PiperOrigin-RevId: 177274800",reduction_ops_test.cc,"@@ -174,6 +174,11 @@ static void BM_Min2DToScalarGPU(int iters, int num_x, int num_y) {
 }
 BENCHMARK(BM_Min2DToScalarGPU)->RangePair(2048, 8192, 2048, 8192);
 
+static void BM_Min2DToScalarGPUHalf(int iters, int num_x, int num_y) {
+  ReduceToScalar<Eigen::half>(iters, ""gpu"", ""Min"", num_x, num_y);
+}
+BENCHMARK(BM_Min2DToScalarGPUHalf)->RangePair(2048, 8192, 2048, 8192);
+
 static void BM_Bool2DToScalarGPU(int iters, int num_x, int num_y) {
   ReduceToScalar<bool>(iters, ""gpu"", ""All"", num_x, num_y);
 }
",0,train
163fd2ea39f550f45717dd70f26ebebdaf74411e,tensorflow/tensorflow,"Remove obsolete BernoulliWithSigmoidProbs (#16846)

As was pointed out by 9485, BernoulliWithSigmoidProbs is covered
by Bernoulli and is obsolete. This fix removes BernoulliWithSigmoidProbs.

This fix closes 9485.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",__init__.py,"@@ -97,7 +97,6 @@ _allowed_symbols = [
     'Autoregressive',
     'Binomial',
     'Bernoulli',
-    'BernoulliWithSigmoidProbs',
     'Beta',
     'BetaWithSoftplusConcentration',
     'Categorical',
",0,train
163fd2ea39f550f45717dd70f26ebebdaf74411e,tensorflow/tensorflow,"Remove obsolete BernoulliWithSigmoidProbs (#16846)

As was pointed out by 9485, BernoulliWithSigmoidProbs is covered
by Bernoulli and is obsolete. This fix removes BernoulliWithSigmoidProbs.

This fix closes 9485.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",bernoulli_test.py,"@@ -291,12 +291,6 @@ class BernoulliTest(test.TestCase):
                [np.sqrt(var(0.5)), np.sqrt(var(0.4))]],
               dtype=np.float32))
 
-  def testBernoulliWithSigmoidProbs(self):
-    p = np.array([8.3, 4.2])
-    dist = bernoulli.BernoulliWithSigmoidProbs(logits=p)
-    with self.test_session():
-      self.assertAllClose(math_ops.sigmoid(p).eval(), dist.probs.eval())
-
   def testBernoulliBernoulliKL(self):
     with self.test_session() as sess:
       batch_size = 6
",0,train
163fd2ea39f550f45717dd70f26ebebdaf74411e,tensorflow/tensorflow,"Remove obsolete BernoulliWithSigmoidProbs (#16846)

As was pointed out by 9485, BernoulliWithSigmoidProbs is covered
by Bernoulli and is obsolete. This fix removes BernoulliWithSigmoidProbs.

This fix closes 9485.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",bernoulli.py,"@@ -167,26 +167,6 @@ class Bernoulli(distribution.Distribution):
     return math_ops.cast(self.probs > 0.5, self.dtype)
 
 
-class BernoulliWithSigmoidProbs(Bernoulli):
-  """"""Bernoulli with `probs = nn.sigmoid(logits)`.""""""
-
-  def __init__(self,
-               logits=None,
-               dtype=dtypes.int32,
-               validate_args=False,
-               allow_nan_stats=True,
-               name=""BernoulliWithSigmoidProbs""):
-    parameters = locals()
-    with ops.name_scope(name):
-      super(BernoulliWithSigmoidProbs, self).__init__(
-          probs=nn.sigmoid(logits, name=""sigmoid_probs""),
-          dtype=dtype,
-          validate_args=validate_args,
-          allow_nan_stats=allow_nan_stats,
-          name=name)
-    self._parameters = parameters
-
-
 @kullback_leibler.RegisterKL(Bernoulli, Bernoulli)
 def _kl_bernoulli_bernoulli(a, b, name=None):
   """"""Calculate the batched KL divergence KL(a || b) with a and b Bernoulli.
",0,train
dffd32796e53749d4d3ce90d901b6c04c259d69f,tensorflow/tensorflow,"[XLA] Don't run a pass if it has not been changed in more than 3 fixed pass iterations.

PiperOrigin-RevId: 386587289
Change-Id: I217b75c25fa53d1c5030c4668e395bbdb2ed88b9",hlo_pass_fix.h,"@@ -52,9 +52,6 @@ class HloPassFix : public Pass {
   StatusOr<bool> Run(HloModule* module) override {
     RunState run_state(module);
     TF_RETURN_IF_ERROR(RunToFixPoint(module, &run_state));
-    if (Pass::IsPassPipeline()) {
-      Pass::ResetPassPipeline();
-    }
     return !run_state.changed.empty();
   }
 
",0,train
dffd32796e53749d4d3ce90d901b6c04c259d69f,tensorflow/tensorflow,"[XLA] Don't run a pass if it has not been changed in more than 3 fixed pass iterations.

PiperOrigin-RevId: 386587289
Change-Id: I217b75c25fa53d1c5030c4668e395bbdb2ed88b9",hlo_pass_interface.h,"@@ -92,8 +92,6 @@ class HloPassInterface {
   virtual StatusOr<bool> RunOnModuleGroup(HloModuleGroup* module_group) = 0;
 
   virtual bool IsPassPipeline() { return false; }
-
-  virtual void ResetPassPipeline() {}
 };
 
 // Base class for passes which are module-scoped.
",0,train
dffd32796e53749d4d3ce90d901b6c04c259d69f,tensorflow/tensorflow,"[XLA] Don't run a pass if it has not been changed in more than 3 fixed pass iterations.

PiperOrigin-RevId: 386587289
Change-Id: I217b75c25fa53d1c5030c4668e395bbdb2ed88b9",hlo_pass_pipeline.cc,"@@ -16,9 +16,7 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/service/hlo_pass_pipeline.h""
 
 #include <functional>
-#include <string>
 
-#include ""absl/algorithm/container.h""
 #include ""absl/container/flat_hash_map.h""
 #include ""absl/container/flat_hash_set.h""
 #include ""absl/strings/str_format.h""
@@ -122,10 +120,6 @@ void SetInstructionMetadata(HloModuleGroup& module_group) {
 
 }  // namespace
 
-void HloPassPipeline::ResetPassPipeline() {
-  absl::c_fill(pass_run_counts_since_change_, 0);
-}
-
 template <typename HloT>
 Status HloPassPipeline::RunInvariantCheckers(
     HloT* hlo, absl::string_view after_pass_name) {
@@ -172,14 +166,10 @@ StatusOr<bool> HloPassPipeline::RunPassesInternal(
   bool changed = false;
   for (int i = 0; i < passes.size(); i++) {
     HloPassInterface* pass = passes[i];
-    if (pass_run_counts_since_change_[i] > 3) {
-      VLOG(1) << ""  Skipping HLO pass "" << passes[i]->name();
-      continue;
-    }
     XLA_SCOPED_LOGGING_TIMER(absl::StrCat(""HLO pass: "", pass->name()));
     std::string pass_name = std::string(pass->name());
     VLOG(1) << ""  HLO pass "" << pass_name;
-    VLOG(3) << ""  Module hash "" << hlo->Hash();
+    VLOG(2) << ""  Module hash "" << hlo->Hash();
     if (!pass->IsPassPipeline()) {
       compilation_stats_->StartPass(pass_name);
     }
@@ -196,13 +186,7 @@ StatusOr<bool> HloPassPipeline::RunPassesInternal(
     RecordPassEndMetadata(*hlo, pass_name, pass_changed);
     changed |= pass_changed;
     if (pass_changed) {
-    VLOG(1) << name() << "":"" << pass->name() << "" -> ""
-                 << pass_run_counts_since_change_[i];
-      if (pass_run_counts_since_change_[i] <= 3) {
-        pass_run_counts_since_change_[i] = 0;
-      }
-    } else {
-      ++pass_run_counts_since_change_[i];
+      VLOG(3) << ""  Pass caused changes "" << pass->name();
     }
     TF_RETURN_IF_ERROR(RunInvariantCheckers(hlo, pass_name));
     if (!pass->IsPassPipeline()) {
",0,train
dffd32796e53749d4d3ce90d901b6c04c259d69f,tensorflow/tensorflow,"[XLA] Don't run a pass if it has not been changed in more than 3 fixed pass iterations.

PiperOrigin-RevId: 386587289
Change-Id: I217b75c25fa53d1c5030c4668e395bbdb2ed88b9",hlo_pass_pipeline.h,"@@ -58,7 +58,6 @@ class HloPassPipeline : public HloPassInterface {
     CHECK(!run_called_) << ""AddPass cannot be called after Run"";
     auto pass = new T(std::forward<Args>(args)...);
     passes_.push_back(std::unique_ptr<T>(pass));
-    pass_run_counts_since_change_.push_back(0);
     return *pass;
   }
 
@@ -86,8 +85,6 @@ class HloPassPipeline : public HloPassInterface {
 
   bool IsPassPipeline() override { return true; }
 
-  void ResetPassPipeline() override;
-
   // Return size of passes_.
   int PassesSize() { return passes_.size(); }
   // Return reference to pass specified by index.
@@ -137,8 +134,6 @@ class HloPassPipeline : public HloPassInterface {
 
   const string name_;
   std::vector<std::unique_ptr<HloPassInterface>> passes_;
-  // How many times has the pass run without chaning.
-  std::vector<int64> pass_run_counts_since_change_;
   std::vector<std::unique_ptr<HloPassInterface>> invariant_checkers_;
   bool run_called_ = false;
 
",0,train
589a2d431cf7f1e3479f2f581da0f69b761df165,tensorflow/tensorflow,"TensorFlow for NVIDIA Tegra devices with CUDA support (#14167)

This commit enables CUDA support on compatible devices running Android such as the Nvidia TX1 and TX2 when using Makefile builds.

Note that JetPack for Android is required to build/run Android TF binaries with CUDA support. This should be released by Nvidia in the near future.",register_types.h,"@@ -52,7 +52,7 @@ limitations under the License.
    #undef REGISTER_PARTITION
 */
 
-#if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION)
+#if !defined(IS_MOBILE_PLATFORM) || defined(SUPPORT_SELECTIVE_REGISTRATION) || defined(NVIDIA_TEGRA)
 
 // All types are supported, so all macros are invoked.
 //
",0,train
589a2d431cf7f1e3479f2f581da0f69b761df165,tensorflow/tensorflow,"TensorFlow for NVIDIA Tegra devices with CUDA support (#14167)

This commit enables CUDA support on compatible devices running Android such as the Nvidia TX1 and TX2 when using Makefile builds.

Note that JetPack for Android is required to build/run Android TF binaries with CUDA support. This should be released by Nvidia in the near future.",cuda_diagnostics.cc,"@@ -232,7 +232,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
       result = StringToDriverVersion(version);
     }
 #else
-#if !defined(PLATFORM_WINDOWS)
+#if !defined(PLATFORM_WINDOWS) && !defined(NVIDIA_TEGRA)
   // Callback used when iterating through DSOs. Looks for the driver-interfacing
   // DSO and yields its version number into the callback data, when found.
   auto iterate_phdr =
",0,train
4b87b12c96c56a4c6485195ca5123b1a21636221,tensorflow/tensorflow,"Update run_v1_only test with proper reason.

PiperOrigin-RevId: 322164638
Change-Id: I17ffbc5c396c8fa97e2a4dff8ada380795c52ba2",server_lib_same_variables_no_clear_test.py,"@@ -34,7 +34,8 @@ class SameVariablesNoClearTest(test.TestCase):
   # TODO(b/34465411): Starting multiple servers with different configurations
   # in the same test is flaky. Move this test case back into
   # ""server_lib_test.py"" when this is no longer the case.
-  @test_util.run_v1_only(""b/120545219"")
+  @test_util.run_v1_only(
+      ""This exercises tensor lookup via names which is not supported in V2."")
   def testSameVariablesNoClear(self):
     server = server_lib.Server.create_local_server()
 
",0,train
05a122df524904dd8869fb564cae083ad53f3c73,tensorflow/tensorflow,"TFLITE_WITH_RUY_GEMV uses CustomGEMV for float

PiperOrigin-RevId: 292930831
Change-Id: I3786ba562af1cf5f3a8e000af4abac65696bf3a3",cpu_backend_gemm.h,"@@ -94,15 +94,19 @@ void Gemm(const MatrixParams<LhsScalar>& lhs_params, const LhsScalar* lhs_data,
           CpuBackendContext* context) {
   ruy::profiler::ScopeLabel label(""cpu_backend_gemm::Gemm"");
   ValidateParams(lhs_params, rhs_params, dst_params, params);
-#ifndef TFLITE_WITH_RUY_GEMV
-  if (dst_params.cols == 1) {
+  bool do_custom_gemv = dst_params.cols == 1;
+#ifdef TFLITE_WITH_RUY_GEMV
+  // Prefer a Ruy GEMM to Custom GEMV unless we are doing float math.
+  // TODO(b/148692500): Add float GEMV kernels to Ruy.
+  do_custom_gemv = do_custom_gemv && std::is_floating_point<DstScalar>::value;
+#endif
+  if (do_custom_gemv) {
     // GEMV case: try a custom fast GEMV path.
     if (detail::CustomGemv(lhs_params, lhs_data, rhs_params, rhs_data,
                            dst_params, dst_data, params, context)) {
       return;
     }
   }
-#endif
   ruy::profiler::ScopeLabel label2(""cpu_backend_gemm::Gemm: general GEMM"");
   GemmImpl<LhsScalar, RhsScalar, AccumScalar, DstScalar,
            quantization_flavor>::Run(lhs_params, lhs_data, rhs_params, rhs_data,
",0,train
647ab367ed13cd166577a62e5088c083face328f,tensorflow/tensorflow,"Add float16 and bfloat16 support for tf.image.rgb_to_hsv/tf.image.hsv_to_rgb

This PR addresses the issue raised in 54855 where there was no float16
and bfloat16 support for tf.image.rgb_to_hsv/tf.image.hsv_to_rgb

This PR fixes 54855.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",colorspace_op.cc,"@@ -116,6 +116,8 @@ class HSVToRGBOp : public OpKernel {
   template class HSVToRGBOp<CPUDevice, T>;
 TF_CALL_float(REGISTER_CPU);
 TF_CALL_double(REGISTER_CPU);
+TF_CALL_half(REGISTER_CPU);
+TF_CALL_bfloat16(REGISTER_CPU);
 
 #if (defined(GOOGLE_CUDA) && GOOGLE_CUDA) || \
     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
",0,train
3526f05b16ed8ab00f4287b62b8b49589fbf7971,tensorflow/tensorflow,"Add detection_responder which allows each platform to process the person detection output in its own way. For example, sparkfun_edge lights up the yellow LED for no person and the green LED for person, and toggles the blue LED on each run.

PiperOrigin-RevId: 257638242",detection_responder.cc,"@@ -0,0 +1,25 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h""
+
+// This dummy implementation writes person and no person scores to the error
+// console. Real applications will want to take some custom action instead, and
+// should implement their own versions of this function.
+void RespondToDetection(tflite::ErrorReporter* error_reporter,
+                        uint8_t person_score, uint8_t no_person_score) {
+  error_reporter->Report(""person score:%d no person score %d"", person_score,
+                         no_person_score);
+}
",0,train
3526f05b16ed8ab00f4287b62b8b49589fbf7971,tensorflow/tensorflow,"Add detection_responder which allows each platform to process the person detection output in its own way. For example, sparkfun_edge lights up the yellow LED for no person and the green LED for person, and toggles the blue LED on each run.

PiperOrigin-RevId: 257638242",detection_responder.h,"@@ -0,0 +1,34 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+// Provides an interface to take an action based on the output from the person
+// detection model.
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_VISION_DETECTION_RESPONDER_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_VISION_DETECTION_RESPONDER_H_
+
+#include ""tensorflow/lite/c/c_api_internal.h""
+#include ""tensorflow/lite/experimental/micro/micro_error_reporter.h""
+
+// Called every time the results of a person detection run are available. The
+// `person_score` has the numerical confidence that the captured image contains
+// a person, and `no_person_score` has the numerical confidence that the image
+// does not contain a person. Typically if person_score > no person score, the
+// image is considered to contain a person.  This threshold may be adjusted for
+// particular applications.
+void RespondToDetection(tflite::ErrorReporter* error_reporter,
+                        uint8_t person_score, uint8_t no_person_score);
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICRO_EXAMPLES_MICRO_VISION_DETECTION_RESPONDER_H_
",0,train
3526f05b16ed8ab00f4287b62b8b49589fbf7971,tensorflow/tensorflow,"Add detection_responder which allows each platform to process the person detection output in its own way. For example, sparkfun_edge lights up the yellow LED for no person and the green LED for person, and toggles the blue LED on each run.

PiperOrigin-RevId: 257638242",detection_responder_test.cc,"@@ -0,0 +1,34 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h""
+
+#include ""tensorflow/lite/experimental/micro/testing/micro_test.h""
+#include ""tensorflow/lite/experimental/micro/testing/test_utils.h""
+
+TF_LITE_MICRO_TESTS_BEGIN
+
+TF_LITE_MICRO_TEST(TestCallability) {
+  tflite::MicroErrorReporter micro_error_reporter;
+  tflite::ErrorReporter* error_reporter = &micro_error_reporter;
+
+  // This will have external side-effects (like printing to the debug console
+  // or lighting an LED) that are hard to observe, so the most we can do is
+  // make sure the call doesn't crash.
+  RespondToDetection(error_reporter, 100, 200);
+  RespondToDetection(error_reporter, 200, 100);
+}
+
+TF_LITE_MICRO_TESTS_END
",0,train
3526f05b16ed8ab00f4287b62b8b49589fbf7971,tensorflow/tensorflow,"Add detection_responder which allows each platform to process the person detection output in its own way. For example, sparkfun_edge lights up the yellow LED for no person and the green LED for person, and toggles the blue LED on each run.

PiperOrigin-RevId: 257638242",main.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include ""tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h""
 #include ""tensorflow/lite/experimental/micro/examples/micro_vision/image_provider.h""
 #include ""tensorflow/lite/experimental/micro/examples/micro_vision/model_settings.h""
 #include ""tensorflow/lite/experimental/micro/examples/micro_vision/person_detect_model_data.h""
@@ -69,12 +70,10 @@ int main(int argc, char* argv[]) {
 
     TfLiteTensor* output = interpreter.output(0);
 
-    // Log the person score and no person score.
+    // Process the inference results.
     uint8_t person_score = output->data.uint8[kPersonIndex];
     uint8_t no_person_score = output->data.uint8[kNotAPersonIndex];
-    error_reporter->Report(
-        ""person data.  person score: %d, no person score: %d\n"", person_score,
-        no_person_score);
+    RespondToDetection(error_reporter, person_score, no_person_score);
   }
 
   return 0;
",0,train
3526f05b16ed8ab00f4287b62b8b49589fbf7971,tensorflow/tensorflow,"Add detection_responder which allows each platform to process the person detection output in its own way. For example, sparkfun_edge lights up the yellow LED for no person and the green LED for person, and toggles the blue LED on each run.

PiperOrigin-RevId: 257638242",detection_responder.cc,"@@ -0,0 +1,54 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/lite/experimental/micro/examples/micro_vision/detection_responder.h""
+
+#include ""am_bsp.h""  // NOLINT
+
+// This implementation will light up LEDs on the board in response to the
+// inference results.
+void RespondToDetection(tflite::ErrorReporter* error_reporter,
+                        uint8_t person_score, uint8_t no_person_score) {
+  static bool is_initialized = false;
+  if (!is_initialized) {
+    // Setup LED's as outputs.  Leave red LED alone since that's an error
+    // indicator for sparkfun_edge in image_provider.
+    am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_BLUE, g_AM_HAL_GPIO_OUTPUT_12);
+    am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_GREEN, g_AM_HAL_GPIO_OUTPUT_12);
+    am_hal_gpio_pinconfig(AM_BSP_GPIO_LED_YELLOW, g_AM_HAL_GPIO_OUTPUT_12);
+    is_initialized = true;
+  }
+
+  // Toggle the blue LED every time an inference is performed.
+  static int count = 0;
+  if (++count & 1) {
+    am_hal_gpio_output_set(AM_BSP_GPIO_LED_BLUE);
+  } else {
+    am_hal_gpio_output_clear(AM_BSP_GPIO_LED_BLUE);
+  }
+
+  // Turn on the green LED if a person was detected.  Turn on the yellow LED
+  // otherwise.
+  am_hal_gpio_output_clear(AM_BSP_GPIO_LED_YELLOW);
+  am_hal_gpio_output_clear(AM_BSP_GPIO_LED_GREEN);
+  if (person_score > no_person_score) {
+    am_hal_gpio_output_set(AM_BSP_GPIO_LED_GREEN);
+  } else {
+    am_hal_gpio_output_set(AM_BSP_GPIO_LED_YELLOW);
+  }
+
+  error_reporter->Report(""person score:%d no person score %d"", person_score,
+                         no_person_score);
+}
",0,train
0047edb7b1d35d588c3e3fb5bfe7de60c67c351c,tensorflow/tensorflow,"Return failure for ops with zero results in TF dialect folding hook

Currently, the hook returns success for ops like tf.Yield that doesn't have any side effect and zero results. That will cause canonicalizer or any other pass using the greedy rewriter to not converge. Canonicalizer pass doesn't handle return value of the greedy rewriter so this change doesn't have any observable effect for canonicalizer.

Added test is passing before this change as well because the canonicalizer pass ignores convergence issue in the rewriter.

Ran into this issue while using greedy rewriter to replace _XlaHostComputeMlir op by XlaHostCompute op.

PiperOrigin-RevId: 362152694
Change-Id: I30a3829de7c0a75fa8f0b137246a50aedc0db918",constant_fold.cc,"@@ -72,8 +72,11 @@ LogicalResult ConstantFoldFallbackHook(
     Operation* inst, ArrayRef<Attribute> operands,
     SmallVectorImpl<OpFoldResult>& results) {  // NOLINT
   // Instructions with side effects should not be constant folded to preserve
-  // the original semantics.
-  if (inst->hasTrait<OpTrait::TF::NoConstantFold>() ||
+  // the original semantics. Ops that have no side effect and zero results but
+  // could be folded should have a custom folder instead of relying on the
+  // TensorFlow folding hook.
+  if (inst->getNumResults() == 0 ||
+      inst->hasTrait<OpTrait::TF::NoConstantFold>() ||
       inst->getNumRegions() != 0 || !MemoryEffectOpInterface::hasNoEffect(inst))
     return failure();
 
",0,train
401070d057969b522e0ef176a0adc5e14eb74979,tensorflow/tensorflow,"[TF:TRT] Change `AsyncHelper` to call callback function once.

Macros such as OP_REQUIRES_OK_ASYNC requires AsyncHelper to have this operator, which can be used to invoke the callback function. However, in our case, we only need to invoke the callback function when the object is destructed.

PiperOrigin-RevId: 364582970
Change-Id: I93d75d725eb0851b1122aaabe2b159fb60ad42a0",trt_engine_op.cc,"@@ -115,11 +115,11 @@ class AsyncHelper : public core::RefCounted {
  public:
   AsyncHelper(AsyncOpKernel::DoneCallback done) : done_(done) {}
 
-  ~AsyncHelper() override { this->operator()(); }
+  ~AsyncHelper() override { done_(); }
 
-  void operator()() {
-      done_();
-  }
+  // The function call operator is used at error handling. However, the callback
+  // is deferred to destruction.
+  void operator()() {}
 
  private:
   AsyncOpKernel::DoneCallback done_;
@@ -502,8 +502,9 @@ void TRTEngineOp::ExecuteNativeSegment(OpKernelContext* ctx,
                                 allow_soft_placement_, ctx->num_inputs(),
                                 ctx->num_outputs());
     OP_REQUIRES_OK_ASYNC(ctx, status_or_handle.status(), *helper);
-    native_execution_func_handle_ = status_or_handle.ValueOrDie();
+    native_execution_func_handle_ = *status_or_handle;
   }
+
   auto lib = ctx->function_library();
   FunctionLibraryRuntime::Options opts;
   opts.rendezvous = ctx->rendezvous();
",0,train
f66f384729b2a2f70fd01902f49b0b7a95be9f26,tensorflow/tensorflow,"Make CHLO->HLO patterns extend OpRewritePattern vs OpConversionPattern.

* In the absence of type conversion, this is more generally compatible (ie. with the greedy rewriter).
* Consistent with the rest of the legalize_tf patterns.

PiperOrigin-RevId: 311209137
Change-Id: I3a409dbc307c141753c73ae7731276c61a2728d0",chlo_legalize_to_hlo.cc,"@@ -33,24 +33,23 @@ namespace {
 // Converts binary ops that statically are determined to not broadcast directly
 // to the corresponding xla_hlo non-broadcasting op.
 template <typename ChloOpTy, typename HloOpTy, typename Adaptor>
-struct ConvertTrivialNonBroadcastBinaryOp
-    : public OpConversionPattern<ChloOpTy> {
-  using OpConversionPattern<ChloOpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      ChloOpTy op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+struct ConvertTrivialNonBroadcastBinaryOp : public OpRewritePattern<ChloOpTy> {
+  using OpRewritePattern<ChloOpTy>::OpRewritePattern;
+  LogicalResult matchAndRewrite(ChloOpTy op,
+                                PatternRewriter &rewriter) const override {
     // Only rewrite for statically determinable non-broadcasting cases.
-    auto lhs = operands[0].getType().dyn_cast<RankedTensorType>();
-    auto rhs = operands[1].getType().dyn_cast<RankedTensorType>();
-    if (!lhs || !rhs) return failure();
+    auto lhs_type = op.lhs().getType().template dyn_cast<RankedTensorType>();
+    auto rhs_type = op.rhs().getType().template dyn_cast<RankedTensorType>();
+    if (!lhs_type || !rhs_type) return failure();
 
     // Requires rank broadcast.
-    if (lhs.getRank() != rhs.getRank()) return failure();
+    if (lhs_type.getRank() != rhs_type.getRank()) return failure();
     // Any dynamic dimension may require broadcasting and requires more
     // analysis.
-    if (!lhs.hasStaticShape() || !rhs.hasStaticShape()) return failure();
+    if (!lhs_type.hasStaticShape() || !rhs_type.hasStaticShape())
+      return failure();
 
-    for (auto extents : llvm::zip(lhs.getShape(), rhs.getShape())) {
+    for (auto extents : llvm::zip(lhs_type.getShape(), rhs_type.getShape())) {
       auto lhs_extent = std::get<0>(extents);
       auto rhs_extent = std::get<1>(extents);
       if (lhs_extent != rhs_extent) {
@@ -58,9 +57,8 @@ struct ConvertTrivialNonBroadcastBinaryOp
       }
     }
 
-    rewriter.replaceOp(
-        op, {Adaptor::CreateOp(op, op.getResult().getType(), operands[0],
-                               operands[1], rewriter)});
+    rewriter.replaceOp(op, {Adaptor::CreateOp(op, op.getResult().getType(),
+                                              op.lhs(), op.rhs(), rewriter)});
     return success();
   }
 };
@@ -83,14 +81,13 @@ struct ConvertTrivialNonBroadcastBinaryOp
 // Whether that is of any practical benefit remains to be seen.
 template <typename ChloOpTy, typename HloOpTy, typename Adaptor>
 struct ConvertRankedDynamicBroadcastBinaryOp
-    : public OpConversionPattern<ChloOpTy> {
-  using OpConversionPattern<ChloOpTy>::OpConversionPattern;
-  LogicalResult matchAndRewrite(
-      ChloOpTy op, ArrayRef<Value> operands,
-      ConversionPatternRewriter &rewriter) const override {
+    : public OpRewritePattern<ChloOpTy> {
+  using OpRewritePattern<ChloOpTy>::OpRewritePattern;
+  LogicalResult matchAndRewrite(ChloOpTy op,
+                                PatternRewriter &rewriter) const override {
     // Only support ranked operands.
-    Value lhs = operands[0];
-    Value rhs = operands[1];
+    Value lhs = op.lhs();
+    Value rhs = op.rhs();
     auto lhs_type = lhs.getType().dyn_cast<RankedTensorType>();
     auto rhs_type = rhs.getType().dyn_cast<RankedTensorType>();
     auto result_type =
",0,train
fdbd02c8d7f07bd1207938662716fad8857dcd55,tensorflow/tensorflow,"Make moments numerically stable by default. Added tests for moments.
Change: 144114955",nn_impl.py,"@@ -580,6 +580,9 @@ def moments(x, axes, shift=None, name=None, keep_dims=False):
   across `axes`.  If `x` is 1-D and `axes = [0]` this is just the mean
   and variance of a vector.
 
+  Note: for numerical stability, when shift=None, the true mean
+  would be computed and used as shift.
+
   When using these moments for batch normalization (see
   `tf.nn.batch_normalization`):
 
@@ -592,8 +595,9 @@ def moments(x, axes, shift=None, name=None, keep_dims=False):
     axes: Array of ints.  Axes along which to compute mean and
       variance.
     shift: A `Tensor` containing the value by which to shift the data for
-      numerical stability, or `None` if no shift is to be performed. A shift
-      close to the true mean provides the most numerically stable results.
+      numerical stability, or `None` in which case the true mean of the data is
+      used as shift. A shift close to the true mean provides the most
+      numerically stable results.
     name: Name used to scope the operations that compute the moments.
     keep_dims: produce moments with the same dimensionality as the input.
 
@@ -605,10 +609,17 @@ def moments(x, axes, shift=None, name=None, keep_dims=False):
     # sufficient statistics. As a workaround we simply perform the operations
     # on 32-bit floats before converting the mean and variance back to fp16
     y = math_ops.cast(x, dtypes.float32) if x.dtype == dtypes.float16 else x
-    shift = math_ops.cast(shift, dtypes.float32) if (
-        shift is not None and x.dtype == dtypes.float16) else shift
+    if shift is None:
+      # Compute true mean while keeping the dims for proper broadcasting.
+      shift = array_ops.stop_gradient(
+          math_ops.reduce_mean(y, axes, keep_dims=True))
+    else:
+      shift = math_ops.cast(shift, y.dtype)
     counts, m_ss, v_ss, shift = sufficient_statistics(
         y, axes, shift=shift, keep_dims=keep_dims, name=name)
+    # Reshape shift as needed.
+    shift = array_ops.reshape(shift, array_ops.shape(m_ss))
+    shift.set_shape(m_ss.get_shape())
     with ops.control_dependencies([counts, m_ss, v_ss]):
       mean, variance = normalize_moments(counts, m_ss, v_ss, shift, name=name)
       if x.dtype == dtypes.float16:
",0,train
fdbd02c8d7f07bd1207938662716fad8857dcd55,tensorflow/tensorflow,"Make moments numerically stable by default. Added tests for moments.
Change: 144114955",nn_test.py,"@@ -25,6 +25,7 @@ from six.moves import xrange  # pylint: disable=redefined-builtin
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker
 from tensorflow.python.ops import nn_impl
@@ -791,5 +792,78 @@ class CReluTest(test_lib.TestCase):
       self.assertAllClose(y, z, 1e-4)
 
 
+class MomentsTest(test_lib.TestCase):
+
+  def doOutputTest(self, input_shape, moments_axes, tol=1e-4):
+    for mu in [0.0, 1.0, 1e3]:
+      for sigma in [1.0, 0.1]:
+        for keep_dims in [True, False]:
+          input_values = np.random.rand(*input_shape) * sigma + mu
+          expected_mean = np.mean(input_values, axis=moments_axes,
+                                  keepdims=keep_dims)
+          expected_var = np.var(input_values, axis=moments_axes,
+                                keepdims=keep_dims)
+          with ops.Graph().as_default() as g:
+            with self.test_session(graph=g) as sess:
+              inputs = constant_op.constant(input_values,
+                                            shape=input_shape,
+                                            dtype=dtypes.float32)
+              mean, variance = nn_impl.moments(inputs,
+                                               moments_axes,
+                                               keep_dims=keep_dims)
+
+              [mean, variance] = sess.run([mean, variance])
+              # Make sure that there are no NaNs
+              self.assertFalse(np.isnan(mean).any())
+              self.assertFalse(np.isnan(variance).any())
+              self.assertAllClose(mean, expected_mean, rtol=tol, atol=tol)
+              self.assertAllClose(variance, expected_var, rtol=tol, atol=tol)
+
+  def testOutput2DInput0(self):
+    self.doOutputTest((10, 300), (0,))
+
+  def testOutput2DInput1(self):
+    self.doOutputTest((10, 300), (1,))
+
+  def testOutput2DInput01(self):
+    self.doOutputTest((10, 300), (0, 1))
+
+  def testOutput4DInput0(self):
+    self.doOutputTest((10, 10, 10, 30), (0,))
+
+  def testOutput4DInput1(self):
+    self.doOutputTest((10, 10, 10, 30), (1,))
+
+  def testOutput4DInput3(self):
+    self.doOutputTest((10, 10, 10, 30), (3,))
+
+  def testOutput4DInput012(self):
+    self.doOutputTest((10, 10, 10, 30), (0, 1, 2))
+
+  def testOutput4DInput123(self):
+    self.doOutputTest((10, 10, 10, 30), (1, 2, 3))
+
+  def testUnstableOutputShiftNone(self):
+    input_shape = (10, 300)
+    moments_axes = (0, 1)
+    mu, sigma = 1e3, 0.1
+    tol = 1e-3
+    input_values = np.random.rand(*input_shape) * sigma + mu
+    expected_mean = np.mean(input_values, axis=moments_axes)
+    expected_var = np.var(input_values, axis=moments_axes)
+
+    with self.test_session() as sess:
+      inputs = constant_op.constant(input_values, shape=input_shape,
+                                    dtype=dtypes.float32)
+      mean, variance = nn_impl.moments(inputs, moments_axes, shift=0.0)
+
+      [mean, variance] = sess.run([mean, variance])
+      # Make sure that there are no NaNs
+      self.assertFalse(np.isnan(mean).any())
+      self.assertFalse(np.isnan(variance).any())
+      self.assertAllClose(mean, expected_mean, rtol=tol, atol=tol)
+      # The variance is unstable
+      self.assertGreater(np.abs(variance - expected_var), 0.1)
+
 if __name__ == ""__main__"":
   test_lib.main()
",0,train
425ef6cfe94d84a876694e38cef3e3814378410d,tensorflow/tensorflow,"Fix mlir error_util_test on windows.

PiperOrigin-RevId: 300828339
Change-Id: If89febf00d5727bcecf1958ddb6b0ede5736b80c",error_util_test.cc,"@@ -58,7 +58,8 @@ TEST(ErrorUtilTest, StatusScopedDiagnosticHandler) {
       emitError(loc) << ""Second diagnostic message reported"";
       return tensorflow::errors::Internal(""Passed in error"");
     };
-    Status s = StatusScopedDiagnosticHandler(&context).Combine(function());
+    StatusScopedDiagnosticHandler ssdh(&context);
+    Status s = ssdh.Combine(function());
     ASSERT_TRUE(tensorflow::errors::IsInternal(s));
     EXPECT_THAT(s.error_message(), HasSubstr(""Passed in error""));
     EXPECT_THAT(s.error_message(), HasSubstr(""Diagnostic message reported""));
",0,train
7478c2798e212f8373d286e953ac90d6f524d1bb,tensorflow/tensorflow,"Support using worker/0 as the client in SetServerDef.

PiperOrigin-RevId: 428611848
Change-Id: Iab846ebcfc46713cfdb09ec40432e14a6083dec6",context_distributed_manager.cc,"@@ -52,6 +52,7 @@ limitations under the License.
 #include ""tensorflow/core/distributed_runtime/remote_device.h""
 #include ""tensorflow/core/distributed_runtime/server_lib.h""
 #include ""tensorflow/core/distributed_runtime/session_mgr.h""
+#include ""tensorflow/core/distributed_runtime/worker_cache.h""
 #include ""tensorflow/core/distributed_runtime/worker_env.h""
 #include ""tensorflow/core/distributed_runtime/worker_interface.h""
 #endif  // !IS_MOBILE_PLATFORM
@@ -548,6 +549,29 @@ Status UpdateContextWithServerDef(EagerContext* context,
 
   // Initialize remote eager workers.
   if (reset_context) {
+    const auto& config = server_def.default_session_config();
+    const bool enable_coordination =
+        !config.experimental().coordination_config().service_type().empty();
+
+    if (enable_coordination) {
+      WorkerCacheInterface* worker_cache = server->master_env()->worker_cache;
+      LOG_AND_RETURN_IF_ERROR(
+          context->GetDistributedManager()->EnableCoordinationService(
+              config.experimental().coordination_config().service_type(),
+              server->worker_env(), server_def, worker_cache));
+      std::unique_ptr<CoordinationClientCache> client_cache;
+      LOG_AND_RETURN_IF_ERROR(
+          worker_cache->GetCoordinationClientCache(&client_cache));
+      TF_RETURN_IF_ERROR(
+          context->GetDistributedManager()
+              ->GetCoordinationServiceAgent()
+              ->Initialize(server->worker_env()->env, server_def,
+                           std::move(client_cache),
+                           /*error_fn=*/[](Status s) {
+                             LOG(ERROR)
+                                 << ""Coordination agent is set to error: "" << s;
+                           }));
+    }
     const Status s = CreateRemoteContexts(
         context, remote_workers, context_id, context_view_id, keep_alive_secs,
         server_def, remote_eager_workers.get(), context->Executor().Async(),
",0,train
7478c2798e212f8373d286e953ac90d6f524d1bb,tensorflow/tensorflow,"Support using worker/0 as the client in SetServerDef.

PiperOrigin-RevId: 428611848
Change-Id: Iab846ebcfc46713cfdb09ec40432e14a6083dec6",c_api_coordination_test.cc,"@@ -38,7 +38,7 @@ namespace {
 
 constexpr char kCoordinationServiceType[] = ""standalone"";
 
-void EnableCoordinationService(tensorflow::ServerDef* server_def) {
+void ConfigCoordinationService(tensorflow::ServerDef* server_def) {
   auto coord_config = server_def->mutable_default_session_config()
                           ->mutable_experimental()
                           ->mutable_coordination_config();
@@ -104,7 +104,7 @@ TEST(CAPI, MultiClientCoordinationService) {
   const int cluster_size = 3;
   tensorflow::ServerDef server_def =
       GetMultiClientServerDef(""worker"", cluster_size);
-  EnableCoordinationService(&server_def);
+  ConfigCoordinationService(&server_def);
   auto worker_thread_fn = [&](int worker_id) {
     tensorflow::ServerDef server_def_copy = server_def;
     // By default, server_def has task index set to 0.
@@ -170,7 +170,7 @@ TEST(CAPI, MultiClientSetGetConfigInOp) {
   const int cluster_size = 3;
   tensorflow::ServerDef server_def =
       GetMultiClientServerDef(""worker"", cluster_size);
-  EnableCoordinationService(&server_def);
+  ConfigCoordinationService(&server_def);
   BlockingCounter finish_counter(cluster_size);
   auto worker_thread_fn = [&](int worker_id) {
     tensorflow::ServerDef server_def_copy = server_def;
@@ -257,7 +257,7 @@ TEST(CAPI, MultiClientCoordinationSetGetConfigs) {
   const int cluster_size = 3;
   tensorflow::ServerDef server_def =
       GetMultiClientServerDef(""worker"", cluster_size);
-  EnableCoordinationService(&server_def);
+  ConfigCoordinationService(&server_def);
   tensorflow::BlockingCounter counter1(cluster_size);
   tensorflow::BlockingCounter counter2(cluster_size);
   tensorflow::BlockingCounter counter3(cluster_size);
@@ -327,7 +327,7 @@ TEST(CAPI, MultiClientPropagateError) {
   const int cluster_size = 3;
   tensorflow::ServerDef server_def =
       GetMultiClientServerDef(""worker"", cluster_size);
-  EnableCoordinationService(&server_def);
+  ConfigCoordinationService(&server_def);
   // Barrier for initializing the cluster.
   tensorflow::BlockingCounter counter1(cluster_size);
   // Barrier for finishing executing operations on all workers.
@@ -387,35 +387,29 @@ TEST(CAPI, MultiClientPropagateError) {
   thread_worker3.join();
 }
 
-TEST(CAPI, SingleClientSetGetConfigInOp) {
+class SingleClientCoordinationServiceTest
+    : public ::testing::Test,
+      public ::testing::WithParamInterface<bool> {};
+
+TEST_P(SingleClientCoordinationServiceTest, TestSetGetConfigInOp) {
+  const bool use_worker0_as_client = GetParam();
   tensorflow::ServerDef server_def = GetServerDef(""worker"", 3);
   const char task0_name[] = ""/job:worker/replica:0/task:0/device:CPU:0"";
   const char task1_name[] = ""/job:worker/replica:0/task:1/device:CPU:0"";
   const char task2_name[] = ""/job:worker/replica:0/task:2/device:CPU:0"";
 
-  EnableCoordinationService(&server_def);
-  // Add localhost job for the remote client task
-  auto cluster = server_def.mutable_cluster();
-  auto client_job = cluster->add_job();
-  client_job->set_name(""localhost"");
-  const int client_port = tensorflow::testing::PickUnusedPortOrDie();
-  client_job->mutable_tasks()->insert(
-      {0, strings::StrCat(""localhost:"", client_port)});
-  server_def.set_job_name(""localhost"");
-  server_def.mutable_default_session_config()
-      ->mutable_experimental()
-      ->mutable_coordination_config()
-      ->set_service_leader(task0_name);
-  string serialized = server_def.SerializeAsString();
-
+  ConfigCoordinationService(&server_def);
   ServerFactory* factory;
   ASSERT_TRUE(ServerFactory::GetFactory(server_def, &factory).ok());
   server_def.set_job_name(""worker"");
   server_def.set_task_index(0);
   std::unique_ptr<tensorflow::ServerInterface> w0;
-  ASSERT_TRUE(
-      factory->NewServer(server_def, ServerFactory::Options(), &w0).ok());
-  ASSERT_TRUE(w0->Start().ok());
+  if (!use_worker0_as_client) {
+    // Start a separate server for worker0 if it's not used as the client
+    ASSERT_TRUE(
+        factory->NewServer(server_def, ServerFactory::Options(), &w0).ok());
+    ASSERT_TRUE(w0->Start().ok());
+  }
   server_def.set_task_index(1);
   std::unique_ptr<tensorflow::ServerInterface> w1;
   ASSERT_TRUE(
@@ -435,6 +429,23 @@ TEST(CAPI, SingleClientSetGetConfigInOp) {
   EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
   TFE_DeleteContextOptions(opts);
 
+  server_def.set_task_index(0);
+  if (!use_worker0_as_client) {
+    // Add localhost job for the remote client task
+    auto cluster = server_def.mutable_cluster();
+    auto client_job = cluster->add_job();
+    client_job->set_name(""localhost"");
+    const int client_port = tensorflow::testing::PickUnusedPortOrDie();
+    client_job->mutable_tasks()->insert(
+        {0, strings::StrCat(""localhost:"", client_port)});
+    server_def.set_job_name(""localhost"");
+  }
+  server_def.mutable_default_session_config()
+      ->mutable_experimental()
+      ->mutable_coordination_config()
+      ->set_service_leader(task0_name);
+  const std::string serialized = server_def.SerializeAsString();
+
   TFE_ContextSetServerDef(ctx, 0, serialized.data(), serialized.size(), status);
   EXPECT_EQ(TF_GetCode(status), TF_OK) << TF_Message(status);
 
@@ -541,5 +552,12 @@ TEST(CAPI, SingleClientSetGetConfigInOp) {
   w2.release();
 }
 
+INSTANTIATE_TEST_SUITE_P(CAPI, SingleClientCoordinationServiceTest,
+                         ::testing::Bool(),
+                         [](const ::testing::TestParamInfo<bool> arg) {
+                           return arg.param ? ""use_worker0_as_client""
+                                            : ""use_remote_client"";
+                         });
+
 }  // namespace
 }  // namespace tensorflow
",0,train
4dee31dc561f8101f4d1275c3640e5da38069215,tensorflow/tensorflow,Added doc in MobileNet for decode_predictions() and preprocess_input(),mobilenet.py,"@@ -436,9 +436,31 @@ def _depthwise_conv_block(inputs,
 
 @keras_export('keras.applications.mobilenet.preprocess_input')
 def preprocess_input(x, data_format=None):
+  """"""Preprocesses a numpy array encoding a batch of images.
+  
+  Arguments
+    x: A 4D numpy array consists of RGB values within [0, 255].
+
+  Returns
+    Preprocessed array.
+  """"""
   return imagenet_utils.preprocess_input(x, data_format=data_format, mode='tf')
 
 
 @keras_export('keras.applications.mobilenet.decode_predictions')
 def decode_predictions(preds, top=5):
+  """"""Decodes the prediction result from the model.
+
+  Arguments
+    preds: Numpy tensor encoding a batch of predictions.
+    top: Integer, how many top-guesses to return.
+
+  Returns
+    A list of lists of top class prediction tuples
+    `(class_name, class_description, score)`.
+    One list of tuples per sample in batch input.
+
+  Raises
+    ValueError: In case of invalid shape of the `preds` array (must be 2D).
+  """"""
   return imagenet_utils.decode_predictions(preds, top=top)
",0,test
4d0a420c4b4d1fbe3e666bd377de2a40401177d2,tensorflow/tensorflow,Fix clang-format and pylint,convert_nodes.cc,"@@ -3856,8 +3856,10 @@ tensorflow::Status ConvertSegmentToGraphDef(
       marker_nodes.insert(node_name);
       auto seg_node = segment_def->add_node();
       tensorflow::NodeDefBuilder builder(node_name, ""Identity"");
-      auto status = builder.Input(connection.inside_node_name, connection.inside_port, dtype)
-                        .Finalize(seg_node);
+      auto status =
+          builder
+              .Input(connection.inside_node_name, connection.inside_port, dtype)
+              .Finalize(seg_node);
       VLOG(1) << ""Constructing output "" << node_name << "" for the edge ""
               << connection.inside_node_name << "":"" << connection.inside_port
               << "" -> "" << connection.outside_node_name << "":""
",0,train
4d0a420c4b4d1fbe3e666bd377de2a40401177d2,tensorflow/tensorflow,Fix clang-format and pylint,topk_test.py,"@@ -18,10 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import numpy as np
-
 from tensorflow.contrib.tensorrt.test import tf_trt_integration_test_base as trt_test
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import nn_ops
",0,train
66f4f4cf31b86b7dd20f10ce6d968348b502f2ee,tensorflow/tensorflow,"Automated g4 rollback of changelist 185072479

PiperOrigin-RevId: 185900165",constant_folding.cc,"@@ -1375,6 +1375,29 @@ void ConstantFolding::ReplaceOperationWithIdentity(int input_to_forward,
   graph_modified_ = true;
 }
 
+void ConstantFolding::ReplaceOperationWithSnapshot(int input_to_forward,
+                                                   NodeDef* node,
+                                                   GraphDef* graph) {
+  node->set_op(""Snapshot"");
+  DataType dtype = node->attr().at(""T"").type();
+  node->clear_attr();
+  (*node->mutable_attr())[""T""].set_type(dtype);
+
+  // Propagate the designated input through the Snapshot.
+  node->mutable_input()->SwapElements(0, input_to_forward);
+  // Add all other inputs as control dependencies.
+  for (int i = 1; i < node->input_size(); ++i) {
+    if (IsControlInput(node->input(i))) {
+      break;
+    }
+    const string ctrl_dep =
+        AddControlDependency(node->input(i), graph, node_map_.get());
+    node_map_->UpdateInput(node->name(), node->input(i), ctrl_dep);
+    node->set_input(i, ctrl_dep);
+  }
+  graph_modified_ = true;
+}
+
 void ConstantFolding::ReplaceDivisionOfOnesByReciprocal(NodeDef* node,
                                                         GraphDef* graph) {
   node->set_op(""Reciprocal"");
@@ -1443,15 +1466,14 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       graph_modified_ = true;
       continue;
     }
-    const bool safe_to_use_shapes =
-        use_shape_info && (feed_nodes_.empty() || is_aggressive);
+
     const bool is_mul = IsMul(*node);
     const bool is_matmul = IsMatMul(*node);
     const bool is_add = IsAdd(*node) || IsBiasAdd(*node);
     const bool is_sub = IsSub(*node);
     const bool is_any_div = IsAnyDiv(*node);
     // Simplify arithmetic operations with ones or zeros.
-    if (safe_to_use_shapes &&
+    if (use_shape_info &&
         (is_mul || is_matmul || is_add || is_sub || is_any_div) &&
         properties.HasInputProperties(node->name()) &&
         properties.HasOutputProperties(node->name())) {
@@ -1475,7 +1497,7 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
           ((is_mul && x_is_one) || (is_add && x_is_zero))) {
         // TODO(rmlarsen): Handle subtraction 0 - y.
         // 1 * y = y or 0 + y = y.
-        ReplaceOperationWithIdentity(1, node, output);
+        ReplaceOperationWithSnapshot(1, node, output);
         continue;
       }
 
@@ -1495,9 +1517,9 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
       const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
       if (x_matches_output_shape &&
           (((is_mul || is_any_div) && y_is_one) ||
-           ((is_add || is_sub) && y_is_zero && is_aggressive))) {
+           ((is_add || is_sub) && y_is_zero))) {
         // x * 1 = x or x / 1 = x or x +/- 0 = x
-        ReplaceOperationWithIdentity(0, node, output);
+        ReplaceOperationWithSnapshot(0, node, output);
         continue;
       }
 
@@ -1690,6 +1712,7 @@ Status ConstantFolding::RunOptimizationPass(Cluster* cluster,
 
 Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
                                  GraphDef* output) {
+  LOG(INFO) << ""Graph before: "" << item.graph.DebugString();
   nodes_to_preserve_ = item.NodesToPreserve();
   for (const auto& feed : item.feed) {
     feed_nodes_.insert(NodeName(feed.first));
@@ -1716,6 +1739,7 @@ Status ConstantFolding::Optimize(Cluster* cluster, const GrapplerItem& item,
   *output->mutable_library() = item.graph.library();
   *output->mutable_versions() = item.graph.versions();
 
+  LOG(INFO) << ""Graph after: "" << output->DebugString();
   return Status::OK();
 }
 
",0,train
66f4f4cf31b86b7dd20f10ce6d968348b502f2ee,tensorflow/tensorflow,"Automated g4 rollback of changelist 185072479

PiperOrigin-RevId: 185900165",constant_folding.h,"@@ -79,6 +79,8 @@ class ConstantFolding : public GraphOptimizer {
   bool IsZeros(const NodeDef& node) const;
   void ReplaceOperationWithIdentity(int input_to_forward, NodeDef* node,
                                     GraphDef* graph);
+  void ReplaceOperationWithSnapshot(int input_to_forward, NodeDef* node,
+                                    GraphDef* graph);
   Status ReplaceOperationWithConstant(double value,
                                       const TensorShapeProto& shape,
                                       NodeDef* node, GraphDef* graph);
",0,train
66f4f4cf31b86b7dd20f10ce6d968348b502f2ee,tensorflow/tensorflow,"Automated g4 rollback of changelist 185072479

PiperOrigin-RevId: 185900165",constant_folding_test.cc,"@@ -195,8 +195,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
     TF_CHECK_OK(s.ToGraphDef(&item.graph));
     item.fetch = {""addn"", ""matmul3"", ""matmul4""};
 
-    ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
-                              nullptr /* cpu_device */);
+    ConstantFolding optimizer(nullptr /* cpu_device */);
     GraphDef output;
     Status status = optimizer.Optimize(nullptr, item, &output);
     TF_EXPECT_OK(status);
@@ -214,11 +213,11 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
         EXPECT_EQ(""^zeros"", node.input(0));
         EXPECT_EQ(""^y"", node.input(1));
       } else if (name == ""mul3"") {
-        EXPECT_EQ(""Identity"", node.op());
+        EXPECT_EQ(""Snapshot"", node.op());
         EXPECT_EQ(""x"", node.input(0));
         EXPECT_EQ(""^ones"", node.input(1));
       } else if (name == ""mul4"") {
-        EXPECT_EQ(""Identity"", node.op());
+        EXPECT_EQ(""Snapshot"", node.op());
         EXPECT_EQ(""y"", node.input(0));
         EXPECT_EQ(""^ones"", node.input(1));
       } else if (name == ""mul5"") {
@@ -230,7 +229,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
         EXPECT_EQ(""^zeros_1d"", node.input(0));
         EXPECT_EQ(""^y"", node.input(1));
       } else if (name == ""div1"") {
-        EXPECT_EQ(""Identity"", node.op());
+        EXPECT_EQ(""Snapshot"", node.op());
         EXPECT_EQ(""x"", node.input(0));
         EXPECT_EQ(""^ones"", node.input(1));
       } else if (name == ""div2"") {
@@ -266,15 +265,15 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
         EXPECT_EQ(2, t.tensor_shape().dim(0).size());
         EXPECT_EQ(3, t.tensor_shape().dim(1).size());
       } else if (name == ""add1"") {
-        EXPECT_EQ(""Identity"", node.op());
+        EXPECT_EQ(""Snapshot"", node.op());
         EXPECT_EQ(""x"", node.input(0));
         EXPECT_EQ(""^zeros"", node.input(1));
       } else if (name == ""add2"") {
-        EXPECT_EQ(""Identity"", node.op());
+        EXPECT_EQ(""Snapshot"", node.op());
         EXPECT_EQ(""y"", node.input(0));
         EXPECT_EQ(""^zeros"", node.input(1));
       } else if (name == ""bias_add1"") {
-        EXPECT_EQ(""Identity"", node.op());
+        EXPECT_EQ(""Snapshot"", node.op());
         EXPECT_EQ(""x"", node.input(0));
         EXPECT_EQ(""^zeros_1d"", node.input(1));
       } else if (name == ""bias_add2"") {
@@ -283,7 +282,7 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
         EXPECT_EQ(""zeros"", node.input(0));
         EXPECT_EQ(""bias"", node.input(1));
       } else if (name == ""sub1"") {
-        EXPECT_EQ(""Identity"", node.op());
+        EXPECT_EQ(""Snapshot"", node.op());
         EXPECT_EQ(""x"", node.input(0));
         EXPECT_EQ(""^zeros"", node.input(1));
       } else if (name == ""sub2"") {
@@ -322,8 +321,7 @@ TEST_F(ConstantFoldingTest, StrengthReduce_Reciprocal) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
   item.fetch = {""div_f"", ""div_i"", ""realdiv""};
-  ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
-                            nullptr /* cpu_device */);
+  ConstantFolding optimizer(nullptr /* cpu_device */);
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
@@ -413,8 +411,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_UnknownOutputShape) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
-  ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
-                            nullptr /* cpu_device */);
+  ConstantFolding optimizer(nullptr /* cpu_device */);
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
@@ -468,8 +465,7 @@ TEST_F(ConstantFoldingTest, NeutralElement_PartialShape_KnownOutputShape) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
-  ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
-                            nullptr /* cpu_device */);
+  ConstantFolding optimizer(nullptr /* cpu_device */);
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
@@ -1337,7 +1333,7 @@ TEST_F(ConstantFoldingTest, MaterializeBroadcastGradientArgs) {
   GrapplerItem item;
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
 
-  ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */);
+  ConstantFolding fold(nullptr /* cpu_device */);
   GraphDef output;
   Status status = fold.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
@@ -1398,7 +1394,7 @@ TEST_F(ConstantFoldingTest, MaterializeReductionIndices) {
   TF_CHECK_OK(s.ToGraphDef(&item.graph));
   item.fetch.push_back(""reshape"");
 
-  ConstantFolding fold(RewriterConfig::AGGRESSIVE, nullptr /* cpu_device */);
+  ConstantFolding fold(nullptr /* cpu_device */);
   GraphDef output;
   Status status = fold.Optimize(nullptr, item, &output);
   TF_EXPECT_OK(status);
",0,train
66f4f4cf31b86b7dd20f10ce6d968348b502f2ee,tensorflow/tensorflow,"Automated g4 rollback of changelist 185072479

PiperOrigin-RevId: 185900165",snapshot_op.h,"@@ -35,12 +35,17 @@ class SnapshotOp : public OpKernel {
   void Compute(OpKernelContext* context) override {
     const Tensor& input = context->input(0);
     Tensor* output = nullptr;
-    OP_REQUIRES_OK(context,
-                   context->allocate_output(0, input.shape(), &output));
-    const Device& device = context->eigen_device<Device>();
-    device.memcpy(output->template flat<Scalar>().data(),
-                  input.template flat<Scalar>().data(),
-                  input.NumElements() * sizeof(Scalar));
+    // Try to use buffer forwarding to avoid an explicit copy.
+    OP_REQUIRES_OK(context, context->forward_input_or_allocate_output(
+                                {0}, 0, input.shape(), &output));
+    if (!output->SharesBufferWith(input)) {
+      // We had to allocate a new buffer since the refcount on the input was
+      // greater than 1. Copy the input to the new buffer.
+      const Device& device = context->eigen_device<Device>();
+      device.memcpy(output->template flat<Scalar>().data(),
+                    input.template flat<Scalar>().data(),
+                    input.NumElements() * sizeof(Scalar));
+    }
   }
 };
 
",0,train
66f4f4cf31b86b7dd20f10ce6d968348b502f2ee,tensorflow/tensorflow,"Automated g4 rollback of changelist 185072479

PiperOrigin-RevId: 185900165",cluster_test.py,"@@ -45,7 +45,7 @@ class ClusterTest(test.TestCase):
       op_perfs, run_time, step_stats = grappler_cluster.MeasureCosts(
           grappler_item)
       self.assertTrue(run_time > 0)
-      self.assertEqual(len(op_perfs), 7)
+      self.assertEqual(len(op_perfs), 8)
       self.assertTrue(step_stats.dev_stats)
 
   def testNoDetailedStats(self):
@@ -125,7 +125,7 @@ class ClusterTest(test.TestCase):
         disable_detailed_stats=False, disable_timeline=False) as gcluster:
       op_perfs, run_time, step_stats = gcluster.MeasureCosts(grappler_item)
       self.assertTrue(run_time > 0)
-      self.assertEqual(len(op_perfs), 7)
+      self.assertEqual(len(op_perfs), 8)
       self.assertTrue(step_stats.dev_stats)
 
   def testAvailableOps(self):
",0,train
b6877dfa7b9c45bfecc66a22e1922cc44c37b2fc,tensorflow/tensorflow,"Add migration block for tf.compat.v1.Dimension

PiperOrigin-RevId: 388326479
Change-Id: I634efbe2c988ebd04806c1d466b413f395c6dd93",tensor_shape.py,"@@ -183,7 +183,17 @@ def dimension_at_index(shape, index):
 
 @tf_export(v1=[""Dimension""])
 class Dimension(object):
-  """"""Represents the value of one dimension in a TensorShape.""""""
+  """"""Represents the value of one dimension in a TensorShape.
+
+  @compatibility(TF2)
+  In TF2, members of a `TensorShape` object are integers. The `Dimension` class
+  is not part of TF2's data model.
+
+  Please refer to the [TensorShape section of the migration guide]
+  (https://www.tensorflow.org/guide/migrate/index#tensorshape) on common code
+  patterns adapting Dimension objects to a TF2 syntax.
+  @end_compatibility
+  """"""
 
   __slots__ = [""_value""]
 
",0,train
1e5c128c551050d9e43fbac0d57a432596a799ad,tensorflow/tensorflow,"Adding comments to the FileIO class. Also adding a couple of more test cases.
Change: 129653503",file_io.py,"@@ -29,36 +29,105 @@ from tensorflow.python.util import compat
 
 
 def file_exists(filename):
+  """"""Determines whether a path exists or not.
+
+  Args:
+    filename: string, a path
+
+  Returns:
+    True if the path exists, whether its a file or a directory.
+  """"""
   return pywrap_tensorflow.FileExists(compat.as_bytes(filename))
 
 
 def delete_file(filename):
+  """"""Deletes the file located at 'filename'.
+
+  Args:
+    filename: string, a filename
+
+  Raises:
+    errors.OpError: Propagates any errors reported by the FileSystem API.  E.g.,
+    NotFoundError if the file does not exist.
+  """"""
   with errors.raise_exception_on_not_ok_status() as status:
     pywrap_tensorflow.DeleteFile(compat.as_bytes(filename), status)
 
 
 def read_file_to_string(filename):
+  """"""Reads the entire contents of a file to a string.
+
+  Args:
+    filename: string, path to a file
+
+  Returns:
+    contents of the file as a string
+
+  Raises:
+    errors.OpError: Raises variety of errors that are subtypes e.g.
+    NotFoundError etc.
+  """"""
   with errors.raise_exception_on_not_ok_status() as status:
     return pywrap_tensorflow.ReadFileToString(compat.as_bytes(filename), status)
 
 
 def write_string_to_file(filename, file_content):
+  """"""Writes a string to a given file.
+
+  Args:
+    filename: string, path to a file
+    file_content: string, contents that need to be written to the file
+
+  Raises:
+    errors.OpError: If there are errors during the operation.
+  """"""
   with errors.raise_exception_on_not_ok_status() as status:
     pywrap_tensorflow.WriteStringToFile(
         compat.as_bytes(filename), compat.as_bytes(file_content), status)
 
 
 def get_matching_files(filename):
+  """"""Returns a list of files that match the given pattern.
+
+  Args:
+    filename: string, the pattern
+
+  Returns:
+    Returns a list of strings containing filenames that match the given pattern.
+
+  Raises:
+    errors.OpError: If there are filesystem / directory listing errors.
+  """"""
   with errors.raise_exception_on_not_ok_status() as status:
     return pywrap_tensorflow.GetMatchingFiles(compat.as_bytes(filename), status)
 
 
 def create_dir(dirname):
+  """"""Creates a directory with the name 'dirname'.
+
+  Args:
+    dirname: string, name of the directory to be created
+
+  Notes:
+    The parent directories need to exist. Use recursive_create_dir instead if
+    there is the possibility that the parent dirs don't exist.
+
+  Raises:
+    errors.OpError: If the operation fails.
+  """"""
   with errors.raise_exception_on_not_ok_status() as status:
     pywrap_tensorflow.CreateDir(compat.as_bytes(dirname), status)
 
 
 def recursive_create_dir(dirname):
+  """"""Create a directory and all parent/intermediate directories.
+
+  Args:
+    dirname: string, name of the directory to be created
+
+  Raises:
+    errors.OpError: If the operation fails.
+  """"""
   with errors.raise_exception_on_not_ok_status() as status:
     dirs = dirname.split('/')
     for i in range(len(dirs)):
@@ -68,23 +137,64 @@ def recursive_create_dir(dirname):
 
 
 def copy(oldpath, newpath, overwrite=False):
+  """"""Copies data from oldpath to newpath.
+
+  Args:
+    oldpath: string, name of the file who's contents need to be copied
+    newpath: string, name of the file to which to copy to
+    overwrite: boolean, if false its an error for newpath to be occupied by an
+        existing file.
+
+  Raises:
+    errors.OpError: If the operation fails.
+  """"""
   with errors.raise_exception_on_not_ok_status() as status:
     pywrap_tensorflow.CopyFile(
         compat.as_bytes(oldpath), compat.as_bytes(newpath), overwrite, status)
 
 
 def rename(oldname, newname, overwrite=False):
+  """"""Rename or move a file / directory.
+
+  Args:
+    oldname: string, pathname for a file
+    newname: string, pathname to which the file needs to be moved
+    overwrite: boolean, if false its an error for newpath to be occupied by an
+        existing file.
+
+  Raises:
+    errors.OpError: If the operation fails.
+  """"""
   with errors.raise_exception_on_not_ok_status() as status:
-    return pywrap_tensorflow.RenameFile(
+    pywrap_tensorflow.RenameFile(
         compat.as_bytes(oldname), compat.as_bytes(newname), overwrite, status)
 
 
 def delete_recursively(dirname):
+  """"""Deletes everything under dirname recursively.
+
+  Args:
+    dirname: string, a path to a directory
+
+  Raises:
+    errors.OpError: If the operation fails.
+  """"""
   with errors.raise_exception_on_not_ok_status() as status:
-    return pywrap_tensorflow.DeleteRecursively(compat.as_bytes(dirname), status)
+    pywrap_tensorflow.DeleteRecursively(compat.as_bytes(dirname), status)
 
 
 def is_directory(dirname):
+  """"""Returns whether the path is a directory or not.
+
+  Args:
+    dirname: string, path to a potential directory
+
+  Returns:
+    True, if the path is a directory; False otherwise
+
+  Raises:
+    errors.OpError: If the path doesn't exist or other errors
+  """"""
   with errors.raise_exception_on_not_ok_status() as status:
     return pywrap_tensorflow.IsDirectory(compat.as_bytes(dirname), status)
 
@@ -98,11 +208,11 @@ def list_directory(dirname):
   Args:
     dirname: string, path to a directory
 
-  Raises:
-    NotFoundError if directory doesn't exist
-
   Returns:
     [filename1, filename2, ... filenameN]
+
+  Raises:
+    errors.NotFoundError if directory doesn't exist
   """"""
   if not is_directory(dirname):
     raise errors.NotFoundError(None, None, 'Could not find directory')
@@ -154,6 +264,17 @@ def walk(top, in_order=True):
 
 
 def stat(filename):
+  """"""Returns file statistics for a given path.
+
+  Args:
+    filename: string, path to a file
+
+  Returns:
+    FileStatistics struct that contains information about the path
+
+  Raises:
+    errors.OpError: If the operation fails.
+  """"""
   file_statistics = pywrap_tensorflow.FileStatistics()
   with errors.raise_exception_on_not_ok_status() as status:
     pywrap_tensorflow.Stat(compat.as_bytes(filename), file_statistics, status)
",0,train
1e5c128c551050d9e43fbac0d57a432596a799ad,tensorflow/tensorflow,"Adding comments to the FileIO class. Also adding a couple of more test cases.
Change: 129653503",file_io_test.py,"@@ -38,6 +38,8 @@ class FileIoTest(tf.test.TestCase):
   def testFileDoesntExist(self):
     file_path = os.path.join(self._base_dir, ""temp_file"")
     self.assertFalse(file_io.file_exists(file_path))
+    with self.assertRaises(errors.NotFoundError):
+      _ = file_io.read_file_to_string(file_path)
 
   def testFileWrite(self):
     file_path = os.path.join(self._base_dir, ""temp_file"")
@@ -52,6 +54,11 @@ class FileIoTest(tf.test.TestCase):
     file_io.delete_file(file_path)
     self.assertFalse(file_io.file_exists(file_path))
 
+  def testFileDeleteFail(self):
+    file_path = os.path.join(self._base_dir, ""temp_file"")
+    with self.assertRaises(errors.NotFoundError):
+      file_io.delete_file(file_path)
+
   def testGetMatchingFiles(self):
     dir_path = os.path.join(self._base_dir, ""temp_dir"")
     file_io.create_dir(dir_path)
",0,train
e6adc8a90b9eca1133a249121683ac9ec0570002,tensorflow/tensorflow,Add tests in TFLite micro for Logistic Int8,logistic.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include ""tensorflow/lite/kernels/internal/reference/integer_ops/logistic.h""
 #include ""tensorflow/lite/kernels/internal/reference/logistic.h""
 
 #include ""tensorflow/lite/c/builtin_op_data.h""
@@ -27,11 +28,59 @@ namespace tflite {
 namespace ops {
 namespace micro {
 namespace activations {
-
+namespace {
 constexpr int kInputTensor = 0;
 constexpr int kOutputTensor = 0;
 
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+struct OpData {
+  int32_t input_zero_point;
+  int32_t input_range_radius;
+  int32_t input_multiplier;
+  int32_t input_left_shift;
+};
+
+TfLiteStatus CalculateArithmeticOpData(TfLiteContext* context, TfLiteNode* node,
+                                       OpData* data) {
+  const TfLiteTensor* input = GetInput(context, node, kInputTensor);
+  TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
+
+  TF_LITE_ENSURE_EQ(context, input->type, output->type);
+  if (input->type == kTfLiteInt8) {
+    TF_LITE_ENSURE_EQ(context, output->params.zero_point,
+                      std::numeric_limits<int8_t>::min());
+    TF_LITE_ENSURE_EQ(context, static_cast<double>(output->params.scale),
+                      1. / 256);
+
+    static constexpr int kInputIntegerBits = 4;
+    const double input_real_multiplier =
+        static_cast<double>(input->params.scale) *
+        static_cast<double>(1 << (31 - kInputIntegerBits));
+
+    const double q = std::frexp(input_real_multiplier, &data->input_left_shift);
+    data->input_multiplier = static_cast<int32_t>(TfLiteRound(q * (1ll << 31)));
+
+    data->input_range_radius =
+        CalculateInputRadius(kInputIntegerBits, data->input_left_shift, 31);
+  }
+  return kTfLiteOk;
+}
+}  // namespace
+
+void* LogisticInit(TfLiteContext* context, const char* buffer, size_t length) {
+  OpData* data = new OpData();
+  return data;
+}
+
+void LogisticFree(TfLiteContext* context, void* buffer) {}
+
+TfLiteStatus LogisticPrepare(TfLiteContext* context, TfLiteNode* node) {
+  OpData* data = reinterpret_cast<OpData*>(node->user_data);
+  CalculateArithmeticOpData(context, node, data);
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus LogisticEval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
@@ -52,11 +101,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   } else if (input->type == kTfLiteInt8) {
     switch (output->type) {
       case kTfLiteInt8: {
-        reference_ops::Logistic(
-            GetTensorShape(input), GetTensorData<int8_t>(input),
-            input->params.scale, input->params.zero_point,
-            GetTensorShape(output), GetTensorData<int8_t>(output),
-            output->params.scale, output->params.zero_point);
+        OpData* data = reinterpret_cast<OpData*>(node->user_data);
+        reference_integer_ops::Logistic(
+            input->params.zero_point, data->input_range_radius,
+            data->input_multiplier, data->input_left_shift,
+            NumElements(input->dims), GetTensorData<int8_t>(input),
+            GetTensorData<int8_t>(output));
         return kTfLiteOk;
       }
       default:
@@ -79,14 +129,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 }  // namespace activations
 
 TfLiteRegistration* Register_LOGISTIC() {
-  static TfLiteRegistration r = {/*init=*/nullptr,
-                                 /*free=*/nullptr,
-                                 /*prepare=*/nullptr,
-                                 /*invoke=*/activations::Eval,
-                                 /*profiling_string=*/nullptr,
-                                 /*builtin_code=*/0,
-                                 /*custom_name=*/nullptr,
-                                 /*version=*/0};
+  static TfLiteRegistration r = {
+      activations::LogisticInit,    activations::LogisticFree,
+      activations::LogisticPrepare, activations::LogisticEval,
+      /*profiling_string=*/nullptr, /*builtin_code=*/0,
+      /*custom_name=*/nullptr,      /*version=*/0};
   return &r;
 }
 }  // namespace micro
",0,test
e6adc8a90b9eca1133a249121683ac9ec0570002,tensorflow/tensorflow,Add tests in TFLite micro for Logistic Int8,logistic_test.cc,"@@ -82,13 +82,75 @@ void TestLogisticFloat(std::initializer_list<int> input_dims_data,
   }
 }
 
+void TestLogisticInt8(std::initializer_list<int> input_dims_data,
+                      std::initializer_list<int8_t> input_data, float input_min,
+                      float input_max,
+                      std::initializer_list<int8_t> expected_output_data,
+                      std::initializer_list<int> output_dims_data,
+                      float output_min, float output_max, int8_t* output_data) {
+  TfLiteIntArray* input_dims = IntArrayFromInitializer(input_dims_data);
+  TfLiteIntArray* output_dims = IntArrayFromInitializer(output_dims_data);
+  const int output_elements_count = ElementCount(*output_dims);
+
+  constexpr int inputs_size = 1;
+  constexpr int outputs_size = 1;
+  constexpr int tensors_size = inputs_size + outputs_size;
+  TfLiteTensor tensors[tensors_size] = {
+      CreateQuantizedTensor(input_data, input_dims, ""input_tensor"", input_min,
+                            input_max),
+      CreateQuantizedTensor(output_data, output_dims, ""output_tensor"",
+                            output_min, output_max),
+  };
+
+  TfLiteContext context;
+  PopulateContext(tensors, tensors_size, micro_test::reporter, &context);
+
+  ::tflite::ops::micro::AllOpsResolver resolver;
+  const TfLiteRegistration* registration =
+      resolver.FindOp(tflite::BuiltinOperator_LOGISTIC, 1);
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration);
+
+  const char* init_data = nullptr;
+  size_t init_data_size = 1;
+  void* user_data = nullptr;
+  if (registration->init) {
+    user_data = registration->init(&context, init_data, init_data_size);
+  }
+  int inputs_array_data[] = {1, 0};
+  TfLiteIntArray* inputs_array = IntArrayFromInts(inputs_array_data);
+  int outputs_array_data[] = {1, 1};
+  TfLiteIntArray* outputs_array = IntArrayFromInts(outputs_array_data);
+
+  TfLiteNode node;
+  node.inputs = inputs_array;
+  node.outputs = outputs_array;
+  node.temporaries = nullptr;
+  node.user_data = user_data;
+  node.builtin_data = nullptr;
+  node.custom_initial_data = nullptr;
+  node.custom_initial_data_size = 0;
+  node.delegate = nullptr;
+  if (registration->prepare) {
+    TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->prepare(&context, &node));
+  }
+  TF_LITE_MICRO_EXPECT_NE(nullptr, registration->invoke);
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk, registration->invoke(&context, &node));
+  if (registration->free) {
+    registration->free(&context, user_data);
+  }
+  for (int i = 0; i < output_elements_count; ++i) {
+    TF_LITE_MICRO_EXPECT_NEAR(expected_output_data.begin()[i], output_data[i],
+                              1);
+  }
+}
+
 }  // namespace
 }  // namespace testing
 }  // namespace tflite
 
 TF_LITE_MICRO_TESTS_BEGIN
 
-TF_LITE_MICRO_TEST(SimpleTest) {
+TF_LITE_MICRO_TEST(SimpleTestFloat) {
   const int output_elements_count = 10;
   float output_data[output_elements_count];
   tflite::testing::TestLogisticFloat({2, 1, 5},  // Input shape.
@@ -121,4 +183,38 @@ TF_LITE_MICRO_TEST(SimpleTest) {
                                      output_data);
 }
 
+TF_LITE_MICRO_TEST(SimpleTestInt8) {
+  using tflite::testing::F2QS;
+
+  const float input_min = -63.5f;
+  const float input_max = 64.0f;
+  const float output_min = 0.0f;
+  const float output_max = (255.0f / 256.0f);
+
+  const int output_elements_count = 10;
+  int8_t output_data[output_elements_count];
+  tflite::testing::TestLogisticInt8(
+      {2, 1, output_elements_count},  // Input shape.
+      {F2QS(1.0, input_min, input_max), F2QS(2.0, input_min, input_max),
+       F2QS(3.0, input_min, input_max), F2QS(4.0, input_min, input_max),
+       F2QS(5.0, input_min, input_max), F2QS(-1.0, input_min, input_max),
+       F2QS(-2.0, input_min, input_max), F2QS(-3.0, input_min, input_max),
+       F2QS(-4.0, input_min, input_max), F2QS(-5.0, input_min, input_max)},
+      input_min, input_max,  // Input quantized range.
+      {                      // Expected results.
+       F2QS(0.73105858, output_min, output_max),
+       F2QS(0.88079708, output_min, output_max),
+       F2QS(0.95257413, output_min, output_max),
+       F2QS(0.98201379, output_min, output_max),
+       F2QS(0.99330715, output_min, output_max),
+       F2QS(0.26894142, output_min, output_max),
+       F2QS(0.11920292, output_min, output_max),
+       F2QS(0.04742587, output_min, output_max),
+       F2QS(0.01798621, output_min, output_max),
+       F2QS(0.00669285, output_min, output_max)},
+      {2, 1, output_elements_count},  // Output shape.
+      output_min, output_max,         // Output quantized range.
+      output_data);
+}
+
 TF_LITE_MICRO_TESTS_END
",0,test
932e1bfb87ac3d0e7d6d4c2979f77817bcc41590,tensorflow/tensorflow,Specify boolean in the soft_placement docstrings,config.py,"@@ -254,7 +254,7 @@ def get_soft_device_placement():
   An error is raised when an Op cannot be placed onto its intended device.
 
   Returns:
-    If soft placement is enabled.
+   A boolean indicating if soft placement is enabled.
   """"""
   return context.context().soft_device_placement
 
@@ -269,7 +269,7 @@ def set_soft_device_placement(enabled):
     3. need to co-locate with reftype input(s) which are from CPU
 
   Args:
-    enabled: Whether to enable soft placement.
+    enabled: A boolean indicating whether to enable soft placement.
   """"""
   context.context().soft_device_placement = enabled
 
",0,train
d6362c90e7ef942808bc31887175e2c0ef437896,tensorflow/tensorflow,"Add var name to errors on variable restore.
Change: 152963830",save_restore_tensor.cc,"@@ -268,7 +268,8 @@ Status RestoreTensorsV2(OpKernelContext* context, const Tensor& prefix,
                                          &parsed_slice, &parsed_slice_shape));
       if (!restored_full_shape.IsSameSize(parsed_full_shape)) {
         return errors::InvalidArgument(
-            ""Shape in shape_and_slice spec "", parsed_full_shape.DebugString(),
+            ""tensor_name = "", tensor_name, ""; shape in shape_and_slice spec "",
+            parsed_full_shape.DebugString(),
             "" does not match the shape stored in checkpoint: "",
             restored_full_shape.DebugString());
       }
@@ -279,10 +280,10 @@ Status RestoreTensorsV2(OpKernelContext* context, const Tensor& prefix,
           reader.LookupSlice(tensor_name, parsed_slice, restored_tensor));
     }
     if (dtypes[i] != restored_tensor->dtype()) {
-      return errors::InvalidArgument(""Expected dtype "",
-                                     DataTypeString(dtypes[i]),
-                                     "" does not equal restored dtype "",
-                                     DataTypeString(restored_tensor->dtype()));
+      return errors::InvalidArgument(
+          ""tensor_name = "", tensor_name, ""; expected dtype "",
+          DataTypeString(dtypes[i]), "" does not equal restored dtype "",
+          DataTypeString(restored_tensor->dtype()));
     }
   }
   return Status::OK();
",0,train
0421b4be60b3d641e5e0c2ac133fba4c9d80a44e,tensorflow/tensorflow,"Update GraphDef version to 714.

PiperOrigin-RevId: 364510348
Change-Id: I18c89e5492256773a4e3b41b2e04400735ffca4f",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 713  // Updated: 2021/3/22
+#define TF_GRAPH_DEF_VERSION 714  // Updated: 2021/3/23
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
45f2aab17f66d235f4643dda142850457ad9e894,tensorflow/tensorflow,"Removed unnecessary check. More uniform code.

PiperOrigin-RevId: 286472636
Change-Id: I6d2f9356993c2b08ea8985003abbe4e11194d833",convolution_transposed_thin.cc,"@@ -87,24 +87,17 @@ std::string GenerateConvolutionTransposedCode(
     for (int x = 0; x < kernel_size.x; ++x) {
       std::string r_s =
           ""  r["" + std::to_string(y) + ""]["" + std::to_string(x) + ""]"";
-      const std::string to_accum =
-          op_def.precision == CalculationsPrecision::F32_F16 ? ""convert_float""
-                                                             : """";
       for (int d = 0; d < dst_channels; ++d) {
-        c += r_s + postfix[d] + "" = "" + to_accum + ""(dot(src, filters["" +
-             std::to_string(index) + ""]));\n"";
+        c += r_s + postfix[d] + "" = dot(src, filters["" + std::to_string(index) +
+             ""]);\n"";
         index++;
       }
     }
   }
   c += ""  }\n"";
   for (int i = 1; i < src_depth; ++i) {
-    if (op_def.precision != CalculationsPrecision::F32_F16) {
-      c += ""  if (X > "" + std::to_string(-i) +
-           "") {  // always true, to reduce registers usage\n"";
-    } else {
-      c += ""  {\n"";
-    }
+    c += ""  if (X > "" + std::to_string(-i) +
+         "") {  // always true, to reduce registers usage\n"";
     c += ""  FLT4 src = "" +
          src_tensor.Read4D(""X"", ""Y"", std::to_string(i), batch_id) + "";\n"";
     for (int y = 0; y < kernel_size.y; ++y) {
@@ -112,8 +105,8 @@ std::string GenerateConvolutionTransposedCode(
         std::string r_s =
             ""  r["" + std::to_string(y) + ""]["" + std::to_string(x) + ""]"";
         for (int d = 0; d < dst_channels; ++d) {
-          c += r_s + postfix[d] + "" += TO_ACCUM_FLT(dot(src, filters["" +
-               std::to_string(index) + ""]));\n"";
+          c += r_s + postfix[d] + "" += dot(src, filters["" +
+               std::to_string(index) + ""]);\n"";
           index++;
         }
       }
",0,train
879b758fae5d0a5babf88846468afaab22299524,tensorflow/tensorflow,"Add correct import logic for string constants

In TFLite flatbuffer, strings are serialized with tflite::DynamicBuffer.

PiperOrigin-RevId: 387109297
Change-Id: I2fd2fb9c2905e54b34f417ecaa4efe5e1c6a479c",flatbuffer_import.cc,"@@ -77,6 +77,7 @@ limitations under the License.
 #include ""tensorflow/lite/model.h""
 #include ""tensorflow/lite/schema/schema_generated.h""
 #include ""tensorflow/lite/schema/schema_utils.h""
+#include ""tensorflow/lite/string_util.h""
 
 using llvm::ArrayRef;
 using mlir::Builder;
@@ -358,6 +359,14 @@ tensorflow::TensorProto ConvertTfliteConstTensor(
   for (auto dim : tensor.shape) {
     shape->add_dim()->set_size(int64_t{dim});
   }
+  // TensorFlow Lite uses tflite::DynamicBufer to encode vector of strings.
+  if (tensor.type == tflite::TensorType_STRING) {
+    for (int i = 0; i < tflite::GetStringCount(buffer.data()); ++i) {
+      tflite::StringRef str = tflite::GetString(buffer.data(), i);
+      ret.add_string_val(str.str, str.len);
+    }
+    return ret;
+  }
   std::string content;
   content.assign(reinterpret_cast<const char*>(buffer.data()), buffer.size());
   ret.set_tensor_content(content);
",0,test
62f3c16a9aa823acd9ba919fd232ae44991c7bbc,tensorflow/tensorflow,"Fix bug in upgrade script where function_transformers aren't skipped when `import tensorflow.compat.v* as tf` is seen.

PiperOrigin-RevId: 249326595",ast_edits.py,"@@ -233,6 +233,7 @@ class NoUpdateSpec(APIChangeSpec):
     self.function_warnings = {}
     self.change_to_function = {}
     self.module_deprecations = {}
+    self.function_transformers = {}
     self.import_renames = {}
 
 
",0,train
62f3c16a9aa823acd9ba919fd232ae44991c7bbc,tensorflow/tensorflow,"Fix bug in upgrade script where function_transformers aren't skipped when `import tensorflow.compat.v* as tf` is seen.

PiperOrigin-RevId: 249326595",tf_upgrade_v2.py,"@@ -1519,6 +1519,7 @@ class TFAPIChangeSpec(ast_edits.NoUpdateSpec):
       self.function_warnings = {}
       self.change_to_function = {}
       self.module_deprecations = module_deprecations_v2.MODULE_DEPRECATIONS
+      self.function_transformers = {}
       self.import_renames = {}
     return visitor.log, visitor.warnings_and_errors
 
",0,train
62f3c16a9aa823acd9ba919fd232ae44991c7bbc,tensorflow/tensorflow,"Fix bug in upgrade script where function_transformers aren't skipped when `import tensorflow.compat.v* as tf` is seen.

PiperOrigin-RevId: 249326595",tf_upgrade_v2_test.py,"@@ -2072,19 +2072,20 @@ def _log_prob(self, x):
     self.assertEmpty(errors)
 
   def test_api_spec_reset_between_files(self):
-    old_symbol = ""tf.conj(a)""
-    new_symbol = ""tf.math.conj(a)""
-
-    ## Test that the api spec is reset in between files:
-    import_header = ""import tensorflow.compat.v2 as tf\n""
-    text_a = import_header + old_symbol
-    expected_text_a = import_header + old_symbol
-    text_b = old_symbol
-    expected_text_b = new_symbol
-    results = self._upgrade_multiple([text_a, text_b])
-    result_a, result_b = results[0], results[1]
-    self.assertEqual(result_a[3], expected_text_a)
-    self.assertEqual(result_b[3], expected_text_b)
+    for old_symbol, new_symbol in [
+        (""tf.conj(a)"", ""tf.math.conj(a)""),
+        (""tf.to_int32(x)"", ""tf.cast(x, dtype=tf.int32)"")]:
+
+      ## Test that the api spec is reset in between files:
+      import_header = ""import tensorflow.compat.v2 as tf\n""
+      text_a = import_header + old_symbol
+      expected_text_a = import_header + old_symbol
+      text_b = old_symbol
+      expected_text_b = new_symbol
+      results = self._upgrade_multiple([text_a, text_b])
+      result_a, result_b = results[0], results[1]
+      self.assertEqual(result_a[3], expected_text_a)
+      self.assertEqual(result_b[3], expected_text_b)
 
 
 class TestUpgradeFiles(test_util.TensorFlowTestCase):
",0,train
db63348bf14d911f2eebeb418a0b570b65b64f92,tensorflow/tensorflow,"Add test with tf.cond.

PiperOrigin-RevId: 195745718",make_test_graphs.py,"@@ -78,6 +78,22 @@ def tfadd_with_ckpt_saver(out_dir):
       f.write(saver.as_saver_def().SerializeToString())
 
 
+def tfassert_eq(_):
+  x = array_ops.placeholder(dtypes.int32, name='x_hold')
+  y = array_ops.placeholder(dtypes.int32, name='y_hold')
+  control_flow_ops.Assert(
+      math_ops.equal(x, y), ['Expected x == y.'], name='assert_eq')
+  math_ops.add(x, math_ops.negative(y), name='x_y_diff')
+
+
+def tfcond(_):
+  p = array_ops.placeholder(dtypes.bool, name='p_hold')
+  x = array_ops.placeholder(dtypes.int32, name='x_hold')
+  y = array_ops.placeholder(dtypes.int32, name='y_hold')
+  z = control_flow_ops.cond(p, lambda: x, lambda: y)
+  array_ops.identity(z, name='result')
+
+
 def tfgather(_):
   params = array_ops.placeholder(dtypes.float32, name='params')
   indices = array_ops.placeholder(dtypes.int32, name='indices')
@@ -126,14 +142,6 @@ def tfsplits(_):
   array_ops.identity(y, name='result')
 
 
-def tfassert_eq(_):
-  x = array_ops.placeholder(dtypes.int32, name='x_hold')
-  y = array_ops.placeholder(dtypes.int32, name='y_hold')
-  control_flow_ops.Assert(
-      math_ops.equal(x, y), ['Expected x == y.'], name='assert_eq')
-  math_ops.add(x, math_ops.negative(y), name='x_y_diff')
-
-
 def write_graph(build_graph, out_dir):
   """"""Build a graph using build_graph and write it out.""""""
   g = ops.Graph()
@@ -148,12 +156,13 @@ def main(_):
   write_graph(tfadd, FLAGS.out_dir)
   write_graph(tfadd_with_ckpt, FLAGS.out_dir)
   write_graph(tfadd_with_ckpt_saver, FLAGS.out_dir)
+  write_graph(tfassert_eq, FLAGS.out_dir)
+  write_graph(tfcond, FLAGS.out_dir)
+  write_graph(tffunction, FLAGS.out_dir)
   write_graph(tfgather, FLAGS.out_dir)
   write_graph(tfmatmul, FLAGS.out_dir)
   write_graph(tfmatmulandadd, FLAGS.out_dir)
-  write_graph(tffunction, FLAGS.out_dir)
   write_graph(tfsplits, FLAGS.out_dir)
-  write_graph(tfassert_eq, FLAGS.out_dir)
 
 
 if __name__ == '__main__':
",0,test
db63348bf14d911f2eebeb418a0b570b65b64f92,tensorflow/tensorflow,"Add test with tf.cond.

PiperOrigin-RevId: 195745718",tfcompile_test.cc,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""tensorflow/compiler/aot/tests/test_graph_tfadd_with_ckpt.h""
 #include ""tensorflow/compiler/aot/tests/test_graph_tfadd_with_ckpt_saver.h""
 #include ""tensorflow/compiler/aot/tests/test_graph_tfassert_eq.h""
+#include ""tensorflow/compiler/aot/tests/test_graph_tfcond.h""
 #include ""tensorflow/compiler/aot/tests/test_graph_tffunction.h""
 #include ""tensorflow/compiler/aot/tests/test_graph_tfgather.h""
 #include ""tensorflow/compiler/aot/tests/test_graph_tfmatmul.h""
@@ -150,6 +151,31 @@ TEST(TFCompileTest, AddWithCkptSaver) {
   EXPECT_EQ(add_const.result0_data(), add_const.results()[0]);
 }
 
+TEST(TFCompileTest, Cond) {
+  CondComp cond;
+  EXPECT_EQ(cond.arg0_data(), cond.args()[0]);
+  EXPECT_EQ(cond.arg1_data(), cond.args()[1]);
+  EXPECT_EQ(cond.arg2_data(), cond.args()[2]);
+  cond.arg1() = 10;
+  cond.arg2() = 20;
+  {
+    cond.arg0() = true;
+    const int32 expected_result = cond.arg1();
+    EXPECT_TRUE(cond.Run());
+    EXPECT_EQ(cond.result0(), expected_result);
+    EXPECT_EQ(cond.result0_data()[0], expected_result);
+    EXPECT_EQ(cond.result0_data(), cond.results()[0]);
+  }
+  {
+    cond.arg0() = false;
+    const int32 expected_result = cond.arg2();
+    EXPECT_TRUE(cond.Run());
+    EXPECT_EQ(cond.result0(), expected_result);
+    EXPECT_EQ(cond.result0_data()[0], expected_result);
+    EXPECT_EQ(cond.result0_data(), cond.results()[0]);
+  }
+}
+
 TEST(TFCompileTest, Gather) {
   GatherComp gather;
   EXPECT_EQ(gather.arg0_data(), gather.args()[0]);
",0,test
bd2f1ed1c28505c3ab3b325e8c481091b111db3a,tensorflow/tensorflow,"Update GraphDef version to 982.

PiperOrigin-RevId: 416746592
Change-Id: I748de8f3442c4a09305d8d08f1f1849a460f4ce5",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 981  // Updated: 2021/12/15
+#define TF_GRAPH_DEF_VERSION 982  // Updated: 2021/12/16
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,test
690d47e60bdb8adfd387f0c0db01af153a3ed6b9,tensorflow/tensorflow,"TpuDriver: Improve handling of restarted clients/servers.

PiperOrigin-RevId: 280727303
Change-Id: I9c11368de26d8ce851c799a53a903a452d385975",event_id.h,"@@ -31,8 +31,8 @@ namespace tpu_driver {
 // This class provides a typed interface for these values as well as support for
 // hashing and ostreams (for logging).
 struct EventId {
-  int64_t client_id;
-  int64_t operation_id;
+  uint64_t client_id;
+  uint64_t operation_id;
 
   template <typename H>
   friend H AbslHashValue(H h, const EventId& c) {
@@ -51,9 +51,9 @@ struct EventId {
     return absl::StrCat(client_id, "":"", operation_id);
   }
 
-  int64_t AsInt() const { return client_id << 44 | operation_id; }
+  uint64_t AsInt() const { return client_id << 44 | operation_id; }
 
-  static EventId FromInt(int64_t value) {
+  static EventId FromInt(uint64_t value) {
     return EventId{value >> 44, value & 0xfffffffffff};
   }
 };
",0,test
690d47e60bdb8adfd387f0c0db01af153a3ed6b9,tensorflow/tensorflow,"TpuDriver: Improve handling of restarted clients/servers.

PiperOrigin-RevId: 280727303
Change-Id: I9c11368de26d8ce851c799a53a903a452d385975",grpc_tpu_driver.cc,"@@ -64,6 +64,25 @@ class GrpcEvent : public Event {
   GrpcTpuStream* stream_;
 };
 
+class ErrorEvent : public GrpcEvent {
+ public:
+  explicit ErrorEvent(Status status) : GrpcEvent(EventId{0, 0}, nullptr) {
+    status_ = status;
+  }
+
+  xla::Status Await() override { return status_; }
+  absl::optional<xla::Status> AwaitWithTimeout(
+      absl::Duration duration) override {
+    return status_;
+  }
+  void AddCallback(std::function<void(Status)> callback) override {
+    callback(status_);
+  }
+
+ private:
+  Status status_;
+};
+
 class GrpcBufferHandle : public BufferHandle {
  public:
   explicit GrpcBufferHandle(
@@ -417,17 +436,19 @@ class GrpcTpuDriver : public TpuDriver {
   static std::unique_ptr<grpc::CloudTpuDriver::Stub> CreateTpuDriverStub(
       const TpuDriverConfig& config);
 
+  uint32 client_id() const { return client_id_; }
+
  private:
   std::unique_ptr<GrpcTpuStream> AllocateStream(int32_t core_id);
 
   const TpuDriverConfig config_;
-  const int32_t client_id_;
+  const uint32_t client_id_;
   // Map from stream IDs to streams.
   absl::flat_hash_map<int32_t, std::unique_ptr<GrpcTpuStream>> streams_;
   std::unique_ptr<GrpcTpuStream> host_stream_;
   // Shared by all streams.
-  std::atomic<int64_t> operation_id_{0};
-};
+  std::atomic<uint64_t> operation_id_{0};
+};  // namespace
 
 GrpcEvent::~GrpcEvent() { stream_->DeleteEvent(id_); }
 
@@ -464,8 +485,11 @@ GrpcTpuStream::~GrpcTpuStream() {
     // Mark all remaining events invalid.
     absl::MutexLock lock(&events_mutex_);
     for (auto e : events_) {
-      UpdateEventStatus(e.first, xla::Status(tensorflow::error::Code::ABORTED,
-                                             ""Tpustream was closed.""));
+      if (!e.second.done) {
+        LOG(ERROR) << ""Resetting: "" << e.first;
+        UpdateEventStatus(e.first, xla::Status(tensorflow::error::Code::ABORTED,
+                                               ""Driver was closed.""));
+      }
     }
   }
   VLOG(1) << ""Closing stream."";
@@ -511,8 +535,9 @@ void GrpcTpuStream::UpdateEventStatus(EventId id, Status status) {
 
   // This is the first time this event finishes. Remember the results and call
   // the callbacks.
-  VLOG(1) << ""Response received for GrpcEvent "" << id << "". Firing ""
-          << it->second.callbacks.size() << "" callbacks."";
+  VLOG(1) << ""Response received for GrpcEvent "" << id << "". ""
+          << status.ToString() << "". Firing "" << it->second.callbacks.size()
+          << "" callbacks."";
   it->second.done = true;
   it->second.status = status;
   for (const auto& callback : it->second.callbacks) {
@@ -544,6 +569,7 @@ absl::optional<Status> GrpcTpuStream::WaitForEvent(EventId id,
     events_mutex_.AssertHeld();
     return !events_.contains(id) || events_[id].done;
   };
+
   if (events_mutex_.AwaitWithTimeout(absl::Condition(&done), duration)) {
     return events_.contains(id) ? events_[id].status : Status();
   }
@@ -594,6 +620,8 @@ void GrpcTpuStream::StreamWriterFn() {
         reqs.push_back(StreamRequest());
         request_bytes = 0;
       }
+      VLOG(1) << ""Sending request: "" << EventId::FromInt(e->operation_id());
+      VLOG(2) << ""Sending request: "" << e->DebugString();
       reqs.back().mutable_entry()->AddAllocated(e);
     }
     num_pending_requests_ = 0;
@@ -611,9 +639,10 @@ void GrpcTpuStream::StreamWriterFn() {
 void GrpcTpuStream::StreamReaderFn() {
   StreamResponse resp;
   while (stream_->Read(&resp)) {
-    VLOG(1) << ""Received response: "" << resp.DebugString();
+    VLOG(2) << ""Received response: "" << resp.DebugString();
     for (const StreamResponse::Entry entry : resp.entry()) {
       EventId event_id = EventId::FromInt(entry.operation_id());
+      VLOG(1) << ""Received response for: "" << event_id;
 
       TraceMe activity(""GrpcTpuStream::RequestComplete"");
       if (entry.has_transfer_from()) {
@@ -805,8 +834,15 @@ std::unique_ptr<LoadedProgramHandle> GrpcTpuStream::LoadProgram(
   InitializeRequest(req.get(), wait_for);
   TraceMe activity(absl::StrCat(""GrpcTpuStream::LoadProgram""));
   req->mutable_load()->set_core_id(core_id);
-  req->mutable_load()->set_compiled_program_handle(
-      static_cast<const GrpcCompiledProgramHandle*>(handle)->id().AsInt());
+  auto grpc_handle = static_cast<const GrpcCompiledProgramHandle*>(handle);
+  if (grpc_handle->id().client_id != driver_->client_id()) {
+    auto event = absl::make_unique<ErrorEvent>(
+        xla::InvalidArgument(""Invalid program handle (wrong client id). Did ""
+                             ""you restart the server or use a stale handle?""));
+    return absl::make_unique<GrpcLoadedProgramHandle>(event->id(),
+                                                      std::move(event));
+  }
+  req->mutable_load()->set_compiled_program_handle(grpc_handle->id().AsInt());
   auto event =
       absl::make_unique<GrpcEvent>(EventId::FromInt(req->operation_id()), this);
   AddWriteRequest(std::move(req));
@@ -835,13 +871,33 @@ std::unique_ptr<Event> GrpcTpuStream::ExecuteProgram(
     absl::Span<Event* const> wait_for) {
   auto req = absl::make_unique<StreamRequest::Entry>();
   InitializeRequest(req.get(), wait_for);
+  auto program_handle = static_cast<GrpcLoadedProgramHandle*>(program);
+  if (program_handle->id().client_id != driver_->client_id()) {
+    return absl::make_unique<ErrorEvent>(
+        xla::InvalidArgument(""Invalid program handle (wrong client id). Did ""
+                             ""you restart the server or use a stale handle?""));
+  }
+
   req->mutable_execute()->set_loaded_program_handle(
-      static_cast<GrpcLoadedProgramHandle*>(program)->id().AsInt());
+      program_handle->id().AsInt());
+
   for (BufferHandle* input : inputs) {
-    req->mutable_execute()->add_input_handle(
-        static_cast<GrpcBufferHandle*>(input)->id().AsInt());
+    auto* grpc_handle = static_cast<GrpcBufferHandle*>(input);
+    if (grpc_handle->id().client_id != driver_->client_id()) {
+      return absl::make_unique<ErrorEvent>(xla::InvalidArgument(
+          ""Invalid input buffer (wrong client id). Did you restart the server ""
+          ""or use a stale handle?""));
+    }
+    req->mutable_execute()->add_input_handle(grpc_handle->id().AsInt());
   }
+
   for (BufferHandle* output : outputs) {
+    auto* grpc_handle = static_cast<GrpcBufferHandle*>(output);
+    if (grpc_handle->id().client_id != driver_->client_id()) {
+      return absl::make_unique<ErrorEvent>(xla::InvalidArgument(
+          ""Invalid output buffer (wrong client id). Did you restart the server ""
+          ""or use a stale handle?""));
+    }
     req->mutable_execute()->add_output_handle(
         static_cast<GrpcBufferHandle*>(output)->id().AsInt());
   }
",0,test
bbd2047cf3a715a1431889ad8f558576a5382876,tensorflow/tensorflow,"[XLA:HLO] Minor fix for Clamp shape inference, and add some tests.

Previously Clamp(f32[5], f32[], f32[9]) returned success, but it now returns a
failure.  Noticed while debugging a different problem.
Change: 151835981",shape_inference.cc,"@@ -633,26 +633,7 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
   TF_DCHECK_OK(ShapeUtil::ValidateShape(ehs));
   switch (operation) {
     case TRIOP_CLAMP:
-      TF_RETURN_IF_ERROR(
-          ExpectNotTupleOrOpaque(lhs, ""lhs of ternary operation""));
-      TF_RETURN_IF_ERROR(
-          ExpectNotTupleOrOpaque(rhs, ""rhs of ternary operation""));
-      TF_RETURN_IF_ERROR(
-          ExpectNotTupleOrOpaque(ehs, ""ehs of ternary operation""));
-      if (((ShapeUtil::Compatible(lhs, rhs) || ShapeUtil::Rank(lhs) == 0) &&
-           (ShapeUtil::Compatible(rhs, ehs) || ShapeUtil::Rank(ehs) == 0))) {
-        return rhs;
-      }
-      if (ShapeUtil::Rank(rhs) == 0) {
-        if (ShapeUtil::Compatible(lhs, ehs)) {
-          return lhs;
-        }
-        return ShapeUtil::Rank(ehs) == 0 ? lhs : ehs;
-      }
-      return Unimplemented(""not yet implemented: %s, %s <clamp> %s"",
-                           lhs.ShortDebugString().c_str(),
-                           ehs.ShortDebugString().c_str(),
-                           rhs.ShortDebugString().c_str());
+      return InferClampShape(lhs, rhs, ehs);
     case TRIOP_SELECT:
       return InferSelectShape(lhs, rhs, ehs);
     case TRIOP_UPDATE:
@@ -1332,6 +1313,41 @@ ShapeInference::InferDegenerateDimensionBroadcastShape(
   return ShapeUtil::PermuteDimensions(InversePermutation(dimensions), operand);
 }
 
+// TODO(b/36794510): Make broadcast semantics more consistent, by supporting
+// ""degenerate"" cases, as with binary elementwise ops.
+/* static */ StatusOr<Shape> ShapeInference::InferClampShape(
+    const Shape& min, const Shape& operand, const Shape& max) {
+  TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(min, ""clamp min""));
+  TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(operand, ""clamp operand""));
+  TF_RETURN_IF_ERROR(ExpectNotTupleOrOpaque(max, ""clamp max""));
+  if (!ShapeUtil::SameElementType(min, operand) ||
+      !ShapeUtil::SameElementType(max, operand)) {
+    return InvalidArgument(""clamp op with different operand types: %s, %s, %s"",
+                           ShapeUtil::HumanString(min).c_str(),
+                           ShapeUtil::HumanString(operand).c_str(),
+                           ShapeUtil::HumanString(max).c_str());
+  }
+  if (((ShapeUtil::Compatible(min, operand) || ShapeUtil::IsScalar(min)) &&
+       (ShapeUtil::Compatible(max, operand) || ShapeUtil::IsScalar(max)))) {
+    return operand;
+  }
+  if (ShapeUtil::IsScalar(operand)) {
+    if (ShapeUtil::Compatible(min, max)) {
+      return min;
+    } else if (ShapeUtil::IsScalar(min)) {
+      return max;
+    } else if (ShapeUtil::IsScalar(max)) {
+      return min;
+    }
+  }
+  return Unimplemented(
+      ""not yet implemented: %s, %s <clamp> %s"", min.ShortDebugString().c_str(),
+      max.ShortDebugString().c_str(), operand.ShortDebugString().c_str());
+}
+
+// TODO(b/36794510): Make broadcast semantics more consistent, by supporting
+// ""degenerate"" cases, as with binary elementwise ops, as well as scalar
+// broadcast from all operands, not just the predicate.
 /* static */ StatusOr<Shape> ShapeInference::InferSelectShape(
     const Shape& pred, const Shape& on_true, const Shape& on_false) {
   if (!ShapeUtil::Compatible(on_true, on_false)) {
",0,train
bbd2047cf3a715a1431889ad8f558576a5382876,tensorflow/tensorflow,"[XLA:HLO] Minor fix for Clamp shape inference, and add some tests.

Previously Clamp(f32[5], f32[], f32[9]) returned success, but it now returns a
failure.  Noticed while debugging a different problem.
Change: 151835981",shape_inference.h,"@@ -190,6 +190,10 @@ class ShapeInference {
       BinaryOperation operation, const Shape& lhs, const Shape& rhs,
       tensorflow::gtl::ArraySlice<int64> broadcast_dimensions);
 
+  // Helper for inferring the shape of Clamp ops.
+  static StatusOr<Shape> InferClampShape(const Shape& min, const Shape& operand,
+                                         const Shape& max);
+
   // Helper for inferring the shape of Select ops.
   static StatusOr<Shape> InferSelectShape(const Shape& pred,
                                           const Shape& on_true,
",0,train
bbd2047cf3a715a1431889ad8f558576a5382876,tensorflow/tensorflow,"[XLA:HLO] Minor fix for Clamp shape inference, and add some tests.

Previously Clamp(f32[5], f32[], f32[9]) returned success, but it now returns a
failure.  Noticed while debugging a different problem.
Change: 151835981",shape_inference_test.cc,"@@ -157,6 +157,99 @@ TEST_F(ShapeInferenceTest, SelectBadShapes) {
       testing::ContainsRegex(""pred operand must have PRED element type""));
 }
 
+TEST_F(ShapeInferenceTest, ClampAllMatrix) {
+  auto inferred_status = ShapeInference::InferTernaryOpShape(
+      TernaryOperation::TRIOP_CLAMP, matrix_64_48_, matrix_64_48_,
+      matrix_64_48_);
+  ASSERT_IS_OK(inferred_status.status());
+  ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie()));
+}
+
+TEST_F(ShapeInferenceTest, ClampAllScalar) {
+  auto inferred_status = ShapeInference::InferTernaryOpShape(
+      TernaryOperation::TRIOP_CLAMP, f32_, f32_, f32_);
+  ASSERT_IS_OK(inferred_status.status());
+  ASSERT_TRUE(ShapeUtil::Equal(f32_, inferred_status.ValueOrDie()));
+}
+
+TEST_F(ShapeInferenceTest, ClampMinScalar) {
+  auto inferred_status = ShapeInference::InferTernaryOpShape(
+      TernaryOperation::TRIOP_CLAMP, f32_, matrix_64_48_, matrix_64_48_);
+  ASSERT_IS_OK(inferred_status.status());
+  ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie()));
+}
+
+TEST_F(ShapeInferenceTest, ClampMaxScalar) {
+  auto inferred_status = ShapeInference::InferTernaryOpShape(
+      TernaryOperation::TRIOP_CLAMP, matrix_64_48_, matrix_64_48_, f32_);
+  ASSERT_IS_OK(inferred_status.status());
+  ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie()));
+}
+
+TEST_F(ShapeInferenceTest, ClampOperandScalar) {
+  auto inferred_status = ShapeInference::InferTernaryOpShape(
+      TernaryOperation::TRIOP_CLAMP, matrix_64_48_, f32_, matrix_64_48_);
+  ASSERT_IS_OK(inferred_status.status());
+  ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie()));
+}
+
+TEST_F(ShapeInferenceTest, ClampMinMatrix) {
+  auto inferred_status = ShapeInference::InferTernaryOpShape(
+      TernaryOperation::TRIOP_CLAMP, matrix_64_48_, f32_, f32_);
+  ASSERT_IS_OK(inferred_status.status());
+  ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie()));
+}
+
+TEST_F(ShapeInferenceTest, ClampMaxMatrix) {
+  auto inferred_status = ShapeInference::InferTernaryOpShape(
+      TernaryOperation::TRIOP_CLAMP, f32_, f32_, matrix_64_48_);
+  ASSERT_IS_OK(inferred_status.status());
+  ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie()));
+}
+
+TEST_F(ShapeInferenceTest, ClampOperandMatrix) {
+  auto inferred_status = ShapeInference::InferTernaryOpShape(
+      TernaryOperation::TRIOP_CLAMP, f32_, matrix_64_48_, f32_);
+  ASSERT_IS_OK(inferred_status.status());
+  ASSERT_TRUE(ShapeUtil::Equal(matrix_64_48_, inferred_status.ValueOrDie()));
+}
+
+TEST_F(ShapeInferenceTest, ClampBadShapes) {
+  // Type mismatch
+  ASSERT_FALSE(ShapeInference::InferTernaryOpShape(
+                   TernaryOperation::TRIOP_CLAMP, s32_, f32_, f32_)
+                   .ok());
+  ASSERT_FALSE(ShapeInference::InferTernaryOpShape(
+                   TernaryOperation::TRIOP_CLAMP, f32_, s32_, f32_)
+                   .ok());
+  ASSERT_FALSE(ShapeInference::InferTernaryOpShape(
+                   TernaryOperation::TRIOP_CLAMP, f32_, f32_, s32_)
+                   .ok());
+  // Dimension mismatch
+  ASSERT_FALSE(
+      ShapeInference::InferTernaryOpShape(TernaryOperation::TRIOP_CLAMP,
+                                          vector_64_, vector_32_, vector_32_)
+          .ok());
+  ASSERT_FALSE(
+      ShapeInference::InferTernaryOpShape(TernaryOperation::TRIOP_CLAMP,
+                                          vector_32_, vector_64_, vector_32_)
+          .ok());
+  ASSERT_FALSE(
+      ShapeInference::InferTernaryOpShape(TernaryOperation::TRIOP_CLAMP,
+                                          vector_32_, vector_32_, vector_64_)
+          .ok());
+  // Dimension mismatch, where one operand is a scalar
+  ASSERT_FALSE(ShapeInference::InferTernaryOpShape(
+                   TernaryOperation::TRIOP_CLAMP, vector_64_, vector_32_, f32_)
+                   .ok());
+  ASSERT_FALSE(ShapeInference::InferTernaryOpShape(
+                   TernaryOperation::TRIOP_CLAMP, vector_64_, f32_, vector_32_)
+                   .ok());
+  ASSERT_FALSE(ShapeInference::InferTernaryOpShape(
+                   TernaryOperation::TRIOP_CLAMP, f32_, vector_64_, vector_32_)
+                   .ok());
+}
+
 TEST_F(ShapeInferenceTest, VariadicOpTuplify) {
   StatusOr<Shape> result = ShapeInference::InferVariadicOpShape(
       VariadicOperation::VAROP_TUPLE, {&s32_, &f32_});
",0,train
26cb47b77f4029083e765259c329602e8a478ea1,tensorflow/tensorflow,"ROCm build fix.

PiperOrigin-RevId: 292377641
Change-Id: Id6669836b45cd450c40552be2779d7269ecad344",cost_utils.cc,"@@ -88,8 +88,8 @@ TfOpRoofLineCostEstimator::OpRoofLineStats TfOpRoofLineCostEstimator::Predict(
   }
 
   grappler::OpContext op_context;
-  op_context.name = tf_op.type;
-  op_context.op_info.set_op(tf_op.type);
+  op_context.name = std::string(tf_op.type);
+  op_context.op_info.set_op(op_context.name);
   for (const auto& tensor : input_tensors) {
     *op_context.op_info.add_inputs() = GetTensorProperties(tensor);
   }
",0,train
ead4fda06535ce547d014fba1656ae53f0b64996,tensorflow/tensorflow,"Fixes a bug in tf.train.Saver(), where classes using the `VARIABLE_VALUE_KEY` used different naming in the checkpoint file when `var_list` was a dict.

PiperOrigin-RevId: 217182136",saver.py,"@@ -626,7 +626,12 @@ class BaseSaverBuilder(object):
         op, variables.Variable):
       # pylint: disable=protected-access
       for attr, factory in op._gather_saveables_for_checkpoint().items():
-        op = (factory(name + ""_"" + attr) if callable(factory) else factory)
+        if attr == checkpointable.VARIABLE_VALUE_KEY:
+          # Keep original name for classes masquerading as variables.
+          full_name = name
+        else:
+          full_name = name + ""_"" + attr
+        op = (factory(full_name) if callable(factory) else factory)
         for op in BaseSaverBuilder.SaveableObjectsForOp(op, op.name):
           yield op
       # pylint: enable=protected-access
",0,train
7b5d04c60437a415fc4edb5a97d939a1a3babe14,tensorflow/tensorflow,"Makes most variable writes depend on the cached value.

This disallows some undefined behavior with unordered reads and writes.

PiperOrigin-RevId: 198633444",resource_variable_ops_test.py,"@@ -119,6 +119,13 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
           dtype=dtypes.int32, shape=[1], name=""foo"")
       self.assertGreater(len(handle.eval()), 0)
 
+  def testCachedValueReadBeforeWrite(self):
+    with self.test_session() as sess:
+      v = resource_variable_ops.ResourceVariable(0.0, caching_device=""cpu:0"")
+      sess.run(v.initializer)
+      value, _ = sess.run([v, v.assign_add(1.0)])
+      self.assertAllEqual(value, 0.0)
+
   def testAssignVariableDtypeMismatchEager(self):
     with context.eager_mode():
       handle = resource_variable_ops.var_handle_op(
",0,train
7b5d04c60437a415fc4edb5a97d939a1a3babe14,tensorflow/tensorflow,"Makes most variable writes depend on the cached value.

This disallows some undefined behavior with unordered reads and writes.

PiperOrigin-RevId: 198633444",resource_variable_ops.py,"@@ -576,6 +576,21 @@ class ResourceVariable(variables.Variable):
     self._constraint = None
     self._cached_shape_as_list = None
 
+  @contextlib.contextmanager
+  def _assign_dependencies(self):
+    """"""Makes assignments depend on the cached value, if any.
+
+    This prevents undefined behavior with reads not ordered wrt writes.
+
+    Yields:
+      None.
+    """"""
+    if self._cached_value is not None:
+      with ops.control_dependencies([self._cached_value]):
+        yield
+    else:
+      yield
+
   def __nonzero__(self):
     return self.__bool__()
 
@@ -865,7 +880,7 @@ class ResourceVariable(variables.Variable):
     # TODO(apassos): this here and below is not atomic. Consider making it
     # atomic if there's a way to do so without a performance cost for those who
     # don't need it.
-    with _handle_graph(self.handle):
+    with _handle_graph(self.handle), self._assign_dependencies():
       assign_sub_op = gen_resource_variable_ops.assign_sub_variable_op(
           self.handle, ops.convert_to_tensor(delta, dtype=self.dtype),
           name=name)
@@ -889,7 +904,7 @@ class ResourceVariable(variables.Variable):
       it will return the `Operation` that does the assignment, and when in eager
       mode it will return `None`.
     """"""
-    with _handle_graph(self.handle):
+    with _handle_graph(self.handle), self._assign_dependencies():
       assign_add_op = gen_resource_variable_ops.assign_add_variable_op(
           self.handle, ops.convert_to_tensor(delta, dtype=self.dtype),
           name=name)
@@ -921,6 +936,8 @@ class ResourceVariable(variables.Variable):
       it will return the `Operation` that does the assignment, and when in eager
       mode it will return `None`.
     """"""
+    # Note: not depending on the cached value here since this can used to
+    # initialize the variable.
     with _handle_graph(self.handle):
       value_tensor = ops.convert_to_tensor(value, dtype=self.dtype)
       self._shape.assert_is_compatible_with(value_tensor.shape)
@@ -933,7 +950,7 @@ class ResourceVariable(variables.Variable):
   def _strided_slice_assign(self, begin, end, strides, value, name, begin_mask,
                             end_mask, ellipsis_mask, new_axis_mask,
                             shrink_axis_mask):
-    with _handle_graph(self.handle):
+    with _handle_graph(self.handle), self._assign_dependencies():
       return self._lazy_read(
           gen_array_ops.resource_strided_slice_assign(
               ref=self.handle,
",0,train
bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError.

The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels.

PiperOrigin-RevId: 361612621
Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",binary_ops_test.py,"@@ -24,7 +24,6 @@ import numpy as np
 
 from tensorflow.compiler.tests import xla_test
 from tensorflow.python.framework import dtypes
-from tensorflow.python.framework import errors
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import bitwise_ops
@@ -1612,8 +1611,7 @@ class BinaryOpsTest(xla_test.XLATestCase):
 
   @test_util.disable_mlir_bridge(""Error handling"")
   def testBroadcastArgsError(self):
-    with self.assertRaisesWithPredicateMatch(errors.InvalidArgumentError,
-                                             ""Incompatible shapes""):
+    with self.assertRaisesIncompatibleShapesError():
       self._testBinary(array_ops.broadcast_dynamic_shape,
                        np.array([1, 2, 3], dtype=np.int32),
                        np.array([4, 5, 6], dtype=np.int32),
",0,train
bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError.

The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels.

PiperOrigin-RevId: 361612621
Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",test_util.py,"@@ -3100,6 +3100,12 @@ class TensorFlowTestCase(googletest.TestCase):
     return self.assertRaisesWithPredicateMatch(errors.OpError,
                                                expected_err_re_or_predicate)
 
+  def assertRaisesIncompatibleShapesError(
+      self, exception_type=errors.InvalidArgumentError):
+    return self.assertRaisesWithPredicateMatch(
+        exception_type, r""Incompatible shapes|Dimensions must be equal|""
+        r""required broadcastable shapes"")
+
   def assertShapeEqual(self, np_array, tf_tensor, msg=None):
     """"""Asserts that a Numpy ndarray and a TensorFlow tensor have the same shape.
 
",0,train
bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError.

The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels.

PiperOrigin-RevId: 361612621
Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",check_ops_test.py,"@@ -285,9 +285,8 @@ First 2 elements of y:
     # The exception in eager and non-eager mode is different because
     # eager mode relies on shape check done as part of the C++ op, while
     # graph mode does shape checks when creating the `Operation` instance.
-    with self.assertRaisesRegex((errors.InvalidArgumentError, ValueError),
-                                (r""Incompatible shapes: \[3\] vs. \[2\]|""
-                                 r""Dimensions must be equal, but are 3 and 2"")):
+    with self.assertRaisesIncompatibleShapesError(
+        (errors.InvalidArgumentError, ValueError)):
       with ops.control_dependencies([check_ops.assert_equal(small, small_2)]):
         out = array_ops.identity(small)
       self.evaluate(out)
@@ -353,9 +352,8 @@ class AssertNoneEqualTest(test.TestCase):
     # The exception in eager and non-eager mode is different because
     # eager mode relies on shape check done as part of the C++ op, while
     # graph mode does shape checks when creating the `Operation` instance.
-    with self.assertRaisesRegex((ValueError, errors.InvalidArgumentError),
-                                (r""Incompatible shapes: \[3\] vs. \[2\]|""
-                                 r""Dimensions must be equal, but are 3 and 2"")):
+    with self.assertRaisesIncompatibleShapesError(
+        (ValueError, errors.InvalidArgumentError)):
       with ops.control_dependencies(
           [check_ops.assert_none_equal(small, big)]):
         out = array_ops.identity(small)
@@ -581,10 +579,8 @@ class AssertLessTest(test.TestCase):
     # The exception in eager and non-eager mode is different because
     # eager mode relies on shape check done as part of the C++ op, while
     # graph mode does shape checks when creating the `Operation` instance.
-    with self.assertRaisesRegex(  # pylint:disable=g-error-prone-assert-raises
-        (ValueError, errors.InvalidArgumentError),
-        (r""Incompatible shapes: \[3\] vs. \[2\]|""
-         ""Dimensions must be equal, but are 3 and 2"")):
+    with self.assertRaisesIncompatibleShapesError(
+        (ValueError, errors.InvalidArgumentError)):
       with ops.control_dependencies([check_ops.assert_less(small, big)]):
         out = array_ops.identity(small)
       self.evaluate(out)
",0,train
bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError.

The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels.

PiperOrigin-RevId: 361612621
Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",cwise_ops_binary_test.py,"@@ -958,10 +958,8 @@ class ComparisonOpTest(test.TestCase):
     y = np.arange(0, 10).reshape([5, 2])
     for t in dtypes:
       for f in funcs:
-        with self.assertRaisesRegex(
-            (ValueError, errors.InvalidArgumentError),
-            ""Incompatible shapes|Dimensions must be equal|""
-            ""required broadcastable shapes""):
+        with self.assertRaisesIncompatibleShapesError(
+            (ValueError, errors.InvalidArgumentError)):
           f(x.astype(t), y.astype(t))
 
   def testEqualDType(self):
",0,train
bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError.

The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels.

PiperOrigin-RevId: 361612621
Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",cwise_ops_test.py,"@@ -217,10 +217,8 @@ class ComparisonOpTest(test.TestCase):
     for t in dtypes:
       for f in funcs:
         with self.subTest(t=t, f=f):
-          with self.assertRaisesRegex(
-              (ValueError, errors.InvalidArgumentError),
-              ""Incompatible shapes|Dimensions must be equal|""
-              ""required broadcastable shapes""):
+          with self.assertRaisesIncompatibleShapesError(
+              (ValueError, errors.InvalidArgumentError)):
             f(x.astype(t), y.astype(t))
 
 
",0,train
bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError.

The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels.

PiperOrigin-RevId: 361612621
Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",bincount_ops_test.py,"@@ -801,8 +801,7 @@ class TestSparseCountFailureModes(test.TestCase):
         np.array([[3, 0, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
     weights = sparse_ops.from_dense(
         np.array([[3, 1, 1, 0], [0, 0, 0, 0], [5, 0, 4, 4]], dtype=np.int32))
-    with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                ""Incompatible shapes""):
+    with self.assertRaisesIncompatibleShapesError():
       self.evaluate(bincount_ops.sparse_bincount(x, weights=weights, axis=-1))
 
   def test_sparse_input_wrong_shape_fails(self):
",0,train
bb2d8e9dc75dbb6f2d22a438c5e5fb3908700438,tensorflow/tensorflow,"Introduce TestCase.assertRaisesIncompatibleShapesError.

The new helper allows to test for incompatible shapes errors independently of whether they were produced by XLA, mlir generated kernels or classic TensorFlow kernels.

PiperOrigin-RevId: 361612621
Change-Id: Ic9ba732ccb3483e30af6535024a37ab4e53776a3",nn_loss_scaling_utilities_test.py,"@@ -98,9 +98,8 @@ class LossUtilitiesTest(test_lib.TestCase, parameterized.TestCase):
           self.evaluate(loss), (2. * 0.3 + 0.5 * 0.7 + 4. * 0.2 + 1. * 0.8) / 2)
 
   def testComputeAverageLossInvalidSampleWeights(self):
-    with self.assertRaisesRegex((ValueError, errors_impl.InvalidArgumentError),
-                                (r""Incompatible shapes: \[3\] vs. \[2\]|""
-                                 ""Dimensions must be equal"")):
+    with self.assertRaisesIncompatibleShapesError(
+        (ValueError, errors_impl.InvalidArgumentError)):
       nn_impl.compute_average_loss([2.5, 6.2, 5.],
                                    sample_weight=[0.2, 0.8],
                                    global_batch_size=10)
",0,train
5b725bc16f5f548439ef06353952205677076f5a,tensorflow/tensorflow,"cmsis-nn: Revert dynamic allocation for quant params

File affected: cmsis-nn/depthwise_conv.cc

Dynamic allocation of memory for output shift and
multiplier fails(running whole networks) when
done together with scratch buffer for
optimization. The issue is tracked in b/158779832.
This patch reverts back to static allocation
for output shift and multiplier until the scratch buffer
issue is fixed.",depthwise_conv.cc,"@@ -36,6 +36,7 @@ constexpr int kInputTensor = 0;
 constexpr int kFilterTensor = 1;
 constexpr int kBiasTensor = 2;
 constexpr int kOutputTensor = 0;
+constexpr int kMaxChannels = 256;
 
 // Depthwise conv is quantized along dimension 3:
 // https://www.tensorflow.org/lite/performance/quantization_spec
@@ -49,8 +50,9 @@ struct OpData {
   int output_shift;
 
   // Per channel output multiplier and shift.
-  int32_t* per_channel_output_multiplier;
-  int32_t* per_channel_output_shift;
+  // TODO: Allocate dynamic buffers when b/158779832 is resolved
+  int32_t per_channel_output_multiplier[kMaxChannels];
+  int32_t per_channel_output_shift[kMaxChannels];
   // The range of the fused activation layer. For example for kNone and
   // uint8_t these would be 0 and 255.
   int32_t output_activation_min;
@@ -129,13 +131,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     // Allocate memory for per-channel quantization parameters
     const int num_channels =
         filter->dims->data[kDepthwiseConvQuantizedDimension];
-    // Dynamically allocate per-channel quantization parameters.
-    TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
-        context, num_channels * sizeof(int32_t),
-        reinterpret_cast<void**>(&data->per_channel_output_multiplier)));
-    TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
-        context, num_channels * sizeof(int32_t),
-        reinterpret_cast<void**>(&data->per_channel_output_shift)));
+    TFLITE_DCHECK_LE(num_channels, kMaxChannels);
+
     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
                       kTfLiteAffineQuantization);
 
@@ -236,7 +233,7 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
 
 void EvalQuantizedPerChannel(TfLiteContext* context, TfLiteNode* node,
                              TfLiteDepthwiseConvParams* params,
-                             const OpData* data, const TfLiteTensor* input,
+                             OpData* data, const TfLiteTensor* input,
                              const TfLiteTensor* filter,
                              const TfLiteTensor* bias, TfLiteTensor* output) {
   cmsis_nn_dw_conv_params dw_conv_params;
@@ -408,7 +405,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   auto* params =
       reinterpret_cast<TfLiteDepthwiseConvParams*>(node->builtin_data);
-  const OpData& data = *(static_cast<const OpData*>(node->user_data));
+  OpData& data = *(static_cast<OpData*>(node->user_data));
 
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
",0,train
441979ff0399418b7883ca6c267c08fc716ce74b,tensorflow/tensorflow,"[XLA] Add an unoptimized HLO output flag to ExecutableBuildOptions and to the XLA local Python client.

PiperOrigin-RevId: 198930874",executable_build_options.cc,"@@ -87,6 +87,18 @@ ExecutableBuildOptions::dump_optimized_hlo_proto_to() const {
   return dump_optimized_hlo_proto_to_;
 }
 
+ExecutableBuildOptions&
+ExecutableBuildOptions::set_dump_unoptimized_hlo_proto_to(
+    tensorflow::StringPiece dirpath) {
+  dump_unoptimized_hlo_proto_to_ = dirpath.ToString();
+  return *this;
+}
+
+const tensorflow::gtl::optional<string>&
+ExecutableBuildOptions::dump_unoptimized_hlo_proto_to() const {
+  return dump_unoptimized_hlo_proto_to_;
+}
+
 ExecutableBuildOptions& ExecutableBuildOptions::set_dump_per_pass_hlo_proto_to(
     tensorflow::StringPiece dirpath) {
   dump_per_pass_hlo_proto_to_ = dirpath.ToString();
",0,train
441979ff0399418b7883ca6c267c08fc716ce74b,tensorflow/tensorflow,"[XLA] Add an unoptimized HLO output flag to ExecutableBuildOptions and to the XLA local Python client.

PiperOrigin-RevId: 198930874",executable_build_options.h,"@@ -65,6 +65,13 @@ class ExecutableBuildOptions {
       tensorflow::StringPiece dirpath);
   const tensorflow::gtl::optional<string>& dump_optimized_hlo_proto_to() const;
 
+  // If set, specifies a dirpath to dump the start-of-optimization-pipeline HLO
+  // protobuf to (as in DebugOptions).
+  ExecutableBuildOptions& set_dump_unoptimized_hlo_proto_to(
+      tensorflow::StringPiece dirpath);
+  const tensorflow::gtl::optional<string>& dump_unoptimized_hlo_proto_to()
+      const;
+
   // If set, specifies a dirpath to dump the per-pass-in-pipeline HLO protobufs
   // to (as in DebugOptions).
   ExecutableBuildOptions& set_dump_per_pass_hlo_proto_to(
@@ -95,6 +102,7 @@ class ExecutableBuildOptions {
   bool result_layout_set_ = false;
   tensorflow::gtl::optional<string> generate_hlo_graph_;
   tensorflow::gtl::optional<string> dump_optimized_hlo_proto_to_;
+  tensorflow::gtl::optional<string> dump_unoptimized_hlo_proto_to_;
   tensorflow::gtl::optional<string> dump_per_pass_hlo_proto_to_;
   DeviceMemoryAllocator* device_allocator_ = nullptr;
   std::vector<std::string> disabled_hlo_passes_;
",0,train
441979ff0399418b7883ca6c267c08fc716ce74b,tensorflow/tensorflow,"[XLA] Add an unoptimized HLO output flag to ExecutableBuildOptions and to the XLA local Python client.

PiperOrigin-RevId: 198930874",xla_client.py,"@@ -353,6 +353,7 @@ class CompileOptions(object):
   def __init__(self):
     self.generate_hlo_graph = None
     self.dump_optimized_hlo_proto_to = None
+    self.dump_unoptimized_hlo_proto_to = None
     self.dump_per_pass_hlo_proto_to = None
     self.hlo_profile = False
 
",0,train
441979ff0399418b7883ca6c267c08fc716ce74b,tensorflow/tensorflow,"[XLA] Add an unoptimized HLO output flag to ExecutableBuildOptions and to the XLA local Python client.

PiperOrigin-RevId: 198930874",local_service.cc,"@@ -108,6 +108,11 @@ ExecutionOptions CreateExecutionOptions(
         ->set_xla_dump_optimized_hlo_proto_to(
             build_options.dump_optimized_hlo_proto_to().value());
   }
+  if (build_options.dump_unoptimized_hlo_proto_to().has_value()) {
+    execution_options.mutable_debug_options()
+        ->set_xla_dump_unoptimized_hlo_proto_to(
+            build_options.dump_unoptimized_hlo_proto_to().value());
+  }
   if (build_options.dump_per_pass_hlo_proto_to().has_value()) {
     execution_options.mutable_debug_options()
         ->set_xla_dump_per_pass_hlo_proto_to(
",0,train
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",shape_refiner_test.cc,"@@ -980,10 +980,10 @@ TEST_F(ShapeRefinerTest, ConstantValueAsShape_PackInt64) {
 
   InputList inputs{
       // clang-format off
-      Input(ops::Const<int64>(root, 10LL)),
-      Input(ops::Const<int64>(root, 20LL)),
+      Input(ops::Const<int64>(root, int64{10})),
+      Input(ops::Const<int64>(root, int64{20})),
       Input(Output(scalar_non_const)),
-      Input(ops::Const<int64>(root, 1LL << 40)),
+      Input(ops::Const<int64>(root, int64{1} << 40)),
   };  // clang-format on
   auto pack = ops::Stack(root, inputs);
   TF_ASSERT_OK(root.status());
@@ -1008,8 +1008,8 @@ TEST_F(ShapeRefinerTest, ConstantValueAsShape_PackUnknownDim) {
   Scope root = Scope::NewRootScope();
 
   InputList inputs{
-      Input(ops::Const<int64>(root, 10LL)),
-      Input(ops::Const<int64>(root, -1LL)),
+      Input(ops::Const<int64>(root, int64{10})),
+      Input(ops::Const<int64>(root, int64{-1})),
   };
   auto pack = ops::Stack(root, inputs);
   TF_ASSERT_OK(root.status());
@@ -1035,8 +1035,8 @@ TEST_F(ShapeRefinerTest, ConstantValueAsShape_PackInvalidInput) {
 
   // Inputs are length 2 vectors instead of scalars.
   InputList inputs{
-      Input(ops::Const<int64>(root, {10LL, 20LL})),
-      Input(ops::Const<int64>(root, {10LL, 21LL})),
+      Input(ops::Const<int64>(root, {int64{10}, int64{20}})),
+      Input(ops::Const<int64>(root, {int64{10}, int64{21}})),
   };
   auto pack = ops::Stack(root, inputs);
   TF_ASSERT_OK(root.status());
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",debug_io_utils.cc,"@@ -395,11 +395,12 @@ Status DebugIO::PublishDebugMetadata(
     } else if (absl::StartsWith(absl::AsciiStrToLower(url), kFileURLScheme)) {
       const string dump_root_dir = url.substr(strlen(kFileURLScheme));
       const string core_metadata_path = AppendTimestampToFilePath(
-          io::JoinPath(
-              dump_root_dir,
-              strings::StrCat(DebugNodeKey::kMetadataFilePrefix,
-                              DebugIO::kCoreMetadataTag, ""sessionrun"",
-                              strings::Printf(""%.14lld"", session_run_index))),
+          io::JoinPath(dump_root_dir,
+                       strings::StrCat(
+                           DebugNodeKey::kMetadataFilePrefix,
+                           DebugIO::kCoreMetadataTag, ""sessionrun"",
+                           strings::Printf(""%.14lld"", static_cast<long long>(
+                                                          session_run_index)))),
           Env::Default()->NowMicros());
       status.Update(DebugFileIO::DumpEventProtoToFile(
           event, string(io::Dirname(core_metadata_path)),
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",eager_service_impl.cc,"@@ -557,7 +557,7 @@ tensorflow::Status EagerServiceImpl::GetServerContext(
     return errors::InvalidArgument(strings::Printf(
         ""Unable to find a context_id matching the specified one ""
         ""(%llu). Perhaps the worker was restarted, or the context was GC'd?"",
-        context_id));
+        static_cast<unsigned long long>(context_id)));
   }
 
   *server_context = iter->second;
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",graph_mgr.cc,"@@ -303,7 +303,8 @@ Status GraphMgr::Register(
   // Inserts one item into table_.
   {
     mutex_lock l(mu_);
-    *graph_handle = strings::Printf(""%016llx"", ++next_id_);
+    *graph_handle =
+        strings::Printf(""%016llx"", static_cast<long long>(++next_id_));
     item->handle = *graph_handle;
     CHECK(table_.insert({*graph_handle, item}).second);
   }
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",batch_dataset_op.cc,"@@ -54,7 +54,8 @@ class BatchDatasetOp::Dataset : public DatasetBase {
         input_(input),
         op_version_(op_version),
         traceme_metadata_(
-            {{""batch_size"", strings::Printf(""%lld"", batch_size)},
+            {{""batch_size"",
+              strings::Printf(""%lld"", static_cast<long long>(batch_size))},
              {""drop_remainder"", drop_remainder ? ""true"" : ""false""},
              {""parallel_copy"", parallel_copy ? ""true"" : ""false""}}) {
     input_->Ref();
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",map_and_batch_dataset_op.cc,"@@ -100,7 +100,8 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase {
         traceme_metadata_(
             {{""autotune"",
               num_parallel_calls == model::kAutotune ? ""true"" : ""false""},
-             {""batch_size"", strings::Printf(""%lld"", batch_size)},
+             {""batch_size"",
+              strings::Printf(""%lld"", static_cast<long long>(batch_size))},
              {""drop_remainder"", drop_remainder ? ""true"" : ""false""}}) {
     input_->Ref();
   }
@@ -285,8 +286,8 @@ class MapAndBatchDatasetOp::Dataset : public DatasetBase {
     }
 
     TraceMeMetadata GetTraceMeMetadata() const override {
-      int64 parallelism = -1;
-      int64 max_batch_results = -1;
+      long long parallelism = -1;        // NOLINT
+      long long max_batch_results = -1;  // NOLINT
       // NOTE: We only set the parallelism value if the lock can be acquired
       // right away to avoid introducing tracing overhead.
       if (mu_->try_lock()) {
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",parallel_interleave_dataset_op.cc,"@@ -107,8 +107,10 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
         output_types_(output_types),
         output_shapes_(output_shapes),
         traceme_metadata_(
-            {{""block_length"", strings::Printf(""%lld"", block_length)},
-             {""cycle_length"", strings::Printf(""%lld"", cycle_length)},
+            {{""block_length"",
+              strings::Printf(""%lld"", static_cast<long long>(block_length))},
+             {""cycle_length"",
+              strings::Printf(""%lld"", static_cast<long long>(cycle_length))},
              {""deterministic"",
               deterministic.IsDeterministic() || deterministic.IsDefault()
                   ? ""true""
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",rebatch_dataset_op.cc,"@@ -62,7 +62,8 @@ class RebatchDatasetOp : public UnaryDatasetOpKernel {
           output_types_(output_types),
           output_shapes_(output_shapes),
           traceme_metadata_(
-              {{""num_replicas"", strings::Printf(""%lld"", num_replicas)}}) {
+              {{""num_replicas"", strings::Printf(""%lld"", static_cast<long long>(
+                                                            num_replicas))}}) {
       input_->Ref();
     }
 
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",snapshot_dataset_op.cc,"@@ -1206,7 +1206,9 @@ class SnapshotDatasetOp : public UnaryDatasetOpKernel {
         string GetSnapshotFilename() {
           mutex_lock l(mu_);
           string snapshot_data_filename = io::JoinPath(
-              run_dir_, strings::Printf(""%08llu.snapshot"", next_file_index_));
+              run_dir_, strings::Printf(
+                            ""%08llu.snapshot"",
+                            static_cast<unsigned long long>(next_file_index_)));
           next_file_index_++;
           return snapshot_data_filename;
         }
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",interleave_dataset_op.cc,"@@ -62,8 +62,10 @@ class InterleaveDatasetOp::Dataset : public DatasetBase {
         output_types_(output_types),
         output_shapes_(output_shapes),
         traceme_metadata_(
-            {{""block_length"", strings::Printf(""%lld"", block_length)},
-             {""cycle_length"", strings::Printf(""%lld"", cycle_length)}}) {
+            {{""block_length"",
+              strings::Printf(""%lld"", static_cast<long long>(block_length))},
+             {""cycle_length"",
+              strings::Printf(""%lld"", static_cast<long long>(cycle_length))}}) {
     input_->Ref();
   }
 
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",padded_batch_dataset_op.cc,"@@ -61,7 +61,8 @@ class PaddedBatchDatasetOp::Dataset : public DatasetBase {
         input_(input),
         op_version_(op_version),
         traceme_metadata_(
-            {{""batch_size"", strings::Printf(""%lld"", batch_size)},
+            {{""batch_size"",
+              strings::Printf(""%lld"", static_cast<long long>(batch_size))},
              {""drop_remainder"", drop_remainder ? ""true"" : ""false""}}) {
     input_->Ref();
 
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",parallel_interleave_dataset_op.cc,"@@ -172,8 +172,10 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
         traceme_metadata_(
             {{""autotune"",
               num_parallel_calls == model::kAutotune ? ""true"" : ""false""},
-             {""block_length"", strings::Printf(""%lld"", block_length)},
-             {""cycle_length"", strings::Printf(""%lld"", cycle_length)},
+             {""block_length"",
+              strings::Printf(""%lld"", static_cast<long long>(block_length))},
+             {""cycle_length"",
+              strings::Printf(""%lld"", static_cast<long long>(cycle_length))},
              {""deterministic"",
               deterministic.IsNondeterministic() ? ""false"" : ""true""}}) {
     input_->Ref();
@@ -467,8 +469,9 @@ class ParallelInterleaveDatasetOp::Dataset : public DatasetBase {
         mu_->unlock();
       }
       auto result = dataset()->traceme_metadata_;
-      result.push_back(
-          std::make_pair(""parallelism"", strings::Printf(""%lld"", parallelism)));
+      result.push_back(std::make_pair(
+          ""parallelism"",
+          strings::Printf(""%lld"", static_cast<long long>(parallelism))));
       return result;
     }
 
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",parallel_map_dataset_op.cc,"@@ -471,8 +471,9 @@ class ParallelMapIterator : public DatasetBaseIterator {
     result.push_back(std::make_pair(""autotune"", autotune_ ? ""true"" : ""false""));
     result.push_back(
         std::make_pair(""deterministic"", deterministic_ ? ""true"" : ""false""));
-    result.push_back(
-        std::make_pair(""parallelism"", strings::Printf(""%lld"", parallelism)));
+    result.push_back(std::make_pair(
+        ""parallelism"",
+        strings::Printf(""%lld"", static_cast<long long>(parallelism))));
     return result;
   }
 
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",prefetch_dataset_op.cc,"@@ -278,11 +278,13 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
         mu_->unlock();
       }
       data::TraceMeMetadata result;
-      result.push_back(
-          std::make_pair(""buffer_limit"", strings::Printf(""%lld"", limit)));
+      result.push_back(std::make_pair(
+          ""buffer_limit"",
+          strings::Printf(""%lld"", static_cast<long long>(limit))));
       if (dataset()->slack_period_ > 0) {
-        result.push_back(
-            std::make_pair(""slack"", strings::Printf(""%lld"", slack_us_.load())));
+        result.push_back(std::make_pair(
+            ""slack"",
+            strings::Printf(""%lld"", static_cast<long long>(slack_us_.load()))));
       }
       return result;
     }
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",shard_dataset_op.cc,"@@ -48,8 +48,9 @@ class ShardDatasetOp::Dataset : public DatasetBase {
         input_(input),
         require_non_empty_(require_non_empty),
         traceme_metadata_(
-            {{""index"", strings::Printf(""%lld"", index)},
-             {""num_shards"", strings::Printf(""%lld"", num_shards)}}) {
+            {{""index"", strings::Printf(""%lld"", static_cast<long long>(index))},
+             {""num_shards"",
+              strings::Printf(""%lld"", static_cast<long long>(num_shards))}}) {
     input_->Ref();
   }
 
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",shuffle_dataset_op.cc,"@@ -108,7 +108,8 @@ class ShuffleDatasetOpBase::ShuffleDatasetBase : public DatasetBase {
         buffer_size_(buffer_size),
         count_(count),
         traceme_metadata_(
-            {{""buffer_size"", strings::Printf(""%lld"", buffer_size)}}) {
+            {{""buffer_size"",
+              strings::Printf(""%lld"", static_cast<long long>(buffer_size))}}) {
     input_->Ref();
   }
 
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",window_dataset_op.cc,"@@ -54,9 +54,12 @@ class WindowDatasetOp::Dataset : public DatasetBase {
         output_dtypes_(input_->output_dtypes().size(), {DT_VARIANT}),
         output_shapes_(input_->output_shapes().size(), TensorShape({})),
         traceme_metadata_(
-            {{""window_size"", strings::Printf(""%lld"", window_size)},
-             {""window_shift"", strings::Printf(""%lld"", window_shift)},
-             {""window_stride"", strings::Printf(""%lld"", window_stride)}}) {
+            {{""window_size"",
+              strings::Printf(""%lld"", static_cast<long long>(window_size))},
+             {""window_shift"",
+              strings::Printf(""%lld"", static_cast<long long>(window_shift))},
+             {""window_stride"", strings::Printf(""%lld"", static_cast<long long>(
+                                                           window_stride))}}) {
     input_->Ref();
   }
 
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",math_grad.cc,"@@ -78,7 +78,7 @@ REGISTER_OP_GRADIENT(""Reciprocal"", InvGrad);
 Status SquareGrad(const AttrSlice& attrs, FunctionDef* g) {
   // clang-format off
   return GradForUnaryCwise(g, {
-      FDH::Const(""c"", 2LL),
+      FDH::Const(""c"", int64{2}),
       {{""two""}, ""Cast"", {""c""}, {{""SrcT"", DT_INT64}, {""DstT"", ""$T""}}},
       {{""x2""}, ""Mul"", {""x"", ""two""}, {}, {""dy""}},  // x * 2
       {{""dx""}, ""Mul"", {""dy"", ""x2""}},              // dy * (x * 2)
@@ -619,7 +619,7 @@ REGISTER_OP_GRADIENT(""Xdivy"", XdivyGrad);
 Status SquaredDifferenceGrad(const AttrSlice& attrs, FunctionDef* g) {
   // clang-format off
   return GradForBinaryCwise(g, {
-      FDH::Const(""c"", 2LL),
+      FDH::Const(""c"", int64{2}),
       {{""two""}, ""Cast"", {""c""}, {{""SrcT"", DT_INT64}, {""DstT"", ""$T""}}},
       {{""x_sub_y""}, ""Sub"", {""x"", ""y""}},
       {{""two_x_sub_y""}, ""Mul"", {""two"", ""x_sub_y""}},  // 2 * (x - y)
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",curl_http_request.cc,"@@ -141,7 +141,8 @@ CurlHttpRequest::CurlHttpRequest(LibCurl* libcurl, Env* env)
   // TODO(b/74351157): Enable HTTP/2.
 
   // Set up the progress meter.
-  CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, 0ULL));
+  CHECK_CURL_OK(
+      libcurl_->curl_easy_setopt(curl_, CURLOPT_NOPROGRESS, uint64{0}));
   CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFODATA, this));
   CHECK_CURL_OK(libcurl_->curl_easy_setopt(curl_, CURLOPT_XFERINFOFUNCTION,
                                            &CurlHttpRequest::ProgressCallback));
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",env.cc,"@@ -400,7 +400,7 @@ bool Env::CreateUniqueFileName(string* prefix, const string& suffix) {
 #else
   int32 pid = static_cast<int32>(getpid());
 #endif
-  uint64 now_microsec = NowMicros();
+  long long now_microsec = NowMicros();  // NOLINT
 
   *prefix += strings::Printf(""%s-%x-%d-%llx"", port::Hostname().c_str(), tid,
                              pid, now_microsec);
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",numbers.cc,"@@ -439,7 +439,7 @@ string HumanReadableNum(int64 value) {
     value = -value;
   }
   if (value < 1000) {
-    Appendf(&s, ""%lld"", value);
+    Appendf(&s, ""%lld"", static_cast<long long>(value));
   } else if (value >= static_cast<int64>(1e15)) {
     // Number bigger than 1E15; use that notation.
     Appendf(&s, ""%0.3G"", static_cast<double>(value));
@@ -472,7 +472,7 @@ string HumanReadableNumBytes(int64 num_bytes) {
     // No fractions for bytes.
     char buf[8];  // Longest possible string is '-XXXXB'
     snprintf(buf, sizeof(buf), ""%s%lldB"", neg_str,
-             static_cast<int64>(num_bytes));
+             static_cast<long long>(num_bytes));
     return string(buf);
   }
 
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",debug_events_writer_test.cc,"@@ -68,8 +68,9 @@ class DebugEventsWriterTest : public ::testing::Test {
   }
 
   void SetUp() override {
-    dump_root_ = io::JoinPath(testing::TmpDir(),
-                              strings::Printf(""%010lld"", env()->NowMicros()));
+    dump_root_ = io::JoinPath(
+        testing::TmpDir(),
+        strings::Printf(""%010lld"", static_cast<long long>(env()->NowMicros())));
   }
 
   void TearDown() override {
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",events_writer.cc,"@@ -66,7 +66,7 @@ Status EventsWriter::InitIfNeeded() {
 
   filename_ =
       strings::Printf(""%s.out.tfevents.%010lld.%s%s"", file_prefix_.c_str(),
-                      static_cast<int64>(time_in_seconds),
+                      static_cast<long long>(time_in_seconds),
                       port::Hostname().c_str(), file_suffix_.c_str());
 
   // Reset recordio_writer (which has a reference to recordio_file_) so final
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",session_ref.cc,"@@ -53,7 +53,8 @@ struct RunCounter {
 };
 
 std::string SessionToHandle(Session* session) {
-  return strings::Printf(""%llu"", reinterpret_cast<uint64>(session));
+  return strings::Printf(""%llu"", static_cast<unsigned long long>(
+                                     reinterpret_cast<uintptr_t>(session)));
 }
 
 // The Session interface has many methods of the form:
",0,test
8f597046dc30c14b5413813d02c0e0aed399c177,tensorflow/tensorflow,"Use explicit primitive types with strings::Printf

PiperOrigin-RevId: 299753695
Change-Id: Iecbd08903b22442c210c3d404946077535a6089f",cuda_dnn.cc,"@@ -2620,8 +2620,8 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionForwardAlgorithm(
     bool specify_workspace_limit = scratch_allocator != nullptr;
     auto memory_limit_bytes =
         specify_workspace_limit
-            ? std::max(scratch_allocator->GetMemoryLimitInBytes(), 0ll)
-            : 0ll;
+            ? std::max(scratch_allocator->GetMemoryLimitInBytes(), int64{0})
+            : int64{0};
     SE_ASSIGN_OR_RETURN(cudnnConvolutionFwdAlgo_t algo,
                         GetCudnnConvolutionForwardAlgo(
                             cudnn, input_nd, filter, conv, output_nd,
@@ -2673,8 +2673,8 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardDataAlgorithm(
     bool specify_workspace_limit = scratch_allocator != nullptr;
     auto memory_limit_bytes =
         specify_workspace_limit
-            ? std::max(scratch_allocator->GetMemoryLimitInBytes(), 0ll)
-            : 0ll;
+            ? std::max(scratch_allocator->GetMemoryLimitInBytes(), int64{0})
+            : int64{0};
     SE_ASSIGN_OR_RETURN(cudnnConvolutionBwdDataAlgo_t algo,
                         GetCudnnConvolutionBackwardDataAlgo(
                             cudnn, input_nd, filter, conv, output_nd,
@@ -2725,8 +2725,8 @@ port::StatusOr<dnn::AlgorithmDesc> GetCudnnConvolutionBackwardFilterAlgorithm(
     bool specify_workspace_limit = scratch_allocator != nullptr;
     auto memory_limit_bytes =
         specify_workspace_limit
-            ? std::max(scratch_allocator->GetMemoryLimitInBytes(), 0ll)
-            : 0ll;
+            ? std::max(scratch_allocator->GetMemoryLimitInBytes(), int64{0})
+            : int64{0};
     SE_ASSIGN_OR_RETURN(cudnnConvolutionBwdFilterAlgo_t algo,
                         GetCudnnConvolutionBackwardFilterAlgo(
                             cudnn, input_nd, filter, conv, output_nd,
",0,test
f600f4d82de7feded78087b4edf7295eea64dae3,tensorflow/tensorflow,"TensorFlow: change cuda-diagnostics to search for so.1
Change: 115010103",cuda_diagnostics.cc,"@@ -165,7 +165,7 @@ port::StatusOr<DriverVersion> Diagnostician::FindDsoVersion() {
   // DSO and yields its version number into the callback data, when found.
   auto iterate_phdr =
       [](struct dl_phdr_info *info, size_t size, void *data) -> int {
-    if (strstr(info->dlpi_name, ""libcuda.so"")) {
+    if (strstr(info->dlpi_name, ""libcuda.so.1"")) {
       VLOG(1) << ""found DLL info with name: "" << info->dlpi_name;
       char resolved_path[PATH_MAX] = {0};
       if (realpath(info->dlpi_name, resolved_path) == nullptr) {
",0,train
cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup.

PiperOrigin-RevId: 168422582",backprop.py,"@@ -88,14 +88,14 @@ def _magic_gradient_function(op_name, attr_tuple, num_inputs, num_outputs,
 
   Args:
     op_name: the name of the op to be differentiated.
-    attr_tuple: the attrs, as a tuple
-    num_inputs: the number of inputs to the op
-    num_outputs: the number of outputs of the op
+    attr_tuple: the attrs, as a tuple.
+    num_inputs: the number of inputs to the op.
+    num_outputs: the number of outputs of the op.
     *tensors: a list of tensors, composed of, in order, the inputs, the outputs,
       and the gradients with respect to the outputs.
 
   Returns:
-    the gradients with respect to the inputs of the function, as a list.
+    The gradients with respect to the inputs of the function, as a list.
   """"""
   inputs = tensors[:num_inputs]
   outputs = tensors[num_inputs:num_inputs + num_outputs]
@@ -232,9 +232,9 @@ def implicit_val_and_grad(f):
     ag_core.active_progenitors.remove(start_node)
     if not ag_core.isnode(end_node):
       raise ValueError(
-          ""Target not part of a computation being traced. %s"" % end_node)
+          ""Target not part of a computation being traced. %s."" % end_node)
     if start_node not in end_node.progenitors:
-      raise ValueError(""Target not derived from source. %s %s"" %
+      raise ValueError(""Target not derived from source. %s %s."" %
                        (end_node.progenitors, repr(start_node)))
     output_gradients = kwds.get(""output_gradients"", None)
     if output_gradients is None:
@@ -282,7 +282,7 @@ def _get_arg_spec(f, params):
     return params
   else:
     raise ValueError(
-        ""params must be all strings or all integers; got %s"" % params)
+        ""params must be all strings or all integers; got %s."" % params)
 
 
 def gradients_function(f, params=None):
",0,train
cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup.

PiperOrigin-RevId: 168422582",context.py,"@@ -286,8 +286,8 @@ class Context(object):
         it is unset.
       `attrs` contains the attributes of the operation as a `tuple` of
         alternating attribute names and attribute values.
-      `inputs` is the `list` of input `tfe.Tensor`(s) to the op.
-      `outputs` is the `list` of output `tfe.Tensor`(s) from the op.
+      `inputs` is the `list` of input `Tensor`(s) to the op.
+      `outputs` is the `list` of output `Tensor`(s) from the op.
        Return value(s) from the callback are ignored.
     """"""
     # TODO(cais): (b/64674139) Allow access to function-internal operations.
@@ -314,7 +314,7 @@ def _initialize_context():
 
 
 def context():
-  """"""Returns a singleton Context object.""""""
+  """"""Returns a singleton context object.""""""
   if _context is None:
     _initialize_context()
   return _context
@@ -373,7 +373,7 @@ def device(name):
   ```python
   with tfe.device('gpu:0'):
     with tfe.device('cpu:0'):
-      shape = tfe.Tensor([], dtype=tf.int32)
+      shape = Tensor([], dtype=tf.int32)
     x = ops.truncated_normal(shape, tf.float32)
   ```
   will ensure that the `shape` Tensor is on CPU but the `truncated_normal`
@@ -390,13 +390,13 @@ def device(name):
 
 
 def run(main=None, argv=None):
-  """"""Runs the program with an optional 'main' function and 'argv' list.
+  """"""Runs the program with an optional main function and argv list.
 
   The program will run with eager execution enabled.
 
   Args:
-    main: the main function to run
-    argv: the arguments to pass to it
+    main: the main function to run.
+    argv: the arguments to pass to it.
   """"""
   enable_eager_execution()
   app.run(main, argv)
",0,train
cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup.

PiperOrigin-RevId: 168422582",custom_gradient.py,"@@ -38,7 +38,7 @@ def custom_gradient(f):
   """"""Decorator to define a function with a custom gradient.
 
   The input function is expected to return the tuple
-    (results, gradient_function)
+    (results, gradient_function).
 
   The output function will return results while possibly recording the
   gradient_function and inputs in the tape.
",0,train
cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup.

PiperOrigin-RevId: 168422582",execute.py,"@@ -153,9 +153,10 @@ def make_shape(v, arg_name):
   try:
     shape = tensor_shape.as_shape(v)
   except TypeError as e:
-    raise TypeError(""Error converting %s to a TensorShape: %s"" % (arg_name, e))
+    raise TypeError(""Error converting %s to a TensorShape: %s."" % (arg_name, e))
   except ValueError as e:
-    raise ValueError(""Error converting %s to a TensorShape: %s"" % (arg_name, e))
+    raise ValueError(""Error converting %s to a TensorShape: %s."" % (arg_name,
+                                                                    e))
   if shape.ndims is None:
     return None
   else:
@@ -171,7 +172,7 @@ def make_tensor(v, arg_name):
     text_format.Merge(v, pb)
     return pb
   raise TypeError(
-      ""Don't know how to convert %s to a TensorProto for argument '%s'"" %
+      ""Don't know how to convert %s to a TensorProto for argument '%s'."" %
       (repr(v), arg_name))
 
 
@@ -217,7 +218,7 @@ def args_to_mixed_eager_tensors(lists):
   for l in lists[1:]:
     if len(l) != len(lists[0]):
       raise ValueError(
-          ""Expected list arguments to be the same length: %d != %d (%r vs. %r)""
+          ""Expected list arguments to be the same length: %d != %d (%r vs. %r).""
           % (len(lists[0]), len(l), lists[0], l))
     lists_ret.append([])
 
",0,train
cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup.

PiperOrigin-RevId: 168422582",execution_callbacks.py,"@@ -228,8 +228,8 @@ def add_execution_callback(callback):
         it is unset.
       `attrs` contains the attributes of the operation as a `tuple` of
         alternating attribute name and attribute value.
-      `inputs` is the `list` of input `tfe.Tensor`(s) to the op.
-      `outputs` is the `list` of output `tfe.Tensor`(s) from the op.
+      `inputs` is the `list` of input `Tensor`(s) to the op.
+      `outputs` is the `list` of output `Tensor`(s) from the op.
        Return value(s) from the callback are ignored.
   """"""
   context.get_default_context().add_post_execution_callback(callback)
@@ -246,8 +246,8 @@ def seterr(inf_or_nan=None):
   Example:
   ``` python
   tfe.seterr(inf_or_nan=""raise"")
-  a = tfe.Tensor(10.0)
-  b = tfe.Tensor(0.0)
+  a = Tensor(10.0)
+  b = Tensor(0.0)
   c = a / b  # <-- Raises InfOrNanError.
 
   tfe.seterr(inf_or_nan=""ignore"")
",0,train
cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup.

PiperOrigin-RevId: 168422582",function.py,"@@ -41,7 +41,7 @@ from tensorflow.python.util import nest
 # Thread-local storage for tfe Tensors which are referenced while evaluating a
 # graph-mode function.
 _scoped_captures = threading.local()
-# _scoped_captures.tensors is either None or a map from tfe.Tensor id to a pair
+# _scoped_captures.tensors is either None or a map from Tensor id to a pair
 # of a tfe tensor and its corresponding placeholder to pass as a function
 # argument. The value should be None unless we're in function definition
 # context.
@@ -62,7 +62,7 @@ def _convert_to_graph_tensor(value, dtype=None, name=None, as_ref=False):
   """"""Captures a Tensor while building a graph mode function.
 
   Arguments:
-    value: A tfe.Tensor object
+    value: A Tensor object.
     dtype: The datatype of the value produced by the node in the graph.
     name:  Name of the node in the graph.
     as_ref: Ignored (required by register_tensor_conversion_function).
@@ -482,12 +482,12 @@ def defun(func):
   func must be a Python function that constructs a TensorFlow graph,
   typically using functions in the tensorflow module.
 
-  Arguments to func can be either tfe.Tensor objects or Python
+  Arguments to func can be either Tensor objects or Python
   objects. Non-Tensor python objects are treated as constants, and new function
   definitions are created internally based on their values.
 
-  func must return a tf.Tensor (NOT a tfe.Tensor) or a list of tf.Tensor (NOT a
-  tfe.Tensor). TODO(apassos) make the wrapped tfe ops return tf.Tensors when in
+  func must return a tf.Tensor (NOT a Tensor) or a list of tf.Tensor (NOT a
+  Tensor). TODO(apassos) make the wrapped tfe ops return tf.Tensors when in
   graph mode.
 
   TODO(apassos): deal with captured global state. Deal with control flow.
@@ -497,6 +497,6 @@ def defun(func):
 
   Returns:
      A callable that will execute the compiled function (and return zero
-     or more tfe.Tensor objects)
+     or more Tensor objects).
   """"""
   return named_defun(func, func.__name__)
",0,train
cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup.

PiperOrigin-RevId: 168422582",ops_test.py,"@@ -99,7 +99,7 @@ class TargetTest(test_util.TensorFlowTestCase):
   # with tfe.device('/gpu:0'):
   #   ...  # code here
   #   with tfe.device('/cpu:0'):
-  #     shape = tfe.Tensor(...)
+  #     shape = Tensor(...)
   #   y = tfe.ops.random_uniform(.., shape)
   #
   # Without the CPU device block tfe.ops.random_uniform would fail since the
@@ -108,7 +108,7 @@ class TargetTest(test_util.TensorFlowTestCase):
   # After this change, we simplify the code:
   #
   # with tfe.device('/gpu:0'):
-  #   y = tfe.ops.random_uniform(, tfe.Tensor(...))
+  #   y = tfe.ops.random_uniform(, Tensor(...))
   #
   # The approximation is not exact since if there are GPU kernels which do not
   # require host memory for int32 tensors, there will be a discrepancy between
",0,train
cd377811d118f408945c97c9aead6f7dbc8322cb,tensorflow/tensorflow,"Comment and error message consistency cleanup.

PiperOrigin-RevId: 168422582",pywrap_tfe.h,"@@ -64,7 +64,7 @@ PyObject* TFE_Py_RegisterExceptionClass(PyObject* e);
 // class registered via TFE_Py_RegisterExceptionClass) and returns -1.
 int TFE_Py_MayBeRaiseException(TF_Status* status);
 
-// Returns the string associated with the passed-in python object/
+// Returns the string associated with the passed-in python object.
 char* TFE_GetPyThonString(PyObject* o);
 
 #endif  // TENSORFLOW_PYTHON_EAGER_PYWRAP_TFE_H_
",0,train
568b9e56038d2d2fe8927f9e0b538bf5e49116f6,tensorflow/tensorflow,"Fix Toco IdentifyL2Normalization bugs

PiperOrigin-RevId: 229780959",generate_examples.py,"@@ -1424,6 +1424,36 @@ def make_conv_tests(zip_path):
   make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
 
 
+# Note: This is a regression test for a bug (b/122651451) that Toco incorrectly
+# erases the reduction indices array while it's shared with other ops.
+def make_l2norm_shared_epsilon_tests(zip_path):
+  """"""Regression test for a bug (b/122651451).""""""
+
+  # Chose a set of parameters
+  test_parameters = [{
+      ""input_shape"": [[5, 7]],
+      ""dim"": [1],
+      ""epsilon"": [1e-8],
+  }]
+
+  def build_graph(parameters):
+    input_tensor = tf.placeholder(
+        dtype=tf.float32, name=""input"", shape=parameters[""input_shape""])
+    epsilon = tf.constant(parameters[""epsilon""])
+    out1 = tf.nn.l2_normalize(input_tensor, parameters[""dim""], epsilon=epsilon)
+    out2 = tf.nn.l2_normalize(input_tensor, parameters[""dim""], epsilon=epsilon)
+    out = out1 + out2
+    return [input_tensor], [out]
+
+  def build_inputs(parameters, sess, inputs, outputs):
+    input_values = create_tensor_data(
+        np.float32, parameters[""input_shape""], min_value=-4, max_value=10)
+    return [input_values], sess.run(
+        outputs, feed_dict=dict(zip(inputs, [input_values])))
+
+  make_zip_of_tests(zip_path, test_parameters, build_graph, build_inputs)
+
+
 # Note: This is a regression test for a bug (b/112436267) that Toco incorrectly
 # fuses weights when multiple Conv2D/FULLY_CONNECTED ops share the same constant
 # weight tensor.
",0,train
568b9e56038d2d2fe8927f9e0b538bf5e49116f6,tensorflow/tensorflow,"Fix Toco IdentifyL2Normalization bugs

PiperOrigin-RevId: 229780959",identify_l2_normalization.cc,"@@ -151,20 +151,12 @@ std::vector<std::unique_ptr<Operator>>::iterator FindOperator(
 
   // Erase the subgraph that is now replaced by L2Normalization
   model->operators.erase(FindOperator(model, square_op));
-  model->EraseArray(sum_op->inputs[0]);
-  if (sum_op->inputs.size() > 1) {
-    model->EraseArray(sum_op->inputs[1]);
-  }
-  model->operators.erase(FindOperator(model, sum_op));
+  DeleteOpAndArraysIfUnused(model, sum_op);
   if (add_op) {
-    model->EraseArray(add_op->inputs[0]);
-    model->EraseArray(add_op->inputs[1]);
-    model->operators.erase(FindOperator(model, add_op));
+    DeleteOpAndArraysIfUnused(model, add_op);
   }
-  model->EraseArray(sqrt_or_rsqrt_op->inputs[0]);
-  model->operators.erase(FindOperator(model, sqrt_or_rsqrt_op));
-  model->EraseArray(div_or_mul_op->inputs[1]);
-  model->operators.erase(FindOperator(model, div_or_mul_op));
+  DeleteOpAndArraysIfUnused(model, sqrt_or_rsqrt_op);
+  DeleteOpAndArraysIfUnused(model, div_or_mul_op);
   *modified = true;
   return ::tensorflow::Status::OK();
 }
",0,train
568b9e56038d2d2fe8927f9e0b538bf5e49116f6,tensorflow/tensorflow,"Fix Toco IdentifyL2Normalization bugs

PiperOrigin-RevId: 229780959",tooling_util.cc,"@@ -173,7 +173,7 @@ bool DeleteArrayIfUsedOnce(const string& array_name, Model* model) {
   return false;
 }
 
-void DeleteOpAndArraysIfUnused(Model* model, Operator* op) {
+void DeleteOpAndArraysIfUnused(Model* model, const Operator* op) {
   for (const string& array_name : op->inputs) {
     DeleteArrayIfUsedOnce(array_name, model);
   }
",0,train
568b9e56038d2d2fe8927f9e0b538bf5e49116f6,tensorflow/tensorflow,"Fix Toco IdentifyL2Normalization bugs

PiperOrigin-RevId: 229780959",tooling_util.h,"@@ -72,7 +72,7 @@ bool DeleteArrayIfUsedOnce(const string& array_name, Model* model);
 
 // Deletes the op and any of its input and output arrays if they are unused
 // after the op has been deleted.
-void DeleteOpAndArraysIfUnused(Model* model, Operator* op);
+void DeleteOpAndArraysIfUnused(Model* model, const Operator* op);
 
 std::vector<std::unique_ptr<Operator>>::const_iterator FindOpWithOutput(
     const Model& model, const string& array_name);
",0,train
6fd13a50e5c48eb385cd0a5431cd6ca966fc4152,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-09-11

PiperOrigin-RevId: 396085135
Change-Id: I5219fbf4293c5aac0c04fa426b5c712ac7c77fbc",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 9, 10)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 9, 11)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
f59a82f2b08dca1641d5766fdd2234d3b665a862,tensorflow/tensorflow,"Replacing the current inner Cholesky decomposition loop with a While loop rolled version.

This will allow for much larger Cholesky decompositions (and thus matrix inversions) than previously possible on TPU because of the use of rolled While loops so XLA compilation will no longer timeout.

While there is a minor runtime performance decrease (now 25ms vs 15ms for a 500x500 matrix) the compilation time is significantly faster (12.8s vs 55.2s for a 500x500 matrix.)

PiperOrigin-RevId: 193114816",cholesky.cc,"@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the ""License"");
 you may not use this file except in compliance with the License.
@@ -21,6 +21,7 @@ limitations under the License.
 #include ""tensorflow/compiler/tf2xla/lib/batch_dot.h""
 #include ""tensorflow/compiler/tf2xla/lib/triangular_solve.h""
 #include ""tensorflow/compiler/tf2xla/lib/util.h""
+#include ""tensorflow/compiler/tf2xla/lib/while_loop.h""
 #include ""tensorflow/compiler/xla/literal_util.h""
 #include ""tensorflow/compiler/xla/shape_util.h""
 #include ""tensorflow/compiler/xla/status_macros.h""
@@ -31,68 +32,122 @@ namespace tensorflow {
 
 namespace {
 
+// The Cholesky–Banachiewicz algorithm. See
+// https://en.wikipedia.org/wiki/Cholesky_decomposition#The_Cholesky–Banachiewicz_and_Cholesky–Crout_algorithms
+// for a description.
+//
 // def cholesky_unblocked(a):
 //   assert len(a.shape) == 2 and a.shape[-2] == a.shape[-1]
 //   n = a.shape[-2]
 //   l = np.zeros_like(a)
 //   for j in xrange(n):
-//     r = l[..., j, :j]
-//     l[..., j, j] = np.sqrt(a[..., j, j] - np.dot(r, r))
-//     l[..., j+1:, j] = (a[..., j+1:, j] - np.dot(l[..., j+1:, :j],
-//         np.transpose(r))) / l[..., j, j]
+//     row = l[..., j, :j]
+//     row_t = np.swapaxes(row, -1, -2)
+//     l[..., j, j] = np.sqrt(a[..., j, j] - np.dot(row, row_t))
+//     l[..., j+1:, j] = (a[..., j+1:, j] - np.dot(l[..., j+1:, :j], row_t)) /
+//                       l[..., j, j]
 //   return l
 xla::StatusOr<xla::ComputationDataHandle> CholeskyUnblocked(
     xla::ComputationBuilder* builder, const xla::ComputationDataHandle& a) {
-  TF_ASSIGN_OR_RETURN(std::unique_ptr<xla::Shape> shape, builder->GetShape(a));
-  xla::ComputationDataHandle l = Zeros(builder, *shape);
-  const int64 n = xla::ShapeUtil::GetDimension(*shape, -2);
-  for (int j = 0; j < n; ++j) {
-    // Picture of block structure:
-    // ...   \
-    //        \
-    // -- r -- d
-    //         |\
-    //    B    c \
-    //         |  \
-    //         |  ...
-    //
-    //         ^
-    //      column j
-    TF_ASSIGN_OR_RETURN(auto d,
-                        SliceInMinorDims(builder, a, {j, j}, {j + 1, j + 1}));
-    TF_ASSIGN_OR_RETURN(auto c,
-                        SliceInMinorDims(builder, a, {j + 1, j}, {n, j + 1}));
-    xla::ComputationDataHandle new_d_squared = d;
-    xla::ComputationDataHandle br;
-    if (j > 0) {
-      TF_ASSIGN_OR_RETURN(auto r,
-                          SliceInMinorDims(builder, l, {j, 0}, {j + 1, j}));
-      TF_ASSIGN_OR_RETURN(auto b,
-                          SliceInMinorDims(builder, l, {j + 1, 0}, {n, j}));
-      TF_ASSIGN_OR_RETURN(auto r_squared,
-                          BatchDot(builder, r, r, /*transpose_x=*/false,
-                                   /*transpose_y=*/true, /*conjugate_x=*/false,
-                                   /*conjugate_y=*/false));
-      new_d_squared = builder->Sub(new_d_squared, r_squared);
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<xla::Shape> a_shape,
+                      builder->GetShape(a));
+  const int n_dims = xla::ShapeUtil::Rank(*a_shape);
+  const int64 n = xla::ShapeUtil::GetDimension(*a_shape, -1);
+  gtl::ArraySlice<int64> major_dims(xla::AsInt64Slice(a_shape->dimensions()),
+                                    /*pos=*/0,
+                                    /*len=*/n_dims - 2);
 
-      TF_ASSIGN_OR_RETURN(br, BatchDot(builder, b, r, /*transpose_x=*/false,
-                                       /*transpose_y=*/true,
-                                       /*conjugate_x=*/false,
-                                       /*conjugate_y=*/false));
-    }
-    auto new_d_inv = builder->Pow(
-        new_d_squared, FloatLiteral(builder, shape->element_type(), -0.5));
-    auto new_d = builder->Mul(new_d_inv, new_d_squared);
-    TF_ASSIGN_OR_RETURN(l, UpdateSliceInMinorDims(builder, l, new_d, {j, j}));
+  xla::ComputationDataHandle l = Zeros(builder, *a_shape);
 
-    if (j > 0) {
-      c = builder->Sub(c, br);
+  // Construct the for loop body to iterate over rows.
+  auto body_fn = [&](xla::ComputationDataHandle i,
+                     gtl::ArraySlice<xla::ComputationDataHandle> loop_vars,
+                     xla::ComputationBuilder* body_builder)
+      -> xla::StatusOr<std::vector<xla::ComputationDataHandle>> {
+    xla::Shape col_shape;
+    xla::Shape row_shape;
+    for (int64 d : major_dims) {
+      row_shape.add_dimensions(d);
+      col_shape.add_dimensions(d);
     }
-    auto new_c = builder->Mul(c, new_d_inv);
-    TF_ASSIGN_OR_RETURN(l,
-                        UpdateSliceInMinorDims(builder, l, new_c, {j + 1, j}));
-  }
-  return l;
+    row_shape.add_dimensions(1);
+    row_shape.add_dimensions(n);
+    row_shape.set_element_type(a_shape->element_type());
+    auto mask_zeros_row = Zeros(body_builder, row_shape);
+
+    col_shape.add_dimensions(n);
+    col_shape.add_dimensions(1);
+    col_shape.set_element_type(a_shape->element_type());
+    auto mask_zeros_col = Zeros(body_builder, col_shape);
+
+    std::vector<int32> mask_vector(n);
+    std::iota(mask_vector.begin(), mask_vector.end(), 0);
+    auto mask_range = body_builder->ConstantR1<int32>(mask_vector);
+    auto mask_range_row = body_builder->Broadcast(
+        body_builder->Reshape(mask_range, {0}, {1, n}), major_dims);
+    auto mask_range_col = body_builder->Broadcast(
+        body_builder->Reshape(mask_range, {0}, {n, 1}), major_dims);
+    auto body_a = loop_vars[0];
+    auto body_l = loop_vars[1];
+
+    // row = l[..., i, :i]
+    // select the whole i-th row, then mask out all columns past i-1
+    auto zero = body_builder->ConstantR0<int32>(0);
+    TF_ASSIGN_OR_RETURN(auto l_i, DynamicSliceInMinorDims(body_builder, body_l,
+                                                          {i, zero}, {1, n}));
+    auto row = body_builder->Select(body_builder->Ge(mask_range_row, i),
+                                    mask_zeros_row, l_i);
+    // a[..., i, i]
+    TF_ASSIGN_OR_RETURN(auto a_ii, DynamicSliceInMinorDims(body_builder, body_a,
+                                                           {i, i}, {1, 1}));
+    // np.dot(row, np.swapaxes(row, -1, -2))
+    xla::ComputationDataHandle diag_dot;
+    TF_ASSIGN_OR_RETURN(diag_dot, BatchDot(body_builder, row, row,
+                                           /*transpose_x=*/false,
+                                           /*transpose_y=*/true));
+    // l[..., i, i] = np.sqrt(a[..., i, i] - np.dot(row,
+    //                                              np.swapaxes(row, -1, -2)))
+    auto l_ii = body_builder->Pow(
+        body_builder->Sub(a_ii, diag_dot),
+        FloatLiteral(body_builder, a_shape->element_type(), 0.5));
+
+    // a[..., i+1:, i]
+    auto ip1 = body_builder->Add(i, body_builder->ConstantR0<int32>(1));
+    // select the whole i-th column, then mask out all rows above i+1
+    TF_ASSIGN_OR_RETURN(
+        auto a_0i, DynamicSliceInMinorDims(body_builder, body_a, {i}, {1}));
+    auto a_ip1i = body_builder->Select(body_builder->Le(mask_range_col, i),
+                                       mask_zeros_col, a_0i);
+
+    // l[..., i+1:, i] = (a[..., i+1:, i] - np.dot(l[..., i+1:, :i], r.T)) /
+    //                   l[..., i, i]
+    // The columns in [i, n] are zeroed out in `row`, so we just have to
+    // zero out rows above i+1 after the BatchDot. np.dot(l[..., :, :i],
+    // r.T)
+    TF_ASSIGN_OR_RETURN(auto dot, BatchDot(body_builder, body_l, row,
+                                           /*transpose_x=*/false,
+                                           /*transpose_y=*/true));
+    // np.dot(l[..., i+1:, :i], r.T)
+    auto dot_ip1 = body_builder->Select(body_builder->Le(mask_range_col, i),
+                                        mask_zeros_col, dot);
+
+    auto col_update =
+        body_builder->Div(body_builder->Sub(a_ip1i, dot_ip1), l_ii);
+    TF_ASSIGN_OR_RETURN(body_l, DynamicUpdateSliceInMinorDims(
+                                    body_builder, body_l, col_update, {i}));
+    // Assign the diagonal after the rest of the column because otherwise the
+    // column assign will wrap around and overwrite the diagonal assign.
+    TF_ASSIGN_OR_RETURN(body_l, DynamicUpdateSliceInMinorDims(
+                                    body_builder, body_l, l_ii, {i, i}));
+
+    return std::vector<xla::ComputationDataHandle>{body_a, body_l};
+  };
+
+  TF_ASSIGN_OR_RETURN(
+      auto cholesky_while,
+      XlaForEachIndex(n, xla::S32, body_fn, {a, l}, ""unblocked"", builder));
+
+  return cholesky_while[1];
 }
 
 }  // namespace
",0,train
f59a82f2b08dca1641d5766fdd2234d3b665a862,tensorflow/tensorflow,"Replacing the current inner Cholesky decomposition loop with a While loop rolled version.

This will allow for much larger Cholesky decompositions (and thus matrix inversions) than previously possible on TPU because of the use of rolled While loops so XLA compilation will no longer timeout.

While there is a minor runtime performance decrease (now 25ms vs 15ms for a 500x500 matrix) the compilation time is significantly faster (12.8s vs 55.2s for a 500x500 matrix.)

PiperOrigin-RevId: 193114816",cholesky.h,"@@ -1,4 +1,4 @@
-/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the ""License"");
 you may not use this file except in compliance with the License.
@@ -29,7 +29,7 @@ namespace tensorflow {
 // the block size to use.
 // TODO(phawkins): check for negative values on the diagonal and return an
 // error, instead of silently yielding NaNs.
-// TODO(mattjj): handle the complex Hermitian case
+// TODO(znado): handle the complex Hermitian case
 xla::StatusOr<xla::ComputationDataHandle> Cholesky(
     xla::ComputationBuilder* builder, xla::ComputationDataHandle a,
     int64 block_size = 256);
",0,train
d8168396f11ad34939819b8e866668ad375998c1,tensorflow/tensorflow,"Excluding test failing on windows with cmake.

PiperOrigin-RevId: 172130104",batch_dataset_op_test.py,"@@ -22,7 +22,7 @@ import math
 import numpy as np
 
 from tensorflow.contrib.data.python.ops import batching
-from tensorflow.python.data.ops import dataset_ops
+from tensorflow.contrib.data.python.ops import dataset_ops
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
",0,train
747ca958fa6c51ffd81616867e7f043216cbe4ad,tensorflow/tensorflow,"Trace CUPTI synchronization events.

 - cuEventSynchronize
 - cuStreamWaitEvent
 - cuStreamSynchronize
 - cuCtxSynchronize

PiperOrigin-RevId: 353984518
Change-Id: Ib3a5a1f50993248ae71f32baa54390fe8b29d298",cupti_tracer.cc,"@@ -776,6 +776,36 @@ void AddMemsetActivityEvent(CuptiTraceCollector *collector,
   collector->AddEvent(std::move(event));
 }
 
+void AddSynchronizationActivityEvent(
+    CuptiTraceCollector *collector, const CUpti_ActivitySynchronization *sync) {
+  CuptiTracerEvent event{};
+  event.type = CuptiTracerEventType::Generic;
+  event.source = CuptiTracerEventSource::Activity;
+  switch (sync->type) {
+    case CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_EVENT_SYNCHRONIZE:
+      event.name = ""cuEventSynchronize"";
+      break;
+    case CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_STREAM_WAIT_EVENT:
+      event.name = ""cuStreamWaitEvent"";
+      break;
+    case CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_STREAM_SYNCHRONIZE:
+      event.name = ""cuStreamSynchronize"";
+      break;
+    case CUPTI_ACTIVITY_SYNCHRONIZATION_TYPE_CONTEXT_SYNCHRONIZE:
+      event.name = ""cuCtxSynchronize"";
+      break;
+    default:
+      event.name = ""unknown synchronization event"";
+      break;
+  }
+  event.start_time_ns = sync->start;
+  event.end_time_ns = std::max(sync->end, sync->start + 1);
+  event.correlation_id = sync->correlationId;
+  event.context_id = sync->contextId;
+  VLOG(5) << ""Cuda activity "" << event.name;
+  collector->AddEvent(std::move(event));
+}
+
 // This hook uses cupti activity api to measure device side activities.
 class CuptiDriverApiHookWithActivityApi : public CuptiDriverApiHook {
  public:
@@ -1901,6 +1931,11 @@ Status CuptiTracer::ProcessActivityBuffer(CUcontext context, uint32_t stream_id,
           AddMemsetActivityEvent(
               collector_, reinterpret_cast<CUpti_ActivityMemset *>(record));
           break;
+        case CUPTI_ACTIVITY_KIND_SYNCHRONIZATION:
+          AddSynchronizationActivityEvent(
+              collector_,
+              reinterpret_cast<CUpti_ActivitySynchronization *>(record));
+          break;
         default:
           LOG(ERROR) << ""Activity type "" << record->kind << "" not supported."";
           break;
",0,train
80406a561aa3226cf06c8ddfc2ff528ca7fa0ff7,tensorflow/tensorflow,"Fix a typo

PiperOrigin-RevId: 249281178",lstm_test.cc,"@@ -1957,14 +1957,14 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest,
   const int n_input = 5;
   const int n_cell = 4;
   const int n_output = 3;
-  const float ceil_clip = 0.0;
+  const float cell_clip = 0.0;
   const float proj_clip = 0.0;
 
   LayerNormLSTMOpModel layer_norm_lstm(
       n_batch, n_input, n_cell, n_output,
       /*use_cifg=*/false, /*use_peephole=*/true,
       /*use_projection_weights=*/true,
-      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      /*use_projection_bias=*/false, cell_clip, proj_clip,
       {
           {n_batch, n_input},  // input tensor
 
@@ -2052,14 +2052,14 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest,
   const int n_input = 5;
   const int n_cell = 4;
   const int n_output = 3;
-  const float ceil_clip = 0.0;
+  const float cell_clip = 0.0;
   const float proj_clip = 0.0;
 
   HybridLayerNormLSTMOpModel layer_norm_lstm(
       n_batch, n_input, n_cell, n_output,
       /*use_cifg=*/false, /*use_peephole=*/true,
       /*use_projection_weights=*/true,
-      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      /*use_projection_bias=*/false, cell_clip, proj_clip,
       {
           {n_batch, n_input},  // input tensor
 
@@ -2147,14 +2147,14 @@ TEST_F(NoCifgPeepholeProjectionNoClippingLayerNormLstmTest,
   const int n_input = 5;
   const int n_cell = 4;
   const int n_output = 3;
-  const float ceil_clip = 0.0;
+  const float cell_clip = 0.0;
   const float proj_clip = 0.0;
 
   HybridLayerNormLSTMOpModel layer_norm_lstm(
       n_batch, n_input, n_cell, n_output,
       /*use_cifg=*/false, /*use_peephole=*/true,
       /*use_projection_weights=*/true,
-      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      /*use_projection_bias=*/false, cell_clip, proj_clip,
       {
           {n_batch, n_input},  // input tensor
 
@@ -2289,14 +2289,14 @@ TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest,
   const int n_input = 5;
   const int n_cell = 4;
   const int n_output = 3;
-  const float ceil_clip = 0.0;
+  const float cell_clip = 0.0;
   const float proj_clip = 0.0;
 
   LayerNormLSTMOpModel layer_norm_lstm(
       n_batch, n_input, n_cell, n_output,
       /*use_cifg=*/true, /*use_peephole=*/true,
       /*use_projection_weights=*/true,
-      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      /*use_projection_bias=*/false, cell_clip, proj_clip,
       {
           {n_batch, n_input},  // input tensor
 
@@ -2379,14 +2379,14 @@ TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest,
   const int n_input = 5;
   const int n_cell = 4;
   const int n_output = 3;
-  const float ceil_clip = 0.0;
+  const float cell_clip = 0.0;
   const float proj_clip = 0.0;
 
   HybridLayerNormLSTMOpModel layer_norm_lstm(
       n_batch, n_input, n_cell, n_output,
       /*use_cifg=*/true, /*use_peephole=*/true,
       /*use_projection_weights=*/true,
-      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      /*use_projection_bias=*/false, cell_clip, proj_clip,
       {
           {n_batch, n_input},  // input tensor
 
@@ -2470,14 +2470,14 @@ TEST_F(CifgPeepholeProjectionNoClippingLayerNormLstmTest,
   const int n_input = 5;
   const int n_cell = 4;
   const int n_output = 3;
-  const float ceil_clip = 0.0;
+  const float cell_clip = 0.0;
   const float proj_clip = 0.0;
 
   HybridLayerNormLSTMOpModel layer_norm_lstm(
       n_batch, n_input, n_cell, n_output,
       /*use_cifg=*/true, /*use_peephole=*/true,
       /*use_projection_weights=*/true,
-      /*use_projection_bias=*/false, ceil_clip, proj_clip,
+      /*use_projection_bias=*/false, cell_clip, proj_clip,
       {
           {n_batch, n_input},  // input tensor
 
",0,train
bcf47bd5ed73e500f9d16d503023665d9921133b,tensorflow/tensorflow,"Fix to SeparableConv2D that did not work when a stride >1 was
used with the channels_first data_format.

PiperOrigin-RevId: 165028212",convolutional.py,"@@ -975,11 +975,15 @@ class SeparableConv2D(Conv2D):
 
   def call(self, inputs):
     # Apply the actual ops.
+    if self.data_format == 'channels_last':
+      strides = (1,) + self.strides + (1,)
+    else:
+      strides = (1, 1) + self.strides
     outputs = nn.separable_conv2d(
         inputs,
         self.depthwise_kernel,
         self.pointwise_kernel,
-        strides=(1,) + self.strides + (1,),
+        strides=strides,
         padding=self.padding.upper(),
         rate=self.dilation_rate,
         data_format=utils.convert_data_format(self.data_format, ndim=4))
",0,test
bcf47bd5ed73e500f9d16d503023665d9921133b,tensorflow/tensorflow,"Fix to SeparableConv2D that did not work when a stride >1 was
used with the channels_first data_format.

PiperOrigin-RevId: 165028212",convolutional_test.py,"@@ -439,6 +439,31 @@ class SeparableConv2DTest(test.TestCase):
     self.assertListEqual(output.get_shape().as_list(),
                          [5, height / 2, width, 32])
 
+  def testCreateSeparableConvWithStridesChannelsFirst(self):
+    data_format = 'channels_first'
+    height, width = 6, 8
+    # Test strides tuple
+    images = random_ops.random_uniform((5, 3, height, width), seed=1)
+    layer = conv_layers.SeparableConv2D(
+        32, [3, 3], strides=(2, 2), padding='same', data_format=data_format)
+    output = layer.apply(images)
+    self.assertListEqual(output.get_shape().as_list(),
+                         [5, 32, height / 2, width / 2])
+
+    # Test strides integer
+    layer = conv_layers.SeparableConv2D(32, [3, 3], strides=2, padding='same',
+                                        data_format=data_format)
+    output = layer.apply(images)
+    self.assertListEqual(output.get_shape().as_list(),
+                         [5, 32, height / 2, width / 2])
+
+    # Test unequal strides
+    layer = conv_layers.SeparableConv2D(
+        32, [3, 3], strides=(2, 1), padding='same', data_format=data_format)
+    output = layer.apply(images)
+    self.assertListEqual(output.get_shape().as_list(),
+                         [5, 32, height / 2, width])
+
   def testFunctionalConv2DReuse(self):
     height, width = 7, 9
     images = random_ops.random_uniform((5, height, width, 3), seed=1)
",0,test
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,direct_session.cc,"@@ -94,7 +94,7 @@ string GetRendezvousKey(const string& tensor_name,
 //
 // 2) Recv nodes always complete immediately: The inputs are sent into
 //    the local rendezvous before we start the executor, so the
-//    corresonding recvs will not block.
+//    corresponding recvs will not block.
 //
 // Based on these assumptions, we can use the same thread pool for
 // both ""non-blocking"" and ""blocking"" OpKernels on Android.
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,direct_session_test.cc,"@@ -94,7 +94,7 @@ TEST_F(DirectSessionMinusAXTest, RunSimpleNetwork) {
   ASSERT_OK(s);
 
   ASSERT_EQ(1, outputs.size());
-  // The first output should be initiailzed and have the correct
+  // The first output should be initialized and have the correct
   // output.
   auto mat = outputs[0].matrix<float>();
   ASSERT_TRUE(outputs[0].IsInitialized());
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,executor.cc,"@@ -374,7 +374,7 @@ Status ExecutorImpl::InferAllocAttr(
   return s;
 }
 
-// The state associated with one invokation of ExecutorImpl::Run.
+// The state associated with one invocation of ExecutorImpl::Run.
 // ExecutorState dispatches nodes when they become ready and keeps
 // track of how many predecessors of a node have not done (pending_).
 class ExecutorState {
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,function.cc,"@@ -430,7 +430,7 @@ Status FunctionLibraryRuntimeImpl::InstantiateSymbolicGradient(
   const auto& func = f->func();
   const FunctionDef* fdef = lib_def_->Find(func.name());
   if (fdef == nullptr) {
-    // f is a primitve op.
+    // f is a primitive op.
     gradient::Creator creator;
     TF_RETURN_IF_ERROR(gradient::GetOpGradientCreator(func.name(), &creator));
     if (creator == nullptr) {
@@ -1100,7 +1100,7 @@ class SymbolicGradientHelper {
 
   // 'ready' keeps track of nodes that have been completely
   // backpropped. Initially, for every output y of the function f, we
-  // add dy as an input of the the gradient function.
+  // add dy as an input of the gradient function.
   std::deque<Node*> ready_;
 
   // Makes a copy of fbody_ in gbody_.
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,function.h,"@@ -90,7 +90,7 @@ bool RemoveListArrayConverter(Graph* g);
 // multiple times by calling ExpandInlineFunctions a few times.
 bool ExpandInlineFunctions(FunctionLibraryRuntime* lib, Graph* graph);
 
-// Applies graph rewrite optimzation such as inlining, dead code
+// Applies graph rewrite optimization such as inlining, dead code
 // removal, etc.
 //
 // **g is a graph constructed based on the runtime library 'lib'.
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,gpu_allocator_retry.h,"@@ -32,7 +32,7 @@ class GPUAllocatorRetry {
   // then wait up to 'max_millis_to_wait' milliseconds, retrying each
   // time a call to DeallocateRaw() is detected, until either a good
   // pointer is returned or the deadline is exhausted.  If the
-  // deadline is exahusted, try one more time with 'verbose_failure'
+  // deadline is exhausted, try one more time with 'verbose_failure'
   // set to true.  The value returned is either the first good pointer
   // obtained from 'alloc_func' or nullptr.
   void* AllocateRaw(std::function<void*(size_t alignment, size_t num_bytes,
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,gpu_bfc_allocator.cc,"@@ -420,7 +420,7 @@ void GPUBFCAllocator::DumpMemoryLog(size_t num_bytes) {
     }
   }
 
-  // Next show the the chunks that are in use, and also summarize their
+  // Next show the chunks that are in use, and also summarize their
   // number by size.
   std::map<size_t, int> in_use_by_size;
   for (auto& it : ptr_to_chunk_map_) {
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,gpu_event_mgr_test.cc,"@@ -199,7 +199,7 @@ TEST(EventMgr, StreamSwitchingFlushesImmediately) {
   EXPECT_GT(initial_live_bytes, live_tensor_bytes);
 }
 
-TEST(EventMgr, ManySmallTensorsSeperateCallsFlushed) {
+TEST(EventMgr, ManySmallTensorsSeparateCallsFlushed) {
   auto stream_exec = GPUMachineManager()->ExecutorForDevice(0).ValueOrDie();
   EventMgr em(stream_exec, GPUOptions());
   TEST_EventMgrHelper th(&em);
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,gpu_region_allocator.cc,"@@ -279,7 +279,7 @@ void GPURegionAllocator::CheckForMemoryLeaks() {
 }
 
 // Since there's no merging of chunks once allocated, we want to
-// maximize their reusablity (which argues for fewer, larger sizes),
+// maximize their reusability (which argues for fewer, larger sizes),
 // while minimizing waste (which argues for tight-fitting sizes).
 //
 // The smallest unit of allocation is 256 bytes.
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,gpu_stream_util.cc,"@@ -61,7 +61,7 @@ Status AssignStreams(const Graph* graph, const AssignStreamsOpts& opts,
       }
     }
   }
-  // We perform stream assigmnent assuming a large number of
+  // We perform stream assignment assuming a large number of
   // stream IDs and then map these down to the required number of streams
   // using simple round-robin.
   // Stream Assignment strategy:
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,gpu_util.h,"@@ -36,7 +36,7 @@ class GPUUtil {
   // ""tensor"" is GPU-local.  ""dev"" is the hosting GPU.
   // ""device_context"" should be the context of the GPU ""_Send"" op
   // which provides the Tensor.
-  // Sets all necessasry fields of ""proto"" by transferring value
+  // Sets all necessary fields of ""proto"" by transferring value
   // bytes from GPU to CPU RAM. ""is_dead"" indicates that the
   // tensor is dead with an uninit value.
   static void SetProtoFromGPU(const Tensor& tensor, Device* dev,
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,pool_allocator.cc,"@@ -47,7 +47,7 @@ PoolAllocator::PoolAllocator(size_t pool_size_limit, bool auto_resize,
 PoolAllocator::~PoolAllocator() { Clear(); }
 
 namespace {
-// Pools contain Chunks allocatated from the underlying Allocator.
+// Pools contain Chunks allocated from the underlying Allocator.
 // Chunk alignment is always on kPoolAlignment boundaries.  Each Chunk
 // begins with a descriptor (ChunkPrefix) that gives its size and a
 // pointer to itself.  The pointer returned to the user is just past
@@ -56,7 +56,7 @@ namespace {
 // pointer and also re-write the ChunkPrefix.chunk_ptr value
 // immediately before it.  This way the Chunk address and size can be
 // recovered from the returned user pointer, regardless of alignment.
-// Note that this deferencing of the pointers means that we cannot
+// Note that this dereferencing of the pointers means that we cannot
 // handle GPU memory, only CPU memory.
 struct ChunkPrefix {
   size_t num_bytes;
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,process_state.cc,"@@ -46,7 +46,7 @@ const bool FLAGS_brain_gpu_region_allocator_reset_to_nan = false;
 const bool FLAGS_brain_gpu_use_bfc_allocator = true;
 
 // If true, record attributes of memory allocations and
-// dyanmically check for appropriate use of registered memory.
+// dynamically check for appropriate use of registered memory.
 // Should only be true for debugging or diagnosis of
 // performance issues.
 bool FLAGS_brain_gpu_record_mem_types = false;
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,process_state.h,"@@ -67,7 +67,7 @@ class ProcessState {
   MemDesc PtrType(const void* ptr);
 
   // Returns the one CPUAllocator used for the given numa_node.
-  // TEMPORY: ignores numa_node.
+  // TEMPORARY: ignores numa_node.
   Allocator* GetCPUAllocator(int numa_node);
 
   // Returns the one GPU allocator used for the indexed GPU.
@@ -80,7 +80,7 @@ class ProcessState {
   // used on that first call is used.
   //
   // ""Allocator type"" describes the type of algorithm to use for the
-  // underlying allocator.  REQURES: Must be a valid type (see
+  // underlying allocator.  REQUIRES: Must be a valid type (see
   // config.proto for the list of supported strings.).
   //
   // REQUIRES: gpu_id must be a valid ordinal for a GPU available in the
@@ -98,7 +98,7 @@ class ProcessState {
   // interface to be used for network device memory registration.
   // ""bus_id"" is platform-specific.  On many platforms it
   // should be 0.  On machines with multiple PCIe buses, it should be
-  // the index of one of the PCIe buses.  If the the bus_id is invalid,
+  // the index of one of the PCIe buses.  If the bus_id is invalid,
   // results are undefined.
   typedef std::function<void(void*, size_t)> AllocVisitor;
   void AddGPUAllocVisitor(int bus_id, AllocVisitor visitor);
",0,train
621820d41adedc6dc80f37b30c556a5cbe5c3601,tensorflow/tensorflow,Fix typos in a testcase name and comments of core/common_runtime module.,simple_placer.cc,"@@ -37,7 +37,7 @@ namespace {
 // types in 'supported_device_types' and returns the *first* subset of devices
 // that match.
 //
-// For example, if suported_device_types contains {GPU, CPU} and
+// For example, if supported_device_types contains {GPU, CPU} and
 // 'devices' contains CPU and GPU devices, the returned vector will
 // include *only* GPU devices, since that is higher in the priority
 // order in 'supported_device_types'.
",0,train
b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,builtin_ops.h,"@@ -172,7 +172,6 @@ typedef enum {
   kTfLiteBuiltinVarHandle = 142,
   kTfLiteBuiltinReadVariable = 143,
   kTfLiteBuiltinAssignVariable = 144,
-  kTfLiteBuiltinTable = 145,
 } TfLiteBuiltinOperator;
 
 #ifdef __cplusplus
",0,train
b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,flatbuffer_conversions.cc,"@@ -844,7 +844,6 @@ TfLiteStatus ParseOpDataTfLite(const Operator* op, BuiltinOperator op_type,
     case BuiltinOperator_HASHTABLE_SIZE:
     case BuiltinOperator_READ_VARIABLE:
     case BuiltinOperator_ASSIGN_VARIABLE:
-    case BuiltinOperator_TABLE:
       return kTfLiteOk;
     case BuiltinOperator_PLACEHOLDER_FOR_GREATER_OP_CODES:
       return kTfLiteError;
",0,train
b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,builtin_op_kernels.h,"@@ -159,7 +159,6 @@ TfLiteRegistration* Register_STRIDED_SLICE();
 TfLiteRegistration* Register_SUB();
 TfLiteRegistration* Register_SUM();
 TfLiteRegistration* Register_SVDF();
-TfLiteRegistration* Register_TABLE();
 TfLiteRegistration* Register_TANH();
 TfLiteRegistration* Register_TILE();
 TfLiteRegistration* Register_TOPK_V2();
",0,train
b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,register.cc,"@@ -28,6 +28,7 @@ TfLiteRegistration* Register_NUMERIC_VERIFY();
 TfLiteRegistration* Register_AUDIO_SPECTROGRAM();
 TfLiteRegistration* Register_MFCC();
 TfLiteRegistration* Register_DETECTION_POSTPROCESS();
+TfLiteRegistration* Register_TABLE();
 
 }  // namespace custom
 
@@ -325,7 +326,6 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_VAR_HANDLE, Register_VAR_HANDLE());
   AddBuiltin(BuiltinOperator_READ_VARIABLE, Register_READ_VARIABLE());
   AddBuiltin(BuiltinOperator_ASSIGN_VARIABLE, Register_ASSIGN_VARIABLE());
-  AddBuiltin(BuiltinOperator_TABLE, Register_TABLE());
   AddCustom(""NumericVerify"", tflite::ops::custom::Register_NUMERIC_VERIFY());
   // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that
   // custom ops aren't always included by default.
@@ -334,6 +334,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
             tflite::ops::custom::Register_AUDIO_SPECTROGRAM());
   AddCustom(""TFLite_Detection_PostProcess"",
             tflite::ops::custom::Register_DETECTION_POSTPROCESS());
+  AddCustom(""Table"", tflite::ops::custom::Register_TABLE());
   // By definition, all of the ops added above are not user-defined ops,
   // since they are supported by BuiltinOpResolver.
   may_directly_contain_user_defined_ops_ = false;
",0,train
b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,register_ref.cc,"@@ -29,6 +29,7 @@ TfLiteRegistration* Register_NUMERIC_VERIFY_REF();
 TfLiteRegistration* Register_AUDIO_SPECTROGRAM();
 TfLiteRegistration* Register_MFCC();
 TfLiteRegistration* Register_DETECTION_POSTPROCESS();
+TfLiteRegistration* Register_TABLE();
 
 }  // namespace custom
 
@@ -163,7 +164,6 @@ TfLiteRegistration* Register_IMAG();
 TfLiteRegistration* Register_REAL();
 TfLiteRegistration* Register_COMPLEX_ABS();
 TfLiteRegistration* Register_CONV_3D_TRANSPOSE_REF();
-TfLiteRegistration* Register_TABLE();
 
 namespace {
 
@@ -475,7 +475,6 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() {
   AddBuiltin(BuiltinOperator_COMPLEX_ABS, Register_COMPLEX_ABS());
   AddBuiltin(BuiltinOperator_CONV_3D_TRANSPOSE,
              Register_CONV_3D_TRANSPOSE_REF());
-  AddBuiltin(BuiltinOperator_TABLE, Register_TABLE());
   AddCustom(""NumericVerify"",
             tflite::ops::custom::Register_NUMERIC_VERIFY_REF());
   // TODO(andrewharp, ahentz): Move these somewhere more appropriate so that
@@ -485,6 +484,7 @@ BuiltinRefOpResolver::BuiltinRefOpResolver() {
             tflite::ops::custom::Register_AUDIO_SPECTROGRAM());
   AddCustom(""TFLite_Detection_PostProcess"",
             tflite::ops::custom::Register_DETECTION_POSTPROCESS());
+  AddCustom(""Table"", tflite::ops::custom::Register_TABLE());
 }
 
 }  // namespace builtin
",0,train
b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,table.cc,"@@ -20,7 +20,7 @@ limitations under the License.
 
 namespace tflite {
 namespace ops {
-namespace builtin {
+namespace custom {
 namespace table {
 
 constexpr int kInputTensor = 0;
@@ -124,6 +124,6 @@ TfLiteRegistration* Register_TABLE() {
   return &r;
 }
 
-}  // namespace builtin
+}  // namespace custom
 }  // namespace ops
 }  // namespace tflite
\ No newline at end of file
",0,train
b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,table_test.cc,"@@ -20,6 +20,11 @@ limitations under the License.
 #include ""tensorflow/lite/schema/schema_generated.h""
 
 namespace tflite {
+namespace ops {
+namespace custom {
+
+TfLiteRegistration* Register_TABLE();
+
 namespace {
 
 using ::testing::ElementsAreArray;
@@ -31,8 +36,7 @@ class TableOpModel : public SingleOpModel {
     input_ = AddInput(input);
     table_ = AddInput(table);
     output_ = AddOutput(output);
-    SetBuiltinOp(BuiltinOperator_TABLE, BuiltinOptions_TableOptions,
-                 CreateSubOptions(builder_).Union());
+    SetCustomOp(""Table"", {}, Register_TABLE);
     BuildInterpreter({GetShape(input_), GetShape(table_)});
   }
 
@@ -139,4 +143,6 @@ TEST(TableOpTest, Int16ToInt8WithExpLUT) {
 }
 
 }  // namespace
-}  // namespace tflite
\ No newline at end of file
+}  // namespace custom
+}  // namespace ops
+}  // namespace tflite
",0,train
b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,schema_generated.h,"@@ -385,9 +385,6 @@ struct ReadVariableOptionsT;
 struct AssignVariableOptions;
 struct AssignVariableOptionsT;
 
-struct TableOptions;
-struct TableOptionsT;
-
 struct OperatorCode;
 struct OperatorCodeT;
 
@@ -856,12 +853,11 @@ enum BuiltinOperator {
   BuiltinOperator_VAR_HANDLE = 142,
   BuiltinOperator_READ_VARIABLE = 143,
   BuiltinOperator_ASSIGN_VARIABLE = 144,
-  BuiltinOperator_TABLE = 145,
   BuiltinOperator_MIN = BuiltinOperator_ADD,
-  BuiltinOperator_MAX = BuiltinOperator_TABLE
+  BuiltinOperator_MAX = BuiltinOperator_ASSIGN_VARIABLE
 };
 
-inline const BuiltinOperator (&EnumValuesBuiltinOperator())[146] {
+inline const BuiltinOperator (&EnumValuesBuiltinOperator())[145] {
   static const BuiltinOperator values[] = {
     BuiltinOperator_ADD,
     BuiltinOperator_AVERAGE_POOL_2D,
@@ -1007,14 +1003,13 @@ inline const BuiltinOperator (&EnumValuesBuiltinOperator())[146] {
     BuiltinOperator_CONV_3D_TRANSPOSE,
     BuiltinOperator_VAR_HANDLE,
     BuiltinOperator_READ_VARIABLE,
-    BuiltinOperator_ASSIGN_VARIABLE,
-    BuiltinOperator_TABLE
+    BuiltinOperator_ASSIGN_VARIABLE
   };
   return values;
 }
 
 inline const char * const *EnumNamesBuiltinOperator() {
-  static const char * const names[147] = {
+  static const char * const names[146] = {
     ""ADD"",
     ""AVERAGE_POOL_2D"",
     ""CONCATENATION"",
@@ -1160,14 +1155,13 @@ inline const char * const *EnumNamesBuiltinOperator() {
     ""VAR_HANDLE"",
     ""READ_VARIABLE"",
     ""ASSIGN_VARIABLE"",
-    ""TABLE"",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameBuiltinOperator(BuiltinOperator e) {
-  if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_TABLE)) return """";
+  if (flatbuffers::IsOutRange(e, BuiltinOperator_ADD, BuiltinOperator_ASSIGN_VARIABLE)) return """";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesBuiltinOperator()[index];
 }
@@ -1287,12 +1281,11 @@ enum BuiltinOptions {
   BuiltinOptions_VarHandleOptions = 111,
   BuiltinOptions_ReadVariableOptions = 112,
   BuiltinOptions_AssignVariableOptions = 113,
-  BuiltinOptions_TableOptions = 114,
   BuiltinOptions_MIN = BuiltinOptions_NONE,
-  BuiltinOptions_MAX = BuiltinOptions_TableOptions
+  BuiltinOptions_MAX = BuiltinOptions_AssignVariableOptions
 };
 
-inline const BuiltinOptions (&EnumValuesBuiltinOptions())[115] {
+inline const BuiltinOptions (&EnumValuesBuiltinOptions())[114] {
   static const BuiltinOptions values[] = {
     BuiltinOptions_NONE,
     BuiltinOptions_Conv2DOptions,
@@ -1407,14 +1400,13 @@ inline const BuiltinOptions (&EnumValuesBuiltinOptions())[115] {
     BuiltinOptions_HashtableSizeOptions,
     BuiltinOptions_VarHandleOptions,
     BuiltinOptions_ReadVariableOptions,
-    BuiltinOptions_AssignVariableOptions,
-    BuiltinOptions_TableOptions
+    BuiltinOptions_AssignVariableOptions
   };
   return values;
 }
 
 inline const char * const *EnumNamesBuiltinOptions() {
-  static const char * const names[116] = {
+  static const char * const names[115] = {
     ""NONE"",
     ""Conv2DOptions"",
     ""DepthwiseConv2DOptions"",
@@ -1529,14 +1521,13 @@ inline const char * const *EnumNamesBuiltinOptions() {
     ""VarHandleOptions"",
     ""ReadVariableOptions"",
     ""AssignVariableOptions"",
-    ""TableOptions"",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameBuiltinOptions(BuiltinOptions e) {
-  if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_TableOptions)) return """";
+  if (flatbuffers::IsOutRange(e, BuiltinOptions_NONE, BuiltinOptions_AssignVariableOptions)) return """";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesBuiltinOptions()[index];
 }
@@ -1997,10 +1988,6 @@ template<> struct BuiltinOptionsTraits<tflite::AssignVariableOptions> {
   static const BuiltinOptions enum_value = BuiltinOptions_AssignVariableOptions;
 };
 
-template<> struct BuiltinOptionsTraits<tflite::TableOptions> {
-  static const BuiltinOptions enum_value = BuiltinOptions_TableOptions;
-};
-
 struct BuiltinOptionsUnion {
   BuiltinOptions type;
   void *value;
@@ -2937,14 +2924,6 @@ struct BuiltinOptionsUnion {
     return type == BuiltinOptions_AssignVariableOptions ?
       reinterpret_cast<const tflite::AssignVariableOptionsT *>(value) : nullptr;
   }
-  tflite::TableOptionsT *AsTableOptions() {
-    return type == BuiltinOptions_TableOptions ?
-      reinterpret_cast<tflite::TableOptionsT *>(value) : nullptr;
-  }
-  const tflite::TableOptionsT *AsTableOptions() const {
-    return type == BuiltinOptions_TableOptions ?
-      reinterpret_cast<const tflite::TableOptionsT *>(value) : nullptr;
-  }
 };
 
 bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *obj, BuiltinOptions type);
@@ -10361,46 +10340,6 @@ inline flatbuffers::Offset<AssignVariableOptions> CreateAssignVariableOptions(
 
 flatbuffers::Offset<AssignVariableOptions> CreateAssignVariableOptions(flatbuffers::FlatBufferBuilder &_fbb, const AssignVariableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
 
-struct TableOptionsT : public flatbuffers::NativeTable {
-  typedef TableOptions TableType;
-  TableOptionsT() {
-  }
-};
-
-struct TableOptions FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
-  typedef TableOptionsT NativeTableType;
-  bool Verify(flatbuffers::Verifier &verifier) const {
-    return VerifyTableStart(verifier) &&
-           verifier.EndTable();
-  }
-  TableOptionsT *UnPack(const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  void UnPackTo(TableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver = nullptr) const;
-  static flatbuffers::Offset<TableOptions> Pack(flatbuffers::FlatBufferBuilder &_fbb, const TableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-};
-
-struct TableOptionsBuilder {
-  flatbuffers::FlatBufferBuilder &fbb_;
-  flatbuffers::uoffset_t start_;
-  explicit TableOptionsBuilder(flatbuffers::FlatBufferBuilder &_fbb)
-        : fbb_(_fbb) {
-    start_ = fbb_.StartTable();
-  }
-  TableOptionsBuilder &operator=(const TableOptionsBuilder &);
-  flatbuffers::Offset<TableOptions> Finish() {
-    const auto end = fbb_.EndTable(start_);
-    auto o = flatbuffers::Offset<TableOptions>(end);
-    return o;
-  }
-};
-
-inline flatbuffers::Offset<TableOptions> CreateTableOptions(
-    flatbuffers::FlatBufferBuilder &_fbb) {
-  TableOptionsBuilder builder_(_fbb);
-  return builder_.Finish();
-}
-
-flatbuffers::Offset<TableOptions> CreateTableOptions(flatbuffers::FlatBufferBuilder &_fbb, const TableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher = nullptr);
-
 struct OperatorCodeT : public flatbuffers::NativeTable {
   typedef OperatorCode TableType;
   int8_t deprecated_builtin_code;
@@ -10890,9 +10829,6 @@ struct Operator FLATBUFFERS_FINAL_CLASS : private flatbuffers::Table {
   const tflite::AssignVariableOptions *builtin_options_as_AssignVariableOptions() const {
     return builtin_options_type() == tflite::BuiltinOptions_AssignVariableOptions ? static_cast<const tflite::AssignVariableOptions *>(builtin_options()) : nullptr;
   }
-  const tflite::TableOptions *builtin_options_as_TableOptions() const {
-    return builtin_options_type() == tflite::BuiltinOptions_TableOptions ? static_cast<const tflite::TableOptions *>(builtin_options()) : nullptr;
-  }
   const flatbuffers::Vector<uint8_t> *custom_options() const {
     return GetPointer<const flatbuffers::Vector<uint8_t> *>(VT_CUSTOM_OPTIONS);
   }
@@ -11381,10 +11317,6 @@ template<> inline const tflite::AssignVariableOptions *Operator::builtin_options
   return builtin_options_as_AssignVariableOptions();
 }
 
-template<> inline const tflite::TableOptions *Operator::builtin_options_as<tflite::TableOptions>() const {
-  return builtin_options_as_TableOptions();
-}
-
 struct OperatorBuilder {
   flatbuffers::FlatBufferBuilder &fbb_;
   flatbuffers::uoffset_t start_;
@@ -15405,29 +15337,6 @@ inline flatbuffers::Offset<AssignVariableOptions> CreateAssignVariableOptions(fl
       _fbb);
 }
 
-inline TableOptionsT *TableOptions::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
-  auto _o = new TableOptionsT();
-  UnPackTo(_o, _resolver);
-  return _o;
-}
-
-inline void TableOptions::UnPackTo(TableOptionsT *_o, const flatbuffers::resolver_function_t *_resolver) const {
-  (void)_o;
-  (void)_resolver;
-}
-
-inline flatbuffers::Offset<TableOptions> TableOptions::Pack(flatbuffers::FlatBufferBuilder &_fbb, const TableOptionsT* _o, const flatbuffers::rehasher_function_t *_rehasher) {
-  return CreateTableOptions(_fbb, _o, _rehasher);
-}
-
-inline flatbuffers::Offset<TableOptions> CreateTableOptions(flatbuffers::FlatBufferBuilder &_fbb, const TableOptionsT *_o, const flatbuffers::rehasher_function_t *_rehasher) {
-  (void)_rehasher;
-  (void)_o;
-  struct _VectorArgs { flatbuffers::FlatBufferBuilder *__fbb; const TableOptionsT* __o; const flatbuffers::rehasher_function_t *__rehasher; } _va = { &_fbb, _o, _rehasher}; (void)_va;
-  return tflite::CreateTableOptions(
-      _fbb);
-}
-
 inline OperatorCodeT *OperatorCode::UnPack(const flatbuffers::resolver_function_t *_resolver) const {
   auto _o = new OperatorCodeT();
   UnPackTo(_o, _resolver);
@@ -16358,10 +16267,6 @@ inline bool VerifyBuiltinOptions(flatbuffers::Verifier &verifier, const void *ob
       auto ptr = reinterpret_cast<const tflite::AssignVariableOptions *>(obj);
       return verifier.VerifyTable(ptr);
     }
-    case BuiltinOptions_TableOptions: {
-      auto ptr = reinterpret_cast<const tflite::TableOptions *>(obj);
-      return verifier.VerifyTable(ptr);
-    }
     default: return true;
   }
 }
@@ -16832,10 +16737,6 @@ inline void *BuiltinOptionsUnion::UnPack(const void *obj, BuiltinOptions type, c
       auto ptr = reinterpret_cast<const tflite::AssignVariableOptions *>(obj);
       return ptr->UnPack(resolver);
     }
-    case BuiltinOptions_TableOptions: {
-      auto ptr = reinterpret_cast<const tflite::TableOptions *>(obj);
-      return ptr->UnPack(resolver);
-    }
     default: return nullptr;
   }
 }
@@ -17294,10 +17195,6 @@ inline flatbuffers::Offset<void> BuiltinOptionsUnion::Pack(flatbuffers::FlatBuff
       auto ptr = reinterpret_cast<const tflite::AssignVariableOptionsT *>(value);
       return CreateAssignVariableOptions(_fbb, ptr, _rehasher).Union();
     }
-    case BuiltinOptions_TableOptions: {
-      auto ptr = reinterpret_cast<const tflite::TableOptionsT *>(value);
-      return CreateTableOptions(_fbb, ptr, _rehasher).Union();
-    }
     default: return 0;
   }
 }
@@ -17756,10 +17653,6 @@ inline BuiltinOptionsUnion::BuiltinOptionsUnion(const BuiltinOptionsUnion &u) FL
       value = new tflite::AssignVariableOptionsT(*reinterpret_cast<tflite::AssignVariableOptionsT *>(u.value));
       break;
     }
-    case BuiltinOptions_TableOptions: {
-      value = new tflite::TableOptionsT(*reinterpret_cast<tflite::TableOptionsT *>(u.value));
-      break;
-    }
     default:
       break;
   }
@@ -18332,11 +18225,6 @@ inline void BuiltinOptionsUnion::Reset() {
       delete ptr;
       break;
     }
-    case BuiltinOptions_TableOptions: {
-      auto ptr = reinterpret_cast<tflite::TableOptionsT *>(value);
-      delete ptr;
-      break;
-    }
     default: break;
   }
   value = nullptr;
",0,train
b6549e400f5bb43cf36be2bbe61c26c5dce2582c,tensorflow/tensorflow,Move the TABLE operator from builtin op to custom op,runtime_version.cc,"@@ -357,7 +357,6 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code,
               {{BuiltinOperator_VAR_HANDLE, 1}, ""2.6.0""},
               {{BuiltinOperator_READ_VARIABLE, 1}, ""2.6.0""},
               {{BuiltinOperator_ASSIGN_VARIABLE, 1}, ""2.6.0""},
-              {{BuiltinOperator_TABLE, 1}, ""2.6.0""},
           });
 
   std::pair<BuiltinOperator, int> version_key = {op_code, op_version};
",0,train
d784a48aea16c150e618d5aaf42b320d108dd20d,tensorflow/tensorflow,"Fix bug causing inconsistent AST when collections of lambdas are involved in template expansions.

PiperOrigin-RevId: 233964704",templates.py,"@@ -92,6 +92,14 @@ class ContextAdjuster(gast.NodeTransformer):
     return self.generic_visit(node)
 
   def visit_comprehension(self, node):
+    # We may be able to override some of these, but for now it's simpler
+    # to just assert that they're set.
+    self._ctx_override = None
+    return self.generic_visit(node)
+
+  def visit_Lambda(self, node):
+    # We may be able to override some of these, but for now it's simpler
+    # to just assert that they're set.
     self._ctx_override = None
     return self.generic_visit(node)
 
",0,train
d784a48aea16c150e618d5aaf42b320d108dd20d,tensorflow/tensorflow,"Fix bug causing inconsistent AST when collections of lambdas are involved in template expansions.

PiperOrigin-RevId: 233964704",templates_test.py,"@@ -248,6 +248,16 @@ class TemplatesTest(test.TestCase):
     self.assertIsInstance(arg_node.generators[0].target.ctx, gast.Store)
     self.assertIsInstance(arg_node.elt.ctx, gast.Load)
 
+  def test_lambda_in_function_call(self):
+    template = """"""
+      a = foo(arg)
+    """"""
+    source = parser.parse_expression('[lambda i: i]')
+    node = templates.replace(template, arg=source)
+    lambda_arg = node[0].value.args[0].elts[0]
+    self.assertIsInstance(lambda_arg.args.args[0].ctx, gast.Param)
+    self.assertIsInstance(lambda_arg.body.ctx, gast.Load)
+
 
 if __name__ == '__main__':
   test.main()
",0,train
9065899e9252d5d9472b45d5a3dbecfb8b039117,tensorflow/tensorflow,"LocalResponseNormalization: We're spending about half of the time in this function converting to and from double. Do the computation in the float domain, I don't think there is any risk of numerical instability here.

PiperOrigin-RevId: 320588654
Change-Id: Ia641f6359b5966aa669de037d355292a25c08bed",optimized_ops.h,"@@ -3815,6 +3815,7 @@ inline void LocalResponseNormalization(
   const int double_range = op_params.range * 2;
   Eigen::VectorXf padded_square(data_in.rows() + double_range);
   padded_square.setZero();
+  const float bias = op_params.bias;
   for (int r = 0; r < data_in.cols(); ++r) {
     // Do local response normalization for data_in(:, r)
     // first, compute the square and store them in buffer for repeated use
@@ -3827,7 +3828,7 @@ inline void LocalResponseNormalization(
     }
     for (int i = 0; i < data_in.rows(); ++i) {
       accumulated_scale += padded_square(i + double_range);
-      data_out(i, r) = op_params.bias + accumulated_scale;
+      data_out(i, r) = bias + accumulated_scale;
       accumulated_scale -= padded_square(i);
     }
   }
",0,train
cc83067469bc30bba55932c587f31ef68f15792f,tensorflow/tensorflow,"Migrate a few conv kernels to use new kernel signatures.

PiperOrigin-RevId: 214831837",conv.cc,"@@ -86,6 +86,18 @@ struct OpData {
   bool run_multithreaded_kernel;
 };
 
+inline PaddingType RuntimePaddingType(TfLitePadding padding) {
+  switch (padding) {
+    case TfLitePadding::kTfLitePaddingSame:
+      return PaddingType::kSame;
+    case TfLitePadding::kTfLitePaddingValid:
+      return PaddingType::kValid;
+    case TfLitePadding::kTfLitePaddingUnknown:
+    default:
+      return PaddingType::kNone;
+  }
+}
+
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   // This is a builtin op, so we don't use the contents in 'buffer', if any.
   // Instead, we allocate a new object to use as scratch space for im2col, and
@@ -487,18 +499,18 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
   } else {
     effective_kernel_type = kernel_type;
   }
+  ConvParams op_params;
+  op_params.padding_type = RuntimePaddingType(params->padding);
+  op_params.padding_values.width = data->padding.width;
+  op_params.padding_values.height = data->padding.height;
+  op_params.stride_width = params->stride_width;
+  op_params.stride_height = params->stride_height;
+  op_params.dilation_width_factor = params->dilation_width_factor;
+  op_params.dilation_height_factor = params->dilation_height_factor;
+  op_params.float_activation_min = output_activation_min;
+  op_params.float_activation_max = output_activation_max;
   switch (effective_kernel_type) {
     case kReference: {
-      ConvParams op_params;
-      op_params.padding_type = PaddingType::kSame;
-      op_params.padding_values.width = data->padding.width;
-      op_params.padding_values.height = data->padding.height;
-      op_params.stride_width = params->stride_width;
-      op_params.stride_height = params->stride_height;
-      op_params.dilation_width_factor = params->dilation_width_factor;
-      op_params.dilation_height_factor = params->dilation_height_factor;
-      op_params.float_activation_min = output_activation_min;
-      op_params.float_activation_max = output_activation_max;
       reference_ops::Conv(op_params, GetTensorShape(input),
                           GetTensorData<float>(input), GetTensorShape(filter),
                           GetTensorData<float>(filter), GetTensorShape(bias),
@@ -508,16 +520,6 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
       break;
     }
     case kGenericOptimized: {
-      ConvParams op_params;
-      op_params.padding_type = PaddingType::kSame;
-      op_params.padding_values.width = data->padding.width;
-      op_params.padding_values.height = data->padding.height;
-      op_params.stride_width = params->stride_width;
-      op_params.stride_height = params->stride_height;
-      op_params.dilation_width_factor = params->dilation_width_factor;
-      op_params.dilation_height_factor = params->dilation_height_factor;
-      op_params.float_activation_min = output_activation_min;
-      op_params.float_activation_max = output_activation_max;
       optimized_ops::Conv(op_params, GetTensorShape(input),
                           GetTensorData<float>(input), GetTensorShape(filter),
                           GetTensorData<float>(filter), GetTensorShape(bias),
@@ -534,25 +536,21 @@ void EvalFloat(TfLiteContext* context, TfLiteNode* node,
         filter_data = GetTensorData<float>(filter);
       }
       multithreaded_ops::Conv(
-          *eigen_support::GetThreadPoolDevice(context),
-          GetTensorData<float>(input), GetTensorDims(input), filter_data,
-          GetTensorDims(filter), GetTensorData<float>(bias),
-          GetTensorDims(bias), params->stride_width, params->stride_height,
-          data->padding.width, data->padding.height, params->padding,
-          output_activation_min, output_activation_max,
-          GetTensorData<float>(output), GetTensorDims(output),
-          GetTensorData<float>(im2col), GetTensorDims(im2col));
+          *eigen_support::GetThreadPoolDevice(context), op_params,
+          GetTensorShape(input), GetTensorData<float>(input),
+          GetTensorShape(filter), filter_data, GetTensorShape(bias),
+          GetTensorData<float>(bias), GetTensorShape(output),
+          GetTensorData<float>(output), GetTensorShape(im2col),
+          GetTensorData<float>(im2col));
       break;
     }
     case kCblasOptimized: {
-      cblas_ops::Conv(GetTensorData<float>(input), GetTensorDims(input),
-                      GetTensorData<float>(filter), GetTensorDims(filter),
-                      GetTensorData<float>(bias), GetTensorDims(bias),
-                      params->stride_width, params->stride_height,
-                      data->padding.width, data->padding.height,
-                      output_activation_min, output_activation_max,
-                      GetTensorData<float>(output), GetTensorDims(output),
-                      GetTensorData<float>(im2col), GetTensorDims(im2col));
+      cblas_ops::Conv(op_params, GetTensorShape(input),
+                      GetTensorData<float>(input), GetTensorShape(filter),
+                      GetTensorData<float>(filter), GetTensorShape(bias),
+                      GetTensorData<float>(bias), GetTensorShape(output),
+                      GetTensorData<float>(output), GetTensorShape(im2col),
+                      GetTensorData<float>(im2col));
       break;
     }
   }
",0,test
cc83067469bc30bba55932c587f31ef68f15792f,tensorflow/tensorflow,"Migrate a few conv kernels to use new kernel signatures.

PiperOrigin-RevId: 214831837",cblas_conv.h,"@@ -31,20 +31,29 @@ limitations under the License.
 namespace tflite {
 namespace cblas_ops {
 
-inline void Conv(const float* input_data, const Dims<4>& input_dims,
-                 const float* filter_data, const Dims<4>& filter_dims,
-                 const float* bias_data, const Dims<4>& bias_dims,
-                 int stride_width, int stride_height, int pad_width,
-                 int pad_height, float output_activation_min,
-                 float output_activation_max, float* output_data,
-                 const Dims<4>& output_dims, float* im2col_data,
-                 const Dims<4>& im2col_dims) {
+inline void Conv(const ConvParams& params, const RuntimeShape& input_shape,
+                 const float* input_data, const RuntimeShape& filter_shape,
+                 const float* filter_data, const RuntimeShape& bias_shape,
+                 const float* bias_data, const RuntimeShape& output_shape,
+                 float* output_data, const RuntimeShape& im2col_shape,
+                 float* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const int dilation_width_factor = params.dilation_width_factor;
+  const int dilation_height_factor = params.dilation_height_factor;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
   gemmlowp::ScopedProfilingLabel label(""Conv/cblas"");
 
   const float* gemm_input_data = nullptr;
-  const Dims<4>* gemm_input_dims = nullptr;
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
+  const RuntimeShape* gemm_input_shape = nullptr;
+  const int filter_width = filter_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
   const bool need_im2col = stride_width != 1 || stride_height != 1 ||
                            filter_width != 1 || filter_height != 1;
   if (need_im2col) {
@@ -55,18 +64,17 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims,
     op_params.padding_values.height = pad_height;
     op_params.stride_width = stride_width;
     op_params.stride_height = stride_height;
-    op_params.dilation_width_factor = 1;
-    op_params.dilation_height_factor = 1;
+    op_params.dilation_width_factor = dilation_width_factor;
+    op_params.dilation_height_factor = dilation_height_factor;
     optimized_ops::Im2col(op_params, filter_height, filter_width, 0,
-                          DimsToShape(input_dims), input_data,
-                          DimsToShape(im2col_dims), im2col_data);
+                          input_shape, input_data, im2col_shape, im2col_data);
 
     gemm_input_data = im2col_data;
-    gemm_input_dims = &im2col_dims;
+    gemm_input_shape = &im2col_shape;
   } else {
     TFLITE_DCHECK(!im2col_data);
     gemm_input_data = input_data;
-    gemm_input_dims = &input_dims;
+    gemm_input_shape = &input_shape;
   }
 
   // The following code computes matrix multiplication c = a * transponse(b)
@@ -78,10 +86,10 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims,
   const float* a = gemm_input_data;
   const float* b = filter_data;
   float* c = output_data;
-  int m = gemm_input_dims->sizes[1] * gemm_input_dims->sizes[2] *
-          gemm_input_dims->sizes[3];
-  int n = output_dims.sizes[0];
-  int k = gemm_input_dims->sizes[0];
+  const int gemm_input_dims = gemm_input_shape->DimensionsCount();
+  int m = FlatSizeSkipDim(*gemm_input_shape, gemm_input_dims - 1);
+  int n = output_shape.Dims(3);
+  int k = gemm_input_shape->Dims(gemm_input_dims - 1);
   // The stride of matrix a, b and c respectively.
   int stride_a = k;
   int stride_b = k;
@@ -91,8 +99,8 @@ inline void Conv(const float* input_data, const Dims<4>& input_dims,
               stride_a, b, stride_b, 0.0f, c, stride_c);
 
   optimized_ops::AddBiasAndEvalActivationFunction(
-      output_activation_min, output_activation_max, DimsToShape(bias_dims),
-      bias_data, DimsToShape(output_dims), output_data);
+      output_activation_min, output_activation_max, bias_shape, bias_data,
+      output_shape, output_data);
 }
 
 }  // namespace cblas_ops
",0,test
cc83067469bc30bba55932c587f31ef68f15792f,tensorflow/tensorflow,"Migrate a few conv kernels to use new kernel signatures.

PiperOrigin-RevId: 214831837",multithreaded_conv.h,"@@ -69,13 +69,13 @@ struct MatMulConvFunctor {
 template <class T>
 class EigenTensorConvFunctor {
  private:
-  Eigen::PaddingType TfLitePadding2EigenPadding(TfLitePadding padding) {
+  Eigen::PaddingType RuntimePadding2EigenPadding(PaddingType padding) {
     switch (padding) {
-      case kTfLitePaddingValid:
+      case PaddingType::kValid:
         return Eigen::PADDING_VALID;
-      case kTfLitePaddingSame:
+      case PaddingType::kSame:
         return Eigen::PADDING_SAME;
-      case kTfLitePaddingUnknown:
+      case PaddingType::kNone:
         assert(false);  // should never get here.
         return Eigen::PADDING_VALID;
     }
@@ -89,7 +89,7 @@ class EigenTensorConvFunctor {
                   int input_width, int input_depth, const T* filter_data,
                   int filter_height, int filter_width, int filter_count,
                   int stride_rows, int stride_cols, int pad_width,
-                  int pad_height, TfLitePadding padding, T* output_data,
+                  int pad_height, PaddingType padding, T* output_data,
                   int output_height, int output_width) {
     const bool is_1x1_kernel = (filter_height == 1 && filter_width == 1 &&
                                 stride_rows == 1 && stride_cols == 1);
@@ -127,28 +127,38 @@ class EigenTensorConvFunctor {
                               input_depth, filter_count);
       output.device(device) =
           Eigen::SpatialConvolution(input, filter, stride_cols, stride_rows,
-                                    TfLitePadding2EigenPadding(padding));
+                                    RuntimePadding2EigenPadding(padding));
     }
   }
 };
 
-inline void Conv(const Eigen::ThreadPoolDevice& device, const float* input_data,
-                 const Dims<4>& input_dims, const float* filter_data,
-                 const Dims<4>& filter_dims, const float* bias_data,
-                 const Dims<4>& bias_dims, int stride_width, int stride_height,
-                 int pad_width, int pad_height, TfLitePadding padding,
-                 float output_activation_min, float output_activation_max,
-                 float* output_data, const Dims<4>& output_dims,
-                 float* im2col_data, const Dims<4>& im2col_dims) {
-  const int batches = MatchingArraySize(input_dims, 3, output_dims, 3);
-  const int input_depth = MatchingArraySize(input_dims, 0, filter_dims, 0);
-  const int output_depth = MatchingArraySize(filter_dims, 3, output_dims, 0);
-  const int input_height = ArraySize(input_dims, 2);
-  const int input_width = ArraySize(input_dims, 1);
-  const int filter_height = ArraySize(filter_dims, 2);
-  const int filter_width = ArraySize(filter_dims, 1);
-  const int output_height = ArraySize(output_dims, 2);
-  const int output_width = ArraySize(output_dims, 1);
+inline void Conv(const Eigen::ThreadPoolDevice& device,
+                 const ConvParams& params, const RuntimeShape& input_shape,
+                 const float* input_data, const RuntimeShape& filter_shape,
+                 const float* filter_data, const RuntimeShape& bias_shape,
+                 const float* bias_data, const RuntimeShape& output_shape,
+                 float* output_data, const RuntimeShape& im2col_shape,
+                 float* im2col_data) {
+  const int stride_width = params.stride_width;
+  const int stride_height = params.stride_height;
+  const PaddingType padding = params.padding_type;
+  const int pad_width = params.padding_values.width;
+  const int pad_height = params.padding_values.height;
+  const float output_activation_min = params.float_activation_min;
+  const float output_activation_max = params.float_activation_max;
+  TFLITE_DCHECK_EQ(input_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(filter_shape.DimensionsCount(), 4);
+  TFLITE_DCHECK_EQ(output_shape.DimensionsCount(), 4);
+
+  const int batches = MatchingDim(input_shape, 0, output_shape, 0);
+  const int input_depth = MatchingDim(input_shape, 3, filter_shape, 3);
+  const int output_depth = MatchingDim(filter_shape, 0, output_shape, 3);
+  const int input_height = input_shape.Dims(1);
+  const int input_width = input_shape.Dims(2);
+  const int filter_height = filter_shape.Dims(1);
+  const int filter_width = filter_shape.Dims(2);
+  const int output_height = output_shape.Dims(1);
+  const int output_width = output_shape.Dims(2);
   EigenTensorConvFunctor<float> conv_functor;
   conv_functor(device, input_data, im2col_data, batches, input_height,
                input_width, input_depth, filter_data, filter_height,
@@ -157,8 +167,8 @@ inline void Conv(const Eigen::ThreadPoolDevice& device, const float* input_data,
                output_width);
 
   optimized_ops::AddBiasAndEvalActivationFunction(
-      output_activation_min, output_activation_max, DimsToShape(bias_dims),
-      bias_data, DimsToShape(output_dims), output_data);
+      output_activation_min, output_activation_max, bias_shape, bias_data,
+      output_shape, output_data);
 }
 
 }  // namespace multithreaded_ops
",0,test
04ea2096ade2cf323312cb1a1ff008c667994e24,tensorflow/tensorflow,"Use tf.lite as the py_module name. Made the necessary changes to the api generator to accomodate for `dots` in the py_module name

PiperOrigin-RevId: 329537229
Change-Id: If695ba06a0252b0094eafa629c43c5d65c344d13",build_py_api_docs.py,"@@ -55,7 +55,7 @@ FLAGS = flags.FLAGS
 def main(_):
   doc_generator = generate_lib.DocGenerator(
       root_title='TensorFlow Lite',
-      py_modules=[('lite', tf.lite)],
+      py_modules=[('tf.lite', tf.lite)],
       base_dir=str(pathlib.Path(tf.__file__).parent),
       code_url_prefix=FLAGS.code_url_prefix,
       search_hints=FLAGS.search_hints,
",0,train
ddf9a100370bfbb27a2801202bb33a13ca4b9999,tensorflow/tensorflow,address comments,stream_executor_internal.h,"@@ -37,6 +37,9 @@ port::Status InitStreamExecutorPlugin(void* dso_handle);
 // testing).
 port::Status InitStreamExecutorPlugin(SEInitPluginFn init_fn);
 
+// This file implements core stream executor base classes in terms of
+// the C API defined in stream_executor.h. A class ""CSomething"" represents a
+// ""Something"" that can be manipulated via calls in the C interface.
 class CPlatform : public Platform {
  public:
   explicit CPlatform(SP_Platform platform,
",0,train
89b81679155ce4e0b25af28440ae4d0906e69e8e,tensorflow/tensorflow,"Make `maybe_set_static_shape` a no-op when `shape` is a python constant.
`maybe_set_static_shape` is only meant to handle cases that C++ shape inference cannot, which is when shape is a tensor that has a path to a captured placeholder inside a FuncGraph. So this change does not break any use-cases we care about.
This fixes an issue with creating spurious constants in the Graph which are unused after shape inference.

PiperOrigin-RevId: 263666943",tensor_util.py,"@@ -964,10 +964,40 @@ def shape_tensor(shape):  # pylint: disable=invalid-name
   return ops.convert_to_tensor(shape, dtype=dtype, name=""shape"")
 
 
+# DO NOT USE: For testing only.
+_ENABLE_MAYBE_SET_STATIC_SHAPE = True
+
+
 def maybe_set_static_shape(tensor, shape):  # pylint: disable=invalid-name
-  if (not context.executing_eagerly() and
+  """"""Sets the shape of `tensor` to the `shape`'s constant value, if inferrable.
+
+  This is a temporary workaround to fix shape inference across functional op
+  boundaries. E.g.
+
+  ```python
+  shape = tf.constant([3])
+  @tf.function
+  def f():
+    u = tf.random_uniform(shape)
+    return u
+  ```
+
+  If we were to rely solely on C++ shape inference, the shape of `u` inside
+  `f` would be unknown because C++ shape inference is not aware of the outer
+  graph and all it sees is a Placeholder node when backtracing the captured
+  tensor for `shape`. `maybe_set_static_shape` computes the static shape value
+  of `shape` by traversing the `FuncGraph` boundaries and sets the correct
+  shape.
+
+  A longer term solution would be to fix C++ shape inference.
+
+  Args:
+    tensor: A tensor.
+    shape: A shape tensor.
+  """"""
+  if (_ENABLE_MAYBE_SET_STATIC_SHAPE and not context.executing_eagerly() and
       ops.get_default_graph().building_function and
-      not tensor.shape.is_fully_defined()):
+      not tensor.shape.is_fully_defined() and is_tensor(shape)):
     shape = shape_tensor(shape)
     const_shape = constant_value_as_shape(shape)
     tensor.set_shape(const_shape)
",0,train
89b81679155ce4e0b25af28440ae4d0906e69e8e,tensorflow/tensorflow,"Make `maybe_set_static_shape` a no-op when `shape` is a python constant.
`maybe_set_static_shape` is only meant to handle cases that C++ shape inference cannot, which is when shape is a tensor that has a path to a captured placeholder inside a FuncGraph. So this change does not break any use-cases we care about.
This fixes an issue with creating spurious constants in the Graph which are unused after shape inference.

PiperOrigin-RevId: 263666943",tensor_util_test.py,"@@ -18,11 +18,13 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import contextlib
 import sys
 import numpy as np
 
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import func_graph
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.framework import test_util
@@ -1080,6 +1082,52 @@ class ConstantValueAsShapeTest(test.TestCase):
       c_val = tensor_util.constant_value_as_shape(tf_val)
 
 
+class MaybeSetStaticShapeTest(test.TestCase):
+
+  @contextlib.contextmanager
+  def disableSetStaticShape(self):
+    flag_old = tensor_util._ENABLE_MAYBE_SET_STATIC_SHAPE
+    tensor_util._ENABLE_MAYBE_SET_STATIC_SHAPE = False
+    try:
+      yield
+    finally:
+      tensor_util._ENABLE_MAYBE_SET_STATIC_SHAPE = flag_old
+
+  @test_util.run_deprecated_v1
+  def testMaybeSetStaticShape(self):
+    shape = constant_op.constant([2, 5], dtype=dtypes.int32)
+
+    def reshape():
+      v = array_ops.zeros([10])
+      return array_ops.reshape(v, shape)
+
+    with self.disableSetStaticShape():
+      graph_without_shape_propagation = func_graph.func_graph_from_py_func(
+          ""without_shape_propagation"", reshape, [], {})
+    graph_with_shape_propagation = func_graph.func_graph_from_py_func(
+        ""with_shape_propagation"", reshape, [], {})
+    self.assertCountEqual(
+        [op.type for op in graph_without_shape_propagation.get_operations()],
+        [op.type for op in graph_with_shape_propagation.get_operations()])
+
+  @test_util.run_deprecated_v1
+  def testMaybeSetStaticShapeScalarShape(self):
+
+    def reshape():
+      v = array_ops.placeholder(dtypes.float32)
+      t = array_ops.reshape(v, [-1])
+      return t
+
+    with self.disableSetStaticShape():
+      graph_without_shape_propagation = func_graph.func_graph_from_py_func(
+          ""without_shape_propagation"", reshape, [], {})
+    graph_with_shape_propagation = func_graph.func_graph_from_py_func(
+        ""with_shape_propagation"", reshape, [], {})
+    self.assertCountEqual(
+        [op.type for op in graph_without_shape_propagation.get_operations()],
+        [op.type for op in graph_with_shape_propagation.get_operations()])
+
+
 class ShapeTensorTest(test_util.TensorFlowTestCase):
 
   @test_util.run_in_graph_and_eager_modes
",0,train
50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers.

PiperOrigin-RevId: 348653113
Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",compiled_model.cc,"@@ -533,6 +533,9 @@ NodeDescriptor FuseChain(const FusionSequence& chain) {
       absl::Substitute(non_linkable.task->shader_source, function_code + ""$0"",
                        buffer_declarations + ""$1"", call_code);
   fused_descriptor->AddDstTensor("""", {});
+  fused_descriptor->src_tensors_names = non_linkable.task->src_tensors_names;
+  fused_descriptor->dst_tensors_names = non_linkable.task->dst_tensors_names;
+  fused_descriptor->tensors_as_args = non_linkable.task->tensors_as_args;
   fused_descriptor->resize_function = non_linkable.task->resize_function;
   node_desc.dst_tensors_ids = {fused_id};
   node_desc.task = fused_descriptor;
",0,train
50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers.

PiperOrigin-RevId: 348653113
Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",compute_task.cc,"@@ -114,6 +114,9 @@ absl::Status ComputeTask::CompileWithDevice(id<MTLDevice> device,
   }
   resize_function_ = desc.task->resize_function;
   program_ = program;
+  src_tensors_names_ = desc.task->src_tensors_names;
+  dst_tensors_names_ = desc.task->dst_tensors_names;
+  tensors_as_args_ = desc.task->tensors_as_args;
   return absl::OkStatus();
 }
 
@@ -228,10 +231,22 @@ std::vector<ValueId> ComputeTask::GetInputIds() const {
 
 void ComputeTask::SetSrcTensor(const MetalSpatialTensor& tensor, int index) {
   input_buffers_[index].metal_handle = tensor.GetBufferHandle();
+  if (tensors_as_args_) {
+    auto name = src_tensors_names_[index];
+    // extracting tensor_name from ""device FLT4* tensor_name_buffer"";
+    name = name.substr(13, name.size() - 20);
+    auto status = metal_args_.SetObjectRef(name, tensor);
+  }
 }
 
 void ComputeTask::SetDstTensor(const MetalSpatialTensor& tensor, int index) {
   output_buffers_[index].metal_handle = tensor.GetBufferHandle();
+  if (tensors_as_args_) {
+    auto name = dst_tensors_names_[index];
+    // extracting tensor_name from ""device FLT4* tensor_name_buffer"";
+    name = name.substr(13, name.size() - 20);
+    auto status = metal_args_.SetObjectRef(name, tensor);
+  }
 }
 
 void ComputeTask::SetDescription(const std::string& description) {
",0,train
50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers.

PiperOrigin-RevId: 348653113
Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",compute_task.h,"@@ -94,6 +94,9 @@ class ComputeTask {
   DispatchParamsFunction resize_function_;
   std::string description_;
   MetalArguments metal_args_;
+  std::vector<std::string> src_tensors_names_;
+  std::vector<std::string> dst_tensors_names_;
+  bool tensors_as_args_;
 };
 
 }  // namespace metal
",0,train
50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers.

PiperOrigin-RevId: 348653113
Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",compute_task_descriptor.cc,"@@ -62,12 +62,24 @@ ComputeTaskDescriptor::ComputeTaskDescriptor(const OperationDef& def)
 
 void ComputeTaskDescriptor::AddSrcTensor(const std::string& tensor_name,
                                          const TensorDescriptor& desc) {
-  src_tensors_names.push_back(""device FLT4* "" + tensor_name);
+  if (tensors_as_args) {
+    src_tensors_names.push_back(""device FLT4* "" + tensor_name + ""_buffer"");
+    auto desc_new = absl::make_unique<TensorDescriptor>(desc);
+    args.AddObjectRef(tensor_name, AccessType::READ, std::move(desc_new));
+  } else {
+    src_tensors_names.push_back(""device FLT4* "" + tensor_name);
+  }
 }
 
 void ComputeTaskDescriptor::AddDstTensor(const std::string& tensor_name,
                                          const TensorDescriptor& desc) {
-  dst_tensors_names.push_back(""device FLT4* "" + tensor_name);
+  if (tensors_as_args) {
+    dst_tensors_names.push_back(""device FLT4* "" + tensor_name + ""_buffer"");
+    auto desc_new = absl::make_unique<TensorDescriptor>(desc);
+    args.AddObjectRef(tensor_name, AccessType::WRITE, std::move(desc_new));
+  } else {
+    dst_tensors_names.push_back(""device FLT4* "" + tensor_name);
+  }
 }
 
 }  // namespace metal
",0,train
50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers.

PiperOrigin-RevId: 348653113
Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",compute_task_descriptor.h,"@@ -65,6 +65,10 @@ struct ComputeTaskDescriptor {
   ComputeTaskDescriptor(const ComputeTaskDescriptor&) = delete;
   ComputeTaskDescriptor& operator=(const ComputeTaskDescriptor&) = delete;
 
+  // temporary
+  bool tensors_as_args =
+      false;  // must be true if input/output tensors used throught args.tensor
+
   OperationDef definition;
   Arguments args;
   bool is_linkable = false;
",0,train
50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers.

PiperOrigin-RevId: 348653113
Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",fully_connected.cc,"@@ -62,26 +62,28 @@ std::string GetFullyConnectedCode(const GpuInfo& gpu_info, int src_channels,
     code << R""(
   float summa = 0.0f;
   threadgroup FLT4 local_vector[32];
-  for (int j = 0; j < $0; ++j) {
-    local_vector[tid_index] = j * 32 + tid_index >= args.src_slices ?
-      FLT4(0.0f) : src_tensor[j * 32 + tid_index];
-    $1(mem_flags::mem_threadgroup);
+  for (int j = 0; j < args.src_depth_sub_groups; ++j) {
+    local_vector[tid_index] = j * 32 + tid_index >= args.src_tensor.Slices() ?
+      FLT4(0.0f) : args.src_tensor.Read(0, 0, j * 32 + tid_index);
+    $0(mem_flags::mem_threadgroup);
     for (uint i = 0, counter = j * 32 + tid.y * 8; i < 8; ++i, ++counter) {
       summa += dot(local_vector[tid.y * 8 + i], args.weights.Read(counter * args.dst_channels_alignedx8 + ugid.x));
     }
-    $1(mem_flags::mem_none);
+    $0(mem_flags::mem_none);
   }
   )"";
   } else {
     code << R""(
   float summa = 0.0f;
-  uint counter = ugid.y * $0;
-  for (uint i = 0; i < $0; ++i, ++counter) {
+  int counter = int(ugid.y) * args.src_depth_sub_groups;
+  for (int i = 0; i < args.src_depth_sub_groups; ++i, ++counter) {
     )"";
     if (src_depth % 4 != 0) {
-      code << ""    if (counter >= args.src_slices) continue;"" << std::endl;
+      code << ""    if (counter >= args.src_tensor.Slices()) continue;""
+           << std::endl;
     }
-    code << ""    summa += dot(src_tensor[counter], args.weights.Read(counter * ""
+    code << ""    summa += dot(args.src_tensor.Read(0, 0, counter), ""
+            ""args.weights.Read(counter * ""
             ""args.dst_channels_alignedx8 + ugid.x));""
          << std::endl;
     code << ""  }"" << std::endl;
@@ -90,27 +92,25 @@ std::string GetFullyConnectedCode(const GpuInfo& gpu_info, int src_channels,
 
   threadgroup float temp[8][4];
   temp[tid.x][tid.y] = summa;
-  $1(mem_flags::mem_threadgroup);
+  $0(mem_flags::mem_threadgroup);
   if (tid.y == 0) {
     summa += temp[tid.x][1];
     summa += temp[tid.x][2];
     summa += temp[tid.x][3];
     temp[tid.x][0] = summa;
   }
-  $1(mem_flags::mem_threadgroup);
-  if (tid.y == 0 && tid.x % 4 == 0 && ugid.x < args.dst_channels) {
-    const int linear_index = ugid.x / 4;
+  $0(mem_flags::mem_threadgroup);
+  const int linear_index = ugid.x / 4;
+  if (tid.y == 0 && tid.x % 4 == 0 && linear_index < args.dst_tensor.Slices()) {
     FLT4 value = FLT4(temp[tid.x][0], temp[tid.x + 1][0], temp[tid.x + 2][0], temp[tid.x + 3][0]) +
       args.bias.Read(linear_index);
     uint3 gid = uint3(0u, 0u, uint(linear_index));
     $$2
-    dst_tensor[linear_index] = value;
+    args.dst_tensor.Write(value, 0, 0, linear_index);
   }
 }
   )"";
-  const int src_depth_sub_groups = shared_memory ? DivideRoundUp(src_depth, 32)
-                                                 : DivideRoundUp(src_depth, 4);
-  return absl::Substitute(code.str(), src_depth_sub_groups, barrier);
+  return absl::Substitute(code.str(), barrier);
 }
 }  // namespace
 
@@ -118,19 +118,21 @@ ComputeTaskDescriptor FullyConnected(const OperationDef& definition,
                                      const FullyConnectedAttributes& attr,
                                      const GpuInfo& gpu_info) {
   ComputeTaskDescriptor desc(definition);
+  desc.tensors_as_args = true;
   desc.shader_source = GetFullyConnectedCode(gpu_info, attr.weights.shape.i,
                                              attr.weights.shape.o);
 
-  desc.args.AddInt(""dst_channels"", attr.weights.shape.o);
-  desc.args.AddInt(""src_slices"", DivideRoundUp(attr.weights.shape.i, 4));
+  bool shared_memory = gpu_info.IsApple() &&
+                       gpu_info.apple_info.IsLocalMemoryPreferredOverGlobal();
+  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
+  const int src_depth_sub_groups = shared_memory ? DivideRoundUp(src_depth, 32)
+                                                 : DivideRoundUp(src_depth, 4);
   desc.args.AddInt(""dst_channels_alignedx8"", AlignByN(attr.weights.shape.o, 8));
+  desc.args.AddInt(""src_depth_sub_groups"", src_depth_sub_groups);
 
   desc.AddSrcTensor(""src_tensor"", definition.src_tensors[0]);
   desc.AddDstTensor(""dst_tensor"", definition.dst_tensors[0]);
 
-  bool shared_memory = gpu_info.IsApple() &&
-                       gpu_info.apple_info.IsLocalMemoryPreferredOverGlobal();
-  const int src_depth = DivideRoundUp(attr.weights.shape.i, 4);
   const int src_depth_aligned = AlignByN(src_depth, shared_memory ? 32 : 4);
   const int dst_channels_aligned = AlignByN(attr.weights.shape.o, 8);
 
",0,train
50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers.

PiperOrigin-RevId: 348653113
Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",metal_arguments.cc,"@@ -141,6 +141,7 @@ absl::Status MetalArguments::Init(id<MTLDevice> device, int buffer_offset,
   RETURN_IF_ERROR(AddObjectArgs(args));
   RETURN_IF_ERROR(ResolveSelectorsPass(*args, {}, code));
   RETURN_IF_ERROR(SetObjectsResources(*args));
+  object_refs_ = std::move(args->object_refs_);
   args->GetActiveArguments(kArgsPrefix, *code);
   std::string struct_desc = ""struct uniforms_buffer {\n"";
   int pos = 0;
@@ -229,6 +230,25 @@ absl::Status MetalArguments::SetHalf(const std::string& name, half value) {
       ""No support of half uniforms in Metal backend"");
 }
 
+absl::Status MetalArguments::SetObjectRef(const std::string& name,
+                                          const GPUObject& object) {
+  auto it = object_refs_.find(name);
+  if (it == object_refs_.end()) {
+    return absl::NotFoundError(
+        absl::StrCat(""No object ref with name - "", name));
+  }
+  GPUResourcesWithValue resources;
+  RETURN_IF_ERROR(object.GetGPUResources(it->second.get(), &resources));
+  for (const auto& r : resources.ints) {
+    RETURN_IF_ERROR(SetInt(absl::StrCat(name, ""_"", r.first), r.second));
+  }
+  for (const auto& r : resources.floats) {
+    RETURN_IF_ERROR(SetFloat(absl::StrCat(name, ""_"", r.first), r.second));
+  }
+  return absl::OkStatus();
+  // return SetGPUResources(name, resources);
+}
+
 void MetalArguments::Encode(id<MTLComputeCommandEncoder> encoder,
                             int buffer_offset) const {
   for (auto& b : buffers_) {
@@ -258,7 +278,14 @@ absl::Status MetalArguments::AddObjectArgs(Arguments* args) {
     AddGPUResources(t.first, t.second->GetGPUResources(), args);
   }
   for (auto& t : args->object_refs_) {
-    AddGPUResources(t.first, t.second->GetGPUResources(), args);
+    auto resources = t.second->GetGPUResources();
+    for (const auto& r : resources.ints) {
+      args->AddInt(absl::StrCat(t.first, ""_"", r));
+    }
+    for (const auto& r : resources.floats) {
+      args->AddFloat(absl::StrCat(t.first, ""_"", r));
+    }
+    // AddGPUResources(t.first, t.second->GetGPUResources(), args);
   }
   return absl::OkStatus();
 }
",0,train
50ae55b2eb72527f94bdeb68881fa19a60c9c975,tensorflow/tensorflow,"Using Tensor objects in FullyConnected kernel instead of raw buffers.

PiperOrigin-RevId: 348653113
Change-Id: I8e383cebc28d1218e6d4507a593f7e568b077120",metal_arguments.h,"@@ -46,6 +46,7 @@ class MetalArguments : public ArgumentsBinder {
   absl::Status SetInt(const std::string& name, int value) override;
   absl::Status SetFloat(const std::string& name, float value) override;
   absl::Status SetHalf(const std::string& name, half value) override;
+  absl::Status SetObjectRef(const std::string& name, const GPUObject& object);
 
   void Encode(id<MTLComputeCommandEncoder> encoder, int buffer_offset) const;
 
",0,train
d5bd4932569106da79a7ac7ba7a9e7cf0141ab06,tensorflow/tensorflow,"Internal change.

PiperOrigin-RevId: 226560588",gather_op_test.py,"@@ -421,6 +421,12 @@ class GatherTest(test.TestCase, parameterized.TestCase):
     # expected result.
     expected = self._batchNumpyGather(params, indices, axis, batch_dims)
 
+    # On Windows, we get an exception if we pass in the transformed numpy
+    # arrays (""Failed to convert numpy ndarray to a Tensor (Unsupported
+    # feed type).""); so convert them back to lists before calling tf.gather.
+    params = params.tolist()
+    indices = indices.tolist()
+
     result = array_ops.gather(params, indices, axis=axis, batch_dims=batch_dims)
     self.assertAllEqual(output_shape, result.shape.as_list())
     self.assertAllEqual(expected, result)
",0,train
a7c8a73a9f6d3a13fad085d6cff79cf96cfd5b18,tensorflow/tensorflow,"[xla::gpu] fuse bias addition for bf16 gemms.

PiperOrigin-RevId: 441133520",gemm_rewriter.cc,"@@ -132,29 +132,48 @@ class GemmRewriterVisitor : public DfsHloRewriteVisitor {
               m::AddAnyOrder(
                   m::Op(&existing_gemm).WithCustomCallTarget(kGemmCallTarget),
                   m::Op(&bias)))) {
-      // Do not fuse bias into S32 GEMM, as for this datatype cuBLAS only
-      // supports fixed values for alpha/beta.
-      if (existing_gemm->shape().element_type() == S32) {
-        return Status::OK();
-      }
-      auto config =
-          existing_gemm->backend_config<GemmBackendConfig>().ValueOrDie();
-      if (config.beta() == 0 && bias->user_count() == 1 &&
-          existing_gemm->user_count() == 1 &&
-          bias->shape() == existing_gemm->shape()) {
-        config.set_beta(1.0);
-        CHECK_EQ(existing_gemm->operand_count(), 2);
-        std::unique_ptr<HloInstruction> gemm_call =
-            HloInstruction::CreateCustomCall(
-                instr->shape(),
-                {existing_gemm->mutable_operand(0),
-                 existing_gemm->mutable_operand(1), bias},
-                kGemmCallTarget);
-        TF_RETURN_IF_ERROR(gemm_call->set_backend_config(config));
-        TF_RETURN_IF_ERROR(SetName(instr->GetModule(), gemm_call.get()));
-        TF_RETURN_IF_ERROR(
-            ReplaceWithNewInstruction(instr, std::move(gemm_call)));
-      }
+      return FuseBiasedGemm(instr, bias, existing_gemm);
+    }
+    return Status::OK();
+  }
+
+  Status HandleConvert(HloInstruction *instr) override {
+    HloInstruction *bias, *existing_gemm;
+    if (Match(
+            instr,
+            m::Convert(m::AddAnyOrder(
+                           m::Convert(m::Op(&existing_gemm)
+                                          .WithCustomCallTarget(kGemmCallTarget)
+                                          .WithElementType(BF16)),
+                           m::Convert(m::Op(&bias).WithElementType(BF16))))
+                .WithElementType(BF16))) {
+      return FuseBiasedGemm(instr, bias, existing_gemm);
+    }
+    return Status::OK();
+  }
+
+  Status FuseBiasedGemm(HloInstruction *instr, HloInstruction *bias,
+                        HloInstruction *existing_gemm) {
+    // Do not fuse bias into S32 GEMM, as for this datatype cuBLAS only
+    // supports fixed values for alpha/beta.
+    if (existing_gemm->shape().element_type() == S32) {
+      return Status::OK();
+    }
+    auto config =
+        existing_gemm->backend_config<GemmBackendConfig>().ValueOrDie();
+    if (config.beta() == 0 && bias->user_count() == 1 &&
+        existing_gemm->user_count() == 1 &&
+        bias->shape() == existing_gemm->shape()) {
+      config.set_beta(1.0);
+      CHECK_EQ(existing_gemm->operand_count(), 2);
+      std::unique_ptr<HloInstruction> gemm_call =
+          existing_gemm->CloneWithNewOperands(
+              instr->shape(), {existing_gemm->mutable_operand(0),
+                               existing_gemm->mutable_operand(1), bias});
+      TF_RETURN_IF_ERROR(gemm_call->set_backend_config(config));
+      TF_RETURN_IF_ERROR(SetName(instr->GetModule(), gemm_call.get()));
+      TF_RETURN_IF_ERROR(
+          ReplaceWithNewInstruction(instr, std::move(gemm_call)));
     }
     return Status::OK();
   }
",0,train
a7c8a73a9f6d3a13fad085d6cff79cf96cfd5b18,tensorflow/tensorflow,"[xla::gpu] fuse bias addition for bf16 gemms.

PiperOrigin-RevId: 441133520",gemm_rewrite_test.cc,"@@ -576,6 +576,30 @@ ENTRY int8gemm {
                       /*print_operand_shape=*/true);
   }
 }
+
+TEST_F(GemmRewriteTest, BF16GemmWithBias) {
+  const char* hlo_text = R""(
+HloModule BF16GemmWithBias
+
+ENTRY BF16GemmWithBias {
+  x = bf16[8,8]{1,0} parameter(0)
+  y = bf16[8,8]{1,0} parameter(1)
+  dot.5 = bf16[8,8]{1,0} dot(x, y), lhs_contracting_dims={1}, rhs_contracting_dims={0}
+  bias = bf16[8,8]{1,0} parameter(2)
+  ROOT add.6 = bf16[8,8]{1,0} add(dot.5, bias)
+}
+  )"";
+
+  EXPECT_TRUE(RunAndCompare(hlo_text, ErrorSpec{1e-3, 1e-3}));
+  MatchOptimizedHlo(hlo_text,
+                    R""(
+; CHECK-LABEL: ENTRY %BF16GemmWithBias (x: bf16[8,8], y: bf16[8,8], bias: bf16[8,8]) -> bf16[8,8] {
+; CHECK-NEXT:    [[INSTR_0:%[^ ]+]] = bf16[8,8]{1,0} parameter(0)
+; CHECK-NEXT:    [[INSTR_1:%[^ ]+]] = bf16[8,8]{1,0} parameter(1)
+; CHECK-NEXT:    [[INSTR_2:%[^ ]+]] = bf16[8,8]{1,0} parameter(2)
+; CHECK-NEXT:    ROOT [[INSTR_3:%[^ ]+]] = bf16[8,8]{1,0} custom-call([[INSTR_0]], [[INSTR_1]], [[INSTR_2]]), custom_call_target=""__cublas$gemm"", backend_config=""{\""alpha_real\"":1,\""alpha_imag\"":0,\""beta\"":1,\""dot_dimension_numbers\"":{\""lhs_contracting_dimensions\"":[\""1\""],\""rhs_contracting_dimensions\"":[\""0\""],\""lhs_batch_dimensions\"":[],\""rhs_batch_dimensions\"":[]},\""batch_size\"":\""1\"",\""lhs_stride\"":\""64\"",\""rhs_stride\"":\""64\"",\""selected_algorithm\"":\""{{-?[0-9]+}}\""}""
+      )"");
+}
 }  // namespace
 }  // namespace gpu
 }  // namespace xla
",0,train
a455319208888e72af34fc3021122803a53a047d,tensorflow/tensorflow,"Automated g4 rollback of changelist 201217989

PiperOrigin-RevId: 201257755",arithmetic_optimizer.cc,"@@ -2519,14 +2519,14 @@ class ConvertLog1pStage : public ArithmeticOptimizerStage {
                              bool* modified) {
     const auto& t =
         ctx().graph_properties->GetInputProperties(input->name())[i];
-    const auto& c =
-        ctx().graph_properties->GetInputProperties(input->name())[j];
-    for (int k = 0; k < c.shape().dim_size(); ++k) {
-      // Skip if c shape is not fully determined.
-      if (c.shape().dim(k).size() < 0) {
+    for (int k = 0; k < t.shape().dim_size(); ++k) {
+      // Skip if t shape is not fully determined.
+      if (t.shape().dim(k).size() < 0) {
         return Status::OK();
       }
     }
+    const auto& c =
+        ctx().graph_properties->GetInputProperties(input->name())[j];
     TensorShapeProto broadcast_shape;
     if (!ShapeAfterBroadcast(t.shape(), c.shape(), &broadcast_shape)) {
       return errors::InvalidArgument(""Cannot get broadcast shape for: "",
@@ -2537,15 +2537,15 @@ class ConvertLog1pStage : public ArithmeticOptimizerStage {
       // broadcast.
       return Status::OK();
     }
-    if (TensorShape::IsValid(c.shape()) && c.has_value()) {
-      Tensor constant(c.dtype(), c.shape());
-      if (!constant.FromProto(c.value())) {
+    if (TensorShape::IsValid(t.shape()) && t.has_value()) {
+      Tensor tensor(t.dtype(), t.shape());
+      if (!tensor.FromProto(t.value())) {
         return errors::InvalidArgument(""Cannot parse tensor from proto: "",
                                        t.value().DebugString());
       }
       complex128 element;
-      for (int k = 0; k < constant.NumElements(); ++k) {
-        if (!GetElement(constant, k, &element)) {
+      for (int k = 0; k < tensor.NumElements(); ++k) {
+        if (!GetElement(tensor, k, &element)) {
           // input data type is not supported by log1p. Skip.
           return Status::OK();
         }
@@ -2558,8 +2558,8 @@ class ConvertLog1pStage : public ArithmeticOptimizerStage {
       TF_RETURN_IF_ERROR(GetInputNode(input->input(i), &x));
       TF_RETURN_IF_ERROR(GetInputNode(input->input(j), &y));
       node->set_op(""Log1p"");
-      node->set_input(0, x->name());
-      node->add_input(AsControlDependency(y->name()));
+      node->set_input(0, y->name());
+      node->add_input(AsControlDependency(x->name()));
       ForwardControlDependencies(node, {input});
 
       AddToOptimizationQueue(node);
",0,test
6311992aa95f5e9ea88f400a404328071f8b6bea,tensorflow/tensorflow,"MultiProcessRunner: Register faulthandler so that subprocesses' stack trace can be dumped at timeout. Set default timeout second so timeout is handled for regular medium size test target.

PiperOrigin-RevId: 304674677
Change-Id: I516315c6d2d4f951fc394c6bf2fb6f029096ed74",multi_process_runner.py,"@@ -36,13 +36,6 @@ from tensorflow.python.distribute import multi_process_lib
 from tensorflow.python.eager import context
 from tensorflow.python.platform import test
 
-# pylint: disable=g-import-not-at-top
-try:
-  # `faulthandler` is not available in py2.
-  import faulthandler
-except ImportError:
-  faulthandler = None
-
 # _ProcessStatusInfo contains process status information. When is_successful
 # attribute is True, the subprocess has ended successfully, or if False, the
 # exception stack trace info is stored in exc_info to pass on to parent process
@@ -312,7 +305,7 @@ class MultiProcessRunner(object):
         break
     return list_to_return
 
-  def join(self, timeout=250):
+  def join(self, timeout=None):
     """"""Joins all the processes with timeout.
 
     Args:
@@ -343,9 +336,6 @@ class MultiProcessRunner(object):
           if self._all_forced_terminated:
             break
           if time.time() - start_time > timeout:
-            # Send SIGTERM signal to subprocesses to dump their current
-            # stack trace.
-            self.terminate_all(sig=signal.SIGTERM)
             # If none of those did, report timeout to user.
             raise RuntimeError('One or more subprocesses timed out. '
                                'Number of outstanding subprocesses '
@@ -384,7 +374,7 @@ class MultiProcessRunner(object):
     _resource(PARENT_TO_SUB_QUEUE).put('terminate {} {}'.format(
         task_type, task_id))
 
-  def terminate_all(self, sig=signal.SIGKILL):
+  def terminate_all(self):
     """"""Terminates all subprocesses.""""""
     subprocess_infos = []
 
@@ -398,7 +388,7 @@ class MultiProcessRunner(object):
     for subprocess_info in subprocess_infos:
       logging.info('Parent process is now killing PID: %d', subprocess_info.pid)
       try:
-        os.kill(subprocess_info.pid, sig)
+        os.kill(subprocess_info.pid, signal.SIGKILL)
       except ProcessLookupError:
         # TODO(rchao): Remove subprocess info from the queue once a subprocess
         # is terminated.
@@ -459,14 +449,11 @@ class _Subprocess(object):
                *arg, **kwargs):
     """"""The wrapper function that actually gets run in child process(es).""""""
 
-    if faulthandler is not None:
-      faulthandler.enable()
-      faulthandler.register(signal.SIGTERM, chain=True)
-
     pid = os.getpid()
     logging.info('Subprocess with PID %d (%s, %d) is now being started.', pid,
                  task_type, task_id)
     _resource(SUBPROCESS_INFO_QUEUE).put(_SubprocessInfo(pid=pid))
+
     # Assign sys.stdout and sys.stderr as duplicates of `pipe_w` so print() and
     # logging.*() write directly to `pipe_w`. Unfortunately since we cannot
     # prepend task_type and task_id information to the streamed logs we will
@@ -557,7 +544,7 @@ def run(proc_func,
         grpc_fail_fast=None,
         stream_stdout=True,
         list_stdout=False,
-        timeout=250,
+        timeout=None,
         args=None,
         kwargs=None):  # pylint: disable=g-doc-args
   """"""Runs functions in local child processes.
",0,train
92b66929070da69a40a0bfc78dfc1ac10bbf26d8,tensorflow/tensorflow,"Handle Python corner case: the name of a variable caught in an except block (i.e. `e` in `except Name as e` is not visible outside the except block itself.

PiperOrigin-RevId: 293801105
Change-Id: I76faf7b0957170c18f739f531c44123b4dca3835",activity.py,"@@ -57,6 +57,8 @@ class Scope(object):
       the terminology of the Python 3 reference documentation, True roughly
       represents an actual scope, whereas False represents an ordinary code
       block.
+    isolated_names: Set[qual_names.QN], identifiers that are isolated to this
+      scope (even if the scope is not isolated).
     read: Set[qual_names.QN], identifiers read in this scope.
     modified: Set[qual_names.QN], identifiers modified in this scope.
     deleted: Set[qual_names.QN], identifiers deleted in this scope.
@@ -99,6 +101,8 @@ class Scope(object):
     self.parent = parent
     self.isolated = isolated
 
+    self.isolated_names = set()
+
     self.read = set()
     self.modified = set()
     self.deleted = set()
@@ -136,6 +140,7 @@ class Scope(object):
     if self.parent is not None:
       assert other.parent is not None
       self.parent.copy_from(other.parent)
+    self.isolated_names = copy.copy(other.isolated_names)
     self.modified = copy.copy(other.modified)
     self.read = copy.copy(other.read)
     self.deleted = copy.copy(other.deleted)
@@ -158,6 +163,7 @@ class Scope(object):
     if self.parent is not None:
       assert other.parent is not None
       self.parent.merge_from(other.parent)
+    self.isolated_names.update(other.isolated_names)
     self.read.update(other.read)
     self.modified.update(other.modified)
     self.bound.update(other.deleted)
@@ -170,9 +176,9 @@ class Scope(object):
     if self.parent is not None:
       assert not self.parent.is_final
       if not self.isolated:
-        self.parent.read.update(self.read)
-        self.parent.modified.update(self.modified)
-        self.parent.bound.update(self.bound)
+        self.parent.read.update(self.read - self.isolated_names)
+        self.parent.modified.update(self.modified - self.isolated_names)
+        self.parent.bound.update(self.bound - self.isolated_names)
         self.parent.globals.update(self.globals)
       else:
         # TODO(mdan): This is not accurate.
@@ -537,6 +543,16 @@ class ActivityAnalyzer(transformer.Base):
                                           (node.orelse, NodeAnno.ORELSE_SCOPE)))
     return node
 
+  def visit_ExceptHandler(self, node):
+    self._enter_scope(False)
+    # try/except oddity: as expected, it leaks any names you defined inside the
+    # except block, but not the name of the exception variable.
+    if node.name is not None:
+      self.scope.isolated_names.add(anno.getanno(node.name, anno.Basic.QN))
+    node = self.generic_visit(node)
+    self._exit_scope()
+    return node
+
 
 def resolve(node, context, parent_scope=None):
   return ActivityAnalyzer(context, parent_scope).visit(node)
",0,train
92b66929070da69a40a0bfc78dfc1ac10bbf26d8,tensorflow/tensorflow,"Handle Python corner case: the name of a variable caught in an except block (i.e. `e` in `except Name as e` is not visible outside the except block itself.

PiperOrigin-RevId: 293801105
Change-Id: I76faf7b0957170c18f739f531c44123b4dca3835",activity_test.py,"@@ -403,6 +403,32 @@ class ActivityAnalyzerTest(ActivityAnalyzerTestBase):
     self.assertScopeIs(
         anno.getanno(node.body[0], anno.Static.SCOPE), ('b',), ())
 
+  def test_except_exposes_names(self):
+
+    def test_fn(a, b, c):  # pylint: disable=unused-argument
+      try:
+        pass
+      except:  # pylint: disable=bare-except
+        b = c
+
+    node, _ = self._parse_and_analyze(test_fn)
+    fn_node = node
+    self.assertScopeIs(
+        anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('c',), ('b',))
+
+  def test_except_hides_exception_var_name(self):
+
+    def test_fn(a, b, c):  # pylint: disable=unused-argument
+      try:
+        pass
+      except a as e:
+        b = e
+
+    node, _ = self._parse_and_analyze(test_fn)
+    fn_node = node
+    self.assertScopeIs(
+        anno.getanno(fn_node, NodeAnno.BODY_SCOPE), ('a',), ('b',))
+
   def test_aug_assign(self):
 
     def test_fn(a, b):
",0,train
92b66929070da69a40a0bfc78dfc1ac10bbf26d8,tensorflow/tensorflow,"Handle Python corner case: the name of a variable caught in an except block (i.e. `e` in `except Name as e` is not visible outside the except block itself.

PiperOrigin-RevId: 293801105
Change-Id: I76faf7b0957170c18f739f531c44123b4dca3835",liveness_test.py,"@@ -245,6 +245,23 @@ class LivenessAnalyzerTest(LivenessAnalyzerTestBase):
     self.assertHasLiveIn(fn_body[0].body[0], ('b', 'c'))
     self.assertHasLiveIn(fn_body[1], ('x',))
 
+  def test_live_out_except_variable(self):
+
+    def test_fn(x, a):
+      try:
+        pass
+      except a as b:
+        raise b
+      return x
+
+    node = self._parse_and_analyze(test_fn)
+    fn_body = node.body
+
+    # Note: 'a' is not live because there is no raise statement inside the
+    # try, and we discount the possibility of other code in the try block
+    # raising an error.
+    self.assertHasLiveIn(fn_body[0], ('b', 'x'))
+
   def test_live_in_return_statement(self):
 
     def test_fn(x, a, b, c):  # pylint:disable=unused-argument
",0,train
3e0f9502b76778bb714de0317cfe2bdf256257ed,tensorflow/tensorflow,"Update docs about the Android version requirement for using nnapi execution priority.

PiperOrigin-RevId: 319464023
Change-Id: I8ac9522caf6ab7db361b17c022659a2729ced286",nnapi_delegate_provider.cc,"@@ -62,7 +62,7 @@ std::vector<Flag> NnapiDelegateProvider::CreateFlags(ToolParams* params) const {
     CreateFlag<std::string>(""nnapi_execution_priority"", params,
                             ""The model execution priority in nnapi, and it ""
                             ""should be one of the following: default, low, ""
-                            ""medium, high.""),
+                            ""medium and high. This requires Android 11+.""),
     CreateFlag<std::string>(
         ""nnapi_accelerator_name"", params,
         ""the name of the nnapi accelerator to use (requires Android Q+)""),
",0,train
d99a5dd275b2e9fc54e9d40830c3d8b3622b79c7,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-02-08

PiperOrigin-RevId: 293967672
Change-Id: I2bf6a86c4360d8cc5c8429e96539ab1a44ed2972",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 2, 7)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 2, 8)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
0642f8155f9d1391471067e7d97fee39521d3c44,tensorflow/tensorflow,"Narrow down argmin/argmax contract to always return the smallest index for ties

Currently we get this behavior consistently across TF/XLA:CPU/XLA:GPU/XLA:TPU, and it also matches Numpy semantics.

PiperOrigin-RevId: 312528188
Change-Id: I16901ff67052182fe374235f8c7521cbdf047779",math_ops.py,"@@ -256,7 +256,7 @@ def argmax(input,
 def argmax_v2(input, axis=None, output_type=dtypes.int64, name=None):
   """"""Returns the index with the largest value across axes of a tensor.
 
-  Note that in case of ties the identity of the return value is not guaranteed.
+  In case of identity returns the smallest index.
 
   For example:
 
@@ -269,6 +269,9 @@ def argmax_v2(input, axis=None, output_type=dtypes.int64, name=None):
   <tf.Tensor: shape=(5,), dtype=int64, numpy=array([2, 2, 0, 2, 2])>
   >>> tf.math.argmax(B, 1)
   <tf.Tensor: shape=(3,), dtype=int64, numpy=array([2, 2, 1])>
+  >>> C = tf.constant([0, 0, 0, 0])
+  >>> tf.math.argmax(C) # Returns smallest index in case of ties
+  <tf.Tensor: shape=(), dtype=int64, numpy=0>
 
   Args:
     input: A `Tensor`.
@@ -307,7 +310,7 @@ def argmin(input,
 def argmin_v2(input, axis=None, output_type=dtypes.int64, name=None):
   """"""Returns the index with the smallest value across axes of a tensor.
 
-  Note that in case of ties the identity of the return value is not guaranteed.
+  Returns the smallest index in case of ties.
 
   Args:
     input: A `Tensor`. Must be one of the following types: `float32`, `float64`,
",0,train
48aef32dcd356fa6bae490fa1c853b9b2cdd4846,tensorflow/tensorflow,removing redundant semicolon,toco_from_protos_test.py,"@@ -50,7 +50,7 @@ class TocoFromProtosTest(googletest.TestCase):
     toco_flags.output_format = toco_flags_pb2.TFLITE
     toco_flags.inference_input_type = types_pb2.FLOAT
     toco_flags.inference_type = types_pb2.FLOAT
-    toco_flags.allow_custom_ops = True;
+    toco_flags.allow_custom_ops = True
     model_flags = model_flags_pb2.ModelFlags()
     input_array = model_flags.input_arrays.add()
     input_array.name = TensorName(in_tensor)
",0,test
74fb47ccd26da99e57a14fccf7561e7ba7bcb000,tensorflow/tensorflow,"Add a new pass for promoting VarHandle ops to TF saved model arguments

PiperOrigin-RevId: 315275908
Change-Id: Icbc5c032bd9474d279fecf48267665025a53c1bf",passes.h,"@@ -95,11 +95,9 @@ std::unique_ptr<OperationPass<ModuleOp>> CreateResourceDeviceInferencePass();
 // of their aliasing output arguments.
 std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteResourcesToArgsPass();
 
-// Creates a pass that promotes tf.VarHandleOp to to resource arguments of where
-// resource names are `tf_saved_model.bound_input` symbol argument attributes
-// for all functions.
-std::unique_ptr<OperationPass<ModuleOp>>
-CreatePromoteVarHandlesToSavedModelArgsPass();
+// Creates a pass that promotes tf.VarHandleOp to resource arguments for all
+// functions.
+std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteVarHandlesToArgsPass();
 
 // Creates a pass that converts readonly reference variables to the
 // corresponding resource variables.
",0,train
74fb47ccd26da99e57a14fccf7561e7ba7bcb000,tensorflow/tensorflow,"Add a new pass for promoting VarHandle ops to TF saved model arguments

PiperOrigin-RevId: 315275908
Change-Id: Icbc5c032bd9474d279fecf48267665025a53c1bf",promote_resources_to_args.cc,"@@ -389,18 +389,15 @@ void PromoteResourcesToArgsPass::runOnOperation() {
     return signalPassFailure();
 }
 
-// This pass is for promoting Varhandle ops to tf_saved_model.bound_input
-// attributes, which are required for TensorFlowSavedModelDialect.
-class PromoteVarHandlesToSavedModelArgsPass
-    : public PassWrapper<PromoteVarHandlesToSavedModelArgsPass,
-                         OperationPass<ModuleOp>> {
+class PromoteVarHandlesToArgsPass
+    : public PassWrapper<PromoteVarHandlesToArgsPass, OperationPass<ModuleOp>> {
  public:
   void runOnOperation() override;
 };
 
-void PromoteVarHandlesToSavedModelArgsPass::runOnOperation() {
+void PromoteVarHandlesToArgsPass::runOnOperation() {
   ModuleOp module = getOperation();
-
+  MLIRContext* context = module.getContext();
   for (auto function : module.getOps<FuncOp>()) {
     if (failed(CheckSingleBlockFunction(function))) return signalPassFailure();
 
@@ -409,15 +406,13 @@ void PromoteVarHandlesToSavedModelArgsPass::runOnOperation() {
                                  &var_handle_shared_names);
 
     // Add resource names for each `tf.VarHandleOp` that were promoted to
-    // saved model arguments.
+    // resource arguments.
     const int var_handle_args_offset =
         function.getNumArguments() - var_handle_shared_names.size();
-    for (auto var_name_and_index : llvm::enumerate(var_handle_shared_names)) {
-      auto symbol_ref =
-          SymbolRefAttr::get(var_name_and_index.value(), &getContext());
+    for (auto var_name_and_index : llvm::enumerate(var_handle_shared_names))
       function.setArgAttr(var_name_and_index.index() + var_handle_args_offset,
-                          ""tf_saved_model.bound_input"", symbol_ref);
-    }
+                          kResourceNameArgAttr,
+                          StringAttr::get(var_name_and_index.value(), context));
   }
 }
 
@@ -427,19 +422,17 @@ std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteResourcesToArgsPass() {
   return std::make_unique<PromoteResourcesToArgsPass>();
 }
 
-std::unique_ptr<OperationPass<ModuleOp>>
-CreatePromoteVarHandlesToSavedModelArgsPass() {
-  return std::make_unique<PromoteVarHandlesToSavedModelArgsPass>();
+std::unique_ptr<OperationPass<ModuleOp>> CreatePromoteVarHandlesToArgsPass() {
+  return std::make_unique<PromoteVarHandlesToArgsPass>();
 }
 
 static PassRegistration<PromoteResourcesToArgsPass> pass(
     ""tf-promote-resources-to-args"",
     ""Promote resources reads/writes to function inputs/outputs."");
 
-static PassRegistration<PromoteVarHandlesToSavedModelArgsPass> saved_model_pass(
-    ""tf-saved-model-promote-var-handles-to-args"",
-    ""Promote tf.VarHandleOps to function arguments in a format of ""
-    ""TensorFlowSavedModelDialect."");
+static PassRegistration<PromoteVarHandlesToArgsPass> var_handle_pass(
+    ""tf-promote-var-handles-to-args"",
+    ""Promote tf.VarHandleOps to function arguments."");
 
 }  // namespace TF
 }  // namespace mlir
",0,train
56a86ce36e09fdedeb84b5ebfa8f83f7778edf4a,tensorflow/tensorflow,Add CreateDir,hadoop_filesystem.cc,"@@ -525,6 +525,21 @@ void DeleteFile(const TF_Filesystem* filesystem, const char* path,
     TF_SetStatus(status, TF_OK, """");
 }
 
+void CreateDir(const TF_Filesystem* filesystem, const char* path,
+               TF_Status* status) {
+  auto libhdfs = static_cast<LibHDFS*>(filesystem->plugin_filesystem);
+  auto fs = Connect(libhdfs, path, status);
+  if (TF_GetCode(status) != TF_OK) return;
+
+  std::string scheme, namenode, hdfs_path;
+  ParseHadoopPath(path, &scheme, &namenode, &hdfs_path);
+
+  if (libhdfs->hdfsCreateDirectory(fs, hdfs_path.c_str()) != 0)
+    TF_SetStatusFromIOError(status, errno, path);
+  else
+    TF_SetStatus(status, TF_OK, """");
+}
+
 // TODO(vnvo2409): Implement later
 
 }  // namespace tf_hadoop_filesystem
",0,train
2f25e2d448f55e8218974d4cf7acbe703a17ddae,tensorflow/tensorflow,"[tfrt:jit] Insert a copy when returning a dynamic broadcast

We bufferize dynamic broadcasts into a memref reinterpret cast that yields a
memref with affine map. This clashes with the return type of the function that
doesn't support affine maps. Insert a copy for this special case.

This is still a bit of a hack, but I prefer not to invest too much as a
different representation for dynamic broaddcasts is on the horizon.

PiperOrigin-RevId: 405473962
Change-Id: I81b67e9a0d17c83b0a0673f83fda09b015b10a72",rewriters.h,"@@ -16,6 +16,7 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_TRANSFORMS_REWRITERS_H_
 #define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_MHLO_TRANSFORMS_REWRITERS_H_
 
+#include <functional>
 #include <memory>
 
 #include ""mlir/IR/MLIRContext.h""
@@ -64,12 +65,15 @@ void populateHLOToLHLOConversionPattern(MLIRContext *context,
 
 // Collection of rewrite patterns for lowering of HLO to memref dialect.
 // These patterns generally assume that the HLO operation are aliasing their
-// input memrefs. If enforce_identity_map is set to true, copies will be
+// input memrefs. If enforce_identity_map returns true for an op, copies will be
 // inserted when the lowering would otherwise lead to a memref with a
 // non-identity map.
 void populateHLOToMemrefConversionPattern(
     BufferizeTypeConverter *converter, RemoveSignTypeConverter *sign_converter,
-    OwningRewritePatternList *patterns, bool enforce_identity_map = true);
+    OwningRewritePatternList *patterns,
+    std::function<bool(Operation *)> enforce_identity_map = [](Operation *) {
+      return true;
+    });
 
 // Collection of rewrite patterns for lowering of HLO to Linalg dialect.
 void populateHLOToLinalgConversionPattern(MLIRContext *context,
",0,train
2f25e2d448f55e8218974d4cf7acbe703a17ddae,tensorflow/tensorflow,"[tfrt:jit] Insert a copy when returning a dynamic broadcast

We bufferize dynamic broadcasts into a memref reinterpret cast that yields a
memref with affine map. This clashes with the return type of the function that
doesn't support affine maps. Insert a copy for this special case.

This is still a bit of a hack, but I prefer not to invest too much as a
different representation for dynamic broaddcasts is on the horizon.

PiperOrigin-RevId: 405473962
Change-Id: I81b67e9a0d17c83b0a0673f83fda09b015b10a72",hlo_legalize_to_memref.cc,"@@ -15,6 +15,7 @@ limitations under the License.
 
 // This file implements logic for lowering HLO dialect to LHLO dialect.
 
+#include <functional>
 #include <utility>
 
 #include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops.h""
@@ -159,10 +160,10 @@ class HloToMemrefDynamicBroadcastInDimOpConverter
  public:
   HloToMemrefDynamicBroadcastInDimOpConverter(
       TypeConverter& converter, RemoveSignTypeConverter* sign_converter,
-      MLIRContext* ctx, bool enforce_identity_maps)
+      MLIRContext* ctx, std::function<bool(Operation*)> enforce_identity_maps)
       : BaseOpConversion<mhlo::DynamicBroadcastInDimOp>(converter,
                                                         sign_converter, ctx),
-        enforce_identity_maps_(enforce_identity_maps) {}
+        enforce_identity_maps_(std::move(enforce_identity_maps)) {}
 
   Value signlessRewrite(mhlo::DynamicBroadcastInDimOp op,
                         ArrayRef<Value> operands, Type op_result_type,
@@ -171,7 +172,7 @@ class HloToMemrefDynamicBroadcastInDimOpConverter
     if (!result_type) return {};
     Value result = InsertDynamicMemrefCastOp(op, operands.front(), &rewriter);
 
-    if (enforce_identity_maps_) {
+    if (enforce_identity_maps_(op)) {
       result = CreateCopy(op, result, &rewriter);
     }
 
@@ -295,7 +296,7 @@ class HloToMemrefDynamicBroadcastInDimOpConverter
     return copy;
   }
 
-  bool enforce_identity_maps_;
+  std::function<bool(Operation*)> enforce_identity_maps_;
 };
 
 struct HloLegalizeToMemrefPass
@@ -331,10 +332,11 @@ struct HloLegalizeToMemrefPass
 
 void populateHLOToMemrefConversionPattern(
     BufferizeTypeConverter* converter, RemoveSignTypeConverter* sign_converter,
-    OwningRewritePatternList* patterns, bool enforce_identity_maps) {
+    OwningRewritePatternList* patterns,
+    std::function<bool(Operation*)> enforce_identity_maps) {
   MLIRContext* context = patterns->getContext();
   patterns->insert<HloToMemrefDynamicBroadcastInDimOpConverter>(
-      *converter, sign_converter, context, enforce_identity_maps);
+      *converter, sign_converter, context, std::move(enforce_identity_maps));
   patterns->insert<HloToMemrefDynamicReshapeConverter,
                    HloToMemrefReshapeUnrankedConverter>(
       *converter, sign_converter, context);
",0,train
2f25e2d448f55e8218974d4cf7acbe703a17ddae,tensorflow/tensorflow,"[tfrt:jit] Insert a copy when returning a dynamic broadcast

We bufferize dynamic broadcasts into a memref reinterpret cast that yields a
memref with affine map. This clashes with the return type of the function that
doesn't support affine maps. Insert a copy for this special case.

This is still a bit of a hack, but I prefer not to invest too much as a
different representation for dynamic broaddcasts is on the horizon.

PiperOrigin-RevId: 405473962
Change-Id: I81b67e9a0d17c83b0a0673f83fda09b015b10a72",tf_broadcast_to_test.py,"@@ -0,0 +1,49 @@
+# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""Tests for Tensorflow -> CPURT compilation.""""""
+
+import numpy as np
+
+from tensorflow.compiler.mlir.tfrt.jit.python_binding import tf_cpurt
+from tensorflow.python.platform import test
+
+cpurt = tf_cpurt.TfCpurtExecutor()
+
+
+class TfBroadcastToTest(test.TestCase):
+
+  def test_broadcast_return(self):
+    mlir_function = """"""
+      func @test(%arg0: tensor<?xf32>, %arg1: tensor<2xi32>)
+           -> (tensor<?x?xf32>, tensor<?x?xf32>) {
+        %1 = ""tf.BroadcastTo""(%arg0, %arg1)
+             : (tensor<?xf32>, tensor<2xi32>) -> tensor<?x?xf32>
+        %2 = ""tf.Add""(%1, %1)
+             : (tensor<?x?xf32>, tensor<?x?xf32>) -> tensor<?x?xf32>
+        return %1, %2 : tensor<?x?xf32>, tensor<?x?xf32>
+      }""""""
+
+    compiled = cpurt.compile(mlir_function, 'test')
+
+    arg0 = np.random.uniform(0, 10.0, size=1).astype(np.float32)
+    arg1 = np.random.uniform(0, 10, size=2).astype(np.int32)
+
+    [res1, res2] = cpurt.execute(compiled, [arg0, arg1])
+    np.testing.assert_allclose(res1, np.broadcast_to(arg0, arg1), atol=0.0)
+    np.testing.assert_allclose(res2, np.broadcast_to(arg0, arg1) * 2, atol=0.0)
+
+
+if __name__ == '__main__':
+  test.main()
",0,train
2f25e2d448f55e8218974d4cf7acbe703a17ddae,tensorflow/tensorflow,"[tfrt:jit] Insert a copy when returning a dynamic broadcast

We bufferize dynamic broadcasts into a memref reinterpret cast that yields a
memref with affine map. This clashes with the return type of the function that
doesn't support affine maps. Insert a copy for this special case.

This is still a bit of a hack, but I prefer not to invest too much as a
different representation for dynamic broaddcasts is on the horizon.

PiperOrigin-RevId: 405473962
Change-Id: I81b67e9a0d17c83b0a0673f83fda09b015b10a72",bufferize_pass.cc,"@@ -136,7 +136,12 @@ struct ComputeOpAndFuncBufferizePass
     // Configure bufferize pattern for functions and lhlo.
     mhlo::populateHLOToMemrefConversionPattern(
         &converter, &remove_sign_converter, &patterns,
-        /*enforce_identity_map=*/false);
+        /*enforce_identity_map=*/[](Operation* op) {
+          // Insert a copy if the broadcast escapes.
+          return llvm::any_of(op->getUsers(), [](Operation* user) {
+            return isa<mlir::ReturnOp>(user);
+          });
+        });
     populateFuncOpTypeConversionPattern(patterns, converter);
     populateCallOpTypeConversionPattern(patterns, converter);
     populateBranchOpInterfaceTypeConversionPattern(patterns, converter);
",0,train
7e148a6e17c45487f68fbe025346babf7bbd14f2,tensorflow/tensorflow,Fix build of TensorFlow Lite,c_api.cc,"@@ -77,7 +77,7 @@ void TFL_InterpreterOptionsSetNumThreads(TFL_InterpreterOptions* options,
   options->num_threads = num_threads;
 }
 
-TFL_CAPI_EXPORT extern void TFL_InterpreterOptionsSetErrorReporter(
+void TFL_InterpreterOptionsSetErrorReporter(
     TFL_InterpreterOptions* options,
     void (*reporter)(void* user_data, const char* format, va_list args),
     void* user_data) {
",0,train
ea135aee180e47abe3ed45cf5a3be75f18955d20,tensorflow/tensorflow,"Fix issue with return value of evaluate() in models that add custom metrics via overriding train_step.

PiperOrigin-RevId: 330022551
Change-Id: Ie928e792d678a1142c0f27dbdb2a7b4c39ee3974",training.py,"@@ -1366,13 +1366,7 @@ class Model(base_layer.Layer, version_utils.ModelVersionSelector):
       if return_dict:
         return logs
       else:
-        results = []
-        for name in self.metrics_names:
-          if name in logs:
-            results.append(logs[name])
-        for key in sorted(logs.keys()):
-          if key not in self.metrics_names:
-            results.append(logs[key])
+        results = [logs.get(name, None) for name in self.metrics_names]
         if len(results) == 1:
           return results[0]
         return results
",0,test
ea135aee180e47abe3ed45cf5a3be75f18955d20,tensorflow/tensorflow,"Fix issue with return value of evaluate() in models that add custom metrics via overriding train_step.

PiperOrigin-RevId: 330022551
Change-Id: Ie928e792d678a1142c0f27dbdb2a7b4c39ee3974",training_test.py,"@@ -1618,64 +1618,6 @@ class TrainingTest(keras_parameterized.TestCase):
     model.evaluate(x, batch_size=batch_size)
     model.predict(x, batch_size=batch_size)
 
-  @keras_parameterized.run_all_keras_modes(
-      always_skip_v1=True)
-  @parameterized.named_parameters(
-      ('custom_metrics', False, True),
-      ('compiled_metrics', True, False),
-      ('both_compiled_and_custom_metrics', True, True))
-  def test_evaluate_with_custom_test_step(
-      self, use_compiled_metrics, use_custom_metrics):
-
-    class MyModel(training_module.Model):
-
-      def test_step(self, data):
-        x, y = data
-        pred = self(x)
-        metrics = {}
-        if use_compiled_metrics:
-          self.compiled_metrics.update_state(y, pred)
-          self.compiled_loss(y, pred)
-          for metric in self.metrics:
-            metrics[metric.name] = metric.result()
-        if use_custom_metrics:
-          custom_metrics = {
-              'mean': math_ops.reduce_mean(pred),
-              'sum': math_ops.reduce_sum(pred)
-          }
-          metrics.update(custom_metrics)
-        return metrics
-
-    inputs = layers_module.Input((2,))
-    outputs = layers_module.Dense(3)(inputs)
-    model = MyModel(inputs, outputs)
-    if use_compiled_metrics:
-      model.compile('adam', 'mse', metrics=['mae', 'mape'],
-                    run_eagerly=testing_utils.should_run_eagerly())
-    else:
-      model.compile('adam', 'mse',
-                    run_eagerly=testing_utils.should_run_eagerly())
-    x = np.random.random((4, 2))
-    y = np.random.random((4, 3))
-    results_list = model.evaluate(x, y)
-    results_dict = model.evaluate(x, y, return_dict=True)
-    self.assertLen(results_list, len(results_dict))
-    if use_compiled_metrics and use_custom_metrics:
-      self.assertLen(results_list, 5)
-      self.assertEqual(results_list,
-                       [results_dict['loss'],
-                        results_dict['mae'], results_dict['mape'],
-                        results_dict['mean'], results_dict['sum']])
-    if use_compiled_metrics and not use_custom_metrics:
-      self.assertLen(results_list, 3)
-      self.assertEqual(results_list,
-                       [results_dict['loss'],
-                        results_dict['mae'], results_dict['mape']])
-    if not use_compiled_metrics and use_custom_metrics:
-      self.assertLen(results_list, 2)
-      self.assertEqual(results_list,
-                       [results_dict['mean'], results_dict['sum']])
-
 
 class TestExceptionsAndWarnings(keras_parameterized.TestCase):
 
",0,test
8c933654194da1588e37d0088f701ff8f157764a,tensorflow/tensorflow,"Do not restore previous context in ScopedActivateContext destructor.

PiperOrigin-RevId: 239643428",cuda_driver.cc,"@@ -207,8 +207,11 @@ ScopedActivateContext::ScopedActivateContext(GpuContext* cuda_context) {
   if (FLAGS_gpuexec_cuda_sync_around_driver_calls) SynchronizeOrDie();
 
   auto* tls = &tls_data.get();
-  tls->depth++;
-  if (tls->id == cuda_context->id()) {
+  if (tls->depth++ > 0) {
+    CHECK(tls->id == cuda_context->id())
+        << ""Trying to activate a CUDA context in the current thread which is ""
+           ""different than an existing instance of ScopedActivateContext."";
+
     if (kVerifyGpuContext) {
       CHECK_EQ(CurrentContext(), cuda_context->context());
     }
@@ -219,8 +222,6 @@ ScopedActivateContext::ScopedActivateContext(GpuContext* cuda_context) {
   VLOG(3) << ""ScopedActivateContext switching context from "" << tls->id
           << "" to "" << cuda_context->id();
 
-  to_restore_ = (tls->depth == 1 ? nullptr : tls->context);
-
   // Set the context and update thread local.
   CHECK_EQ(CUDA_SUCCESS, cuCtxSetCurrent(cuda_context->context()));
   tls->id = cuda_context->id();
@@ -241,15 +242,6 @@ ScopedActivateContext::~ScopedActivateContext() {
 
   tls->depth--;
   DCHECK_GE(tls->depth, 0);
-  if (to_restore_ == nullptr) {
-    // Leave context, tls->id, and tls->context set.
-    return;
-  }
-
-  // Set context and update thread local.
-  CHECK_EQ(CUDA_SUCCESS, cuCtxSetCurrent(to_restore_->context()));
-  tls->id = to_restore_->id();
-  tls->context = to_restore_;
 }
 
 namespace {
@@ -964,10 +956,8 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) {
   }
 }
 
-/* static */ port::Status GpuDriver::RecordEvent(GpuContext* context,
-                                                 CUevent event,
+/* static */ port::Status GpuDriver::RecordEvent(GpuContext*, CUevent event,
                                                  CUstream stream) {
-  ScopedActivateContext activated{context};
   CUresult res = cuEventRecord(event, stream);
   switch (res) {
     case CUDA_SUCCESS:
@@ -986,9 +976,8 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) {
   }
 }
 
-/* static */ port::StatusOr<CUresult> GpuDriver::QueryEvent(GpuContext* context,
+/* static */ port::StatusOr<CUresult> GpuDriver::QueryEvent(GpuContext*,
                                                             CUevent event) {
-  ScopedActivateContext activated{context};
   CUresult res = cuEventQuery(event);
   if (res != CUDA_SUCCESS && res != CUDA_ERROR_NOT_READY) {
     return port::Status(
@@ -1020,9 +1009,8 @@ GpuDriver::ContextGetSharedMemConfig(GpuContext* context) {
   return true;
 }
 
-/* static */ bool GpuDriver::WaitStreamOnEvent(GpuContext* context,
-                                               CUstream stream, CUevent event) {
-  ScopedActivateContext activation(context);
+/* static */ bool GpuDriver::WaitStreamOnEvent(GpuContext*, CUstream stream,
+                                               CUevent event) {
   CUresult res = cuStreamWaitEvent(stream, event, 0 /* = flags */);
   if (res != CUDA_SUCCESS) {
     LOG(ERROR) << ""could not wait stream on event: "" << ToString(res);
",0,train
8c933654194da1588e37d0088f701ff8f157764a,tensorflow/tensorflow,"Do not restore previous context in ScopedActivateContext destructor.

PiperOrigin-RevId: 239643428",gpu_driver.h,"@@ -512,11 +512,8 @@ class ScopedActivateContext {
   explicit ScopedActivateContext(GpuContext* context);
 
   // Checks that the context has remained activated for the duration of the
-  // scope.
+  // scope. Does not restore the previously active context!
   ~ScopedActivateContext();
-
- private:
-  GpuContext* to_restore_ = nullptr;
 };
 
 }  // namespace gpu
",0,train
f5c2e5d968d371c0855c6d7b2cc4f050615d4bc4,tensorflow/tensorflow,"Fix issue with gradients of resource variables in cond.

PiperOrigin-RevId: 192369091",control_flow_grad.py,"@@ -20,6 +20,7 @@ from __future__ import print_function
 
 from six.moves import xrange  # pylint: disable=redefined-builtin
 
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.ops import control_flow_ops
@@ -74,6 +75,11 @@ def _SwitchGrad(op, *grad):
     # At this point, we have created zero_grad guarded by the right switch.
     # Unfortunately, we may still get None here for not trainable data types.
     if zero_grad is None:
+      # For resource variables we get None always on the other branch, so bypass
+      # this.
+      if op.inputs[0].dtype == dtypes.resource:
+        return merge(
+            [grad[op_ctxt.branch]] * 2, name=""cond_resource_grad"")[0], None
       return None, None
     return merge(grad, name=""cond_grad"")[0], None
   else:
",0,train
f5c2e5d968d371c0855c6d7b2cc4f050615d4bc4,tensorflow/tensorflow,"Fix issue with gradients of resource variables in cond.

PiperOrigin-RevId: 192369091",gradients_test.py,"@@ -44,6 +44,7 @@ from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_grad  # pylint: disable=unused-import
+from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import state_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import tensor_array_grad  # pylint: disable=unused-import
 from tensorflow.python.ops import tensor_array_ops
@@ -810,5 +811,29 @@ class OnlyRealGradientsTest(test_util.TensorFlowTestCase):
       gradients.gradients(y, x)
 
 
+class ResourceCondTest(test_util.TensorFlowTestCase):
+
+  def testBasic(self):
+    gamma = resource_variable_ops.ResourceVariable(
+        np.random.random((3,)),
+        dtype=""float32"", name=""gamma"")
+
+    inputs = array_ops.ones(shape=(3,), dtype=""float32"")
+
+    def TestFn():
+      output = inputs + gamma
+      return output
+
+    training = array_ops.placeholder_with_default(True, shape=())
+    output = control_flow_ops.cond(
+        training, TestFn, lambda: inputs)
+
+    loss = output
+
+    grads = gradients.gradients(
+        loss, [gamma])
+    self.assertTrue(None not in grads)
+
+
 if __name__ == ""__main__"":
   googletest.main()
",0,train
8898af469c6d00310ce9f2a7ed18e331442a60ba,tensorflow/tensorflow,"Enable test now that the underlying problem has been fixed for TPU.

PiperOrigin-RevId: 217773726",raw_api_test.cc,"@@ -479,8 +479,7 @@ TEST(RawApiTest, CompileWithXlaReturnShapes) {
                                      xla_program_shape.result().layout()));
 }
 
-// Disabled because of failure on TPU (b/117876141)
-TEST(RawApiTest, DISABLED_DotGeneralWithLayoutTest) {
+TEST(RawApiTest, DotGeneralWithLayoutTest) {
   auto layout = xla::LayoutUtil::MakeLayout({0, 1});
 
   xrt::XLAAllocation p0;
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",flatbuffer_export.cc,"@@ -167,7 +167,8 @@ static StatusOr<tflite::TensorType> GetTFLiteType(Type type,
       case 32:
         return tflite::TensorType_INT32;
       case 64:
-        return tflite::TensorType_INT64;
+        return itype.isUnsigned() ? tflite::TensorType_UINT64
+                                  : tflite::TensorType_INT64;
     }
   } else if (auto q_uniform_type =
                  type.dyn_cast<mlir::quant::UniformQuantizedType>()) {
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",tf_tfl_flatbuffer_helpers.cc,"@@ -119,6 +119,8 @@ DataType ConvertIODataTypeToDataType(toco::IODataType dtype) {
       return DT_INT32;
     case toco::IODataType::INT64:
       return DT_INT64;
+    case toco::IODataType::UINT64:
+      return DT_UINT64;
     case toco::IODataType::STRING:
       return DT_STRING;
     case toco::IODataType::BOOL:
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",convert_type.cc,"@@ -57,6 +57,8 @@ mlir::Type ConvertElementType(tflite::TensorType type, mlir::Builder builder) {
       return mlir::ComplexType::get(builder.getF64Type());
     case tflite::TensorType_INT8:
       return builder.getIntegerType(8);
+    case tflite::TensorType_UINT64:
+      return builder.getIntegerType(64, /*isSigned=*/false);
   }
 }
 
@@ -86,6 +88,8 @@ tensorflow::DataType TflTypeToTfType(tflite::TensorType type) {
       return tensorflow::DT_STRING;
     case tflite::TensorType_UINT8:
       return tensorflow::DT_UINT8;
+    case tflite::TensorType_UINT64:
+      return tensorflow::DT_UINT64;
   }
 }
 
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",common.c,"@@ -203,6 +203,8 @@ const char* TfLiteTypeGetName(TfLiteType type) {
       return ""INT8"";
     case kTfLiteInt64:
       return ""INT64"";
+    case kTfLiteUInt64:
+      return ""UINT64"";
     case kTfLiteBool:
       return ""BOOL"";
     case kTfLiteComplex64:
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",common.h,"@@ -300,6 +300,7 @@ typedef enum {
   kTfLiteFloat16 = 10,
   kTfLiteFloat64 = 11,
   kTfLiteComplex128 = 12,
+  kTfLiteUInt64 = 13,
 } TfLiteType;
 
 // Return the name of a given type, for error reporting purposes.
@@ -354,6 +355,7 @@ typedef union TfLitePtrUnion {
    * members are deprecated. */
   int32_t* i32;
   int64_t* i64;
+  uint64_t* u64;
   float* f;
   TfLiteFloat16* f16;
   double* f64;
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",common_test.cc,"@@ -84,6 +84,7 @@ TEST(Types, TestTypeNames) {
   EXPECT_EQ(type_name(kTfLiteInt16), ""INT16"");
   EXPECT_EQ(type_name(kTfLiteInt32), ""INT32"");
   EXPECT_EQ(type_name(kTfLiteUInt8), ""UINT8"");
+  EXPECT_EQ(type_name(kTfLiteUInt64), ""UINT64"");
   EXPECT_EQ(type_name(kTfLiteInt8), ""INT8"");
   EXPECT_EQ(type_name(kTfLiteInt64), ""INT64"");
   EXPECT_EQ(type_name(kTfLiteBool), ""BOOL"");
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",flatbuffer_conversions.cc,"@@ -859,6 +859,9 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
     case TensorType_INT64:
       *type = kTfLiteInt64;
       return kTfLiteOk;
+    case TensorType_UINT64:
+      *type = kTfLiteUInt64;
+      return kTfLiteOk;
     case TensorType_STRING:
       *type = kTfLiteString;
       return kTfLiteOk;
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",util.cc,"@@ -74,6 +74,8 @@ TF_DataType GetTensorFlowDataType(TfLiteType type) {
       return TF_INT8;
     case kTfLiteInt64:
       return TF_INT64;
+    case kTfLiteUInt64:
+      return TF_UINT64;
     case kTfLiteComplex64:
       return TF_COMPLEX64;
     case kTfLiteComplex128:
@@ -103,6 +105,8 @@ TfLiteType GetTensorFlowLiteType(TF_DataType type) {
       return kTfLiteInt8;
     case TF_INT64:
       return kTfLiteInt64;
+    case TF_UINT64:
+      return kTfLiteUInt64;
     case TF_COMPLEX64:
       return kTfLiteComplex64;
     case TF_COMPLEX128:
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",util_test.cc,"@@ -115,6 +115,7 @@ TEST(UtilTest, TypeConversionsFromTFLite) {
   EXPECT_EQ(TF_INT32, GetTensorFlowDataType(kTfLiteInt32));
   EXPECT_EQ(TF_UINT8, GetTensorFlowDataType(kTfLiteUInt8));
   EXPECT_EQ(TF_INT64, GetTensorFlowDataType(kTfLiteInt64));
+  EXPECT_EQ(TF_UINT64, GetTensorFlowDataType(kTfLiteUInt64));
   EXPECT_EQ(TF_COMPLEX64, GetTensorFlowDataType(kTfLiteComplex64));
   EXPECT_EQ(TF_COMPLEX128, GetTensorFlowDataType(kTfLiteComplex128));
   EXPECT_EQ(TF_STRING, GetTensorFlowDataType(kTfLiteString));
@@ -129,6 +130,7 @@ TEST(UtilTest, TypeConversionsFromTensorFlow) {
   EXPECT_EQ(kTfLiteInt32, GetTensorFlowLiteType(TF_INT32));
   EXPECT_EQ(kTfLiteUInt8, GetTensorFlowLiteType(TF_UINT8));
   EXPECT_EQ(kTfLiteInt64, GetTensorFlowLiteType(TF_INT64));
+  EXPECT_EQ(kTfLiteUInt64, GetTensorFlowLiteType(TF_UINT64));
   EXPECT_EQ(kTfLiteComplex64, GetTensorFlowLiteType(TF_COMPLEX64));
   EXPECT_EQ(kTfLiteComplex128, GetTensorFlowLiteType(TF_COMPLEX128));
   EXPECT_EQ(kTfLiteString, GetTensorFlowLiteType(TF_STRING));
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",enum_mapping.h,"@@ -74,6 +74,8 @@ inline TensorType TfLiteTypeToSchemaType(TfLiteType type) {
       return TensorType_INT8;
     case kTfLiteInt64:
       return TensorType_INT64;
+    case kTfLiteUInt64:
+      return TensorType_UINT64;
     case kTfLiteString:
       return TensorType_STRING;
     case kTfLiteBool:
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",kernel_util.cc,"@@ -460,6 +460,9 @@ int TfLiteTypeGetSize(TfLiteType type) {
     case kTfLiteInt64:
       TF_LITE_ASSERT_EQ(sizeof(int64_t), 8);
       return 8;
+    case kTfLiteUInt64:
+      TF_LITE_ASSERT_EQ(sizeof(uint64_t), 8);
+      return 8;
     case kTfLiteFloat64:
       TF_LITE_ASSERT_EQ(sizeof(double), 8);
       return 8;
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",memory_helpers.cc,"@@ -66,6 +66,9 @@ TfLiteStatus TfLiteTypeSizeOf(TfLiteType type, size_t* size) {
     case kTfLiteInt64:
       *size = sizeof(int64_t);
       break;
+    case kTfLiteUInt64:
+      *size = sizeof(uint64_t);
+      break;
     case kTfLiteBool:
       *size = sizeof(bool);
       break;
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",memory_helpers_test.cc,"@@ -139,6 +139,10 @@ TF_LITE_MICRO_TEST(TestTypeSizeOf) {
                           tflite::TfLiteTypeSizeOf(kTfLiteInt64, &size));
   TF_LITE_MICRO_EXPECT_EQ(sizeof(int64_t), size);
 
+  TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
+                          tflite::TfLiteTypeSizeOf(kTfLiteUInt64, &size));
+  TF_LITE_MICRO_EXPECT_EQ(sizeof(uint64_t), size);
+
   TF_LITE_MICRO_EXPECT_EQ(kTfLiteOk,
                           tflite::TfLiteTypeSizeOf(kTfLiteBool, &size));
   TF_LITE_MICRO_EXPECT_EQ(sizeof(bool), size);
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",micro_interpreter.cc,"@@ -192,6 +192,9 @@ void MicroInterpreter::CorrectTensorEndianness(TfLiteEvalTensor* tensorCorr) {
     case TfLiteType::kTfLiteInt64:
       CorrectTensorDataEndianness(tensorCorr->data.i64, tensorSize);
       break;
+    case TfLiteType::kTfLiteUInt64:
+      CorrectTensorDataEndianness(tensorCorr->data.u64, tensorSize);
+      break;
     case TfLiteType::kTfLiteInt32:
       CorrectTensorDataEndianness(tensorCorr->data.i32, tensorSize);
       break;
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",optional_debug_tools.cc,"@@ -50,6 +50,8 @@ const char* TensorTypeName(TfLiteType type) {
       return ""kTfLiteInt8"";
     case kTfLiteInt64:
       return ""kTfLiteInt64"";
+    case kTfLiteUInt64:
+      return ""kTfLiteUInt64"";
     case kTfLiteString:
       return ""kTfLiteString"";
     case kTfLiteBool:
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",portable_type_to_tflitetype.h,"@@ -69,6 +69,7 @@ MATCH_TYPE_AND_TFLITE_TYPE(std::complex<float>, kTfLiteComplex64);
 MATCH_TYPE_AND_TFLITE_TYPE(std::complex<double>, kTfLiteComplex128);
 MATCH_TYPE_AND_TFLITE_TYPE(TfLiteFloat16, kTfLiteFloat16);
 MATCH_TYPE_AND_TFLITE_TYPE(double, kTfLiteFloat64);
+MATCH_TYPE_AND_TFLITE_TYPE(uint64_t, kTfLiteUInt64);
 
 }  // namespace tflite
 #endif  // TENSORFLOW_LITE_PORTABLE_TYPE_TO_TFLITETYPE_H_
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",numpy.cc,"@@ -50,6 +50,8 @@ int TfLiteTypeToPyArrayType(TfLiteType tf_lite_type) {
       return NPY_INT8;
     case kTfLiteInt64:
       return NPY_INT64;
+    case kTfLiteUInt64:
+      return NPY_UINT64;
     case kTfLiteString:
       return NPY_STRING;
     case kTfLiteBool:
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",calibration_wrapper.cc,"@@ -79,6 +79,8 @@ inline TensorType TfLiteTypeToSchemaType(TfLiteType type) {
       return TensorType_INT8;
     case kTfLiteInt64:
       return TensorType_INT64;
+    case kTfLiteUInt64:
+      return TensorType_UINT64;
     case kTfLiteString:
       return TensorType_STRING;
     case kTfLiteBool:
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",util.py,"@@ -56,6 +56,7 @@ _MAP_TF_TO_TFLITE_TYPES = {
     dtypes.int32: _types_pb2.INT32,
     dtypes.uint8: _types_pb2.QUANTIZED_UINT8,
     dtypes.int64: _types_pb2.INT64,
+    dtypes.uint64: _types_pb2.UINT64,
     dtypes.string: _types_pb2.STRING,
     dtypes.bool: _types_pb2.BOOL,
     dtypes.int16: _types_pb2.QUANTIZED_INT16,
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",schema_generated.h,"@@ -395,11 +395,12 @@ enum TensorType {
   TensorType_INT8 = 9,
   TensorType_FLOAT64 = 10,
   TensorType_COMPLEX128 = 11,
+  TensorType_UINT64 = 12,
   TensorType_MIN = TensorType_FLOAT32,
-  TensorType_MAX = TensorType_COMPLEX128
+  TensorType_MAX = TensorType_UINT64
 };
 
-inline const TensorType (&EnumValuesTensorType())[12] {
+inline const TensorType (&EnumValuesTensorType())[13] {
   static const TensorType values[] = {
     TensorType_FLOAT32,
     TensorType_FLOAT16,
@@ -412,13 +413,14 @@ inline const TensorType (&EnumValuesTensorType())[12] {
     TensorType_COMPLEX64,
     TensorType_INT8,
     TensorType_FLOAT64,
-    TensorType_COMPLEX128
+    TensorType_COMPLEX128,
+    TensorType_UINT64
   };
   return values;
 }
 
 inline const char * const *EnumNamesTensorType() {
-  static const char * const names[13] = {
+  static const char * const names[14] = {
     ""FLOAT32"",
     ""FLOAT16"",
     ""INT32"",
@@ -431,13 +433,14 @@ inline const char * const *EnumNamesTensorType() {
     ""INT8"",
     ""FLOAT64"",
     ""COMPLEX128"",
+    ""UINT64"",
     nullptr
   };
   return names;
 }
 
 inline const char *EnumNameTensorType(TensorType e) {
-  if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_COMPLEX128)) return """";
+  if (flatbuffers::IsOutRange(e, TensorType_FLOAT32, TensorType_UINT64)) return """";
   const size_t index = static_cast<size_t>(e);
   return EnumNamesTensorType()[index];
 }
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",split.h,"@@ -52,6 +52,7 @@ template <>
 inline std::vector<int> Split(const string& s, const string& delimiter) {
   std::vector<int> fields;
   for (const auto& p : SplitToPos(s, delimiter)) {
+    // NOLINTNEXTLINE(runtime/deprecated_fn)
     fields.push_back(strtol(s.data() + p.first, nullptr, 10));
   }
   return fields;
@@ -61,11 +62,22 @@ template <>
 inline std::vector<int64_t> Split(const string& s, const string& delimiter) {
   std::vector<int64_t> fields;
   for (const auto& p : SplitToPos(s, delimiter)) {
+    // NOLINTNEXTLINE(runtime/deprecated_fn)
     fields.push_back(strtoll(s.data() + p.first, nullptr, 10));
   }
   return fields;
 }
 
+template <>
+inline std::vector<uint64_t> Split(const string& s, const string& delimiter) {
+  std::vector<uint64_t> fields;
+  for (const auto& p : SplitToPos(s, delimiter)) {
+    // NOLINTNEXTLINE(runtime/deprecated_fn)
+    fields.push_back(strtoull(s.data() + p.first, nullptr, 10));
+  }
+  return fields;
+}
+
 template <>
 inline std::vector<float> Split(const string& s, const string& delimiter) {
   std::vector<float> fields;
@@ -79,6 +91,7 @@ template <>
 inline std::vector<uint8_t> Split(const string& s, const string& delimiter) {
   std::vector<uint8_t> fields;
   for (const auto& p : SplitToPos(s, delimiter)) {
+    // NOLINTNEXTLINE(runtime/deprecated_fn)
     fields.push_back(strtol(s.data() + p.first, nullptr, 10));
   }
   return fields;
@@ -88,6 +101,7 @@ template <>
 inline std::vector<int8_t> Split(const string& s, const string& delimiter) {
   std::vector<int8_t> fields;
   for (const auto& p : SplitToPos(s, delimiter)) {
+    // NOLINTNEXTLINE(runtime/deprecated_fn)
     fields.push_back(strtol(s.data() + p.first, nullptr, 10));
   }
   return fields;
@@ -97,6 +111,7 @@ template <>
 inline std::vector<int16_t> Split(const string& s, const string& delimiter) {
   std::vector<int16_t> fields;
   for (const auto& p : SplitToPos(s, delimiter)) {
+    // NOLINTNEXTLINE(runtime/deprecated_fn)
     fields.push_back(strtol(s.data() + p.first, nullptr, 10));
   }
   return fields;
@@ -106,8 +121,9 @@ template <>
 inline std::vector<bool> Split(const string& s, const string& delimiter) {
   std::vector<bool> fields;
   for (const auto& p : SplitToPos(s, delimiter)) {
-    fields.push_back(
-        static_cast<bool>(strtol(s.data() + p.first, nullptr, 10)));
+    // NOLINTNEXTLINE(runtime/deprecated_fn)
+    bool val = static_cast<bool>(strtol(s.data() + p.first, nullptr, 10));
+    fields.push_back(val);
   }
   return fields;
 }
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",tflite_driver.cc,"@@ -325,6 +325,8 @@ bool TfLiteDriver::DataExpectation::Check(bool verbose,
       return TypedCheck<int32_t, float>(verbose, tensor);
     case kTfLiteInt64:
       return TypedCheck<int64_t, float>(verbose, tensor);
+    case kTfLiteUInt64:
+      return TypedCheck<uint64_t, float>(verbose, tensor);
     case kTfLiteUInt8:
       return TypedCheck<uint8_t, float>(verbose, tensor);
     case kTfLiteInt8:
@@ -477,6 +479,12 @@ void TfLiteDriver::SetInput(int id, const string& csv_values) {
       SetTensorData(values, tensor->data.raw);
       break;
     }
+    case kTfLiteUInt64: {
+      const auto& values = testing::Split<uint64_t>(csv_values, "","");
+      if (!CheckSizes<uint64_t>(tensor->bytes, values.size())) return;
+      SetTensorData(values, tensor->data.raw);
+      break;
+    }
     case kTfLiteUInt8: {
       const auto& values = testing::Split<uint8_t>(csv_values, "","");
       if (!CheckSizes<uint8_t>(tensor->bytes, values.size())) return;
@@ -554,6 +562,9 @@ void TfLiteDriver::SetExpectation(int id, const string& csv_values) {
     case kTfLiteInt64:
       expected_output_[id]->SetData<int64_t>(csv_values);
       break;
+    case kTfLiteUInt64:
+      expected_output_[id]->SetData<uint64_t>(csv_values);
+      break;
     case kTfLiteUInt8:
       expected_output_[id]->SetData<uint8_t>(csv_values);
       break;
@@ -653,6 +664,8 @@ string TfLiteDriver::ReadOutput(int id) {
       return JoinDefault(tensor->data.i32, num_elements, "","");
     case kTfLiteInt64:
       return JoinDefault(tensor->data.i64, num_elements, "","");
+    case kTfLiteUInt64:
+      return JoinDefault(tensor->data.u64, num_elements, "","");
     case kTfLiteUInt8:
       return Join(tensor->data.uint8, num_elements, "","");
     case kTfLiteInt8:
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",operator.cc,"@@ -49,6 +49,7 @@ namespace tflite {
       {ArrayDataType::kInt16, ::tflite::TensorType_INT16},
       {ArrayDataType::kInt32, ::tflite::TensorType_INT32},
       {ArrayDataType::kInt64, ::tflite::TensorType_INT64},
+      {ArrayDataType::kUint64, ::tflite::TensorType_UINT64},
       {ArrayDataType::kString, ::tflite::TensorType_STRING},
       {ArrayDataType::kComplex64, ::tflite::TensorType_COMPLEX64},
       {ArrayDataType::kComplex128, ::tflite::TensorType_COMPLEX128},
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",tooling_util.cc,"@@ -2309,6 +2309,8 @@ ArrayDataType ConvertIODataTypeToArrayDataType(IODataType type) {
       return ArrayDataType::kInt32;
     case INT64:
       return ArrayDataType::kInt64;
+    case UINT64:
+      return ArrayDataType::kUint64;
     case BOOL:
       return ArrayDataType::kBool;
     case STRING:
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",c_api_types.h,"@@ -300,6 +300,7 @@ typedef enum {
   kTfLiteFloat16 = 10,
   kTfLiteFloat64 = 11,
   kTfLiteComplex128 = 12,
+  kTfLiteUInt64 = 13,
 } TfLiteType;
 
 // Return the name of a given type, for error reporting purposes.
@@ -354,6 +355,7 @@ typedef union TfLitePtrUnion {
    * members are deprecated. */
   int32_t* i32;
   int64_t* i64;
+  uint64_t* u64;
   float* f;
   TfLiteFloat16* f16;
   double* f64;
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",verifier.cc,"@@ -418,6 +418,9 @@ bool VerifyNumericTensorBuffer(const Tensor& tensor, const Buffer& buffer,
     case TensorType_INT64:
       bytes_required *= sizeof(int64_t);
       break;
+    case TensorType_UINT64:
+      bytes_required *= sizeof(uint64_t);
+      break;
     case TensorType_BOOL:
       bytes_required *= sizeof(bool);
       break;
",0,train
f954b2770d0cfd8244a9cf9d7116fb15bc044118,tensorflow/tensorflow,"Add uint64 tensor support in TFLite

Even though we do not support uint64 op kernels on mobile, it is inevitable to
support uint64 tensors in order to enable TF uint64 ops via flex delegate.

This CL enables the uint64 tensor type in MLIR converter only.

PiperOrigin-RevId: 342939673
Change-Id: I24f422040f82cad7affce4b921361f79e8a51730",util.cc,"@@ -96,6 +96,9 @@ TfLiteStatus GetSizeOfType(TfLiteContext* context, const TfLiteType type,
     case kTfLiteInt64:
       *bytes = sizeof(int64_t);
       break;
+    case kTfLiteUInt64:
+      *bytes = sizeof(uint64_t);
+      break;
     case kTfLiteBool:
       *bytes = sizeof(bool);
       break;
",0,train
d969cd8ff9a337503963c8f4a02f56b7d776171e,tensorflow/tensorflow,"Fix flakiness in the test case.

PiperOrigin-RevId: 226370519",unified_gru_test.py,"@@ -176,8 +176,8 @@ class UnifiedGRUTest(keras_parameterized.TestCase):
       cudnn_model.fit(x_train, y_train)
       y_4 = cudnn_model.predict(x_train)
 
-      self.assertAllClose(y_1, y_3)
-      self.assertAllClose(y_2, y_4)
+      self.assertAllClose(y_1, y_3, rtol=1e-5, atol=1e-5)
+      self.assertAllClose(y_2, y_4, rtol=1e-5, atol=1e-5)
 
   @parameterized.named_parameters(
       # test_name, use_bias, bias_initializer, activation
",0,train
d969cd8ff9a337503963c8f4a02f56b7d776171e,tensorflow/tensorflow,"Fix flakiness in the test case.

PiperOrigin-RevId: 226370519",unified_lstm_test.py,"@@ -332,8 +332,8 @@ class UnifiedLSTMTest(keras_parameterized.TestCase):
       cudnn_model.fit(x_train, y_train)
       y_4 = cudnn_model.predict(x_train)
 
-      self.assertAllClose(y_1, y_3)
-      self.assertAllClose(y_2, y_4)
+      self.assertAllClose(y_1, y_3, rtol=1e-5, atol=1e-5)
+      self.assertAllClose(y_2, y_4, rtol=1e-5, atol=1e-5)
 
   @parameterized.named_parameters(('v0', 0), ('v1', 1), ('v2', 2))
   def test_implementation_mode_LSTM(self, implementation_mode):
",0,train
08963dfe9c24c3fdb28b0a5eabaac93f615d3509,tensorflow/tensorflow,"[tf.data] Changing `tf.data.Dataset.reduce` user-defined function device placement logic to match TensorFlow. Prior to this change, `tf.data.Dataset.reduce` would -- like the rest of tf.data operations -- default the placement of ops in its user-defined function to CPU. After this change, ops without explicit device placement will be placed on GPU (if possible).

The rationale behind this change is that, unlike other tf.data transformations, `tf.data.Dataset.reduce` computation is not expected to be executed within a training step where the accelerator (if present) would be expected to be used for model computation.

PiperOrigin-RevId: 306760623
Change-Id: Ia2602b7cde7503e9d9519b44b7f6b7621bedb547",iterator_ops.cc,"@@ -606,6 +606,7 @@ class ReduceDatasetOp : public HybridAsyncOpKernel {
     FunctionMetadata::Params params;
     OP_REQUIRES_OK(ctx, ctx->GetAttr(""use_inter_op_parallelism"",
                                      &params.use_inter_op_parallelism));
+    params.use_default_device = false;
     OP_REQUIRES_OK(ctx,
                    FunctionMetadata::Create(ctx, ""f"", params, &func_metadata_));
     OP_REQUIRES_OK(ctx, ctx->GetAttr(kOutputTypes, &output_types_));
",0,test
2b7fb42e3b7112fc712edf05f29bbfd865a5515a,tensorflow/tensorflow,"Provides an environment variable via TF_XLA_FLAGS to turn on the MLIR bridge.

PiperOrigin-RevId: 316116765
Change-Id: I005c5b6712a4e7cdd72f4302caae93f58c5f840e",flags.cc,"@@ -33,6 +33,7 @@ MarkForCompilationPassFlags* mark_for_compilation_flags;
 XlaDeviceFlags* device_flags;
 XlaOpsCommonFlags* ops_flags;
 IntroduceFloatingPointJitterPassFlags* jitter_flags;
+MlirCommonFlags* mlir_flags;
 
 std::vector<Flag>* flag_list;
 absl::once_flag flags_init;
@@ -166,6 +167,9 @@ void AllocateAndParseFlags() {
   jitter_flags = new IntroduceFloatingPointJitterPassFlags;
   jitter_flags->jitter_amount = 1e-5;
 
+  mlir_flags = new MlirCommonFlags;
+  mlir_flags->tf_mlir_enable_mlir_bridge = false;
+
   auto setter_for_jitter_tensor_names = [](string sequence) {
     jitter_flags->tensor_names = absl::StrSplit(sequence, ',');
     return true;
@@ -211,7 +215,11 @@ void AllocateAndParseFlags() {
        Flag(""tf_introduce_floating_point_jitter_amount"",
             &jitter_flags->jitter_amount,
             ""The amount of jitter to introduce.  This amount is added to each ""
-            ""element in the tensors named in `tensor_names."")});
+            ""element in the tensors named in `tensor_names.""),
+
+       Flag(""tf_mlir_enable_mlir_bridge"",
+            &mlir_flags->tf_mlir_enable_mlir_bridge,
+            ""Enables experimental MLIR-Based TensorFlow Compiler Bridge."")});
 
   AppendMarkForCompilationPassFlagsInternal(flag_list);
   xla::ParseFlagsFromEnvAndDieIfUnknown(""TF_XLA_FLAGS"", *flag_list);
@@ -250,6 +258,11 @@ GetIntroduceFloatingPointJitterPassFlags() {
   return *jitter_flags;
 }
 
+MlirCommonFlags* GetMlirCommonFlags() {
+  absl::call_once(flags_init, &AllocateAndParseFlags);
+  return mlir_flags;
+}
+
 void AppendMarkForCompilationPassFlags(std::vector<Flag>* flag_list) {
   absl::call_once(flags_init, &AllocateAndParseFlags);
   AppendMarkForCompilationPassFlagsInternal(flag_list);
",0,test
2b7fb42e3b7112fc712edf05f29bbfd865a5515a,tensorflow/tensorflow,"Provides an environment variable via TF_XLA_FLAGS to turn on the MLIR bridge.

PiperOrigin-RevId: 316116765
Change-Id: I005c5b6712a4e7cdd72f4302caae93f58c5f840e",flags.h,"@@ -133,6 +133,11 @@ struct IntroduceFloatingPointJitterPassFlags {
   std::vector<string> tensor_names;
 };
 
+// Flags for common MLIR configurations.
+struct MlirCommonFlags {
+  bool tf_mlir_enable_mlir_bridge;
+};
+
 // Return a pointer to the DumpGraphFlags struct;
 // repeated calls return the same pointer.
 // This should be called only after Flags::Parse() has returned.
@@ -148,6 +153,8 @@ const XlaOpsCommonFlags& GetXlaOpsCommonFlags();
 const IntroduceFloatingPointJitterPassFlags&
 GetIntroduceFloatingPointJitterPassFlags();
 
+MlirCommonFlags* GetMlirCommonFlags();
+
 // Appends the flag definitions associated with
 // MarkForCompilationPassFlags/DumpGraphFlags to `flag_list`.
 //
",0,test
2b7fb42e3b7112fc712edf05f29bbfd865a5515a,tensorflow/tensorflow,"Provides an environment variable via TF_XLA_FLAGS to turn on the MLIR bridge.

PiperOrigin-RevId: 316116765
Change-Id: I005c5b6712a4e7cdd72f4302caae93f58c5f840e",context.py,"@@ -451,7 +451,6 @@ class Context(object):
     self._inter_op_parallelism_threads = None
     self._soft_device_placement = None
     self._log_device_placement = None
-    self._enable_mlir_bridge = None
     self._enable_mlir_graph_optimization = None
     self._optimizer_experimental_options = {}
 
@@ -927,8 +926,7 @@ class Context(object):
     if self._log_device_placement is not None:
       config.log_device_placement = self._log_device_placement
 
-    if self._enable_mlir_bridge is not None:
-      config.experimental.enable_mlir_bridge = self._enable_mlir_bridge
+    config.experimental.enable_mlir_bridge = pywrap_tfe.TF_IsMlirBridgeEnabled()
     if self._enable_mlir_graph_optimization is not None:
       config.experimental.enable_mlir_graph_optimization = (
           self._enable_mlir_graph_optimization)
@@ -1466,7 +1464,7 @@ class Context(object):
 
   @property
   def enable_mlir_bridge(self):
-    return self._enable_mlir_bridge
+    return pywrap_tfe.TF_IsMlirBridgeEnabled()
 
   @property
   def enable_mlir_graph_optimization(self):
@@ -1474,7 +1472,7 @@ class Context(object):
 
   @enable_mlir_bridge.setter
   def enable_mlir_bridge(self, enabled):
-    self._enable_mlir_bridge = enabled
+    pywrap_tfe.TF_EnableMlirBridge(enabled)
     self._thread_local_data.function_call_options = None
 
   @enable_mlir_graph_optimization.setter
",0,test
2b7fb42e3b7112fc712edf05f29bbfd865a5515a,tensorflow/tensorflow,"Provides an environment variable via TF_XLA_FLAGS to turn on the MLIR bridge.

PiperOrigin-RevId: 316116765
Change-Id: I005c5b6712a4e7cdd72f4302caae93f58c5f840e",tfe_wrapper.cc,"@@ -364,6 +364,14 @@ PYBIND11_MODULE(_pywrap_tfe, m) {
   m.def(""TF_SetXlaMinClusterSize"", &TF_SetXlaMinClusterSize);
   m.def(""TF_IsXlaEnabled"", [] { return tensorflow::IsXlaEnabled(); });
 
+  // MLIR Logic
+  m.def(""TF_IsMlirBridgeEnabled"", [] {
+    return tensorflow::GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge;
+  });
+  m.def(""TF_EnableMlirBridge"", [](bool enabled) {
+    tensorflow::GetMlirCommonFlags()->tf_mlir_enable_mlir_bridge = enabled;
+  });
+
   // // TFE_Context Logic
   m.def(
       ""TFE_NewContext"",
",0,test
4c0c199222fdcffbfb548aefa0ea82c853aea609,tensorflow/tensorflow,"Add support for testing per channel quantization kernels.

PiperOrigin-RevId: 230947961",test_util.cc,"@@ -47,7 +47,12 @@ std::vector<Matcher<std::complex<float>>> ArrayComplex64Near(
 }
 
 int SingleOpModel::AddInput(const TensorData& t, bool is_variable) {
-  int id = AddTensor<float>(t, {}, is_variable);
+  int id = 0;
+  if (t.per_channel_quantization) {
+    id = AddTensorPerChannelQuant(t);
+  } else {
+    id = AddTensor<float>(t, {}, is_variable);
+  }
   inputs_.push_back(id);
   return id;
 }
",0,train
4c0c199222fdcffbfb548aefa0ea82c853aea609,tensorflow/tensorflow,"Add support for testing per channel quantization kernels.

PiperOrigin-RevId: 230947961",test_util.h,"@@ -21,13 +21,14 @@ limitations under the License.
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
 
+#include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/lite/interpreter.h""
 #include ""tensorflow/lite/kernels/internal/tensor_utils.h""
 #include ""tensorflow/lite/kernels/register.h""
 #include ""tensorflow/lite/model.h""
 #include ""tensorflow/lite/string_util.h""
 #include ""tensorflow/lite/testing/util.h""
-#include ""tensorflow/core/platform/logging.h""
+#include ""tensorflow/lite/tools/optimize/quantization_utils.h""
 
 namespace tflite {
 
@@ -82,7 +83,7 @@ inline std::vector<float> Dequantize(const std::vector<T>& data, float scale,
 // A helper struct to construct test tensors. This is particularly useful for
 // quantized tensor which must have their scale and zero_point defined before
 // the actual data is known. This mimics what happens in practice: quantization
-// parameters are calculated during training.
+// parameters are calculated during training or post training..
 struct TensorData {
   TensorType type;
   std::vector<int> shape;
@@ -90,6 +91,10 @@ struct TensorData {
   float max;
   float scale;
   int32_t zero_point;
+  bool per_channel_quantization;
+  std::vector<float> per_channel_quantization_scales;
+  std::vector<int64_t> per_channel_quantization_offsets;
+  int32_t channel_index;
 };
 
 class SingleOpResolver : public OpResolver {
@@ -172,6 +177,46 @@ class SingleOpModel {
     PopulateTensor(index, /*offset=*/0, q.data(), q.data() + q.size());
   }
 
+  // Quantize and populate data for filter with per channel quantization.
+  void PerChannelSymmetricQuantizeAndPopulate(
+      int index, const std::vector<float>& input_data) {
+    TfLiteTensor* t = interpreter_->tensor(index);
+    auto* params =
+        reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
+    const int channel_index = params->quantized_dimension;
+
+    std::vector<int32_t> shape(t->dims->size);
+    for (int i = 0; i < shape.size(); ++i) {
+      shape[i] = t->dims->data[i];
+    }
+    const int32_t num_inputs = input_data.size();
+    const int32_t num_channel = shape[channel_index];
+    std::vector<int8_t> quantized_output(num_inputs);
+    std::vector<float> scales_inv(num_channel);
+    for (int i = 0; i < num_channel; ++i) {
+      scales_inv[i] = 1.0f / params->scale->data[i];
+    }
+    optimize::utils::SymmetricPerChannelQuantizeValues(
+        input_data.data(), scales_inv, shape, channel_index, &quantized_output);
+
+    PopulateTensor(index, /*offset=*/0, quantized_output.data(),
+                   quantized_output.data() + quantized_output.size());
+  }
+
+  // Quantize and populate data for bias with per channel quantization.
+  void PerChannelQuantizeBias(int index, const std::vector<float>& input_data) {
+    const int32_t num_inputs = input_data.size();
+    std::vector<int32_t> quantized_output(num_inputs);
+    TfLiteTensor* t = interpreter_->tensor(index);
+    auto* params =
+        reinterpret_cast<TfLiteAffineQuantization*>(t->quantization.params);
+    for (int i = 0; i < num_inputs; ++i) {
+      quantized_output[i] = input_data[i] * params->scale->data[i];
+    }
+    PopulateTensor(index, /*offset=*/0, quantized_output.data(),
+                   quantized_output.data() + quantized_output.size());
+  }
+
   const std::vector<int>& GetShape(int id) { return tensor_data_.at(id).shape; }
 
   float GetScale(int id) { return tensor_data_.at(id).scale; }
@@ -292,6 +337,24 @@ class SingleOpModel {
     return {scale, zero_point};
   }
 
+  int AddTensorPerChannelQuant(TensorData t) {
+    const int id = tensors_.size();
+    flatbuffers::Offset<QuantizationParameters> q_params = 0;
+    q_params = CreateQuantizationParameters(
+        builder_, /*min=*/0, /*max=*/0,
+        /*scale=*/
+        builder_.CreateVector<float>(t.per_channel_quantization_scales),
+        /*zero point=*/
+        builder_.CreateVector<int64_t>(t.per_channel_quantization_offsets),
+        QuantizationDetails_NONE, 0, t.channel_index);
+    tensors_.push_back(
+        CreateTensor(builder_, builder_.CreateVector<int>(t.shape), t.type,
+                     /*buffer=*/0,
+                     /*name=*/0, q_params, /*is_variable=*/false));
+    tensor_data_[id] = t;
+    return id;
+  }
+
   template <typename T>
   int AddTensor(TensorData t, std::initializer_list<T> data,
                 bool is_variable = false) {
",0,train
17fe6574eb7929f92d081a754144747527af2a24,tensorflow/tensorflow,"Add warning note to Variable.initialized_value documentation.
Change: 140374197",variables.py,"@@ -518,6 +518,10 @@ class Variable(object):
     You should use this instead of the variable itself to initialize another
     variable with a value that depends on the value of this variable.
 
+    Beware of using initialized_value except during initialization:
+    initialized_value causes the Variable's initializer op to be run, so running
+    this op resets the variable to the initial value.
+
     ```python
     # Initialize 'v' with a random tensor.
     v = tf.Variable(tf.truncated_normal([10, 40]))
",0,test
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",check_macros.h,"@@ -35,7 +35,7 @@ struct ToString {
 template <>
 struct ToString<float, void> {
   static void Run(float value, char* buf) {
-    snprintf(buf, kValueBufSize, ""%.9g"", value);
+    snprintf(buf, kValueBufSize, ""%.9g"", static_cast<double>(value));
   }
 };
 
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",quantization_util.cc,"@@ -183,11 +183,11 @@ double DoubleFromFractionAndShift(int64_t fraction, int shift) {
   // Detect NaNs and infinities.
   if (shift == std::numeric_limits<int>::max()) {
     if (fraction == 0) {
-      return NAN;
+      return std::numeric_limits<double>::quiet_NaN();
     } else if (fraction > 0) {
-      return INFINITY;
+      return std::numeric_limits<double>::infinity();
     } else {
-      return -INFINITY;
+      return -std::numeric_limits<double>::infinity();
     }
   }
 
@@ -229,7 +229,7 @@ double IntegerDoubleMultiply(double a, double b) {
   // Detect NaNs and infinities.
   if (a_shift == std::numeric_limits<int>::max() ||
       (b_shift == std::numeric_limits<int>::max())) {
-    return NAN;
+    return std::numeric_limits<double>::quiet_NaN();
   }
   const int result_shift = a_shift + b_shift + 1;
   const int64_t result_fraction = (a_fraction * b_fraction) >> 32;
@@ -379,7 +379,7 @@ bool CheckedLog2(const float x, int* log2_result) {
   const float x_log2_fracpart = x_log2 - x_log2_rounded;
 
   *log2_result = static_cast<int>(x_log2_rounded);
-  return std::abs(x_log2_fracpart) < 1e-3;
+  return std::abs(x_log2_fracpart) < 1e-3f;
 }
 
 void QuantizeMultiplierArray(const double* effective_scales, size_t size,
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",quantize.h,"@@ -36,7 +36,9 @@ inline void AffineQuantize(const tflite::QuantizationParams& op_params,
 
   for (int i = 0; i < flat_size; i++) {
     const float val = input_data[i];
-    int32 unclamped = static_cast<int32>(TfLiteRound(val / scale)) + zero_point;
+    int32 unclamped =
+        static_cast<int32>(TfLiteRound(val / static_cast<float>(scale))) +
+        zero_point;
     int32 clamped = std::min(std::max(unclamped, min_val), max_val);
     output_data[i] = clamped;
   }
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",softmax.h,"@@ -43,16 +43,20 @@ inline void Softmax(const SoftmaxParams& params,
       max = std::max(max, input_data[i * depth + c]);
     }
 
+    // TODO(b/148114827): Improve this code.
     // Compute sum.
     float sum = 0.f;
     for (int c = 0; c < depth; ++c) {
-      sum += std::exp((input_data[i * depth + c] - max) * params.beta);
+      sum += std::exp(static_cast<double>(input_data[i * depth + c] - max) *
+                      params.beta);
     }
 
     // Compute result.
     for (int c = 0; c < depth; ++c) {
       output_data[i * depth + c] =
-          std::exp((input_data[i * depth + c] - max) * params.beta) / sum;
+          std::exp(static_cast<double>(input_data[i * depth + c] - max) *
+                   params.beta) /
+          static_cast<double>(sum);
     }
   }
 }
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",kernel_util.cc,"@@ -118,11 +118,12 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
                                               const TfLiteTensor* bias,
                                               TfLiteTensor* output,
                                               double* multiplier) {
-  const double input_product_scale = input->params.scale * filter->params.scale;
+  const double input_product_scale = static_cast<double>(input->params.scale) *
+                                     static_cast<double>(filter->params.scale);
   // TODO(ahentz): The following conditions must be guaranteed by the training
   // pipeline.
   if (bias) {
-    const double bias_scale = bias->params.scale;
+    const double bias_scale = static_cast<double>(bias->params.scale);
     TF_LITE_ENSURE(context,
                    std::abs(input_product_scale - bias_scale) <=
                        1e-6 * std::min(input_product_scale, bias_scale));
@@ -136,9 +137,10 @@ TfLiteStatus GetQuantizedConvolutionMultipler(TfLiteContext* context,
                                               const TfLiteTensor* filter,
                                               TfLiteTensor* output,
                                               double* multiplier) {
-  const double input_product_scale = input->params.scale * filter->params.scale;
+  const double input_product_scale = static_cast<double>(input->params.scale) *
+                                     static_cast<double>(filter->params.scale);
   TF_LITE_ENSURE(context, input_product_scale >= 0);
-  *multiplier = input_product_scale / output->params.scale;
+  *multiplier = input_product_scale / static_cast<double>(output->params.scale);
 
   return kTfLiteOk;
 }
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",output_handler.cc,"@@ -18,5 +18,7 @@ limitations under the License.
 void HandleOutput(tflite::ErrorReporter* error_reporter, float x_value,
                   float y_value) {
   // Log the current X and Y values
-  error_reporter->Report(""x_value: %f, y_value: %f\n"", x_value, y_value);
+  error_reporter->Report(""x_value: %f, y_value: %f\n"",
+                         static_cast<double>(x_value),
+                         static_cast<double>(y_value));
 }
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",add.cc,"@@ -77,14 +77,15 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteAddParams* params,
     data->output_offset = output->params.zero_point;
     data->left_shift = 20;
     const double twice_max_input_scale =
-        2 * std::max(input1->params.scale, input2->params.scale);
+        2 * static_cast<double>(
+                std::max(input1->params.scale, input2->params.scale));
     const double real_input1_multiplier =
-        input1->params.scale / twice_max_input_scale;
+        static_cast<double>(input1->params.scale) / twice_max_input_scale;
     const double real_input2_multiplier =
-        input2->params.scale / twice_max_input_scale;
+        static_cast<double>(input2->params.scale) / twice_max_input_scale;
     const double real_output_multiplier =
         twice_max_input_scale /
-        ((1 << data->left_shift) * output->params.scale);
+        ((1 << data->left_shift) * static_cast<double>(output->params.scale));
 
     QuantizeMultiplierSmallerThanOneExp(
         real_input1_multiplier, &data->input1_multiplier, &data->input1_shift);
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",comparisons.cc,"@@ -43,12 +43,14 @@ constexpr int kOutputTensor = 0;
                                                                                \
       int32 input1_multiplier;                                                 \
       int input1_shift;                                                        \
-      QuantizeMultiplierSmallerThanOneExp(input1->params.scale,                \
-                                          &input1_multiplier, &input1_shift);  \
+      QuantizeMultiplierSmallerThanOneExp(                                     \
+          static_cast<double>(input1->params.scale), &input1_multiplier,       \
+          &input1_shift);                                                      \
       int32 input2_multiplier;                                                 \
       int input2_shift;                                                        \
-      QuantizeMultiplierSmallerThanOneExp(input2->params.scale,                \
-                                          &input2_multiplier, &input2_shift);  \
+      QuantizeMultiplierSmallerThanOneExp(                                     \
+          static_cast<double>(input2->params.scale), &input2_multiplier,       \
+          &input2_shift);                                                      \
                                                                                \
       ComparisonParams op_params;                                              \
       op_params.left_shift = left_shift;                                       \
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",dequantize.cc,"@@ -46,7 +46,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   tflite::DequantizationParams op_params;
   op_params.zero_point = input->params.zero_point;
-  op_params.scale = input->params.scale;
+  op_params.scale = static_cast<double>(input->params.scale);
   switch (input->type) {
     case kTfLiteUInt8:
       reference_ops::Dequantize(
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",mul.cc,"@@ -55,8 +55,9 @@ TfLiteStatus CalculateOpData(TfLiteContext* context, TfLiteNode* node,
       &data->output_activation_max));
 
   if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt8) {
-    double real_multiplier =
-        input1->params.scale * input2->params.scale / output->params.scale;
+    double real_multiplier = static_cast<double>(input1->params.scale) *
+                             static_cast<double>(input2->params.scale) /
+                             static_cast<double>(output->params.scale);
     QuantizeMultiplier(real_multiplier, &data->output_multiplier,
                        &data->output_shift);
   }
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",pad.cc,"@@ -152,8 +152,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         // same quantized range as the input and output tensors.
         TF_LITE_ENSURE_EQ(context, op_context.output->params.zero_point,
                           op_context.constant_values->params.zero_point);
-        TF_LITE_ENSURE_EQ(context, op_context.output->params.scale,
-                          op_context.constant_values->params.scale);
+        TF_LITE_ENSURE_EQ(
+            context, static_cast<double>(op_context.output->params.scale),
+            static_cast<double>(op_context.constant_values->params.scale));
         pad_value = *GetTensorData<uint8_t>(op_context.constant_values);
       }
       if (op_context.resizing_category == ResizingCategory::kImageStyle) {
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",prelu.cc,"@@ -53,7 +53,7 @@ inline void BroadcastPrelu4DSlowFloat(
           auto in2_idx = SubscriptToIndex(desc2, b, y, x, c);
           auto in1_val = input1_data[in1_idx];
           auto in2_val = input2_data[in2_idx];
-          output_data[out_idx] = in1_val >= 0.0 ? in1_val : in1_val * in2_val;
+          output_data[out_idx] = in1_val >= 0.0f ? in1_val : in1_val * in2_val;
         }
       }
     }
@@ -67,8 +67,9 @@ TfLiteStatus PreluEval(TfLiteContext* context, TfLiteNode* node) {
   int32_t output_multiplier = 0;
   int output_shift = 0;
   if (output->type == kTfLiteUInt8 || output->type == kTfLiteInt16) {
-    double real_multiplier =
-        input->params.scale * alpha->params.scale / output->params.scale;
+    double real_multiplier = static_cast<double>(input->params.scale) *
+                             static_cast<double>(alpha->params.scale) /
+                             static_cast<double>(output->params.scale);
     QuantizeMultiplierSmallerThanOneExp(real_multiplier, &output_multiplier,
                                         &output_shift);
   }
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",quantize.cc,"@@ -60,7 +60,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   tflite::QuantizationParams op_params;
   op_params.zero_point = output->params.zero_point;
-  op_params.scale = output->params.scale;
+  op_params.scale = static_cast<double>(output->params.scale);
   switch (output->type) {
     case kTfLiteInt8:
       reference_ops::AffineQuantize(
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",softmax.cc,"@@ -53,7 +53,8 @@ TfLiteStatus CalculateSoftmaxOpData(TfLiteContext* context,
     static const int kScaledDiffIntegerBits = 5;
 
     tflite::PreprocessSoftmaxScaling(
-        params->beta, input->params.scale, kScaledDiffIntegerBits,
+        static_cast<double>(params->beta),
+        static_cast<double>(input->params.scale), kScaledDiffIntegerBits,
         &data->input_multiplier, &data->input_left_shift);
     data->diff_min = -1.0 * tflite::CalculateInputRadius(
                                 kScaledDiffIntegerBits, data->input_left_shift);
@@ -143,7 +144,7 @@ void Softmax2DQuantized(const TfLiteTensor* input, TfLiteTensor* output,
 void Softmax4DFloat(const TfLiteTensor* input, TfLiteTensor* output,
                     TfLiteSoftmaxParams* params) {
   SoftmaxParams op_params;
-  op_params.beta = params->beta;
+  op_params.beta = static_cast<double>(params->beta);
   tflite::reference_ops::Softmax(
       op_params, GetTensorShape(input), GetTensorData<float>(input),
       GetTensorShape(output), GetTensorData<float>(output));
",0,train
884ec0ff0679fd34c3ce855a3a7cabf07c1dfc14,tensorflow/tensorflow,"TFLM: Fix double-promotion error.

Some of these double promotion is not obvious as va_args implicitly promotes float to double.

PiperOrigin-RevId: 290881894
Change-Id: I58a67bb4770e5a5a1a2ccfda59de515625e91df1",svdf.cc,"@@ -526,12 +526,12 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         auto* output_params = reinterpret_cast<TfLiteAffineQuantization*>(
             output->quantization.params);
         const double effective_scale_1 =
-            input_params->scale->data[0] *
-            weights_feature_params->scale->data[0] /
-            state_params->scale->data[0];
-        const double effective_scale_2 = state_params->scale->data[0] *
-                                         weight_time_params->scale->data[0] /
-                                         output_params->scale->data[0];
+            static_cast<double>(input_params->scale->data[0] *
+                                weights_feature_params->scale->data[0] /
+                                state_params->scale->data[0]);
+        const double effective_scale_2 = static_cast<double>(
+            state_params->scale->data[0] * weight_time_params->scale->data[0] /
+            output_params->scale->data[0]);
         QuantizeMultiplier(effective_scale_1, &op_data.effective_scale_1_a,
                            &op_data.effective_scale_1_b);
         QuantizeMultiplier(effective_scale_2, &op_data.effective_scale_2_a,
",0,train
c51da68e1dbe80029b0ef93b86cf6fde4447aaa4,tensorflow/tensorflow,"Pluggable device/op_handler support in c_api_tfrt. And it starts to reuse device name (e.g. /device:CPU:0) borrowed from TensorFlow. It also allows creating different op handler for different GPU devices.

PiperOrigin-RevId: 320713554
Change-Id: Id554249713fe7571e29e8f2f36fc0986ee44e9ec",c_api.cc,"@@ -725,13 +725,7 @@ void TFE_DeleteContextOptions(TFE_ContextOptions* options) { delete options; }
 TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) {
   if (opts->use_tfrt) {
 #ifdef PLATFORM_GOOGLE
-    tfrt::SmallVector<std::string, 4> op_handler_chains;
-    tfrt::SmallVector<tensorflow::DeviceAttributes, 4> device_attributes;
-    status->status = tfrt::ListOpHandlerChains(
-        opts->session_options.options, &op_handler_chains, &device_attributes);
-    if (!status->status.ok()) return nullptr;
-    return tensorflow::wrap(new tfrt::ContextInterface(
-        op_handler_chains, device_attributes, opts->async));
+    return tensorflow::wrap(new tfrt::ContextInterface(opts->async));
 #else
     status->status = tensorflow::errors::Unimplemented(""TFRT is not supported"");
     return nullptr;
",0,train
d4b3956c3759afac03f2a21c77399a01150f2928,tensorflow/tensorflow,"Make name_scopes work correctly in V2.

PiperOrigin-RevId: 232397249",base_layer.py,"@@ -551,64 +551,59 @@ class Layer(checkpointable.Checkpointable):
         # pass to __call__, hence we set previous_mask as the default value.
         kwargs['mask'] = previous_mask
 
-    with ops.name_scope(self._name_scope()):
-      if not self.built:
+    # Check input assumptions set after layer building, e.g. input shape.
+    if build_graph:
+      # Symbolic execution on symbolic tensors. We will attempt to build
+      # the corresponding TF subgraph inside `backend.get_graph()`
+      input_spec.assert_input_compatibility(self.input_spec, inputs, self.name)
+      graph = backend.get_graph()
+      with graph.as_default(), ops.name_scope(self._name_scope()):
         # Build layer if applicable (if the `build` method has been overridden).
         self._maybe_build(inputs)
-        # We must set self.built since user defined build functions are not
-        # constrained to set self.built.
-        self.built = True
-
-      # Check input assumptions set after layer building, e.g. input shape.
-      if build_graph:
-        # Symbolic execution on symbolic tensors. We will attempt to build
-        # the corresponding TF subgraph inside `backend.get_graph()`
-        input_spec.assert_input_compatibility(
-            self.input_spec, inputs, self.name)
-        graph = backend.get_graph()
-        with graph.as_default():
-          if not self.dynamic:
-            try:
-              outputs = self.call(inputs, *args, **kwargs)
-            except TypeError as e:
-              messages = ('`tf.Tensor` as a Python `bool` is not allowed',
-                          'Tensor objects are only iterable when eager')
-              exception_str = str(e)
-              for msg in messages:
-                if msg in exception_str:
-                  raise TypeError('You are attempting to use Python control '
-                                  'flow in a layer that was not declared to be '
-                                  'dynamic. Pass `dynamic=True` to the class '
-                                  'constructor.\nEncountered error:\n""""""\n' +
-                                  exception_str + '\n""""""')
-              raise
-          else:
-            # We will use static shape inference to return symbolic tensors
-            # matching the specifications of the layer outputs.
-            # Since `self.dynamic` is True, we will never attempt to
-            # run the underlying TF graph (which is disconnected).
-            # TODO(fchollet): consider py_func as an alternative, which
-            # would enable us to run the underlying graph if needed.
-            outputs = self._symbolic_call(inputs)
-
-          if outputs is None:
-            raise ValueError('A layer\'s `call` method should return a '
-                             'Tensor or a list of Tensors, not None '
-                             '(layer: ' + self.name + ').')
-          if base_layer_utils.have_all_keras_metadata(inputs):
-            inputs, outputs = self._set_connectivity_metadata_(
-                inputs, outputs, args, kwargs)
-          self._handle_activity_regularization(inputs, outputs)
-          self._set_mask_metadata(inputs, outputs, previous_mask)
-          if hasattr(self, '_set_inputs') and not self.inputs:
-            # Subclassed network: explicitly set metadata normally set by
-            # a call to self._set_inputs().
-            # TODO(b/120997007): This should be done in Eager as well, but
-            # causes garbage collection issues because of the placeholders
-            # created on the default Keras graph.
-            self._set_inputs(inputs, outputs)
-      else:
-        # Eager execution on data tensors.
+        if not self.dynamic:
+          try:
+            outputs = self.call(inputs, *args, **kwargs)
+          except TypeError as e:
+            messages = ('`tf.Tensor` as a Python `bool` is not allowed',
+                        'Tensor objects are only iterable when eager')
+            exception_str = str(e)
+            for msg in messages:
+              if msg in exception_str:
+                raise TypeError('You are attempting to use Python control '
+                                'flow in a layer that was not declared to be '
+                                'dynamic. Pass `dynamic=True` to the class '
+                                'constructor.\nEncountered error:\n""""""\n' +
+                                exception_str + '\n""""""')
+            raise
+        else:
+          # We will use static shape inference to return symbolic tensors
+          # matching the specifications of the layer outputs.
+          # Since `self.dynamic` is True, we will never attempt to
+          # run the underlying TF graph (which is disconnected).
+          # TODO(fchollet): consider py_func as an alternative, which
+          # would enable us to run the underlying graph if needed.
+          outputs = self._symbolic_call(inputs)
+
+        if outputs is None:
+          raise ValueError('A layer\'s `call` method should return a '
+                           'Tensor or a list of Tensors, not None '
+                           '(layer: ' + self.name + ').')
+        if base_layer_utils.have_all_keras_metadata(inputs):
+          inputs, outputs = self._set_connectivity_metadata_(
+              inputs, outputs, args, kwargs)
+        self._handle_activity_regularization(inputs, outputs)
+        self._set_mask_metadata(inputs, outputs, previous_mask)
+        if hasattr(self, '_set_inputs') and not self.inputs:
+          # Subclassed network: explicitly set metadata normally set by
+          # a call to self._set_inputs().
+          # TODO(b/120997007): This should be done in Eager as well, but
+          # causes garbage collection issues because of the placeholders
+          # created on the default Keras graph.
+          self._set_inputs(inputs, outputs)
+    else:
+      # Eager execution on data tensors.
+      with ops.name_scope(self._name_scope()):
+        self._maybe_build(inputs)
         outputs = self.call(inputs, *args, **kwargs)
         self._handle_activity_regularization(inputs, outputs)
         self._set_mask_metadata(inputs, outputs, previous_mask)
@@ -1578,6 +1573,9 @@ class Layer(checkpointable.Checkpointable):
 
   def _maybe_build(self, inputs):
     # Check input assumptions set before layer building, e.g. input rank.
+    if self.built:
+      return
+
     input_spec.assert_input_compatibility(
         self.input_spec, inputs, self.name)
     input_list = nest.flatten(inputs)
@@ -1592,6 +1590,9 @@ class Layer(checkpointable.Checkpointable):
     # Only call `build` if the user has manually overridden the build method.
     if not hasattr(self.build, '_is_default'):
       self.build(input_shapes)
+    # We must set self.built since user defined build functions are not
+    # constrained to set self.built.
+    self.built = True
 
   def _symbolic_call(self, inputs):
     input_shapes = nest.map_structure(lambda x: x.shape, inputs)
",0,train
d4b3956c3759afac03f2a21c77399a01150f2928,tensorflow/tensorflow,"Make name_scopes work correctly in V2.

PiperOrigin-RevId: 232397249",base_layer_test.py,"@@ -456,6 +456,34 @@ class NestedTrackingTest(test.TestCase):
       self.assertEqual(len(layer.updates), 3)
 
 
+@test_util.run_all_in_graph_and_eager_modes
+class NameScopingTest(keras_parameterized.TestCase):
+
+  def test_name_scope_layer(self):
+    x = keras.backend.placeholder(shape=(10, 10))
+    layer = keras.layers.Dense(10, name='MyName')
+    layer(x)
+    self.assertEqual(layer.bias.name, 'MyName/bias:0')
+    self.assertEqual(layer.kernel.name, 'MyName/kernel:0')
+
+  def test_name_scope_sublayer(self):
+    x = keras.backend.placeholder(shape=(10, 10))
+    layer = keras.layers.Dense(
+        10, activation=keras.layers.ReLU(name='MyAct'), name='MyName2')
+    y = layer(x)
+    self.assertEqual(layer.bias.name, 'MyName2/bias:0')
+    self.assertEqual(layer.kernel.name, 'MyName2/kernel:0')
+    self.assertEqual(y.name, 'MyName2/MyAct/Relu:0')
+
+  def test_name_scope_tf_tensor(self):
+    x = ops.convert_to_tensor(np.ones((10, 10)))
+    layer = keras.layers.Dense(
+        10, activation=keras.layers.ReLU(name='MyAct'), name='MyName3')
+    layer(x)
+    self.assertEqual(layer.bias.name, 'MyName3/bias:0')
+    self.assertEqual(layer.kernel.name, 'MyName3/kernel:0')
+
+
 if __name__ == '__main__':
   ops.enable_eager_execution()
   test.main()
",0,train
036c2c3e720ba65a975eb2db8e2b2dbc71417b74,tensorflow/tensorflow,"Fix incorrect gradient w.r.t. A for matrix_solve_ls in the underdetermined case.
Add missing name to gradient test that caused most tests for matrix_solve_ls_grad to be skipped.
Set proper initial values in linalg_grad_test and tighten test tolerances for float64.
Change: 138725515",linalg_grad_test.py,"@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-
 """"""Tests for tensorflow.ops.linalg_grad.""""""
 from __future__ import absolute_import
 from __future__ import division
@@ -22,6 +21,13 @@ import numpy as np
 import tensorflow as tf
 
 
+def _AddTest(test, op_name, testcase_name, fn):
+  test_name = '_'.join(['test', op_name, testcase_name])
+  if hasattr(test, test_name):
+    raise RuntimeError('Test %s defined more than once' % test_name)
+  setattr(test, test_name, fn)
+
+
 class ShapeTest(tf.test.TestCase):
 
   def testBatchGradientUnknownSize(self):
@@ -29,8 +35,8 @@ class ShapeTest(tf.test.TestCase):
       batch_size = tf.constant(3)
       matrix_size = tf.constant(4)
       batch_identity = tf.tile(
-          tf.expand_dims(
-              tf.diag(tf.ones([matrix_size])), 0), [batch_size, 1, 1])
+          tf.expand_dims(tf.diag(tf.ones([matrix_size])), 0),
+          [batch_size, 1, 1])
       determinants = tf.matrix_determinant(batch_identity)
       reduced = tf.reduce_sum(determinants)
       sum_grad = tf.gradients(reduced, batch_identity)[0]
@@ -46,24 +52,26 @@ def _GetMatrixUnaryFunctorGradientTest(functor_, dtype_, shape_, **kwargs_):
   def Test(self):
     with self.test_session():
       np.random.seed(1)
-      m = np.random.uniform(low=-1.0,
-                            high=1.0,
-                            size=np.prod(shape_)).reshape(shape_).astype(dtype_)
-      a = tf.constant(m)
+      a_np = np.random.uniform(
+          low=-1.0, high=1.0,
+          size=np.prod(shape_)).reshape(shape_).astype(dtype_)
+      a = tf.constant(a_np)
       b = functor_(a, **kwargs_)
 
       # Optimal stepsize for central difference is O(epsilon^{1/3}).
       epsilon = np.finfo(dtype_).eps
-      delta = 0.1 * epsilon**(1.0 / 3.0)
+      delta = epsilon**(1.0 / 3.0)
       # tolerance obtained by looking at actual differences using
       # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build
-      tol = 1e-3 if dtype_ == np.float64 else 0.05
-
-      theoretical, numerical = tf.test.compute_gradient(a,
-                                                        a.get_shape().as_list(),
-                                                        b,
-                                                        b.get_shape().as_list(),
-                                                        delta=delta)
+      tol = 1e-6 if dtype_ == np.float64 else 0.05
+
+      theoretical, numerical = tf.test.compute_gradient(
+          a,
+          a.get_shape().as_list(),
+          b,
+          b.get_shape().as_list(),
+          x_init_value=a_np,
+          delta=delta)
       self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
 
   return Test
@@ -73,42 +81,47 @@ class MatrixBinaryFunctorGradientTest(tf.test.TestCase):
   pass  # Filled in below
 
 
-def _GetMatrixBinaryFunctorGradientTest(functor_, dtype_, shape_, **kwargs_):
+def _GetMatrixBinaryFunctorGradientTest(functor_,
+                                        dtype_,
+                                        shape_,
+                                        float32_tol_fudge=1.0,
+                                        **kwargs_):
 
   def Test(self):
     with self.test_session():
       np.random.seed(1)
-      m = np.random.uniform(low=-1.0,
-                            high=1.0,
-                            size=np.prod(shape_)).reshape(shape_).astype(dtype_)
-      a = tf.constant(m)
-
-      n = np.random.uniform(low=-1.0,
-                            high=1.0,
-                            size=np.prod(shape_)).reshape(shape_).astype(dtype_)
-      b = tf.constant(n)
+      a_np = np.random.uniform(
+          low=-1.0, high=1.0,
+          size=np.prod(shape_)).reshape(shape_).astype(dtype_)
+      a = tf.constant(a_np)
+
+      b_np = np.random.uniform(
+          low=-1.0, high=1.0,
+          size=np.prod(shape_)).reshape(shape_).astype(dtype_)
+      b = tf.constant(b_np)
       c = functor_(a, b, **kwargs_)
 
       # Optimal stepsize for central difference is O(epsilon^{1/3}).
       epsilon = np.finfo(dtype_).eps
-      delta = 0.1 * epsilon**(1.0 / 3.0)
+      delta = epsilon**(1.0 / 3.0)
       # tolerance obtained by looking at actual differences using
       # np.linalg.norm(theoretical-numerical, np.inf) on -mavx build
-      tol = 1e-3 if dtype_ == np.float64 else 0.05
-
+      tol = 1e-6 if dtype_ == np.float64 else float32_tol_fudge * 0.04
       # The gradients for a and b may be of very different magnitudes,
       # so to not get spurious failures we test them separately.
-      for factor in a, b:
+      for factor, factor_init in [a, a_np], [b, b_np]:
         theoretical, numerical = tf.test.compute_gradient(
             factor,
             factor.get_shape().as_list(),
             c,
             c.get_shape().as_list(),
+            x_init_value=factor_init,
             delta=delta)
         self.assertAllClose(theoretical, numerical, atol=tol, rtol=tol)
 
   return Test
 
+
 if __name__ == '__main__':
   # Tests for gradients of binary matrix operations.
   for dtype in np.float32, np.float64:
@@ -120,29 +133,32 @@ if __name__ == '__main__':
           shape = extra + (size, size)
           name = '%s_%s_adj_%s' % (dtype.__name__, '_'.join(map(str, shape)),
                                    str(adjoint))
-          setattr(MatrixBinaryFunctorGradientTest,
-                  'testMatrixSolveGradient_' + name,
-                  _GetMatrixBinaryFunctorGradientTest(tf.matrix_solve,
-                                                      dtype, shape,
-                                                      adjoint=adjoint))
+          _AddTest(
+              MatrixBinaryFunctorGradientTest,
+              'MatrixSolveGradient',
+              name,
+              _GetMatrixBinaryFunctorGradientTest(
+                  tf.matrix_solve, dtype, shape, adjoint=adjoint))
           if dtype == np.float64:
             # TODO(rmlarsen): The gradients of triangular solves seems
             # particularly sensitive to round-off when computed in float32.
             # In some tests, a few gradient elements differ by 25% between the
             # numerical and theoretical values. Disable tests for float32 until
             # we understand this better.
-            setattr(
+            _AddTest(
                 MatrixBinaryFunctorGradientTest,
-                'testMatrixTriangularSolveGradient_' + name + '_low_True',
+                'MatrixTriangularSolveGradient',
+                name + '_low_True',
                 _GetMatrixBinaryFunctorGradientTest(
                     tf.matrix_triangular_solve,
                     dtype,
                     shape,
                     adjoint=adjoint,
                     lower=True))
-            setattr(
+            _AddTest(
                 MatrixBinaryFunctorGradientTest,
-                'testMatrixTriangularSolveGradient_' + name + '_low_False',
+                'MatrixTriangularSolveGradient',
+                name + '_low_False',
                 _GetMatrixBinaryFunctorGradientTest(
                     tf.matrix_triangular_solve,
                     dtype,
@@ -158,14 +174,13 @@ if __name__ == '__main__':
       for extra in [(), (2,), (3,)] + [(3, 2)] * (size < 10):
         shape = extra + (size, size)
         name = '%s_%s' % (dtype.__name__, '_'.join(map(str, shape)))
-        setattr(MatrixUnaryFunctorGradientTest,
-                'testMatrixInverseGradient_' + name,
-                _GetMatrixUnaryFunctorGradientTest(tf.matrix_inverse,
-                                                   dtype, shape))
-        setattr(MatrixUnaryFunctorGradientTest,
-                'testMatrixDeterminantGradient_' + name,
-                _GetMatrixUnaryFunctorGradientTest(tf.matrix_determinant, dtype,
-                                                   shape))
+        _AddTest(MatrixUnaryFunctorGradientTest, 'MatrixInverseGradient', name,
+                 _GetMatrixUnaryFunctorGradientTest(tf.matrix_inverse, dtype,
+                                                    shape))
+        _AddTest(MatrixUnaryFunctorGradientTest, 'MatrixDeterminantGradient',
+                 name,
+                 _GetMatrixUnaryFunctorGradientTest(tf.matrix_determinant,
+                                                    dtype, shape))
 
   # Tests for gradients of matrix_solve_ls
   for dtype in np.float32, np.float64:
@@ -173,9 +188,16 @@ if __name__ == '__main__':
       for cols in 2, 5, 10:
         for l2_regularization in 0.0, 0.001, 1.0:
           shape = (rows, cols)
-          setattr(MatrixBinaryFunctorGradientTest,
-                  'testMatrixSolveLsGradient_' + name,
-                  _GetMatrixBinaryFunctorGradientTest(tf.matrix_solve_ls, dtype,
-                                                      shape))
+          name = '%s_%s_%s' % (dtype.__name__, '_'.join(map(str, shape)),
+                               l2_regularization)
+          _AddTest(
+              MatrixBinaryFunctorGradientTest,
+              'MatrixSolveLsGradient',
+              name,
+              _GetMatrixBinaryFunctorGradientTest(
+                  lambda a, b, l=l2_regularization: tf.matrix_solve_ls(a, b, l),
+                  dtype,
+                  shape,
+                  float32_tol_fudge=4.0))
 
   tf.test.main()
",0,test
036c2c3e720ba65a975eb2db8e2b2dbc71417b74,tensorflow/tensorflow,"Fix incorrect gradient w.r.t. A for matrix_solve_ls in the underdetermined case.
Add missing name to gradient test that caused most tests for matrix_solve_ls_grad to be skipped.
Set proper initial values in linalg_grad_test and tighten test tolerances for float64.
Change: 138725515",linalg_grad.py,"@@ -95,7 +95,7 @@ def _MatrixSolveLsGrad(op, grad):
     """"""
     a = op.inputs[0]
     b = op.inputs[1]
-    l2_regularizer = op.inputs[2]
+    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
     x = op.outputs[0]
     a_shape = array_ops.shape(a)
     batch_shape = a_shape[:-2]
@@ -125,7 +125,7 @@ def _MatrixSolveLsGrad(op, grad):
     """"""
     a = op.inputs[0]
     b = op.inputs[1]
-    l2_regularizer = op.inputs[2]
+    l2_regularizer = math_ops.cast(op.inputs[2], a.dtype.base_dtype)
     a_shape = array_ops.shape(a)
     batch_shape = a_shape[:-2]
     m = a_shape[-2]
@@ -135,11 +135,13 @@ def _MatrixSolveLsGrad(op, grad):
         a, a, adj_y=True) + l2_regularizer * identity
     chol = linalg_ops.cholesky(gramian)
     grad_b = linalg_ops.cholesky_solve(chol, math_ops.batch_matmul(a, grad))
-    # Temporary z = (A * A^T + lambda * I)^{-1} * B.
-    z = linalg_ops.cholesky_solve(chol, b)
-    bz = -math_ops.batch_matmul(grad_b, z, adj_y=True)
-    bz_sym = bz + array_ops.matrix_transpose(bz)
-    grad_a = math_ops.batch_matmul(bz_sym, a) + math_ops.batch_matmul(z, grad)
+    # Temporary tmp = (A * A^T + lambda * I)^{-1} * B.
+    tmp = linalg_ops.cholesky_solve(chol, b)
+    a1 = math_ops.batch_matmul(tmp, a, adj_x=True)
+    a1 = -math_ops.batch_matmul(grad_b, a1)
+    a2 = grad - math_ops.batch_matmul(a, grad_b, adj_x=True)
+    a2 = math_ops.batch_matmul(tmp, a2, adj_y=True)
+    grad_a = a1 + a2
     return (grad_a, grad_b, None)
 
   fast = op.get_attr(""fast"")
",0,test
c0fdbc8eec34a3bd744b58ea9786c4fbf381bf0c,tensorflow/tensorflow,"Cosmetic change: fix header ordering.

There are two different ""jni_utils.h"" headers; the one that should be
included first is the one that matches the path of this .cc file,
which declares the entities defined in this file.

PiperOrigin-RevId: 399651260
Change-Id: I004817fbec02d55b0cce3b710e1ee4df12438895",jni_utils.cc,"@@ -13,13 +13,13 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include ""tensorflow/lite/core/shims/jni/jni_utils.h""
+#include ""tensorflow/lite/java/src/main/native/jni_utils.h""
 
 #include <stdarg.h>
 #include <stdio.h>
 #include <stdlib.h>
 
-#include ""tensorflow/lite/java/src/main/native/jni_utils.h""
+#include ""tensorflow/lite/core/shims/jni/jni_utils.h""
 
 namespace tflite {
 namespace jni {
",0,train
653bdbd4ffefb008a4074617cae518ab143420ed,tensorflow/tensorflow,"Fix potential use-after-free of `worker_cache` in NewRemoteDevices().
Change: 142623343",remote_device.cc,"@@ -77,8 +77,8 @@ void NewRemoteDevices(Env* env, WorkerCacheInterface* worker_cache,
         remote_devices.push_back(d);
       }
     }
-    done(s, &remote_devices);
     worker_cache->ReleaseWorker(worker_name, wi);
+    done(s, &remote_devices);
     delete call;
   };
   wi->GetStatusAsync(&call->req, &call->resp, cb);
",0,train
346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK

PiperOrigin-RevId: 157522142",literal_util_test.cc,"@@ -771,7 +771,7 @@ TEST_F(LiteralUtilTest, F16) {
   // TODO - modify if we make the data format machine endianess dependent
   auto m1 = LiteralUtil::CreateFromShape(ShapeUtil::MakeShape(F16, {2, 2}));
   Literal* l1 = m1.get();
-  const char* d1 = (const char*)LiteralUtil::InternalData(*l1);
+  const char* d1 = static_cast<const char*>(LiteralUtil::InternalData(*l1));
   EXPECT_EQ(d1[0], 0);
   EXPECT_EQ(d1[1], 0);
   EXPECT_EQ(d1[2], 0);
@@ -787,7 +787,7 @@ TEST_F(LiteralUtilTest, F16) {
   half h2(2.0f);
   auto m2 = LiteralUtil::CreateR2<half>({{h1, h2}, {h2, h1}});
   Literal* l2 = m2.get();
-  const char* d2 = (const char*)LiteralUtil::InternalData(*l2);
+  const char* d2 = static_cast<const char*>(LiteralUtil::InternalData(*l2));
   EXPECT_EQ(d2[0], 0);
   EXPECT_EQ(d2[1], 0x3C);
   EXPECT_EQ(d2[2], 0);
",0,train
346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK

PiperOrigin-RevId: 157522142",single_image_random_dot_stereograms_ops.cc,"@@ -57,8 +57,8 @@ class SingleImageRandomDotStereogramsOp : public OpKernel {
   ::tensorflow::TensorShapeProto output_image_shape;
   ::tensorflow::TensorShapeProto output_data_window;
 
-  uint8 Cblack = (uint8)0;
-  uint8 Cwhite = (uint8)255;
+  uint8 Cblack = 0;
+  uint8 Cwhite = 255;
 
   int indexMode = 0;  // 0 - truncate XY, 1 - round XY, 2 - Interpolate XY (not
                       // implemented yet, keep default of 0)
",0,train
346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK

PiperOrigin-RevId: 157522142",gpu_tracer.cc,"@@ -398,8 +398,8 @@ Status GPUTracerImpl::Start() {
   // There can only be one CUPTI subscriber.  If we can't create one then
   // there is another trace in progress (possibly by external code).
   CUptiResult ret;
-  ret = cupti_wrapper_->Subscribe(&subscriber_, (CUpti_CallbackFunc)ApiCallback,
-                                  this);
+  ret = cupti_wrapper_->Subscribe(
+      &subscriber_, static_cast<CUpti_CallbackFunc>(ApiCallback), this);
   if (ret == CUPTI_ERROR_MAX_LIMIT_REACHED) {
     return errors::Unavailable(""CUPTI subcriber limit reached."");
   } else if (ret != CUPTI_SUCCESS) {
",0,train
346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK

PiperOrigin-RevId: 157522142",fft_ops.cc,"@@ -112,14 +112,14 @@ class FFTCPU : public FFTBase {
     auto device = ctx->eigen_device<CPUDevice>();
 
     if (!IsReal()) {
-      auto input = ((Tensor)in).flat_inner_dims<complex64, FFTRank + 1>();
+      auto input = (Tensor(in)).flat_inner_dims<complex64, FFTRank + 1>();
       // Compute the FFT using eigen.
       auto output = out->flat_inner_dims<complex64, FFTRank + 1>();
       output.device(device) = input.template fft < Eigen::BothParts,
       Forward ? Eigen::FFT_FORWARD : Eigen::FFT_REVERSE > (axes);
     } else {
       if (IsForward()) {
-        auto input = ((Tensor)in).flat_inner_dims<float, FFTRank + 1>();
+        auto input = (Tensor(in)).flat_inner_dims<float, FFTRank + 1>();
         auto output = out->flat_inner_dims<complex64, FFTRank + 1>();
         Eigen::DSizes<Eigen::DenseIndex, FFTRank + 1> startIndices;
 
",0,train
346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK

PiperOrigin-RevId: 157522142",snappy_inputbuffer.cc,"@@ -106,7 +106,7 @@ Status SnappyInputBuffer::Inflate() {
 
   // Output buffer must be large enough to fit the uncompressed block.
   DCHECK_GE(output_buffer_capacity_, uncompressed_length);
-  next_out_ = (char*)output_buffer_.get();
+  next_out_ = output_buffer_.get();
 
   bool status = port::Snappy_Uncompress(next_in_, compressed_block_length,
                                         output_buffer_.get());
",0,train
346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK

PiperOrigin-RevId: 157522142",table_test.cc,"@@ -526,8 +526,9 @@ static bool Between(uint64 val, uint64 low, uint64 high) {
   bool result = (val >= low) && (val <= high);
   if (!result) {
     fprintf(stderr, ""Value %llu is not in range [%llu, %llu]\n"",
-            (unsigned long long)(val), (unsigned long long)(low),
-            (unsigned long long)(high));
+            static_cast<unsigned long long>(val),
+            static_cast<unsigned long long>(low),
+            static_cast<unsigned long long>(high));
   }
   return result;
 }
",0,train
346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK

PiperOrigin-RevId: 157522142",ordered_code.cc,"@@ -134,7 +134,9 @@ inline static void AppendBytes(string* dest, const char* src, size_t len) {
   dest->append(src, len);
 }
 
-inline bool IsSpecialByte(char c) { return ((unsigned char)(c + 1)) < 2; }
+inline bool IsSpecialByte(char c) {
+  return (static_cast<unsigned char>(c + 1)) < 2;
+}
 
 // Return a pointer to the first byte in the range ""[start..limit)""
 // whose value is 0 or 255 (kEscape1 or kEscape2).  If no such byte
@@ -201,7 +203,7 @@ void OrderedCode::WriteNumIncreasing(string* dest, uint64 val) {
     buf[9 - len] = (val & 0xff);
     val >>= 8;
   }
-  buf[9 - len - 1] = (unsigned char)len;
+  buf[9 - len - 1] = len;
   len++;
   AppendBytes(dest, reinterpret_cast<const char*>(buf + 9 - len), len);
 }
",0,train
346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK

PiperOrigin-RevId: 157522142",logging.cc,"@@ -156,7 +156,7 @@ void MakeCheckOpValueString(std::ostream* os, const char& v) {
   if (v >= 32 && v <= 126) {
     (*os) << ""'"" << v << ""'"";
   } else {
-    (*os) << ""char value "" << (short)v;
+    (*os) << ""char value "" << static_cast<short>(v);
   }
 }
 
@@ -165,7 +165,7 @@ void MakeCheckOpValueString(std::ostream* os, const signed char& v) {
   if (v >= 32 && v <= 126) {
     (*os) << ""'"" << v << ""'"";
   } else {
-    (*os) << ""signed char value "" << (short)v;
+    (*os) << ""signed char value "" << static_cast<short>(v);
   }
 }
 
@@ -174,7 +174,7 @@ void MakeCheckOpValueString(std::ostream* os, const unsigned char& v) {
   if (v >= 32 && v <= 126) {
     (*os) << ""'"" << v << ""'"";
   } else {
-    (*os) << ""unsigned char value "" << (unsigned short)v;
+    (*os) << ""unsigned char value "" << static_cast<unsigned short>(v);
   }
 }
 
",0,train
346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK

PiperOrigin-RevId: 157522142",net.cc,"@@ -57,15 +57,16 @@ bool IsPortAvailable(int* port, bool is_tcp) {
   // Try binding to port.
   addr.sin_family = AF_INET;
   addr.sin_addr.s_addr = INADDR_ANY;
-  addr.sin_port = htons((uint16_t)*port);
-  if (bind(fd, (struct sockaddr*)&addr, sizeof(addr)) < 0) {
+  addr.sin_port = htons(static_cast<uint16_t>(*port));
+  if (bind(fd, reinterpret_cast<struct sockaddr*>(&addr), sizeof(addr)) < 0) {
     LOG(WARNING) << ""bind(port="" << *port << "") failed: "" << strerror(errno);
     close(fd);
     return false;
   }
 
   // Get the bound port number.
-  if (getsockname(fd, (struct sockaddr*)&addr, &addr_len) < 0) {
+  if (getsockname(fd, reinterpret_cast<struct sockaddr*>(&addr), &addr_len) <
+      0) {
     LOG(WARNING) << ""getsockname() failed: "" << strerror(errno);
     close(fd);
     return false;
",0,train
346021ab4a56fa12f85f2be009991adc2c4bfe1a,tensorflow/tensorflow,"Cleanup: Use C++ casts, remove redundant casts, use CHECK_OK

PiperOrigin-RevId: 157522142",yuv2rgb.cc,"@@ -39,9 +39,9 @@ static inline uint32_t YUV2RGB(int nY, int nU, int nV) {
   // nG = (int)(1.164 * nY - 0.813 * nV - 0.391 * nU);
   // nB = (int)(1.164 * nY + 1.596 * nV);
 
-  int nR = (int)(1192 * nY + 1634 * nV);
-  int nG = (int)(1192 * nY - 833 * nV - 400 * nU);
-  int nB = (int)(1192 * nY + 2066 * nU);
+  int nR = 1192 * nY + 1634 * nV;
+  int nG = 1192 * nY - 833 * nV - 400 * nU;
+  int nB = 1192 * nY + 2066 * nU;
 
   nR = MIN(kMaxChannelValue, MAX(0, nR));
   nG = MIN(kMaxChannelValue, MAX(0, nG));
@@ -171,9 +171,9 @@ void ConvertYUV420SPToRGB565(const uint8_t* const input, uint16_t* const output,
       // nG = (int)(1.164 * nY - 0.813 * nV - 0.391 * nU);
       // nB = (int)(1.164 * nY + 1.596 * nV);
 
-      int nR = (int)(1192 * nY + 1634 * nV);
-      int nG = (int)(1192 * nY - 833 * nV - 400 * nU);
-      int nB = (int)(1192 * nY + 2066 * nU);
+      int nR = 1192 * nY + 1634 * nV;
+      int nG = 1192 * nY - 833 * nV - 400 * nU;
+      int nB = 1192 * nY + 2066 * nU;
 
       nR = MIN(kMaxChannelValue, MAX(0, nR));
       nG = MIN(kMaxChannelValue, MAX(0, nG));
",0,train
1390dd68fe5f2f83138e19a86b6699254ad38734,tensorflow/tensorflow,"When Op Type is not registered, log the hostname of the machine that
it is running on in the error message, since the message could be routed
back during a failure on a remote binary, and it is hard to tell which
machine it came from.

Ideally, we'd somehow log the name of the binary running instead, but
we don't have a function to get that right now.

PiperOrigin-RevId: 156337679",node_def_builder_test.cc,"@@ -208,9 +208,8 @@ TEST_F(NodeDefBuilderTest, OpDoesNotExist) {
       .ControlInput(""y"")
       .Attr(""foo"", 12)
       .Device(""device"");
-  ExpectFailure(
-      builder,
-      ""Op type not registered 'Op Does Not Exist' while building NodeDef 'n'"");
+  ExpectFailures(builder, {""Op type not registered 'Op Does Not Exist'"",
+                           ""while building NodeDef 'n'""});
 }
 
 TEST_F(NodeDefBuilderTest, Polymorphic) {
",0,test
1390dd68fe5f2f83138e19a86b6699254ad38734,tensorflow/tensorflow,"When Op Type is not registered, log the hostname of the machine that
it is running on in the error message, since the message could be routed
back during a failure on a remote binary, and it is hard to tell which
machine it came from.

Ideally, we'd somehow log the name of the binary running instead, but
we don't have a function to get that right now.

PiperOrigin-RevId: 156337679",op.cc,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/lib/core/errors.h""
 #include ""tensorflow/core/lib/gtl/map_util.h""
+#include ""tensorflow/core/platform/host_info.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/mutex.h""
 #include ""tensorflow/core/platform/protobuf.h""
@@ -83,7 +84,10 @@ Status OpRegistry::LookUp(const string& op_type_name,
       first_unregistered = false;
     }
     Status status =
-        errors::NotFound(""Op type not registered '"", op_type_name, ""'"");
+        errors::NotFound(""Op type not registered '"", op_type_name,
+                         ""' in binary running on "", port::Hostname(), "". "",
+                         ""Make sure the Op and Kernel are registered in the ""
+                         ""binary running in this process."");
     VLOG(1) << status.ToString();
     return status;
   }
@@ -225,7 +229,10 @@ Status OpListOpRegistry::LookUp(const string& op_type_name,
   auto iter = index_.find(op_type_name);
   if (iter == index_.end()) {
     *op_reg_data = nullptr;
-    return errors::NotFound(""Op type not registered '"", op_type_name, ""'"");
+    return errors::NotFound(""Op type not registered '"", op_type_name,
+                            ""' in binary running on "", port::Hostname(), "". "",
+                            ""Make sure the Op and Kernel are registered in the ""
+                            ""binary running in this process."");
   }
   *op_reg_data = iter->second;
   return Status::OK();
",0,test
1390dd68fe5f2f83138e19a86b6699254ad38734,tensorflow/tensorflow,"When Op Type is not registered, log the hostname of the machine that
it is running on in the error message, since the message could be routed
back during a failure on a remote binary, and it is hard to tell which
machine it came from.

Ideally, we'd somehow log the name of the binary running instead, but
we don't have a function to get that right now.

PiperOrigin-RevId: 156337679",shape_inference_testutil_test.cc,"@@ -93,10 +93,11 @@ TEST(ShapeInferenceTestutilTest, Failures) {
             RunInferShapes(op, ""[1];[2];[1]"", ""e"", fn_copy_input_0));
   EXPECT_CONTAINS(RunInferShapes(op, ""[1];[2];[1]"", ""[1];[2]"", fn_copy_input_0),
                   ""wrong number of outputs"");
-  EXPECT_EQ(""Op type not registered 'NoSuchOp'"",
-            ShapeInferenceTestutil::InferShapes(
-                ShapeInferenceTestOp(""NoSuchOp""), """", """")
-                .error_message());
+  auto error_message = ShapeInferenceTestutil::InferShapes(
+                           ShapeInferenceTestOp(""NoSuchOp""), """", """")
+                           .error_message();
+  EXPECT_TRUE(StringPiece(error_message)
+                  .starts_with(""Op type not registered 'NoSuchOp'""));
 
   // Wrong shape error messages.
   EXPECT_CONTAINS(RunInferShapes(op, ""[1];[2];[1]"", ""?"", fn_copy_input_0),
",0,test
2932851e5d58ea729f4f5c8346f79e61df5f1126,tensorflow/tensorflow,"Annotate arg in FastMem for XLA compiler.

PiperOrigin-RevId: 272525033",xla_compiler.cc,"@@ -463,9 +463,10 @@ string XlaCompiler::Argument::HumanString() const {
       return absl::StrCat(""kind=constant"", common,
                           "" value="", constant_value.DebugString());
     case kResource: {
-      string output = absl::StrCat(""kind=resource"", common, "" resource_kind="",
-                                   XlaResource::KindToString(resource_kind),
-                                   "" initialized="", initialized);
+      string output = absl::StrCat(
+          ""kind=resource"", common,
+          "" resource_kind="", XlaResource::KindToString(resource_kind),
+          "" initialized="", initialized, "" is_fast_mem="", fast_mem);
       if (max_array_size >= 0) {
         absl::StrAppend(&output, "" max_array_size="", max_array_size);
       }
@@ -800,8 +801,7 @@ Status XlaCompiler::XLAShapeForArgument(const XlaCompiler::Argument& arg,
           TF_ASSIGN_OR_RETURN(*xla_shape,
                               options_.shape_representation_fn(
                                   absl::get<TensorShape>(arg.shape), arg.type,
-                                  /*use_fast_memory=*/false));
-
+                                  /*use_fast_memory=*/arg.fast_mem));
           return Status::OK();
         }
         case XlaResource::kTensorArray: {
",0,train
2932851e5d58ea729f4f5c8346f79e61df5f1126,tensorflow/tensorflow,"Annotate arg in FastMem for XLA compiler.

PiperOrigin-RevId: 272525033",xla_compiler.h,"@@ -153,6 +153,9 @@ class XlaCompiler {
     // For a kResource, has this resource been initialized?
     bool initialized = false;
 
+    // For a kResource, is this resource on Fast Memory.
+    bool fast_mem = false;
+
     // For a TensorArray or Stack resource, what is the array's declared size?
     // (Used for lazy initialization.)
     int64 max_array_size = -1;
",0,train
2932851e5d58ea729f4f5c8346f79e61df5f1126,tensorflow/tensorflow,"Annotate arg in FastMem for XLA compiler.

PiperOrigin-RevId: 272525033",xla_compiler_test.cc,"@@ -328,6 +328,49 @@ TEST_F(XlaCompilerTest, HonorShapeRepresentationFnForUnwrittenResource) {
             xla::ShapeUtil::MakeTupleShape({transposed}));
 }
 
+// Tests that the compiler can correctly propagate fast mem attribute for input
+// resource variable.
+TEST_F(XlaCompilerTest, HonorShapeRepresentationFnForFastMemVar) {
+  Scope scope = Scope::NewRootScope().ExitOnError();
+  auto var = ops::_Arg(scope.WithOpName(""V""), DT_RESOURCE, 0);
+  auto d = ops::_Retval(scope.WithOpName(""D""), var, 0);
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+  TF_ASSERT_OK(scope.ToGraph(graph.get()));
+
+  // Builds a description of the arguments.
+  std::vector<XlaCompiler::Argument> args(1);
+  args[0].kind = XlaCompiler::Argument::kResource;
+  args[0].resource_kind = XlaResource::kVariable;
+  args[0].initialized = true;
+  args[0].type = DT_INT32;
+  args[0].shape = TensorShape({2, 3});
+  args[0].fast_mem = true;
+
+  auto options = DefaultOptions();
+  int fast_mem_arg_count = 0;
+  options.shape_representation_fn =
+      [&fast_mem_arg_count](const TensorShape& shape, DataType dt,
+                            bool use_fast_memory) -> xla::StatusOr<xla::Shape> {
+    xla::Shape xla_shape;
+    TF_RETURN_IF_ERROR(TensorShapeToXLAShape(dt, shape, &xla_shape));
+    *xla_shape.mutable_layout() = xla::LayoutUtil::MakeLayout({0, 1});
+    if (use_fast_memory) {
+      fast_mem_arg_count++;
+    }
+    return xla_shape;
+  };
+  // Compiles the graph.
+  XlaCompiler compiler(options);
+
+  XlaCompiler::CompilationResult result;
+  XlaCompiler::CompileOptions compile_options;
+  compile_options.return_updated_values_for_all_resources = true;
+  TF_ASSERT_OK(compiler.CompileGraph(compile_options, ""add"", std::move(graph),
+                                     args,
+                                     /*user_aliases=*/{}, &result));
+  EXPECT_EQ(fast_mem_arg_count, 1);
+}
+
 // Tests that the compiler can correctly propagate the layout assigned by
 // shape_representation_fn_ to return types.
 TEST_F(XlaCompilerTest, HonorShapeRepresentationFnForRetVal) {
",0,train
34bff30979896879815dd6fc4d77c1a37d9b98a0,tensorflow/tensorflow,"[XLA] Add tests for Clamp with scalars S32 and U32.

PiperOrigin-RevId: 184376425",scalar_computations_test.cc,"@@ -737,7 +737,61 @@ XLA_TEST_F(ScalarComputationsTest, PowScalar) {
   ComputeAndCompareR0<float>(&builder, 8.0, {}, error_spec_);
 }
 
-XLA_TEST_F(ScalarComputationsTest, ClampScalarHigh) {
+XLA_TEST_F(ScalarComputationsTest, ClampScalarHighS32) {
+  ComputationBuilder builder(client_, TestName());
+  builder.Clamp(builder.ConstantR0<int32>(-1),  // The lower bound.
+                builder.ConstantR0<int32>(5),   // The operand to be clamped.
+                builder.ConstantR0<int32>(3));  // The upper bound.
+
+  ComputeAndCompareR0<int32>(&builder, 3, {});
+}
+
+XLA_TEST_F(ScalarComputationsTest, ClampScalarMiddleS32) {
+  ComputationBuilder builder(client_, TestName());
+  builder.Clamp(builder.ConstantR0<int32>(-1),  // The lower bound.
+                builder.ConstantR0<int32>(2),   // The operand to be clamped.
+                builder.ConstantR0<int32>(3));  // The upper bound.
+
+  ComputeAndCompareR0<int32>(&builder, 2, {});
+}
+
+XLA_TEST_F(ScalarComputationsTest, ClampScalarLowS32) {
+  ComputationBuilder builder(client_, TestName());
+  builder.Clamp(builder.ConstantR0<int32>(-1),  // The lower bound.
+                builder.ConstantR0<int32>(-5),  // The operand to be clamped.
+                builder.ConstantR0<int32>(3));  // The upper bound.
+
+  ComputeAndCompareR0<int32>(&builder, -1, {});
+}
+
+XLA_TEST_F(ScalarComputationsTest, ClampScalarHighU32) {
+  ComputationBuilder builder(client_, TestName());
+  builder.Clamp(builder.ConstantR0<uint32>(1),   // The lower bound.
+                builder.ConstantR0<uint32>(5),   // The operand to be clamped.
+                builder.ConstantR0<uint32>(3));  // The upper bound.
+
+  ComputeAndCompareR0<uint32>(&builder, 3, {});
+}
+
+XLA_TEST_F(ScalarComputationsTest, ClampScalarMiddleU32) {
+  ComputationBuilder builder(client_, TestName());
+  builder.Clamp(builder.ConstantR0<uint32>(1),   // The lower bound.
+                builder.ConstantR0<uint32>(2),   // The operand to be clamped.
+                builder.ConstantR0<uint32>(3));  // The upper bound.
+
+  ComputeAndCompareR0<uint32>(&builder, 2, {});
+}
+
+XLA_TEST_F(ScalarComputationsTest, ClampScalarLowU32) {
+  ComputationBuilder builder(client_, TestName());
+  builder.Clamp(builder.ConstantR0<uint32>(1),   // The lower bound.
+                builder.ConstantR0<uint32>(0),   // The operand to be clamped.
+                builder.ConstantR0<uint32>(3));  // The upper bound.
+
+  ComputeAndCompareR0<uint32>(&builder, 1, {});
+}
+
+XLA_TEST_F(ScalarComputationsTest, ClampScalarHighF32) {
   ComputationBuilder builder(client_, TestName());
   builder.Clamp(builder.ConstantR0<float>(2.0f),   // The lower bound.
                 builder.ConstantR0<float>(5.0f),   // The operand to be clamped.
@@ -746,7 +800,7 @@ XLA_TEST_F(ScalarComputationsTest, ClampScalarHigh) {
   ComputeAndCompareR0<float>(&builder, 3.0, {}, error_spec_);
 }
 
-XLA_TEST_F(ScalarComputationsTest, ClampScalarMiddle) {
+XLA_TEST_F(ScalarComputationsTest, ClampScalarMiddleF32) {
   ComputationBuilder builder(client_, TestName());
   builder.Clamp(builder.ConstantR0<float>(2.0f),   // The lower bound.
                 builder.ConstantR0<float>(2.5f),   // The operand to be clamped.
@@ -755,7 +809,7 @@ XLA_TEST_F(ScalarComputationsTest, ClampScalarMiddle) {
   ComputeAndCompareR0<float>(&builder, 2.5, {}, error_spec_);
 }
 
-XLA_TEST_F(ScalarComputationsTest, ClampScalarLow) {
+XLA_TEST_F(ScalarComputationsTest, ClampScalarLowF32) {
   ComputationBuilder builder(client_, TestName());
   builder.Clamp(builder.ConstantR0<float>(2.0f),   // The lower bound.
                 builder.ConstantR0<float>(-5.0f),  // The operand to be clamped.
",0,train
06e80ff230f1551d528f082a1821a82d3229305f,tensorflow/tensorflow,"Add in optimizations for softmax for Fusion F1.

Confirmed that the test passes with:
```
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa OPTIMIZED_KERNEL_DIR=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=F1_190305_swupgrade test_kernel_softmax_test -j8
```

However, the latency improvement is only ~1000 ticks, as tested with:
```
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa OPTIMIZED_KERNEL_DIR=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=F1_190305_swupgrade test_keyword_benchmark -j8
```

Since Softmax is currently a small fraction of the overall keyword_benchmark latency we will focus on the latency of only this particular OP.

With the optimized implementation:
```
SOFTMAX took 749 ticks (0 ms).
```

Reference implementation:
```
SOFTMAX took 2052 ticks (2 ms).
```

And with the LUT hifimini implementation (for completeness):
```
SOFTMAX took 1142 ticks (1 ms).
```

The gain of ~1500 ticks ticks is still worth merging because after all the optimizations (e.g.  https://github.com/tensorflow/tensorflow/pull/47098), this will still mean a ~5% improvement for the keyword benchmark.

And the benefits might be more significant for other models too.",softmax.cc,"@@ -24,6 +24,7 @@ limitations under the License.
 #include ""tensorflow/lite/kernels/kernel_util.h""
 #include ""tensorflow/lite/kernels/op_macros.h""
 #include ""tensorflow/lite/micro/kernels/kernel_util.h""
+#include ""tensorflow/lite/micro/kernels/xtensa/xtensa.h""
 
 namespace tflite {
 namespace {
@@ -32,7 +33,14 @@ namespace {
 struct OpData {
   uint16_t* exp_lut;
 };
+#elif defined(FUSION_F1)
+struct OpData {
+  SoftmaxParams params;
+  int scratch_tensor_index;
+};
+#endif
 
+#if defined(HIFIMINI)
 // Number of unique int8_t and int16_t values.  Used in exponent lookup table
 // computation.
 constexpr int kInt8Range =
@@ -173,8 +181,63 @@ TfLiteStatus PrepareHifimini(TfLiteContext* context, TfLiteNode* node) {
 }
 #endif  // defined(HIFIMINI)
 
+#if defined(FUSION_F1)
+TfLiteStatus PrepareHifi4(TfLiteContext* context, TfLiteNode* node) {
+  TF_LITE_ENSURE_OK(context, SoftmaxPrepare(context, node));
+
+  // Calculate scratch memory requirements and request scratch buffer
+  const TfLiteTensor* input = GetInput(context, node, 0);
+  const TfLiteTensor* output = GetOutput(context, node, 0);
+
+  const RuntimeShape& input_shape = GetTensorShape(input);
+  const RuntimeShape& output_shape = GetTensorShape(output);
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int depth =
+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  if (input->type == kTfLiteInt8) {
+    int required_scratch =
+        get_softmax_scratch_size(PREC_ASYM8S, PREC_ASYM8S, depth);
+    TF_LITE_ENSURE(context, required_scratch > 0);
+
+    auto* data = static_cast<OpData*>(node->user_data);
+    TF_LITE_ENSURE_OK(
+        context, context->RequestScratchBufferInArena(
+                     context, required_scratch, &(data->scratch_tensor_index)));
+  }
+
+  return kTfLiteOk;
+}
+
+TfLiteStatus EvalHifi4(const OpData* op_data, const TfLiteEvalTensor* input,
+                       TfLiteEvalTensor* output, TfLiteContext* context) {
+  const RuntimeShape& input_shape = tflite::micro::GetTensorShape(input);
+  const int8_t* input_data = tflite::micro::GetTensorData<int8_t>(input);
+  const RuntimeShape& output_shape = tflite::micro::GetTensorShape(output);
+  int16_t* output_data = tflite::micro::GetTensorData<int16_t>(output);
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size =
+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth =
+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+
+  void* p_scratch = static_cast<void*>(
+      context->GetScratchBuffer(context, op_data->scratch_tensor_index));
+
+  for (int i = 0; i < outer_size; ++i) {
+    int err = xa_nn_vec_softmax_asym8s_16(
+        &output_data[i * depth], &input_data[i * depth],
+        op_data->params.diff_min, op_data->params.input_left_shift,
+        op_data->params.input_multiplier, depth, p_scratch);
+    TF_LITE_ENSURE(context, err == 0);
+  }
+  return kTfLiteOk;
+}
+
+#endif  // defined(FUSION_F1)
+
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
-#if defined(HIFIMINI)
+#if defined(HIFIMINI) || defined(FUSION_F1)
   TFLITE_DCHECK(context->AllocatePersistentBuffer != nullptr);
   return context->AllocatePersistentBuffer(context, sizeof(OpData));
 #else
@@ -185,6 +248,8 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
 TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 #if defined(HIFIMINI)
   return PrepareHifimini(context, node);
+#elif defined(FUSION_F1)
+  return PrepareHifi4(context, node);
 #else
   return SoftmaxPrepare(context, node);
 #endif
@@ -208,7 +273,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                        TfLiteTypeGetName(input->type), input->type);
     return kTfLiteError;
   }
-#else   // !defined(HIFIMINI)
+#else  // !defined(HIFIMINI)
   switch (input->type) {
     case kTfLiteFloat32: {
       SoftmaxParams op_data = *static_cast<SoftmaxParams*>(node->user_data);
@@ -221,12 +286,17 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     }
     case kTfLiteInt8: {
       if (output->type == kTfLiteInt16) {
+#if defined(FUSION_F1)
+        return EvalHifi4(static_cast<OpData*>(node->user_data), input, output,
+                         context);
+#else
         SoftmaxParams op_data = *static_cast<SoftmaxParams*>(node->user_data);
         tflite::reference_ops::Softmax(
             op_data, tflite::micro::GetTensorShape(input),
             tflite::micro::GetTensorData<int8_t>(input),
             tflite::micro::GetTensorShape(output),
             tflite::micro::GetTensorData<int16_t>(output));
+#endif
       } else {
         SoftmaxParams op_data = *static_cast<SoftmaxParams*>(node->user_data);
         tflite::reference_ops::Softmax(
",0,test
06e80ff230f1551d528f082a1821a82d3229305f,tensorflow/tensorflow,"Add in optimizations for softmax for Fusion F1.

Confirmed that the test passes with:
```
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa OPTIMIZED_KERNEL_DIR=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=F1_190305_swupgrade test_kernel_softmax_test -j8
```

However, the latency improvement is only ~1000 ticks, as tested with:
```
make -f tensorflow/lite/micro/tools/make/Makefile TARGET=xtensa OPTIMIZED_KERNEL_DIR=xtensa TARGET_ARCH=fusion_f1 XTENSA_CORE=F1_190305_swupgrade test_keyword_benchmark -j8
```

Since Softmax is currently a small fraction of the overall keyword_benchmark latency we will focus on the latency of only this particular OP.

With the optimized implementation:
```
SOFTMAX took 749 ticks (0 ms).
```

Reference implementation:
```
SOFTMAX took 2052 ticks (2 ms).
```

And with the LUT hifimini implementation (for completeness):
```
SOFTMAX took 1142 ticks (1 ms).
```

The gain of ~1500 ticks ticks is still worth merging because after all the optimizations (e.g.  https://github.com/tensorflow/tensorflow/pull/47098), this will still mean a ~5% improvement for the keyword benchmark.

And the benefits might be more significant for other models too.",xtensa.h,"@@ -20,6 +20,7 @@ limitations under the License.
 #include <xtensa/tie/xt_hifi2.h>
 #elif defined(FUSION_F1)
 #include ""include/nnlib/xa_nnlib_api.h""
+#include ""include/nnlib/xa_nnlib_standards.h""
 #endif
 
 #endif  // TENSORFLOW_LITE_MICRO_KERNELS_XTENSA_XTENSA_H_
",0,test
32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest

And fix graph dumping tools to add graph dumping options automatically.

PiperOrigin-RevId: 161082718",client.cc,"@@ -18,6 +18,7 @@ limitations under the License.
 #include <string>
 #include <utility>
 
+#include ""tensorflow/compiler/xla/legacy_flags/debug_options_flags.h""
 #include ""tensorflow/compiler/xla/literal_util.h""
 #include ""tensorflow/compiler/xla/ptr_util.h""
 #include ""tensorflow/compiler/xla/status_macros.h""
@@ -376,9 +377,10 @@ StatusOr<std::vector<std::unique_ptr<GlobalData>>> Client::DeconstructTuple(
 }
 
 StatusOr<ComputationStats> Client::GetComputationStats(
-    const Computation& computation) const {
+    const Computation& computation, const DebugOptions& debug_options) const {
   ComputationStatsRequest request;
   *request.mutable_computation() = computation.handle();
+  *request.mutable_debug_options() = debug_options;
   ComputationStatsResponse response;
 
   VLOG(1) << ""making computation stats request"";
@@ -427,7 +429,10 @@ StatusOr<Shape> Client::GetShape(const GlobalData& data) {
 
 StatusOr<string> Client::ExecutionStatsAsString(
     const Computation& computation, const ExecutionProfile& profile) {
-  TF_ASSIGN_OR_RETURN(auto computation_stats, GetComputationStats(computation));
+  TF_ASSIGN_OR_RETURN(
+      auto computation_stats,
+      GetComputationStats(computation,
+                          legacy_flags::GetDebugOptionsFromFlags()));
   int64 total_flops =
       computation_stats.flop_count() + computation_stats.transcendental_count();
   if (profile.compute_time_ns() > 0) {
",0,train
32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest

And fix graph dumping tools to add graph dumping options automatically.

PiperOrigin-RevId: 161082718",client.h,"@@ -150,7 +150,7 @@ class Client {
 
   // Retrieves the statistics of the given computation.
   StatusOr<ComputationStats> GetComputationStats(
-      const Computation& computation) const;
+      const Computation& computation, const DebugOptions& debug_options) const;
 
   // Returns the Shape of the given array specified by 'data'. The shape
   // includes the Layout of the array as it is stored on the service.
",0,train
32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest

And fix graph dumping tools to add graph dumping options automatically.

PiperOrigin-RevId: 161082718",debug_options_flags.cc,"@@ -189,6 +189,7 @@ xla::DebugOptions GetDebugOptionsFromFlags() {
   options.set_xla_hlo_graph_addresses(flag_values->xla_hlo_graph_addresses);
   options.set_xla_hlo_graph_layout(flag_values->xla_hlo_graph_layout);
   options.set_xla_hlo_graph_path(flag_values->xla_hlo_graph_path);
+  options.set_xla_hlo_dump_as_graphdef(flag_values->xla_hlo_dump_as_graphdef);
   options.set_xla_log_hlo_text(flag_values->xla_log_hlo_text);
   options.set_xla_generate_hlo_text_to(flag_values->xla_generate_hlo_text_to);
 
",0,train
32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest

And fix graph dumping tools to add graph dumping options automatically.

PiperOrigin-RevId: 161082718",service.cc,"@@ -1173,9 +1173,11 @@ tensorflow::Status Service::GetComputationStats(
   VersionedComputationHandle versioned_handle =
       user_computation->GetVersionedHandle();
 
+  HloModuleConfig config;
+  config.set_debug_options(arg->debug_options());
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<HloModule> module,
-      computation_tracker_.BuildHloModule(versioned_handle, HloModuleConfig()));
+      computation_tracker_.BuildHloModule(versioned_handle, config));
 
   hlo_graph_dumper::MaybeDumpHloModule(*module,
                                        ""computation statistics subject"");
",0,train
32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest

And fix graph dumping tools to add graph dumping options automatically.

PiperOrigin-RevId: 161082718",dumped_computation_to_graphviz.cc,"@@ -53,8 +53,12 @@ void RealMain(tensorflow::gtl::ArraySlice<char*> args) {
     TF_CHECK_OK(
         tensorflow::ReadBinaryProto(tensorflow::Env::Default(), arg, &module));
     Computation computation = client->LoadSnapshot(module).ConsumeValueOrDie();
+    DebugOptions debug_options = legacy_flags::GetDebugOptionsFromFlags();
+    debug_options.set_xla_generate_hlo_graph("".*"");
+    debug_options.set_xla_hlo_graph_layout(true);
     ComputationStats stats =
-        client->GetComputationStats(computation).ConsumeValueOrDie();
+        client->GetComputationStats(computation, debug_options)
+            .ConsumeValueOrDie();
     fprintf(stdout, "">>> %s :: %s\n"", arg, stats.DebugString().c_str());
   }
 }
",0,train
32de8d33177d6f46c7ecd06363bd492194341731,tensorflow/tensorflow,"[XLA] Propagate debug_options in ComputationStatsRequest

And fix graph dumping tools to add graph dumping options automatically.

PiperOrigin-RevId: 161082718",dumped_computation_to_tf_graphdef.cc,"@@ -52,8 +52,12 @@ void RealMain(tensorflow::gtl::ArraySlice<char*> args) {
     TF_CHECK_OK(
         tensorflow::ReadBinaryProto(tensorflow::Env::Default(), arg, &module));
     Computation computation = client->LoadSnapshot(module).ConsumeValueOrDie();
+    DebugOptions debug_options = legacy_flags::GetDebugOptionsFromFlags();
+    debug_options.set_xla_generate_hlo_graph("".*"");
+    debug_options.set_xla_hlo_dump_as_graphdef(true);
     ComputationStats stats =
-        client->GetComputationStats(computation).ConsumeValueOrDie();
+        client->GetComputationStats(computation, debug_options)
+            .ConsumeValueOrDie();
     fprintf(stdout, "">>> %s :: %s\n"", arg, stats.DebugString().c_str());
   }
 }
",0,train
db9265af2548648dd3aa15af7073076eb393b8d9,tensorflow/tensorflow,Fix build errors.,nvptx_compiler.cc,"@@ -391,7 +391,7 @@ NVPTXCompiler::CompileTargetBinary(const HloModule* module,
   VLOG(2) << ""Libdevice dir = "" << libdevice_dir << ""\n"";
 
   string ptx;
-  if (!MaybeLoadPtxFromFile(module.get(), &ptx)) {
+  if (!MaybeLoadPtxFromFile(module, &ptx)) {
     XLA_SCOPED_LOGGING_TIMER(
         ""NVPTXCompiler::CompileTargetBinary - CompileToPtx"");
     TF_ASSIGN_OR_RETURN(
",0,train
43c95696c0ca68314c613ed0a55e4f58afc784df,tensorflow/tensorflow,[tensorflow/compiler/xla/service/space_to_batch_converter.cc] Use `const auto&` instead of `const auto`,space_to_batch_converter.cc,"@@ -1329,7 +1329,7 @@ void ConvolutionVisitor::PropagateOnBroadcast(HloInstruction* consumer,
   }
 
   std::vector<int64_t> broadcast_dims;
-  const auto dimensions = consumer->dimensions();
+  const auto& dimensions = consumer->dimensions();
   broadcast_dims.reserve(dimensions.size());
   for (auto j : dimensions) {
     broadcast_dims.push_back(DimLookUp(permute_dims, j));
",0,train
09713e439363d763ca7c12d0c279b8d55d5b6053,tensorflow/tensorflow,"We should be using on host shape as the device one can have tuples in place of complex or S64 types.

PiperOrigin-RevId: 228262394",raw_api_test.cc,"@@ -956,6 +956,7 @@ TEST(RawApiTest, CompileAndExecuteWithS64Argument) {
   xrt::XRTExecutionConfig e;
   e.set_release_input_handles(true);
   e.set_release_compilation_handle(true);
+  e.set_return_exploded_tuple(true);
 
   Scope root = Scope::NewRootScope().WithDevice(DeviceFromFlag());
   auto e_config =
",0,test
1d48d2f58417e78853b7ed9b77eb83c030056619,tensorflow/tensorflow,"Integrate LLVM at llvm/llvm-project@df47368d406a

Updates LLVM usage to match
[df47368d406a](https://github.com/llvm/llvm-project/commit/df47368d406a)

PiperOrigin-RevId: 373136440
Change-Id: I479781bf2147874ecaec0d3d4d5ed726acd899a8",tensorflow_abi_knowledge_propagation.cc,"@@ -141,7 +141,7 @@ struct PropagateTfAbiKnowledgeToKernelsPass
           // Add the no_alias attribute to the corresponding pointer.
           kernel.setArgAttr(kernel_p + 1,
                             LLVM::LLVMDialect::getNoAliasAttrName(),
-                            b.getBoolAttr(true));
+                            b.getUnitAttr());
         }
         // Advance base, aligned, offset, strides and sizes many arguments.
         kernel_p += memref.getRank() * 2 + 3;
",0,train
90da05cd1c07b0c84e102944a9a634127ecdc52b,tensorflow/tensorflow,"[TF-numpy] Exports `np.newaxis`.

PiperOrigin-RevId: 321687455
Change-Id: I47a9f566b9a961368cfb4f076674fc0b94a6e140",np_array_ops.py,"@@ -37,10 +37,14 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import sort_ops
 from tensorflow.python.ops.numpy_ops import np_arrays
 from tensorflow.python.ops.numpy_ops import np_dtypes
+from tensorflow.python.ops.numpy_ops import np_export
 from tensorflow.python.ops.numpy_ops import np_utils
 from tensorflow.python.util import nest
 
 
+newaxis = np_export.np_export_constant(__name__, 'newaxis', np.newaxis)
+
+
 @np_utils.np_doc('empty')
 def empty(shape, dtype=float):  # pylint: disable=redefined-outer-name
   return zeros(shape, dtype)
",0,train
b2ab2da16f22007e0f4d61d8806ebac6d5d0edd5,tensorflow/tensorflow,"Support arbitrary many values in KeyValueSort on GPU backend.

PiperOrigin-RevId: 216688700",ir_emitter_unnested.cc,"@@ -34,6 +34,7 @@ limitations under the License.
 #include ""llvm/IR/Instructions.h""
 #include ""llvm/IR/LLVMContext.h""
 #include ""llvm/IR/Module.h""
+#include ""tensorflow/compiler/xla/layout_util.h""
 #include ""tensorflow/compiler/xla/literal.h""
 #include ""tensorflow/compiler/xla/service/buffer_assignment.h""
 #include ""tensorflow/compiler/xla/service/dfs_hlo_visitor.h""
@@ -2192,34 +2193,34 @@ Status IrEmitterUnnested::HandleSelect(HloInstruction* select) {
 
 Status IrEmitterUnnested::HandleSort(HloInstruction* sort) {
   std::vector<std::unique_ptr<Thunk>> thunks;
-  auto keys = sort->operand(0);
-  auto values = sort->operand_count() > 1 ? sort->operand(1) : nullptr;
-  ShapeIndex keys_shape_index({});
-  ShapeIndex values_shape_index({});
-  if (values != nullptr) {
-    keys_shape_index = ShapeIndex({0});
-    values_shape_index = ShapeIndex({1});
-  }
-  auto keys_destination = GetAllocationSlice(*sort, keys_shape_index);
-  auto values_destination = GetAllocationSlice(*sort, values_shape_index);
-
-  if (keys_destination != GetAllocationSlice(*keys)) {
-    thunks.push_back(absl::make_unique<DeviceToDeviceCopyThunk>(
-        /*source_address=*/GetAllocationSlice(*keys),
-        /*destination_buffer=*/keys_destination,
-        /*mem_size=*/ShapeUtil::ByteSizeOf(keys->shape()), nullptr));
-  }
-  if (values != nullptr && values_destination != GetAllocationSlice(*values)) {
-    // TODO(b/26783907): Figure out why we never seem to share buffers for
-    // key/value sort.
-    thunks.push_back(absl::make_unique<DeviceToDeviceCopyThunk>(
-        /*source_address=*/GetAllocationSlice(*values),
-        /*destination_buffer=*/values_destination,
-        /*mem_size=*/ShapeUtil::ByteSizeOf(values->shape()), nullptr));
+  Shape keys_shape = sort->operand(0)->shape();
+  for (int64 i = 0; i < sort->operand_count(); ++i) {
+    ShapeIndex shape_index =
+        sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({});
+    // We assume that the layout of all involved operands and outputs is the
+    // same.
+    TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(keys_shape,
+                                                  sort->operand(i)->shape()));
+    TF_RET_CHECK(LayoutUtil::LayoutsInShapesEqual(
+        keys_shape, ShapeUtil::GetSubshape(sort->shape(), shape_index)));
+
+    // If possible, we share buffers. If that is not possible, we need to copy
+    // the values, because the emitter does the sorting in-place.
+    auto destination_buffer = GetAllocationSlice(*sort, shape_index);
+    auto source_address = GetAllocationSlice(*sort->operand(i));
+    if (destination_buffer != source_address) {
+      // TODO(b/26783907): Figure out why we never seem to share buffers for
+      // key/value sort.
+      thunks.push_back(absl::make_unique<DeviceToDeviceCopyThunk>(
+          /*source_address=*/source_address,
+          /*destination_buffer=*/destination_buffer,
+          /*mem_size=*/ShapeUtil::ByteSizeOf(sort->operand(i)->shape()),
+          nullptr));
+    }
   }
 
   int64 dimension_to_sort = sort->dimensions(0);
-  int64 dimension_to_sort_bound = keys->shape().dimensions(dimension_to_sort);
+  int64 dimension_to_sort_bound = keys_shape.dimensions(dimension_to_sort);
   int64 num_stages = tensorflow::Log2Ceiling(dimension_to_sort_bound);
   auto index_type = b_.getInt64Ty();
 
@@ -2243,7 +2244,7 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) {
       thunks.push_back(
           BuildKernelThunk(sort, /*implements_whole_instruction=*/false));
       LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
-          keys->shape(), ir_emitter_context_->device_description());
+          keys_shape, ir_emitter_context_->device_description());
       UpdateLaunchDimensions(launch_dimensions, thunks.back().get(),
                              ir_emitter_context_->llvm_module());
 
@@ -2254,12 +2255,21 @@ Status IrEmitterUnnested::HandleSort(HloInstruction* sort) {
         xor_mask = llvm::ConstantInt::get(index_type, 1LL << mask);
       }
 
+      IrArray keys_array;
+      std::vector<IrArray> values_arrays;
+      values_arrays.reserve(sort->operand_count() - 1);
+      for (int64 i = 0; i < sort->operand_count(); ++i) {
+        ShapeIndex shape_index =
+            sort->operand_count() > 1 ? ShapeIndex({i}) : ShapeIndex({});
+        if (i == 0) {
+          keys_array = GetIrArray(*sort, *sort, shape_index);
+        } else {
+          values_arrays.push_back(GetIrArray(*sort, *sort, shape_index));
+        }
+      }
       TF_RETURN_IF_ERROR(llvm_ir::EmitSortInPlace(
-          dimension_to_sort, GetIrArray(*sort, *sort, keys_shape_index),
-          values != nullptr ? absl::make_optional<IrArray>(
-                                  GetIrArray(*sort, *sort, values_shape_index))
-                            : absl::nullopt,
-          IrName(sort), xor_mask, &b_, &launch_dimensions));
+          dimension_to_sort, keys_array, values_arrays, IrName(sort), xor_mask,
+          &b_, &launch_dimensions));
     }
   }
 
",0,train
b2ab2da16f22007e0f4d61d8806ebac6d5d0edd5,tensorflow/tensorflow,"Support arbitrary many values in KeyValueSort on GPU backend.

PiperOrigin-RevId: 216688700",sort_util.cc,"@@ -15,9 +15,10 @@ limitations under the License.
 
 #include ""tensorflow/compiler/xla/service/llvm_ir/sort_util.h""
 
+#include <vector>
+
 // IWYU pragma: no_include ""llvm/IR/Intrinsics.gen.inc""
 #include ""absl/strings/string_view.h""
-#include ""absl/types/optional.h""
 #include ""llvm/ADT/APInt.h""
 #include ""llvm/IR/BasicBlock.h""
 #include ""llvm/IR/Constants.h""
@@ -43,7 +44,7 @@ namespace {
 void EmitCompareLoop(int64 dimension_to_sort, const IrArray::Index& keys_index,
                      const IrArray::Index& compare_keys_index,
                      const IrArray& keys_array,
-                     const absl::optional<IrArray>& values_array,
+                     const std::vector<IrArray>& values_arrays,
                      llvm::IRBuilder<>* b) {
   // if (is_smaller_index &&
   //     compare_keys[dimension_to_sort] < dimension_to_sort_bound)
@@ -100,19 +101,18 @@ void EmitCompareLoop(int64 dimension_to_sort, const IrArray::Index& keys_index,
   // Swap key1 with key2.
   keys_array.EmitWriteArrayElement(keys_index, key2, b);
   keys_array.EmitWriteArrayElement(compare_keys_index, key1, b);
-  if (values_array.has_value()) {
+  for (const auto& values_array : values_arrays) {
     // Also swap the values.
-    auto value1 = values_array.value().EmitReadArrayElement(keys_index, b);
-    auto value2 =
-        values_array.value().EmitReadArrayElement(compare_keys_index, b);
-    values_array.value().EmitWriteArrayElement(keys_index, value2, b);
-    values_array.value().EmitWriteArrayElement(compare_keys_index, value1, b);
+    auto value1 = values_array.EmitReadArrayElement(keys_index, b);
+    auto value2 = values_array.EmitReadArrayElement(compare_keys_index, b);
+    values_array.EmitWriteArrayElement(keys_index, value2, b);
+    values_array.EmitWriteArrayElement(compare_keys_index, value1, b);
   }
 }
 }  // namespace
 
 Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array,
-                       const absl::optional<IrArray>& values_array,
+                       const std::vector<IrArray>& values_arrays,
                        absl::string_view name, llvm::Value* xor_mask,
                        llvm::IRBuilder<>* b,
                        const gpu::LaunchDimensions* launch_dimensions) {
@@ -162,7 +162,7 @@ Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array,
     compare_keys_index[dimension_to_sort] =
         b->CreateXor(compare_index[0], xor_mask);
     EmitCompareLoop(dimension_to_sort, keys_index, compare_keys_index,
-                    keys_array, values_array, b);
+                    keys_array, values_arrays, b);
     return Status::OK();
   };
   if (launch_dimensions != nullptr) {
",0,train
b2ab2da16f22007e0f4d61d8806ebac6d5d0edd5,tensorflow/tensorflow,"Support arbitrary many values in KeyValueSort on GPU backend.

PiperOrigin-RevId: 216688700",sort_util.h,"@@ -16,8 +16,9 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_
 #define TENSORFLOW_COMPILER_XLA_SERVICE_LLVM_IR_SORT_UTIL_H_
 
+#include <vector>
+
 #include ""absl/strings/string_view.h""
-#include ""absl/types/optional.h""
 #include ""llvm/IR/Value.h""
 #include ""tensorflow/compiler/xla/service/gpu/partition_assignment.h""
 #include ""tensorflow/compiler/xla/service/llvm_ir/ir_array.h""
@@ -31,7 +32,7 @@ namespace llvm_ir {
 // implements the inner loop of BitonicSort. If 'launch_dimensions' is nullptr,
 // the inner compare loop will not be parallelized.
 Status EmitSortInPlace(int64 dimension_to_sort, const IrArray& keys_array,
-                       const absl::optional<IrArray>& values_array,
+                       const std::vector<IrArray>& values_arrays,
                        absl::string_view name, llvm::Value* xor_mask,
                        llvm::IRBuilder<>* b,
                        const gpu::LaunchDimensions* launch_dimensions);
",0,train
2e5bfbbb902b66720cd8d41a0fa1bce292efd31b,tensorflow/tensorflow,"[XLA] Fix cost analysis interval picking when there is no prefetch start time that satisfies earliest < x < latest.

We specify how long a prefetch can be in relation to how long the overlapped
instructions take using flags. However, sometimes there is a long-executing HLO
before the HLO and the earliest and latest durations both fall within this HLO.
In this case, we were previously not attempting to prefetch at all because there
is no valid prefetch start time that satisifies the earliest/latest constraints.
However, this can be detrimental to some models. We now allow prefetches to
start a little earlier than the specified earliest time in such cases.

PiperOrigin-RevId: 370087066
Change-Id: Ief3b6cd58cdf6d26b38569a5a541c8e06b946840",memory_space_assignment.cc,"@@ -578,7 +578,7 @@ void CostAnalysisPrefetchIntervalPicker::Begin(const HloUse& use,
   // Find the earliest time we're allowed to start prefetching.
   float max_interval = GetMaxElapsedInAlternateMemory(async_copy_elapsed_);
   for (earliest_prefetch_time_ = start_time;
-       earliest_prefetch_time_ <= end_logical_time_ &&
+       earliest_prefetch_time_ < latest_prefetch_time_ &&
        (computation_nest_level_[earliest_prefetch_time_] != end_nest_level ||
         max_interval < GetLogicalIntervalElapsed(earliest_prefetch_time_,
                                                  end_logical_time_));
",0,train
2e5bfbbb902b66720cd8d41a0fa1bce292efd31b,tensorflow/tensorflow,"[XLA] Fix cost analysis interval picking when there is no prefetch start time that satisfies earliest < x < latest.

We specify how long a prefetch can be in relation to how long the overlapped
instructions take using flags. However, sometimes there is a long-executing HLO
before the HLO and the earliest and latest durations both fall within this HLO.
In this case, we were previously not attempting to prefetch at all because there
is no valid prefetch start time that satisifies the earliest/latest constraints.
However, this can be detrimental to some models. We now allow prefetches to
start a little earlier than the specified earliest time in such cases.

PiperOrigin-RevId: 370087066
Change-Id: Ief3b6cd58cdf6d26b38569a5a541c8e06b946840",memory_space_assignment_test.cc,"@@ -5786,5 +5786,51 @@ TEST_F(CostAnalysisPrefetchIntervalPickerTest, ConsecutiveConditionals) {
             5);
 }
 
+TEST_F(CostAnalysisPrefetchIntervalPickerTest, EarliestLatestWindowTooSmall) {
+  // This tests the scenario where there is an op that takes a long time (tanh
+  // in this example) and as a result the earliest and latest times both fall
+  // inside this long-running op. In this case, we should still return a valid
+  // prefetch interval just before the long-running op.
+  absl::string_view hlo_string = R""(
+  HloModule bug, is_scheduled=true
+
+  ENTRY Entry {
+    param0 = f32[2,4] parameter(0)
+    negate = f32[2,4] negate(param0)
+    tanh = f32[2,4] tanh(param0)
+    ROOT add = f32[2,4] add(tanh, negate)
+  }
+  )"";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  HloCostAnalysis hlo_cost_analysis(ShapeSize);
+  TF_ASSERT_OK_AND_ASSIGN(auto cost_analysis,
+                          FakeMemorySpaceAssignmentCostAnalysis::Create(
+                              hlo_cost_analysis, *module));
+  cost_analysis->SetOverrideForGetInstructionElapsed(
+      [](const HloInstruction& hlo) {
+        if (hlo.opcode() == HloOpcode::kTanh) {
+          return 20.0;
+        }
+        return 1.0;
+      });
+  CostAnalysisPrefetchIntervalPicker interval_picker(
+      *cost_analysis,
+      /*min_async_copy_to_overlap_ratio=*/1.0,
+      /*max_async_copy_to_overlap_ratio=*/4.0,
+      /*preferred_async_copy_to_overlap_ratio=*/2.0,
+      /*buffer_size_for_max_async_copy=*/0);
+
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  const HloUse use{root, /*operand_number=*/1, /*operand_index=*/{}};
+  interval_picker.Begin(use, /*start_time=*/1, /*end_time=*/3);
+
+  LOG(INFO) << interval_picker.ToDebugString();
+  EXPECT_FALSE(interval_picker.Done());
+  EXPECT_EQ(interval_picker.Next(), 1);
+  EXPECT_TRUE(interval_picker.Done());
+}
+
 }  // namespace
 }  // namespace xla
",0,train
530dc71d0487cacccbe270490d460bc401040dc9,tensorflow/tensorflow,"Fix tsan detected error in core/util/exec_on_stall_test.cc

Enforce mutex around access to test variable.

PiperOrigin-RevId: 200078751",exec_on_stall_test.cc,"@@ -16,6 +16,7 @@ limitations under the License.
 #include ""tensorflow/core/util/exec_on_stall.h""
 
 #include ""tensorflow/core/platform/macros.h""
+#include ""tensorflow/core/platform/mutex.h""
 #include ""tensorflow/core/platform/test.h""
 
 namespace tensorflow {
@@ -32,14 +33,24 @@ Chunk* NewChunk(int stall_seconds, std::function<void()> f) {
 }
 
 TEST(ExecuteOnStallTest, BothWays) {
-  bool a_triggered = false;
-  bool b_triggered = false;
-  Chunk* a = NewChunk(1, [&a_triggered]() { a_triggered = true; });
-  Chunk* b = NewChunk(1, [&b_triggered]() { b_triggered = true; });
+  mutex mu;
+  bool a_triggered(false);
+  bool b_triggered(false);
+  Chunk* a = NewChunk(1, [&mu, &a_triggered]() {
+    mutex_lock l(mu);
+    a_triggered = true;
+  });
+  Chunk* b = NewChunk(1, [&mu, &b_triggered]() {
+    mutex_lock l(mu);
+    b_triggered = true;
+  });
   delete a;
   Env::Default()->SleepForMicroseconds(2000000);
-  EXPECT_FALSE(a_triggered);
-  EXPECT_TRUE(b_triggered);
+  {
+    mutex_lock l(mu);
+    EXPECT_FALSE(a_triggered);
+    EXPECT_TRUE(b_triggered);
+  }
   delete b;
 }
 
",0,train
37408c89124e8bf4a005ba89d17b18a0dc29f94a,tensorflow/tensorflow,"Fixing a python3 issue in bias_op_test.py

Issue 1: range in python3 does not return a list as in python2",bias_op_test.py,"@@ -63,14 +63,14 @@ class BiasAddTest(tf.test.TestCase):
                             (1,) * (3 - np_value.ndim) + np_value.shape)
     # move the last dimension to third-to-last
     np_dim = range(np_value.ndim)
-    np_dim_new = np_dim[0:-3] + np_dim[-1:] + np_dim[-3:-1]
+    np_dim_new = list(np_dim[0:-3]) + list(np_dim[-1:]) + list(np_dim[-3:-1])
     return np.transpose(np_value, np_dim_new)
 
   def _NCHWToNHWC(self, np_value):
-    assert np_value.shape >= 3
+    assert len(np_value.shape) >= 3
     np_dim = range(np_value.ndim)
     # move the third-to-last dimension to the last
-    np_dim_new = np_dim[0:-3] + np_dim[-2:] + np_dim[-3:-2]
+    np_dim_new = list(np_dim[0:-3]) + list(np_dim[-2:]) + list(np_dim[-3:-2])
     return np.transpose(np_value, np_dim_new)
 
   def _testBiasNCHW(self, np_inputs, np_bias, use_gpu):
",0,train
9b721a246bef8210d5ee3d9bb4a6e43004aa0f8a,tensorflow/tensorflow,"Support full [b]float16 in embedding_lookup_sparse

- This removes a forced cast to float32 and instead outputs the same
  type as the input. The inner computations are still done in float32
  to avoid numerical issues.
- This improves performance and makes the op consistent with all other
  ops that output the same type as the input.",embedding_ops_test.py,"@@ -718,10 +718,7 @@ class EmbeddingLookupSparseTest(test.TestCase):
 
         self.assertEqual(embedding_sum.get_shape().as_list(),
                          expected_lookup_result_shape)
-        if dtype in (dtypes.float16, dtypes.bfloat16):
-          self.assertEqual(embedding_sum.dtype, dtypes.float32)
-        else:
-          self.assertEqual(embedding_sum.dtype, dtype)
+        self.assertEqual(embedding_sum.dtype, dtype)
 
         tf_embedding_sum = embedding_sum.eval(feed_dict=feed_dict)
 
",0,train
9b721a246bef8210d5ee3d9bb4a6e43004aa0f8a,tensorflow/tensorflow,"Support full [b]float16 in embedding_lookup_sparse

- This removes a forced cast to float32 and instead outputs the same
  type as the input. The inner computations are still done in float32
  to avoid numerical issues.
- This improves performance and makes the op consistent with all other
  ops that output the same type as the input.",embedding_ops.py,"@@ -511,18 +511,21 @@ def embedding_lookup_sparse(params,
 
     embeddings = embedding_lookup(
         params, ids, partition_strategy=partition_strategy, max_norm=max_norm)
-    if embeddings.dtype in (dtypes.float16, dtypes.bfloat16):
-      embeddings = math_ops.cast(embeddings, dtypes.float32)
     if not ignore_weights:
       if segment_ids.dtype != dtypes.int32:
         segment_ids = math_ops.cast(segment_ids, dtypes.int32)
 
       weights = sp_weights.values
+      embeddings = array_ops.gather(embeddings, idx)
+
+      original_dtype = embeddings.dtype
+      if embeddings.dtype in (dtypes.float16, dtypes.bfloat16):
+        # Cast low-precision embeddings to float32 during the computation to
+        # avoid numerical issues.
+        embeddings = math_ops.cast(embeddings, dtypes.float32)
       if weights.dtype != embeddings.dtype:
         weights = math_ops.cast(weights, embeddings.dtype)
 
-      embeddings = array_ops.gather(embeddings, idx)
-
       # Reshape weights to allow broadcast
       ones_shape = array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0)
       ones = array_ops.ones(ones_shape, dtype=dtypes.int32)
@@ -555,6 +558,8 @@ def embedding_lookup_sparse(params,
         embeddings = math_ops.divide(embeddings, weight_sum_sqrt, name=name)
       else:
         assert False, ""Unrecognized combiner""
+      if embeddings.dtype != original_dtype:
+        embeddings = math_ops.cast(embeddings, original_dtype)
     else:
       if segment_ids.dtype not in (dtypes.int32, dtypes.int64):
         segment_ids = math_ops.cast(segment_ids, dtypes.int32)
",0,train
71c9f15ea2f953fcdb4ff33316547c71930ed4d7,tensorflow/tensorflow,"Delete AutoGraphParseError now that it is no longer used because we are moving to AutoGraphError, InternalError, etc.

PiperOrigin-RevId: 231601621",__init__.py,"@@ -49,9 +49,8 @@ from tensorflow.python.autograph.impl.api import to_graph
 from tensorflow.python.autograph.lang.directives import set_element_type
 from tensorflow.python.autograph.lang.directives import set_loop_options
 from tensorflow.python.autograph.lang.special_functions import stack
-from tensorflow.python.autograph.lang.special_functions import tensor_list
 from tensorflow.python.autograph.pyct.errors import AutoGraphError
-from tensorflow.python.autograph.pyct.transformer import AutoGraphParseError
+from tensorflow.python.autograph.lang.special_functions import tensor_list
 from tensorflow.python.autograph.utils import ag_logging
 from tensorflow.python.util.all_util import remove_undocumented
 
@@ -79,7 +78,6 @@ _allowed_symbols = [
     'stack',
     'tensor_list',
     # Exceptions
-    'AutoGraphParseError',
     'AutoGraphError',
     # Utilities: to be removed
     'utils',
",0,test
71c9f15ea2f953fcdb4ff33316547c71930ed4d7,tensorflow/tensorflow,"Delete AutoGraphParseError now that it is no longer used because we are moving to AutoGraphError, InternalError, etc.

PiperOrigin-RevId: 231601621",transformer.py,"@@ -27,18 +27,6 @@ from tensorflow.python.autograph.pyct import pretty_printer
 from tensorflow.python.autograph.pyct import templates
 
 
-class AutoGraphParseError(SyntaxError):
-  """"""Error for graph construction errors from AutoGraph generated code.""""""
-
-  def __init__(self, error, origin_info):
-    file_path = origin_info.loc.filename
-    line_number = origin_info.loc.lineno
-    col_offset = origin_info.loc.col_offset
-    source_line = origin_info.source_code_line
-    super(AutoGraphParseError, self).__init__(
-        error, (file_path, line_number, col_offset, source_line))
-
-
 # TODO(znado): Use namedtuple.
 class Context(object):
   """"""Contains information about a source code transformation.
",0,test
647f7ae610f0b1f009b3af70735263598d13e292,tensorflow/tensorflow,"Disable lower using switch and merge in grappler optimization

PiperOrigin-RevId: 297023390
Change-Id: I562efaf9226391624789d2a45912731724318f2e",lite.py,"@@ -1031,10 +1031,16 @@ class TFLiteConverter(TFLiteConverterBase):
         (self.inference_type == constants.INT8 and
          (post_training_optimize or weight_only_quantize))):
       try:
+        # TODO(b/150163103): Merge `disabling lower using switch merge' calls.
+        # Grappler will also try to lower while loop into switch merge
+        # representation which is undesired for Ophints, so we simply remove
+        # those attributes to prevent Grappler from doing so.
+        graph_def = _convert_to_constants.disable_lower_using_switch_merge(
+            optimized_graph)
         # Run function inlining optimization to ensure any models generated
         # through the from_frozen_graph path have been inlined.
         optimized_graph = _run_graph_optimizations(
-            self._graph_def,
+            graph_def,
             self._input_tensors,
             self._output_tensors,
             config=self._grappler_config([""function""]))
",0,train
d5b5e1148ab3ef1817fadb864694ec3139746400,tensorflow/tensorflow,"Add a Keras LSTM+batch_jacobian integration test

PiperOrigin-RevId: 335425103
Change-Id: Ife380fe0ff62baaac80dde0a990eb561b7664164",gradients_test.py,"@@ -79,6 +79,34 @@ class GradientsTest(tf.test.TestCase):
     for g, g_re in zip(grads, grads_re):
       self.assertAllClose(g, g_re)
 
+  def testLSTMBatchJacobian(self):
+    class HasLSTM(tf.keras.Model):
+
+      def __init__(self):
+        super(HasLSTM, self).__init__()
+        self.lstm = tf.keras.layers.LSTM(units=5)
+        self.dense = tf.keras.layers.Dense(1, activation=tf.nn.sigmoid)
+
+      def call(self, x):
+        return self.dense(self.lstm(x))
+
+    m = HasLSTM()
+
+    def jacobian(x):
+      with tf.GradientTape() as tape:
+        tape.watch(x)
+        y = m(x)  # pylint: disable=not-callable
+      return tape.batch_jacobian(y, x)
+
+    inp = tf.nn.l2_normalize(tf.ones([1, 2, 3]), axis=[1, 2])
+    eager_result = jacobian(inp)
+    function_result = tf.function(jacobian)(inp)
+    self.assertAllClose(eager_result, function_result)
+    backprop_result, numeric_result = tf.test.compute_gradient(
+        m, [inp], delta=1e-3)
+    self.assertAllClose(numeric_result, backprop_result, rtol=1e-2)
+    self.assertAllClose(tf.reshape(numeric_result, [-1]),
+                        tf.reshape(eager_result, [-1]), rtol=1e-2)
 
 if __name__ == ""__main__"":
   tf.test.main()
",0,train
17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound

Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
Signed-off-by: Karol Gugala <kgugala@antmicro.com>",cppmath.h,"@@ -20,7 +20,7 @@ limitations under the License.
 namespace tflite {
 
 #if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
-    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO)
+    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || defined(__ZEPHYR__)
 #define TF_LITE_GLOBAL_STD_PREFIX
 #else
 #define TF_LITE_GLOBAL_STD_PREFIX std
",0,train
17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound

Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
Signed-off-by: Karol Gugala <kgugala@antmicro.com>",max.h,"@@ -0,0 +1,35 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
+
+#include <cmath>
+
+namespace tflite {
+
+#if defined(TF_LITE_USE_GLOBAL_MAX) || defined(__ZEPHYR__)
+inline float TfLiteMax(const float& x, const float& y) {
+  return std::max(x, y);
+}
+#else
+template <class T>
+inline T TfLiteMax(const T& x, const T& y) {
+  return std::fmax(x, y);
+}
+#endif
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MAX_H_
",0,train
17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound

Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
Signed-off-by: Karol Gugala <kgugala@antmicro.com>",min.h,"@@ -0,0 +1,35 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
+
+#include <cmath>
+
+namespace tflite {
+
+#if defined(TF_LITE_USE_GLOBAL_MIN) || defined(__ZEPHYR__)
+inline float TfLiteMin(const float& x, const float& y) {
+  return std::min(x, y);
+}
+#else
+template <class T>
+inline T TfLiteMin(const T& x, const T& y) {
+  return std::fmin(x, y);
+}
+#endif
+
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_MIN_H_
",0,train
17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound

Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
Signed-off-by: Karol Gugala <kgugala@antmicro.com>",reduce.h,"@@ -20,6 +20,8 @@ limitations under the License.
 #include ""tensorflow/lite/kernels/internal/cppmath.h""
 #include ""tensorflow/lite/kernels/internal/quantization_util.h""
 #include ""tensorflow/lite/kernels/internal/types.h""
+#include ""tensorflow/lite/kernels/internal/min.h""
+#include ""tensorflow/lite/kernels/internal/max.h""
 
 namespace tflite {
 
@@ -382,10 +384,10 @@ inline bool QuantizedMeanOrSum(const T* input_data, int32 input_zero_point,
         float float_mean = static_cast<float>(temp_sum[idx]) /
                            static_cast<float>(num_elements_in_axis);
         float result =
-            std::min(TfLiteRound(float_mean * scale + bias) + output_zero_point,
+            TfLiteMin(TfLiteRound(float_mean * scale + bias) + output_zero_point,
                      static_cast<float>(std::numeric_limits<T>::max()));
         result =
-            std::max(result, static_cast<float>(std::numeric_limits<T>::min()));
+            TfLiteMax(result, static_cast<float>(std::numeric_limits<T>::min()));
         output_data[idx] = static_cast<T>(result);
       }
     }
",0,train
17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound

Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
Signed-off-by: Karol Gugala <kgugala@antmicro.com>",resize_nearest_neighbor.h,"@@ -18,6 +18,7 @@ limitations under the License.
 #include <cmath>
 
 #include ""tensorflow/lite/kernels/internal/types.h""
+#include ""tensorflow/lite/kernels/internal/cppmath.h""
 
 namespace tflite {
 
@@ -34,7 +35,7 @@ inline int32 GetNearestNeighbor(const int input_value, const int32 input_size,
   const float offset = half_pixel_centers ? 0.5f : 0.0f;
   int32 output_value = std::min(
       align_corners
-          ? static_cast<int32>(std::round((input_value + offset) * scale))
+          ? static_cast<int32>(TfLiteRound((input_value + offset) * scale))
           : static_cast<int32>(std::floor((input_value + offset) * scale)),
       input_size - 1);
   if (half_pixel_centers) {
",0,train
17c5a9bbd67f34b25c3b5ffaba2f72a54ec0a105,tensorflow/tensorflow,"Change std::max, std::min, std::round to tflite::TfLiteMax, tflite::TfLiteMin, tflite::TfLiteRound

Signed-off-by: Kamil Rakoczy <krakoczy@antmicro.com>
Signed-off-by: Karol Gugala <kgugala@antmicro.com>",activation_utils.h,"@@ -21,6 +21,8 @@ limitations under the License.
 
 #include ""tensorflow/lite/c/builtin_op_data.h""
 #include ""tensorflow/lite/kernels/internal/cppmath.h""
+#include ""tensorflow/lite/kernels/internal/max.h""
+#include ""tensorflow/lite/kernels/internal/min.h""
 
 namespace tflite {
 namespace ops {
@@ -32,11 +34,11 @@ inline float ActivationValFloat(TfLiteFusedActivation act, float a) {
     case kTfLiteActNone:
       return a;
     case kTfLiteActRelu:
-      return std::max(0.0f, a);
+      return TfLiteMax(0.0f, a);
     case kTfLiteActRelu1:
-      return std::max(-1.0f, std::min(a, 1.0f));
+      return TfLiteMax(-1.0f, TfLiteMin(a, 1.0f));
     case kTfLiteActRelu6:
-      return std::max(0.0f, std::min(a, 6.0f));
+      return TfLiteMax(0.0f, TfLiteMin(a, 6.0f));
     case kTfLiteActTanh:
       return std::tanh(a);
     case kTfLiteActSignBit:
",0,train
d7503555753420aba3a4f9010bb5f7ed13d6c9ca,tensorflow/tensorflow,"Update GraphDef version to 401.

PiperOrigin-RevId: 311492238
Change-Id: I93cb2eda8127d2ca0504ba2e06911a994c190347",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 400  // Updated: 2020/5/13
+#define TF_GRAPH_DEF_VERSION 401  // Updated: 2020/5/14
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,test
b4a5927472f72b4c01683d14d76e3957336dc8c4,tensorflow/tensorflow,Update pfor.py,pfor.py,"@@ -1593,7 +1593,7 @@ class PFor:
           else:
             converter = _pfor_converter_registry.get(y_op.type, None)
           if converter is None:
-            root_cause = (f""there is no register converter."")
+            root_cause = (f""there is no register converter for this op."")
             has_variant_outputs = any(x.dtype == dtypes.variant for x in
                                       y_op.outputs)
             has_vectorized_variant_inputs = any(
",0,train
010599cb5005ec14c1021adec3079d1504c986a0,tensorflow/tensorflow,"Update GraphDef version to 47.

PiperOrigin-RevId: 250029076",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 46  // Updated: 2019/5/25
+#define TF_GRAPH_DEF_VERSION 47  // Updated: 2019/5/26
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
63ed3e6b5ae5ff2f8f1f4e93201a07995ebf7d7f,tensorflow/tensorflow,"Fix Model.fit for TPU async eager when catching OutOfRange errors.

PiperOrigin-RevId: 297377388
Change-Id: Id7a32962bb5f9451bb5d039dbe9e60d8287c64f5",training.py,"@@ -769,7 +769,11 @@ class Model(network.Network, version_utils.ModelVersionSelector):
                 step_num=step,
                 batch_size=batch_size):
               callbacks.on_train_batch_begin(step)
-              logs = train_function(iterator)
+              tmp_logs = train_function(iterator)
+              # Catch possible OutOfRangeError here.
+              # TODO(b/150292341): Allow multiple async steps.
+              context.async_wait()
+              logs = tmp_logs
               callbacks.on_train_batch_end(step, logs)
         epoch_logs = {m.name: m.result() for m in self.metrics}
 
@@ -996,7 +1000,9 @@ class Model(network.Network, version_utils.ModelVersionSelector):
                 graph_type='test',
                 step_num=step):
               callbacks.on_test_batch_begin(step)
-              logs = test_function(iterator)
+              tmp_logs = test_function(iterator)
+              context.async_wait()  # Possible OutOfRangeError here.
+              logs = tmp_logs
               callbacks.on_test_batch_end(step, logs)
       callbacks.on_test_end()
 
@@ -1176,7 +1182,9 @@ class Model(network.Network, version_utils.ModelVersionSelector):
         with data_handler.catch_stop_iteration():
           for step in data_handler.steps():
             callbacks.on_predict_batch_begin(step)
-            batch_outputs = predict_function(iterator)
+            tmp_batch_outputs = predict_function(iterator)
+            context.async_wait()  # Possible OutOfRangeError here.
+            batch_outputs = tmp_batch_outputs
             if outputs is None:
               outputs = nest.map_structure(lambda batch_output: [batch_output],
                                            batch_outputs)
",0,train
8bf282a345ca80f9e6d154df3bc7ac7f12a6457d,tensorflow/tensorflow,"Fixes tf.bool.as_numpy_dtype to return np.bool_ instead of np.bool (which is the same as Python `bool`).

PiperOrigin-RevId: 353340422
Change-Id: Ie2a243a5ab2d1372308d63b0ec2c34b9c3f0084c",dtypes.py,"@@ -518,7 +518,7 @@ _TF_TO_NP = {
     types_pb2.DT_INT64:
         np.int64,
     types_pb2.DT_BOOL:
-        np.bool,
+        np.bool_,
     types_pb2.DT_QINT8:
         _np_qint8,
     types_pb2.DT_QUINT8:
",0,train
34911101beb2e302b4afcaff79310845998e3530,tensorflow/tensorflow,"Fix error message formatting.

PiperOrigin-RevId: 208911623",conv_grad_ops.cc,"@@ -63,7 +63,7 @@ Status ConvBackpropExtractAndVerifyDimensionV2(
     return errors::InvalidArgument(
         label, "": Size of out_backprop doesn't match computed: "", ""actual = "",
         dim->output_size, "", computed = "", out_size,
-        ""spatial_dim: "", spatial_dim, "" input: "", dim->input_size,
+        "" spatial_dim: "", spatial_dim, "" input: "", dim->input_size,
         "" filter: "", dim->filter_size, "" output: "", dim->output_size,
         "" stride: "", dim->stride, "" dilation: "", dim->dilation);
   }
",0,train
7985e520910962b96d5f71f77d3f4ead1cb24f75,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-07-04

PiperOrigin-RevId: 256515094",compat.py,"@@ -27,7 +27,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 7, 3)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 7, 4)
 
 
 @tf_export(""compat.forward_compatible"")
",0,test
fc9bde9c0675116490d204c21f81c764691503f9,tensorflow/tensorflow,Only register the _Arg and _Retval kernel for POD types on sycl,function_ops.cc,"@@ -87,8 +87,28 @@ class RetvalOp : public OpKernel {
 REGISTER_KERNEL_BUILDER(Name(""_Arg"").Device(DEVICE_CPU), ArgOp);
 REGISTER_KERNEL_BUILDER(Name(""_Retval"").Device(DEVICE_CPU), RetvalOp);
 
-REGISTER_KERNEL_BUILDER(Name(""_Arg"").Device(DEVICE_SYCL), ArgOp);
-REGISTER_KERNEL_BUILDER(Name(""_Retval"").Device(DEVICE_SYCL), RetvalOp);
+#if TENSORFLOW_USE_SYCL
+#define REGISTER(type)     \
+  REGISTER_KERNEL_BUILDER( \
+      Name(""_Arg"").Device(DEVICE_SYCL).TypeConstraint<type>(""T""), ArgOp);
+  TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER)
+  TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(""_Arg"")
+						 .Device(DEVICE_GPU)
+						 .HostMemory(""output"")
+						 .TypeConstraint<int32>(""T""),
+						 ArgOp);
+#undef REGISTER
+#define REGISTER(type)     \
+  REGISTER_KERNEL_BUILDER( \
+      Name(""_Retval"").Device(DEVICE_SYCL).TypeConstraint<type>(""T""), RetvalOp);
+  TF_CALL_NUMBER_TYPES_NO_INT32(REGISTER)
+  TF_CALL_bool(REGISTER) REGISTER_KERNEL_BUILDER(Name(""_Retval"")
+						 .Device(DEVICE_GPU)
+						 .HostMemory(""input"")
+						 .TypeConstraint<int32>(""T""),
+						 RetvalOp);
+#undef REGISTER
+#endif
 
 #define REGISTER(type)     \
   REGISTER_KERNEL_BUILDER( \
",0,train
e22785521834aac1b0b0fb7ccc4458c3ee5f8e62,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 402966641
Change-Id: I4b8bebb3c16a81b1716b6feaaff442c2083a7948",mhlo_to_lhlo_with_xla.cc,"@@ -1662,11 +1662,6 @@ Status HloToLhloModule(const BufferAssignment& assignment,
   module->setLoc(mlir::NameLoc::get(
       mlir::Identifier::get(hlo_module.name(), module.getContext())));
 
-  // Store the HloModule's unique_id in the MLIR module.
-  Builder builder(module.getContext());
-  module->setAttr(""mhlo.unique_id"",
-                  builder.getI64IntegerAttr(hlo_module.unique_id()));
-
   const HloComputation* computation = hlo_module.entry_computation();
 
   LhloDialectEmitter emitter(assignment, *computation, module);
",0,test
e22785521834aac1b0b0fb7ccc4458c3ee5f8e62,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 402966641
Change-Id: I4b8bebb3c16a81b1716b6feaaff442c2083a7948",cpu_executable.cc,"@@ -52,15 +52,6 @@ limitations under the License.
 namespace xla {
 namespace cpu {
 
-static std::string ModuleUniqueName(absl::string_view module_name,
-                                    const HloModule* module) {
-  std::string unique_id;
-  if (module != nullptr) {
-    unique_id = absl::StrCat(""module."", module->unique_id(), ""."");
-  }
-  return absl::StrCat(unique_id, module_name);
-}
-
 CpuExecutable::CpuExecutable(
     std::unique_ptr<SimpleOrcJIT> jit,
     std::unique_ptr<const BufferAssignment> assignment,
@@ -75,9 +66,8 @@ CpuExecutable::CpuExecutable(
   if (assignment_) {
     buffer_assignment_.reset(new BufferAssignmentProto(assignment_->ToProto()));
   }
-  XlaDebugInfoManager::Get()->RegisterModule(
-      ModuleUniqueName(module_name_, shared_module().get()), shared_module(),
-      buffer_assignment_);
+  XlaDebugInfoManager::Get()->RegisterModule(module_name_, shared_module(),
+                                             buffer_assignment_);
 
   // Resolve symbols in the constructor rather than at execution time to avoid
   // races because FindSymbol is not thread safe.
@@ -95,9 +85,8 @@ CpuExecutable::CpuExecutable(
 }
 
 CpuExecutable::~CpuExecutable() {
-  XlaDebugInfoManager::Get()->UnregisterModule(
-      ModuleUniqueName(module_name_, shared_module().get()), shared_module(),
-      buffer_assignment_);
+  XlaDebugInfoManager::Get()->UnregisterModule(module_name_, shared_module(),
+                                               buffer_assignment_);
 }
 
 static StatusOr<MaybeOwningDeviceMemory> MemoryForAllocation(
",0,test
e22785521834aac1b0b0fb7ccc4458c3ee5f8e62,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 402966641
Change-Id: I4b8bebb3c16a81b1716b6feaaff442c2083a7948",gpu_executable.cc,"@@ -78,15 +78,6 @@ bool NeedsAsyncCommsStream(Thunk& thunk) {
   }
 }
 
-static std::string ModuleUniqueName(absl::string_view module_name,
-                                    const HloModule* module) {
-  std::string unique_id;
-  if (module != nullptr) {
-    unique_id = absl::StrCat(""module."", module->unique_id(), ""."");
-  }
-  return absl::StrCat(unique_id, module_name);
-}
-
 }  // namespace
 
 void GpuExecutable::BefBufferDeleter::operator()(uint8_t* ptr) const {
@@ -116,15 +107,13 @@ GpuExecutable::GpuExecutable(GpuExecutable::Params params)
       entry_computation_profile_index_(params.entry_computation_profile_index),
       constants_(std::move(params.constants)),
       output_info_(std::move(params.output_info)) {
-  XlaDebugInfoManager::Get()->RegisterModule(
-      ModuleUniqueName(module_name_, shared_module().get()), shared_module(),
-      debug_buffer_assignment_);
+  XlaDebugInfoManager::Get()->RegisterModule(module_name_, shared_module(),
+                                             debug_buffer_assignment_);
 }
 
 GpuExecutable::~GpuExecutable() {
-  XlaDebugInfoManager::Get()->UnregisterModule(
-      ModuleUniqueName(module_name_, shared_module().get()), shared_module(),
-      debug_buffer_assignment_);
+  XlaDebugInfoManager::Get()->UnregisterModule(module_name_, shared_module(),
+                                               debug_buffer_assignment_);
 
   {
     // We could have issued host->device mem copies in ResolveConstantGlobals.
",0,test
e22785521834aac1b0b0fb7ccc4458c3ee5f8e62,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 402966641
Change-Id: I4b8bebb3c16a81b1716b6feaaff442c2083a7948",ir_emitter_unnested.cc,"@@ -5671,19 +5671,10 @@ Status IrEmitterUnnested::EmitLmhloRegion(mlir::Region* region) {
 
 Thunk::ThunkInfo IrEmitterUnnested::GetThunkInfo(mlir::Operation* op) {
   auto module = op->getParentOfType<mlir::ModuleOp>();
-  // Include the HloModule's unique_id in the thunk's module name so that xprof
-  // shows different modules differently, addressing b/202415436#comment24.
-  // xprof calls this the ""program_id"".
-  std::string unique_id_str;
-  if (auto unique_id_attr =
-          module->getAttrOfType<mlir::IntegerAttr>(""mhlo.unique_id"")) {
-    unique_id_str = absl::StrFormat("",program_id=%d"",
-                                    unique_id_attr.getValue().getZExtValue());
-  }
   Thunk::ThunkInfo thunk_info;
   thunk_info.profile_annotation = absl::StrFormat(
-      ""Thunk:#hlo_op=%s,hlo_module=%s%s#"", mlir::GetNameFromLoc(op->getLoc()),
-      mlir::GetNameFromLoc(module->getLoc()), unique_id_str);
+      ""Thunk:#hlo_op=%s,hlo_module=%s#"", mlir::GetNameFromLoc(op->getLoc()),
+      mlir::GetNameFromLoc(module->getLoc()));
   return thunk_info;
 }
 
",0,test
8a78a2973eee143fe8a255761b214ebe0687b585,tensorflow/tensorflow,merge and add more erase tests,map_ops_test.py,"@@ -30,7 +30,7 @@ from tensorflow.python.platform import test
 
 @test_util.run_all_in_graph_and_eager_modes
 class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
-
+  '''
   def testEmptyTensorMapSize(self):
     m = map_ops.empty_tensor_map()
     s = map_ops.tensor_map_size(m)
@@ -105,82 +105,78 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     self.assertAllEqual(b2, False)
 
   def testHasKeyLookup(self):
-    with self.test_session():
-      m = map_ops.empty_tensor_map()
-      k = constant_op.constant(1.0)
-      k2 = constant_op.constant(2.0)
-      v = constant_op.constant(2.0)
-      m = map_ops.tensor_map_insert(m, k, v)
+    m = map_ops.empty_tensor_map()
+    k = constant_op.constant(1.0)
+    k2 = constant_op.constant(2.0)
+    v = constant_op.constant(2.0)
+    m = map_ops.tensor_map_insert(m, k, v)
 
-      default_value = array_ops.zeros_like(v)
-      l = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k),
+    default_value = array_ops.zeros_like(v)
+    l = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k),
+                              lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32),
+                              lambda: default_value)
+    l2 = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k2),
                                 lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32),
                                 lambda: default_value)
-      l2 = control_flow_ops.cond(map_ops.tensor_map_has_key(m, k2),
-                                 lambda: map_ops.tensor_map_lookup(m, k, dtypes.float32),
-                                 lambda: default_value)
-      self.assertAllClose(l, v)
-      self.assertAllClose(l2, default_value)
-
+    self.assertAllClose(l, v)
+    self.assertAllClose(l2, default_value)
+'''
   def testInsertLookupGrad(self):
     with backprop.GradientTape() as tape:
       m = map_ops.empty_tensor_map()
       k = constant_op.constant(1.0)
-      v = constant_op.constant(2.0)
+      v = constant_op.constant(11.0)
       tape.watch(v)
       m = map_ops.tensor_map_insert(m, k, v)
       l = map_ops.tensor_map_lookup(m, k, dtypes.float32)
       l *= 5
       g = tape.gradient(l, v)
-      self.assertAllClose(g, 5)
+      self.assertAllEqual(g, 5)
 
   def testMultipleInsertLookupGrad(self):
     with backprop.GradientTape(persistent=True) as tape:
       m = map_ops.empty_tensor_map()
       k = constant_op.constant(1.0)
-      v = constant_op.constant(2.0)
-      k2 = constant_op.constant(12.0)
-      v2 = constant_op.constant(22.0)
-      k3 = constant_op.constant(13.0)
-      v3 = constant_op.constant(23.0)
+      k2 = constant_op.constant(2.0)
+      k3 = constant_op.constant(3.0)
+      v = constant_op.constant(11.0)
+      v2 = constant_op.constant(12.0)
+      v3 = constant_op.constant(13.0)
       tape.watch(v)
       tape.watch(v2)
       tape.watch(v3)
       m = map_ops.tensor_map_insert(m, k, v)
       m = map_ops.tensor_map_insert(m, k2, v2)
       m = map_ops.tensor_map_insert(m, k3, v3)
-
       l = map_ops.tensor_map_lookup(m, k, v.dtype)
       l2 = map_ops.tensor_map_lookup(m, k2, v2.dtype)
       l3 = map_ops.tensor_map_lookup(m, k3, v3.dtype)
       g = tape.gradient(l * 5, v)
       g2 = tape.gradient(l2 * 6, v2)
       g3 = tape.gradient(l3 * 7, v3)
-      self.assertAllClose(g, 5)
-      self.assertAllClose(g2, 6)
-      self.assertAllClose(g3, 7)
-
-  def testSameKeyInsertLookupGrad(self):
+      self.assertAllEqual(g, 5)
+      self.assertAllEqual(g2, 6)
+      self.assertAllEqual(g3, 7)
+  
+  def testInsertLookupComposeGrad(self):
     with backprop.GradientTape(persistent=True) as tape:
       m = map_ops.empty_tensor_map()
       k = constant_op.constant(1.0)
-      v = constant_op.constant(2.0)
-      v2 = constant_op.constant(22.0)
+      k2 = constant_op.constant(2.0)
+      v = constant_op.constant(11.0)
       tape.watch(v)
-      tape.watch(v2)
       m = map_ops.tensor_map_insert(m, k, v)
-      m = map_ops.tensor_map_insert(m, k, v2)
-      l = map_ops.tensor_map_lookup(m, k, v2.dtype)
-      g = tape.gradient(l * 5, v)
-      g2 = tape.gradient(l * 5, v2)
-      self.assertAllClose(g, array_ops.zeros_like(v))
-      self.assertAllClose(g2, 5)
+      l = map_ops.tensor_map_lookup(m, k, v.dtype)
+      m = map_ops.tensor_map_insert(m, k2, l)
+      l2 = map_ops.tensor_map_lookup(m, k2, l.dtype)
+      g = tape.gradient(l2 * 5, v)
+      self.assertAllEqual(g, 5)
 
-  def testSameKeyAlternatingInsertLookupGrad(self):
+  def testReplaceLookupGrad(self):
     with backprop.GradientTape(persistent=True) as tape:
       m = map_ops.empty_tensor_map()
       k = constant_op.constant(1.0)
-      v = constant_op.constant(2.0)
+      v = constant_op.constant(11.0)
       v2 = constant_op.constant(22.0)
       tape.watch(v)
       tape.watch(v2)
@@ -200,8 +196,8 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
   def testLookupAddGrad(self):
     with backprop.GradientTape(persistent=True) as tape:
       k = constant_op.constant(1.0)
-      v = constant_op.constant(2.0)
-      k2 = constant_op.constant(12.0)
+      k2 = constant_op.constant(2.0)
+      v = constant_op.constant(11.0)
       v2 = constant_op.constant(22.0)
       tape.watch(v)
       tape.watch(v2)
@@ -217,14 +213,32 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       g3 = tape.gradient(l1 + l2 * 4, v2)
       self.assertAllEqual(g3, 4)
 
-  def testEraseGrad(self):
+  def testLookupMultiplyGrad(self):
     with backprop.GradientTape(persistent=True) as tape:
-      m = map_ops.empty_tensor_map()
       k = constant_op.constant(1.0)
-      v = constant_op.constant(2.0)
+      k2 = constant_op.constant(2.0)
+      v = constant_op.constant(11.0)
+      v2 = constant_op.constant(22.0)
       tape.watch(v)
-      k2 = constant_op.constant(12.0)
+      tape.watch(v2)
+      m = map_ops.empty_tensor_map()
+      m = map_ops.tensor_map_insert(m, k, v)
+      m = map_ops.tensor_map_insert(m, k2, v2)
+      l1 = map_ops.tensor_map_lookup(m, k, v.dtype)
+      l2 = map_ops.tensor_map_lookup(m, k2, v2.dtype)
+      g = tape.gradient(l1 * l2, [v, v2])
+      self.assertAllClose(g, [v2, v])
+      g2 = tape.gradient(l1 * l1, v)
+      self.assertAllClose(g2, 2*v)
+
+  def testEraseSecondGrad(self):
+    with backprop.GradientTape(persistent=True) as tape:
+      m = map_ops.empty_tensor_map()
+      k = constant_op.constant(1.0)
+      k2 = constant_op.constant(2.0)
+      v = constant_op.constant(11.0)
       v2 = constant_op.constant(22.0)
+      tape.watch(v)
       tape.watch(v2)
       m = map_ops.tensor_map_insert(m, k, v)
       m = map_ops.tensor_map_insert(m, k2, v2)
@@ -236,7 +250,49 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       self.assertAllEqual(g, 5)
       g2 = tape.gradient(e * 6, v2)
       self.assertAllEqual(g2, 6)
+  
+  def testEraseFirstGrad(self):
+    with backprop.GradientTape(persistent=True) as tape:
+      m = map_ops.empty_tensor_map()
+      k = constant_op.constant(1.0)
+      k2 = constant_op.constant(2.0)
+      v = constant_op.constant(11.0)
+      v2 = constant_op.constant(22.0)
+      tape.watch(v)
+      tape.watch(v2)
+      m = map_ops.tensor_map_insert(m, k, v)
+      m = map_ops.tensor_map_insert(m, k2, v2)
+      m, e = map_ops.tensor_map_erase(m, k, v.dtype)
+      l = map_ops.tensor_map_lookup(m, k2, v2.dtype)
+      self.assertAllClose(l, v2)
+      self.assertAllClose(e, v)
+      g = tape.gradient(l * 5, v2)
+      self.assertAllEqual(g, 5)
+      g2 = tape.gradient(e * 6, v)
+      self.assertAllEqual(g2, 6)
+      m, e2 = map_ops.tensor_map_erase(m, k2, v2.dtype)
+      g3 = tape.gradient(e2 * 7, v2)
 
+  def testEraseComposedGrad(self):
+    with backprop.GradientTape(persistent=True) as tape:
+      m = map_ops.empty_tensor_map()
+      k = constant_op.constant(1.0)
+      k2 = constant_op.constant(2.0)
+      v = constant_op.constant(11.0)
+      v2 = constant_op.constant(22.0)
+      tape.watch(v)
+      tape.watch(v2)
+      m = map_ops.tensor_map_insert(m, k, v)
+      m, e = map_ops.tensor_map_erase(m, k, v.dtype)
+      m = map_ops.tensor_map_insert(m, k2, e)
+      l = map_ops.tensor_map_lookup(m, k2, e.dtype)
+      self.assertAllClose(e, v)
+      self.assertAllClose(l, e)
+      g = tape.gradient(l * 5, v)
+      self.assertAllEqual(g, 5)
+      g2 = tape.gradient(e * 6, v)
+      self.assertAllEqual(g2, 6)
+    
   def testStringKeyGrad(self):
     with backprop.GradientTape(persistent=True) as tape:
       m = map_ops.empty_tensor_map()
@@ -312,20 +368,6 @@ class MapOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       g2 = tape.gradient(l + l2, v2)
       self.assertAllEqual(g2, 1)
 
-  def testReplaceGrad(self):
-    with backprop.GradientTape(persistent=True) as tape:
-      m = map_ops.empty_tensor_map()
-      k = constant_op.constant(1.0)
-      v = constant_op.constant(2.0)
-      v2 = constant_op.constant(3.0)
-      tape.watch(v)
-      tape.watch(v2)
-      m = map_ops.tensor_map_insert(m, k, v)
-      l = map_ops.tensor_map_lookup(m, k, v.dtype)
-      m = map_ops.tensor_map_insert(m, k, v2)
-      l2 = map_ops.tensor_map_lookup(m, k, v2.dtype)
-      g = tape.gradient(l + l2, v)
-      self.assertAllEqual(g, 1)
 
 if __name__ == '__main__':
   test.main()
",0,train
477cfa2aaa7a65c603b4e04df928ec45a1e0d4ca,tensorflow/tensorflow,"Let CategoryEncoding error out for negative values and gives better error message.

PiperOrigin-RevId: 336751008
Change-Id: If7fb43127c2587b7658e8aed63331413ac932779",category_encoding.py,"@@ -298,12 +298,18 @@ class CategoryEncoding(base_preprocessing_layer.CombinerPreprocessingLayer):
     binary_output = (self._output_mode == BINARY)
     if isinstance(inputs, sparse_tensor.SparseTensor):
       max_value = math_ops.reduce_max(inputs.values)
+      min_value = math_ops.reduce_min(inputs.values)
     else:
       max_value = math_ops.reduce_max(inputs)
-    condition = math_ops.greater_equal(
-        math_ops.cast(out_depth, max_value.dtype), max_value)
+      min_value = math_ops.reduce_min(inputs)
+    condition = math_ops.logical_and(
+        math_ops.greater_equal(
+            math_ops.cast(out_depth, max_value.dtype), max_value),
+        math_ops.greater_equal(
+            min_value, math_ops.cast(0, min_value.dtype)))
     control_flow_ops.Assert(
-        condition, [""Input must be less than max_token {}"".format(out_depth)])
+        condition, [""Input values must be in the range 0 <= values < max_tokens""
+                    "" with max_tokens={}"".format(out_depth)])
     if self._sparse:
       result = bincount_ops.sparse_bincount(
           inputs,
",0,test
477cfa2aaa7a65c603b4e04df928ec45a1e0d4ca,tensorflow/tensorflow,"Let CategoryEncoding error out for negative values and gives better error message.

PiperOrigin-RevId: 336751008
Change-Id: If7fb43127c2587b7658e8aed63331413ac932779",category_encoding_test.py,"@@ -277,8 +277,23 @@ class CategoryEncodingInputTest(keras_parameterized.TestCase,
     int_data = encoder_layer(input_data)
     self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
     model = keras.Model(inputs=input_data, outputs=int_data)
-    with self.assertRaisesRegex(errors.InvalidArgumentError,
-                                "".*must be less than max_token 3""):
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        "".*must be in the range 0 <= values < max_tokens.*""):
+      _ = model.predict(input_array, steps=1)
+
+  def test_dense_negative(self):
+    input_array = constant_op.constant([[1, 2, 0], [2, 2, -1]])
+    max_tokens = 3
+    expected_output_shape = [None, max_tokens]
+    encoder_layer = get_layer_class()(max_tokens)
+    input_data = keras.Input(shape=(3,), dtype=dtypes.int32)
+    int_data = encoder_layer(input_data)
+    self.assertAllEqual(expected_output_shape, int_data.shape.as_list())
+    model = keras.Model(inputs=input_data, outputs=int_data)
+    with self.assertRaisesRegex(
+        errors.InvalidArgumentError,
+        "".*must be in the range 0 <= values < max_tokens.*""):
       _ = model.predict(input_array, steps=1)
 
 
",0,test
e7c7116eabbcc6889da34d2ba0bca4ffe5639d84,tensorflow/tensorflow,"Core RNNCell implementations now use state_is_tuple=True by default

This is part of the deprecation process for non-tuple LSTM and MultiRNNCell
states.
Change: 129507912",models.py,"@@ -19,6 +19,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import functools
+
 from tensorflow.contrib import rnn as contrib_rnn
 from tensorflow.contrib.learn.python.learn.ops import autoencoder_ops
 from tensorflow.contrib.learn.python.learn.ops import dnn_ops
@@ -378,7 +380,8 @@ def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional,
     elif cell_type == 'gru':
       cell_fn = nn.rnn_cell.GRUCell
     elif cell_type == 'lstm':
-      cell_fn = nn.rnn_cell.BasicLSTMCell
+      cell_fn = functools.partial(
+          nn.rnn_cell.BasicLSTMCell, state_is_tuple=False)
     else:
       raise ValueError('cell_type {} is not supported. '.format(cell_type))
     # TODO: state_is_tuple=False is deprecated
@@ -394,9 +397,11 @@ def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional,
         bw_cell = contrib_rnn.AttentionCellWrapper(
           fw_cell, attn_length=attn_length, attn_size=attn_size,
           attn_vec_size=attn_vec_size, state_is_tuple=False)
-      rnn_fw_cell = nn.rnn_cell.MultiRNNCell([fw_cell] * num_layers)
+      rnn_fw_cell = nn.rnn_cell.MultiRNNCell([fw_cell] * num_layers,
+                                             state_is_tuple=False)
       # backward direction cell
-      rnn_bw_cell = nn.rnn_cell.MultiRNNCell([bw_cell] * num_layers)
+      rnn_bw_cell = nn.rnn_cell.MultiRNNCell([bw_cell] * num_layers,
+                                             state_is_tuple=False)
       # pylint: disable=unexpected-keyword-arg, no-value-for-parameter
       _, encoding = bidirectional_rnn(rnn_fw_cell,
                                       rnn_bw_cell,
@@ -411,7 +416,8 @@ def get_rnn_model(rnn_size, cell_type, num_layers, input_op_fn, bidirectional,
         rnn_cell = contrib_rnn.AttentionCellWrapper(
             rnn_cell, attn_length=attn_length, attn_size=attn_size,
             attn_vec_size=attn_vec_size, state_is_tuple=False)
-      cell = nn.rnn_cell.MultiRNNCell([rnn_cell] * num_layers)
+      cell = nn.rnn_cell.MultiRNNCell([rnn_cell] * num_layers,
+                                      state_is_tuple=False)
       _, encoding = nn.rnn(cell,
                            x,
                            dtype=dtypes.float32,
",0,train
55ad623ecaf12de0260008395afb061cdf75f55d,tensorflow/tensorflow,"Automated rollback of change 152465346
Change: 152465844",math_ops.cc,"@@ -1256,8 +1256,6 @@ REGISTER_OP(""ArgMax"")
     .Doc(R""doc(
 Returns the index with the largest value across dimensions of a tensor.
 
-Note: in case of ties the identity of the return value is not guaranteed.
-
 dimension: int32, 0 <= dimension < rank(input).  Describes which dimension
   of the input Tensor to reduce across. For vectors, use dimension = 0.
 )doc"");
@@ -1272,8 +1270,6 @@ REGISTER_OP(""ArgMin"")
     .Doc(R""doc(
 Returns the index with the smallest value across dimensions of a tensor.
 
-Note: in case of ties the identity of the return value is not guaranteed.
-
 dimension: int32, 0 <= dimension < rank(input).  Describes which dimension
   of the input Tensor to reduce across. For vectors, use dimension = 0.
 )doc"");
",0,train
b07f8211409f2b2e46ab539291e824f2b7865885,tensorflow/tensorflow,remove unused sparse_ops import,nn_grad.py,"@@ -27,7 +27,6 @@ from tensorflow.python.ops import gen_nn_ops
 from tensorflow.python.ops import gradients_impl
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn_ops
-from tensorflow.python.ops import sparse_ops
 
 
 @ops.RegisterGradient(""Conv2DBackpropInput"")
",0,train
52aeafdf04af9f95500067dc353fd80728032b63,tensorflow/tensorflow,documenting that init_op will not be run when loading from checkpoint (#18051),session_manager.py,"@@ -229,10 +229,14 @@ class SessionManager(object):
     up to `max_wait_secs`, for recovery to succeed.
 
     If the model cannot be recovered successfully then it is initialized by
-    either running the provided `init_op`, or calling the provided `init_fn`.
-    The local_init_op is also run after init_op and init_fn, regardless of
+    running the `init_op` and calling `init_fn` if they are provided.
+    The `local_init_op` is also run after init_op and init_fn, regardless of
     whether the model was recovered successfully, but only if
-    ready_for_local_init_op passes.
+    `ready_for_local_init_op` passes.
+
+    If the model is recovered from a checkpoint it is assumed that all
+    global variables have been initialized, in particular neither `init_op`
+    nor `init_fn` will be executed.
 
     It is an error if the model cannot be recovered and no `init_op`
     or `init_fn` or `local_init_op` are passed.
",0,train
cdc13381936ce47a06318df8ec7ace48330940f3,tensorflow/tensorflow,"Add gemmlowp label for SquaredDifference and Sum ops

PiperOrigin-RevId: 258267779",reduce.cc,"@@ -523,6 +523,7 @@ TfLiteStatus EvalGeneric(TfLiteContext* context, TfLiteNode* node) {
 
 TfLiteStatus EvalSum(TfLiteContext* context, TfLiteNode* node) {
   OpContext op_context(context, node);
+  gemmlowp::ScopedProfilingLabel label(""Sum"");
   const auto& input = op_context.input;
   const auto& output = op_context.output;
   const bool same_scale =
",0,train
cdc13381936ce47a06318df8ec7ace48330940f3,tensorflow/tensorflow,"Add gemmlowp label for SquaredDifference and Sum ops

PiperOrigin-RevId: 258267779",squared_difference.cc,"@@ -95,6 +95,7 @@ void EvalSquaredDifference(TfLiteContext* context, TfLiteNode* node,
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   OpData* data = reinterpret_cast<OpData*>(node->user_data);
+  gemmlowp::ScopedProfilingLabel label(""SquaredDifference"");
 
   const TfLiteTensor* input1 = GetInput(context, node, kInputTensor1);
   const TfLiteTensor* input2 = GetInput(context, node, kInputTensor2);
",0,train
48b24214dd5da842bd00414b46f3e46319c777ee,tensorflow/tensorflow,"Update model in keras dist strat learning phase test to return consistent values.

PiperOrigin-RevId: 216461637",keras_test.py,"@@ -592,33 +592,37 @@ class TestDistributionStrategyWithDatasets(test.TestCase,
     # meaningful values. Currently we don't pass the learning phase if the
     # Lambda layer uses the learning phase.
     with self.cached_session():
-      x = keras.layers.Input(shape=(16,), name='input')
-      y = keras.layers.Dense(16)(x)
+      x = keras.layers.Input(shape=(1,), name='input')
+      y = keras.layers.Dense(1, kernel_initializer='ones')(x)
       z = keras.layers.Dropout(0.9999)(y)
       model = keras.Model(x, z)
+      initial_weights = model.get_weights()
 
       optimizer = gradient_descent.GradientDescentOptimizer(0.005)
       loss = 'mse'
       metrics = ['acc']
-      strategy = mirrored_strategy.MirroredStrategy(['/device:GPU:0',
-                                                     '/device:CPU:0'])
+      strategy = mirrored_strategy.MirroredStrategy(
+          ['/device:GPU:0', '/device:GPU:1'])
 
       model.compile(optimizer, loss, metrics=metrics, distribute=strategy)
 
-      inputs = np.random.rand(10, 16)
-      targets = np.ones((10, 16), dtype=np.float32)
+      inputs = np.ones((10, 1), dtype=np.float32)
+      targets = np.ones((10, 1), dtype=np.float32)
       dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
-      dataset = dataset.repeat(100)
-      dataset = dataset.batch(8)
-
-      hist = model.fit(dataset, epochs=5, steps_per_epoch=20, verbose=1)
-      self.assertEqual(hist.history['acc'][0], 1)
+      dataset = dataset.repeat().batch(8)
+      hist = model.fit(dataset, epochs=1, steps_per_epoch=20, verbose=1)
+      self.assertAlmostEqual(hist.history['acc'][0], 0, 0)
 
+      model.set_weights(initial_weights)
       evaluate_output = model.evaluate(dataset, steps=20)
-      self.assertEqual(evaluate_output[1], 0)
-
-      predict_output = model.predict(dataset, steps=1)
-      self.assertNotEqual(np.mean(predict_output), 0)
+      self.assertAlmostEqual(evaluate_output[1], 1, 0)
+
+      inputs = np.ones((10, 1), dtype=np.float32)
+      predict_dataset = dataset_ops.Dataset.from_tensor_slices(inputs)
+      predict_dataset = predict_dataset.repeat().batch(5)
+      output = model.predict(predict_dataset, steps=10)
+      ref_output = np.ones((50, 1), dtype=np.float32)
+      self.assertArrayNear(output[0], ref_output, 1e-1)
 
 
 class TestDistributionStrategyErrorCases(test.TestCase, parameterized.TestCase):
",0,train
b5bfebf6669982ccf818c3e9a69197ceca9dc456,tensorflow/tensorflow,"Comment style nitpick for StrategyExtendV2.

PiperOrigin-RevId: 282941111
Change-Id: Ide992cfe95dd02f5ecb1627c758326d577c49f9e",distribute_lib.py,"@@ -1164,8 +1164,8 @@ class StrategyExtendedV2(object):
 
   *Replica context vs. Cross-replica context*
 
-  _replica context_ is when we are in some function that is being called once
-  for each replica.  Otherwise we are in cross-replica context, which is
+  A _replica context_ applies when we are in some function that is being called
+  once for each replica.  Otherwise we are in cross-replica context, which is
   useful for calling `tf.distribute.Strategy` methods which operate across the
   replicas (like `reduce_to()`). By default you start in a replica context
   (the ""default single replica context"") and then some methods can switch you
",0,train
119161fed5d4c2ed38895aa19bcfc5893bd58995,tensorflow/tensorflow,"Better handling when `operators` is `None` instead of `[]`.

PiperOrigin-RevId: 368179066
Change-Id: I337a3cead57ba2b67b24499a750c85a1899e478f",visualize.py,"@@ -293,7 +293,7 @@ def GenerateGraph(subgraph_idx, g, opcode_mapper):
   second = {}
   pixel_mult = 200  # TODO(aselle): multiplier for initial placement
   width_mult = 170  # TODO(aselle): multiplier for initial placement
-  for op_index, op in enumerate(g[""operators""]):
+  for op_index, op in enumerate(g[""operators""] or []):
 
     for tensor_input_position, tensor_index in enumerate(op[""inputs""]):
       if tensor_index not in first:
@@ -487,8 +487,9 @@ def CreateHtmlFile(tflite_input, html_output):
     html += GenerateTableHtml(g[""tensors""], tensor_keys_to_display)
 
     # Print the ops.
-    html += ""<h3>Ops</h3>\n""
-    html += GenerateTableHtml(g[""operators""], op_keys_to_display)
+    if g[""operators""]:
+      html += ""<h3>Ops</h3>\n""
+      html += GenerateTableHtml(g[""operators""], op_keys_to_display)
 
     # Visual graph.
     html += ""<svg id='subgraph%d' width='1600' height='900'></svg>\n"" % (
",0,train
085a8afe4e67c036d2e21c1c178d32e5a6b5b401,tensorflow/tensorflow,"Allow MLIR bridge to run in the fallback mode if user sets the mlir_bridge_safe_mode.

PiperOrigin-RevId: 362441574
Change-Id: Iab408822ab29c216df061f1fe08a6acf7b78a459",mlir_bridge_rollout_policy.h,"@@ -35,6 +35,9 @@ enum class MlirBridgeRolloutPolicy {
   // features in the model, the MLIR bridge should be run. If the MLIR Bridge
   // errors, the fallback path should be used whenever possible.
   kEnabledAfterGraphAnalysis,
+  // The bridge was fallback enabled in a safe mode and passed all graph
+  // analysis checks.
+  kEnabledAfterGraphAnalysisSafeModeFallback
 };
 
 // Analyzes the user requested policy as well as the contents of the graph and
",0,train
085a8afe4e67c036d2e21c1c178d32e5a6b5b401,tensorflow/tensorflow,"Allow MLIR bridge to run in the fallback mode if user sets the mlir_bridge_safe_mode.

PiperOrigin-RevId: 362441574
Change-Id: Iab408822ab29c216df061f1fe08a6acf7b78a459",mlir_bridge_pass.cc,"@@ -103,6 +103,8 @@ MlirOptimizationPassState MlirBridgePass::GetPassState(
       return MlirOptimizationPassState::Enabled;
     case MlirBridgeRolloutPolicy::kEnabledAfterGraphAnalysis:
       return MlirOptimizationPassState::ShadowEnabled;
+    case MlirBridgeRolloutPolicy::kEnabledAfterGraphAnalysisSafeModeFallback:
+      return MlirOptimizationPassState::FallbackEnabled;
     case MlirBridgeRolloutPolicy::kDisabledByUser:
     case MlirBridgeRolloutPolicy::kDisabledAfterGraphAnalysis:
       return MlirOptimizationPassState::Disabled;
@@ -173,9 +175,16 @@ MlirOptimizationPassState MlirBridgeV1CompatPass::GetPassState(
   MlirBridgeRolloutPolicy policy = GetMlirBridgeRolloutPolicy(
       graph, /*function_library=*/&function_library, config_proto,
       /*uses_uninitialized_resource_args=*/false);
-  return (policy == MlirBridgeRolloutPolicy::kEnabledByUser)
-             ? MlirOptimizationPassState::Enabled
-             : MlirOptimizationPassState::Disabled;
+  switch (policy) {
+    case MlirBridgeRolloutPolicy::kEnabledByUser:
+      return MlirOptimizationPassState::Enabled;
+    case MlirBridgeRolloutPolicy::kEnabledAfterGraphAnalysisSafeModeFallback:
+      return MlirOptimizationPassState::FallbackEnabled;
+    case MlirBridgeRolloutPolicy::kEnabledAfterGraphAnalysis:
+    case MlirBridgeRolloutPolicy::kDisabledByUser:
+    case MlirBridgeRolloutPolicy::kDisabledAfterGraphAnalysis:
+      return MlirOptimizationPassState::Disabled;
+  }
 }
 
 Status MlirBridgeV1CompatPass::Run(const GraphOptimizationPassOptions& options,
",0,train
e424ba4a6d6e2c10f78f7f899de3c5d8dfb2e8c9,tensorflow/tensorflow,"Track symbolic shapes through shapeN operations

PiperOrigin-RevId: 177029912",shape_refiner.cc,"@@ -707,6 +707,8 @@ Status ShapeRefiner::ConstantPartialShape(InferenceContext* target_context,
     *result = target_context->Scalar();
   } else if (src_op == ""Shape"") {
     *result = src_context->input(0);
+  } else if (src_op == ""ShapeN"") {
+    *result = src_context->input(input_edge->src_output());
   } else if (src_op == ""Pack"") {
     std::vector<DimensionHandle> dims;
     // Pack is concatenating its input scalars to form the shape tensor vector.
",0,train
e424ba4a6d6e2c10f78f7f899de3c5d8dfb2e8c9,tensorflow/tensorflow,"Track symbolic shapes through shapeN operations

PiperOrigin-RevId: 177029912",graph_properties_test.cc,"@@ -825,6 +825,32 @@ TEST_F(GraphPropertiesTest, DoNotValidateColocationConstraints) {
   TF_EXPECT_OK(properties.InferStatically());
 }
 
+TEST_F(GraphPropertiesTest, ShapeTracking) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output a =
+      ops::Placeholder(s.WithOpName(""a""), DT_FLOAT,
+                       ops::Placeholder::Shape(PartialTensorShape({-1, -1})));
+  Output b =
+      ops::Placeholder(s.WithOpName(""b""), DT_FLOAT,
+                       ops::Placeholder::Shape(PartialTensorShape({-1})));
+  Output zero = ops::Const(s.WithOpName(""zero""), 0.0f, {});
+  auto shp = ops::ShapeN(s.WithOpName(""shapes""), {a, b});
+  Output o1 = ops::Fill(s.WithOpName(""o1""), shp[0], zero);
+  Output o2 = ops::Fill(s.WithOpName(""o2""), shp[1], zero);
+
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  GraphProperties properties(item);
+  TF_CHECK_OK(properties.InferStatically());
+  const auto shape_a = properties.GetOutputProperties(""a"").at(0).shape();
+  const auto shape_b = properties.GetOutputProperties(""b"").at(0).shape();
+  const auto shape_o1 = properties.GetOutputProperties(""o1"").at(0).shape();
+  const auto shape_o2 = properties.GetOutputProperties(""o2"").at(0).shape();
+  EXPECT_EQ(shape_a.DebugString(), shape_o1.DebugString());
+  EXPECT_EQ(shape_b.DebugString(), shape_o2.DebugString());
+}
+
 }  // namespace
 }  // namespace grappler
 }  // namespace tensorflow
",0,train
02f55400f87b22f7ea0849c39022792d1e381afb,tensorflow/tensorflow,"custom_gradient functions should be able to return their inputs

PiperOrigin-RevId: 173723462",backprop_test.py,"@@ -569,5 +569,17 @@ class BackpropTest(test.TestCase):
         var.assign_sub(lr*grad)
     self.assertAllEqual(losses, [4.0, 3., 2., 1., 0.])
 
+  def testCustomGradientIdentity(self):
+
+    @custom_gradient.custom_gradient
+    def my_identity(x):
+
+      def grad(dresult):
+        return [2 * dresult]
+
+      return x, grad
+
+    self.assertAllEqual(backprop.gradients_function(my_identity)(1.0)[0], 2.0)
+
 if __name__ == '__main__':
   test.main()
",0,test
02f55400f87b22f7ea0849c39022792d1e381afb,tensorflow/tensorflow,"custom_gradient functions should be able to return their inputs

PiperOrigin-RevId: 173723462",custom_gradient.py,"@@ -22,6 +22,7 @@ from tensorflow.python.eager import context
 from tensorflow.python.eager import tape
 from tensorflow.python.framework import ops as tf_ops
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import gen_array_ops
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_decorator
 
@@ -72,17 +73,19 @@ def custom_gradient(f):
 
     with tape.stop_recording():
       result, grad_fn = f(*args, **kwargs)
+      flat_result = nest.flatten(result)
+      # TODO(apassos) consider removing the identity below.
+      flat_result = [gen_array_ops.identity(x) for x in flat_result]
 
     def actual_grad_fn(*outputs):
       return nest.flatten(grad_fn(*outputs))
 
-    flat_result = nest.flatten(result)
     tape.record_operation(
         f.__name__,
         flat_result,
         input_tensors,
         actual_grad_fn)
     flat_result = list(flat_result)
-    return result
+    return nest.pack_sequence_as(result, flat_result)
 
   return tf_decorator.make_decorator(f, decorated)
",0,test
8dee064f1b06ca3cf225daa16c9a2a75ddd323f1,tensorflow/tensorflow,"Call Py_CLEAR on dead fields during TF_RESOURCE-to-ndarray conversion
Change: 152338333",tf_session_helper.cc,"@@ -375,6 +375,8 @@ Status GetPyArrayDescrForTensor(const TF_Tensor* tensor,
     PyObject* fields = PyList_New(1);
     PyList_SetItem(fields, 0, field);
     int convert_result = PyArray_DescrConverter(fields, descr);
+    Py_CLEAR(field);
+    Py_CLEAR(fields);
     if (convert_result != 1) {
       return errors::Internal(""Failed to create numpy array description for "",
                               ""TF_RESOURCE-type tensor"");
",0,train
5a6117908e8ddf563333459d5eea825db51cc6d0,tensorflow/tensorflow,"The behavior of outer_dimensions was different than before when the selected indices had a 0. Making it emulate the old behavior because gather stopped working for length 0 Tensors.

e.g.: Gathering row 1 from a Tensor of shape [2, 0] should result in a [1, 0] Tensor, but instead resulted in an error like:
AttributeError: index 1 is not in [0, 2)
Change: 119998790",tensor.cc,"@@ -714,34 +714,43 @@ void Tensor::FillDescription(TensorDescription* description) const {
   }
 }
 
-gtl::InlinedVector<int64, 5> Tensor::ComputeFlatInnerDims(
+gtl::InlinedVector<int64, 4> Tensor::ComputeFlatInnerDims(
     int64 num_out_dims) const {
-  gtl::InlinedVector<int64, 5> out_dims(num_out_dims, 0);
+  if (num_out_dims == dims()) {
+    return shape_.dim_sizes();
+  }
+  gtl::InlinedVector<int64, 4> out_dims(num_out_dims, 0);
   const int64 num_elements = NumElements();
-  if (num_elements != 0) {
-    int64 prod_out_dims = 1;
-    for (int64 out_dim = num_out_dims - 1; out_dim > 0; --out_dim) {
-      const int64 in_dim = out_dim + (dims() - num_out_dims);
-      out_dims[out_dim] =
-          (in_dim >= dims() || in_dim < 0) ? 1 : dim_size(in_dim);
-      prod_out_dims *= out_dims[out_dim];
-    }
+  int64 prod_out_dims = 1;
+  for (int64 out_dim = num_out_dims - 1; out_dim > 0; --out_dim) {
+    const int64 in_dim = out_dim + (dims() - num_out_dims);
+    out_dims[out_dim] = (in_dim >= dims() || in_dim < 0) ? 1 : dim_size(in_dim);
+    prod_out_dims *= out_dims[out_dim];
+  }
+  if (prod_out_dims != 0) {
     out_dims[0] = num_elements / prod_out_dims;
+  } else {
+    out_dims[0] = 0;
   }
   return out_dims;
 }
 
-gtl::InlinedVector<int64, 5> Tensor::ComputeFlatOuterDims(
+gtl::InlinedVector<int64, 4> Tensor::ComputeFlatOuterDims(
     int64 num_out_dims) const {
-  gtl::InlinedVector<int64, 5> out_dims(num_out_dims, 0);
+  if (num_out_dims == dims()) {
+    return shape_.dim_sizes();
+  }
+  gtl::InlinedVector<int64, 4> out_dims(num_out_dims, 0);
   const int64 num_elements = NumElements();
-  if (num_elements != 0) {
-    int64 prod_out_dims = 1;
-    for (int64 out_dim = 0; out_dim < num_out_dims - 1; ++out_dim) {
-      out_dims[out_dim] = out_dim >= dims() ? 1 : dim_size(out_dim);
-      prod_out_dims *= out_dims[out_dim];
-    }
+  int64 prod_out_dims = 1;
+  for (int64 out_dim = 0; out_dim < num_out_dims - 1; ++out_dim) {
+    out_dims[out_dim] = out_dim >= dims() ? 1 : dim_size(out_dim);
+    prod_out_dims *= out_dims[out_dim];
+  }
+  if (prod_out_dims != 0) {
     out_dims[num_out_dims - 1] = num_elements / prod_out_dims;
+  } else {
+    out_dims[num_out_dims - 1] = 0;
   }
   return out_dims;
 }
",0,test
5a6117908e8ddf563333459d5eea825db51cc6d0,tensorflow/tensorflow,"The behavior of outer_dimensions was different than before when the selected indices had a 0. Making it emulate the old behavior because gather stopped working for length 0 Tensors.

e.g.: Gathering row 1 from a Tensor of shape [2, 0] should result in a [1, 0] Tensor, but instead resulted in an error like:
AttributeError: index 1 is not in [0, 2)
Change: 119998790",tensor.h,"@@ -361,8 +361,11 @@ class Tensor {
   void FillDimsAndValidateCompatibleShape(
       gtl::ArraySlice<int64> new_sizes,
       Eigen::array<Eigen::DenseIndex, NDIMS>* dims) const;
-  gtl::InlinedVector<int64, 5> ComputeFlatInnerDims(int64 num_out_dims) const;
-  gtl::InlinedVector<int64, 5> ComputeFlatOuterDims(int64 num_out_dims) const;
+
+  // TODO(rmlarsen): These shouldn't hardcode '4' so that it lines up with
+  // TensorShape's InlineVector.
+  gtl::InlinedVector<int64, 4> ComputeFlatInnerDims(int64 num_out_dims) const;
+  gtl::InlinedVector<int64, 4> ComputeFlatOuterDims(int64 num_out_dims) const;
 
   TensorShape shape_;
   TensorBuffer* buf_;
",0,test
5a6117908e8ddf563333459d5eea825db51cc6d0,tensorflow/tensorflow,"The behavior of outer_dimensions was different than before when the selected indices had a 0. Making it emulate the old behavior because gather stopped working for length 0 Tensors.

e.g.: Gathering row 1 from a Tensor of shape [2, 0] should result in a [1, 0] Tensor, but instead resulted in an error like:
AttributeError: index 1 is not in [0, 2)
Change: 119998790",tensor_test.cc,"@@ -267,6 +267,46 @@ TEST(Tensor_Float, Reshape) {
     EXPECT_EQ(flat_outer_dims(0, 0, 0, 0, 0), 0.01f);
     EXPECT_EQ(flat_outer_dims(1, 2, 3, 4, 0), 0.02f);
   }
+
+  Tensor zero_t(DT_FLOAT, TensorShape({3, 0, 2, 0, 5}));
+  {
+    auto flat_outer_dims = zero_t.flat_outer_dims<float>();
+    EXPECT_EQ(3, flat_outer_dims.dimension(0));
+    EXPECT_EQ(0, flat_outer_dims.dimension(1));
+  }
+  {
+    auto flat_outer_dims = zero_t.flat_outer_dims<float, 3>();
+    EXPECT_EQ(3, flat_outer_dims.dimension(0));
+    EXPECT_EQ(0, flat_outer_dims.dimension(1));
+    EXPECT_EQ(0, flat_outer_dims.dimension(2));
+  }
+  {
+    auto flat_outer_dims = zero_t.flat_outer_dims<float, 5>();
+    EXPECT_EQ(3, flat_outer_dims.dimension(0));
+    EXPECT_EQ(0, flat_outer_dims.dimension(1));
+    EXPECT_EQ(2, flat_outer_dims.dimension(2));
+    EXPECT_EQ(0, flat_outer_dims.dimension(3));
+    EXPECT_EQ(5, flat_outer_dims.dimension(4));
+  }
+  {
+    auto flat_inner_dims = zero_t.flat_inner_dims<float>();
+    EXPECT_EQ(0, flat_inner_dims.dimension(0));
+    EXPECT_EQ(5, flat_inner_dims.dimension(1));
+  }
+  {
+    auto flat_inner_dims = zero_t.flat_inner_dims<float, 3>();
+    EXPECT_EQ(0, flat_inner_dims.dimension(0));
+    EXPECT_EQ(0, flat_inner_dims.dimension(1));
+    EXPECT_EQ(5, flat_inner_dims.dimension(2));
+  }
+  {
+    auto flat_inner_dims = zero_t.flat_inner_dims<float, 5>();
+    EXPECT_EQ(3, flat_inner_dims.dimension(0));
+    EXPECT_EQ(0, flat_inner_dims.dimension(1));
+    EXPECT_EQ(2, flat_inner_dims.dimension(2));
+    EXPECT_EQ(0, flat_inner_dims.dimension(3));
+    EXPECT_EQ(5, flat_inner_dims.dimension(4));
+  }
 }
 
 TEST(Tensor_Scalar, Basics) {
",0,test
5a6117908e8ddf563333459d5eea825db51cc6d0,tensorflow/tensorflow,"The behavior of outer_dimensions was different than before when the selected indices had a 0. Making it emulate the old behavior because gather stopped working for length 0 Tensors.

e.g.: Gathering row 1 from a Tensor of shape [2, 0] should result in a [1, 0] Tensor, but instead resulted in an error like:
AttributeError: index 1 is not in [0, 2)
Change: 119998790",gather_op_test.cc,"@@ -78,6 +78,19 @@ TEST_F(GatherOpTest, Simple_TwoD32) {
   test::ExpectTensorEqual<float>(expected, *GetOutput(0));
 }
 
+TEST_F(GatherOpTest, ZeroSize_TwoD32) {
+  MakeOp(DT_INT32);
+
+  // Feed and run
+  AddInputFromArray<float>(TensorShape({5, 0}), {});
+  AddInputFromArray<int32>(TensorShape({4}), {0, 4, 0, 2});
+  TF_ASSERT_OK(RunOpKernel());
+
+  // Check the output.
+  Tensor expected(allocator(), DT_FLOAT, TensorShape({4, 0}));
+  test::ExpectTensorEqual<float>(expected, *GetOutput(0));
+}
+
 TEST_F(GatherOpTest, Simple_TwoD64) {
   MakeOp(DT_INT64);
 
",0,test
2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs.

- Extend MHLO CustomCall to have multiple tensors as results.
- Extend LHLO CustomCall to have multiple memrefs for output operands.
- Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the
  operand_segment_sizes attribute correctly.

PiperOrigin-RevId: 342067762
Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",hlo_legalize_to_lhlo.cc,"@@ -165,6 +165,32 @@ class HloToLhloOpConverter<mhlo::DotOp> : public BaseOpConversion<mhlo::DotOp> {
   }
 };
 
+struct HloToLhloCustomCallOpConverter
+    : public BaseOpConversion<mhlo::CustomCallOp> {
+ public:
+  using BaseOpConversion<mhlo::CustomCallOp>::BaseOpConversion;
+
+  LogicalResult matchAndRewrite(
+      mhlo::CustomCallOp hloOp, ArrayRef<Value> operands,
+      ConversionPatternRewriter& rewriter) const final {
+    Operation* op = hloOp.getOperation();
+    SmallVector<Value, 2> buffer_args(operands.begin(), operands.end());
+    if (failed(ConvertResults(op, buffer_args, rewriter))) return failure();
+
+    auto lhloOp = rewriter.create<lmhlo::CustomCallOp>(
+        op->getLoc(), llvm::None, buffer_args, op->getAttrs());
+    // Setup AttrSizedOperandSegments attribute to indicate number of operands
+    // for args and outputs.
+    const int32_t segments[2] = {static_cast<int32_t>(operands.size()),
+                                 static_cast<int32_t>(op->getNumResults())};
+    lhloOp.setAttr(lhloOp.getOperandSegmentSizeAttr(),
+                   rewriter.getI32VectorAttr(segments));
+
+    rewriter.replaceOp(op, ArrayRef<Value>(buffer_args).slice(operands.size()));
+    return success();
+  }
+};
+
 struct HloToLhloDynamicBroadcastInDimOpConverter
     : public BaseOpConversion<mhlo::DynamicBroadcastInDimOp> {
  public:
@@ -572,6 +598,7 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context,
                                         OwningRewritePatternList* patterns) {
   // clang-format off
   patterns->insert<
+      HloToLhloCustomCallOpConverter,
       HloToLhloDotGeneralOpConverter,
       HloToLhloDynamicBroadcastInDimOpConverter,
       HloToLhloDynamicReshapeConverter,
@@ -588,7 +615,6 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context,
       HloToLhloOpConverter<mhlo::ConvertOp>,
       HloToLhloOpConverter<mhlo::CopyOp>,
       HloToLhloOpConverter<mhlo::CosOp>,
-      HloToLhloOpConverter<mhlo::CustomCallOp>,
       HloToLhloOpConverter<mhlo::DivOp>,
       HloToLhloOpConverter<mhlo::DotOp>,
       HloToLhloOpConverter<mhlo::ExpOp>,
",0,test
2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs.

- Extend MHLO CustomCall to have multiple tensors as results.
- Extend LHLO CustomCall to have multiple memrefs for output operands.
- Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the
  operand_segment_sizes attribute correctly.

PiperOrigin-RevId: 342067762
Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",mlir_hlo_builder.cc,"@@ -149,7 +149,7 @@ StatusOr<XlaOp> MlirHloBuilder::CustomCallInternal(
       loc_, ty, GetValues(operands), builder_.getStringAttr(call_target_name),
       /*has_side_effect=*/builder_.getBoolAttr(has_side_effect),
       builder_.getStringAttr(opaque));
-  return MakeXlaOp(op);
+  return MakeXlaOp(op.getResult(0));
 }
 
 StatusOr<XlaOp> MlirHloBuilder::ReduceInternal(
",0,test
2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs.

- Extend MHLO CustomCall to have multiple tensors as results.
- Extend LHLO CustomCall to have multiple memrefs for output operands.
- Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the
  operand_segment_sizes attribute correctly.

PiperOrigin-RevId: 342067762
Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",mlir_hlo_to_hlo.cc,"@@ -770,11 +770,12 @@ LogicalResult ExportXlaOp(ConvertOp op, OpLoweringContext ctx) {
 LogicalResult ExportXlaOp(CustomCallOp op, OpLoweringContext ctx) {
   // XLA client builder API does not support generating custom call instructions
   // with side effect.
-  if (op.has_side_effect()) return failure();
+  if (op.has_side_effect() || op.getNumResults() != 1) return failure();
+  Value result = op.getResult(0);
   auto& value_map = *ctx.values;
-  value_map[op] = xla::CustomCall(
+  value_map[result] = xla::CustomCall(
       ctx.builder, std::string(op.call_target_name()), GetTuple(op.args(), ctx),
-      xla::TypeToShape(op.getType()), std::string(op.backend_config()));
+      xla::TypeToShape(result.getType()), std::string(op.backend_config()));
   return success();
 }
 
",0,test
2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs.

- Extend MHLO CustomCall to have multiple tensors as results.
- Extend LHLO CustomCall to have multiple memrefs for output operands.
- Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the
  operand_segment_sizes attribute correctly.

PiperOrigin-RevId: 342067762
Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",legalize_tf.cc,"@@ -5155,7 +5155,7 @@ class ConvertXlaShardingOp : public OpRewritePattern<TF::XlaShardingOp> {
         /*has_side_effect=*/rewriter.getBoolAttr(false),
         /*backend_config=*/rewriter.getStringAttr(""""));
     custom_call.setAttr(kShardingAttr, op._XlaShardingAttr());
-    rewriter.replaceOp(op, custom_call.getResult());
+    rewriter.replaceOp(op, custom_call.getResult(0));
 
     return success();
   }
",0,test
2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs.

- Extend MHLO CustomCall to have multiple tensors as results.
- Extend LHLO CustomCall to have multiple memrefs for output operands.
- Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the
  operand_segment_sizes attribute correctly.

PiperOrigin-RevId: 342067762
Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",mhlo_to_lhlo_with_xla.cc,"@@ -188,23 +188,31 @@ class XlaHloToLhloPass
 
 }  // namespace
 
+Status LhloDialectEmitter::CreateOperands(
+    HloInstruction* instr, llvm::SmallVectorImpl<Value>& operands,
+    size_t& num_arguments, size_t& num_results) {
+  for (const HloInstruction* operand : instr->operands()) {
+    TF_RETURN_IF_ERROR(GetOrCreateView(operand, &operands));
+  }
+  num_arguments = operands.size();
+  TF_RETURN_IF_ERROR(GetOrCreateView(instr, &operands));
+  num_results = operands.size() - num_arguments;
+  return Status::OK();
+}
+
 template <typename OpType>
-StatusOr<OpType> LhloDialectEmitter::CreateOpWithoutAttrs(
-    HloInstruction* instr) {
+StatusOr<OpType> LhloDialectEmitter::CreateOpWithoutAttrs(HloInstruction* instr,
+                                                          size_t& num_arguments,
+                                                          size_t& num_results) {
   Location loc = getLocation(instr);
   std::pair<Identifier, Attribute> attrs[] = {
       {Identifier::get(""name"", builder_.getContext()),
        builder_.getStringAttr(instr->name())},
   };
-  ArrayRef<Type> rets{};
-
   llvm::SmallVector<Value, 4> operands;
-  for (const HloInstruction* operand : instr->operands()) {
-    TF_RETURN_IF_ERROR(GetOrCreateView(operand, &operands));
-  }
-  TF_RETURN_IF_ERROR(GetOrCreateView(instr, &operands));
-
-  return builder_.create<OpType>(loc, rets, operands, attrs);
+  TF_RETURN_IF_ERROR(
+      CreateOperands(instr, operands, num_arguments, num_results));
+  return builder_.create<OpType>(loc, llvm::None, operands, attrs);
 }
 
 StatusOr<mlir::Operation*> LhloDialectEmitter::EmitOp(HloInstruction* instr) {
@@ -479,13 +487,19 @@ StatusOr<lmhlo::SelectAndScatterOp> LhloDialectEmitter::EmitSelectAndScatterOp(
 
 StatusOr<lmhlo::CustomCallOp> LhloDialectEmitter::EmitCustomCallOp(
     HloInstruction* instr) {
+  size_t num_arguments, num_results;
   TF_ASSIGN_OR_RETURN(auto custom_call,
-                      CreateOpWithoutAttrs<lmhlo::CustomCallOp>(instr));
+                      CreateOpWithoutAttrs<lmhlo::CustomCallOp>(
+                          instr, num_arguments, num_results));
   auto* custom_call_instr = ::xla::Cast<::xla::HloCustomCallInstruction>(instr);
   custom_call.call_target_nameAttr(
       builder_.getStringAttr(custom_call_instr->custom_call_target()));
   custom_call.backend_configAttr(
       builder_.getStringAttr(custom_call_instr->opaque()));
+  const int32_t segments[2] = {static_cast<int32_t>(num_arguments),
+                               static_cast<int32_t>(num_results)};
+  custom_call.setAttr(lmhlo::CustomCallOp::getOperandSegmentSizeAttr(),
+                      builder_.getI32VectorAttr(segments));
   return custom_call;
 }
 
",0,test
2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs.

- Extend MHLO CustomCall to have multiple tensors as results.
- Extend LHLO CustomCall to have multiple memrefs for output operands.
- Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the
  operand_segment_sizes attribute correctly.

PiperOrigin-RevId: 342067762
Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",mhlo_to_lhlo_with_xla.h,"@@ -58,8 +58,20 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault {
   ::xla::StatusOr<lmhlo::CustomCallOp> EmitCustomCallOp(
       ::xla::HloInstruction* instr);
 
+  ::xla::Status CreateOperands(::xla::HloInstruction* instr,
+                               SmallVectorImpl<Value>& operands,
+                               size_t& num_arguments, size_t& num_results);
+
+  template <typename OpType>
+  ::xla::StatusOr<OpType> CreateOpWithoutAttrs(::xla::HloInstruction* instr) {
+    size_t unused;
+    return CreateOpWithoutAttrs<OpType>(instr, unused, unused);
+  }
+
   template <typename OpType>
-  ::xla::StatusOr<OpType> CreateOpWithoutAttrs(::xla::HloInstruction* instr);
+  ::xla::StatusOr<OpType> CreateOpWithoutAttrs(::xla::HloInstruction* instr,
+                                               size_t& num_arguments,
+                                               size_t& num_results);
 
   template <typename T>
   DenseIntElementsAttr GetI64DenseElementsAttr(const T& container) {
@@ -117,25 +129,25 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault {
 
   // This map provides access to MLIR buffers for each HLO buffer allocation.
   // The MLIR buffers are all `memref<{size}xi8>` and correspond to function
-  // parameters. It is populated at the beginning of the processing with all the
-  // buffer allocations and is unchanged afterward. Every HLOInstruction is
-  // using a ""slice"" of the buffer allocation and providing shape, layout, and
-  // Dtype. An MLIR view is used separately to model slices into the allocations
-  // (see below).
+  // parameters. It is populated at the beginning of the processing with all
+  // the buffer allocations and is unchanged afterward. Every HLOInstruction
+  // is using a ""slice"" of the buffer allocation and providing shape, layout,
+  // and Dtype. An MLIR view is used separately to model slices into the
+  // allocations (see below).
   llvm::DenseMap<const ::xla::BufferAllocation*, Value> allocations_;
 
   // This map provides access to MLIR buffers for each HLO instruction, keyed
   // instruction identity. A slice is contained in a BufferAllocation, and has
   // an offset and a size.
   //
-  // As for why we don't use HloInstruction*, see GetOrCreateView(), but mostly
-  // we want to leverage better of the aliased buffers.
+  // As for why we don't use HloInstruction*, see GetOrCreateView(), but
+  // mostly we want to leverage better of the aliased buffers.
   //
   // If the HloInstruction is a tuple, all leaf nodes are stored flattened.
   // Otherwise, there will be a single buffer.
   //
-  // An MLIR buffer is either an input parameter, or a ViewOp in the case where
-  // the slice is only part of its allocation.
+  // An MLIR buffer is either an input parameter, or a ViewOp in the case
+  // where the slice is only part of its allocation.
   //
   // `slices_` is populated lazily in the `GetOrCreateView()` helper as we
   // process every instruction.
@@ -153,7 +165,8 @@ class LhloDialectEmitter : public ::xla::DfsHloVisitorWithDefault {
   // computation.
   ModuleOp module_;
 
-  // The builder keeps track of the current insertion point in the MLIR module.
+  // The builder keeps track of the current insertion point in the MLIR
+  // module.
   OpBuilder builder_;
   // Convenient ""cached"" access to this widely used MLIR type (i8).
   Type i8_type_;
",0,test
2cb73e33cefd00b51672cf59c588b95c0af79223,tensorflow/tensorflow,"[MLIR:HLO] Extend CustomCall to support multiple outputs.

- Extend MHLO CustomCall to have multiple tensors as results.
- Extend LHLO CustomCall to have multiple memrefs for output operands.
- Fix HLO->LHLO and XLA HLO->LHLO mapping for CustomCall to setup the
  operand_segment_sizes attribute correctly.

PiperOrigin-RevId: 342067762
Change-Id: Ic7cc31683168c9e0802a19d90831333abddbe7bf",ir_emitter_unnested.cc,"@@ -698,7 +698,9 @@ Status IrEmitterUnnested::EmitSliceToDynamicFromMlir(
 
   const Shape& input_shape =
       TypeToShape(slice_to_dynamic.args().front().getType());
-  const Shape& data_shape = TypeToShape(slice_to_dynamic.output().getType());
+  TF_RET_CHECK(slice_to_dynamic.output().size() == 1);
+  const Shape& data_shape =
+      TypeToShape(slice_to_dynamic.output().front().getType());
 
   // TODO(jurahul): data_shape here is the static shape of the output (which has
   // a dynamic shape in XLA). Currently, we are mapping that to a static shaped
",0,test
d26ee9801c8117f7fd6297a05a82eab98023a2c3,tensorflow/tensorflow,bug fix in the ROCm python implementation for gpu_lstm op,recurrent_v2.py,"@@ -1380,6 +1380,8 @@ def gpu_lstm(inputs, init_h, init_c, kernel, recurrent_kernel, bias, mask,
     # o is output gate weights.
     # c is cell gate weights.
     weights = [weights[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)]
+    # full_bias is a tensor of shape (8*n,)
+    full_bias = array_ops.split(full_bias, 8, axis=0)
     full_bias = [full_bias[x] for x in (0, 1, 3, 2, 4, 5, 7, 6)]
 
   params = _canonical_to_params(
",0,train
e6ab20a481029d8839114867bf070ac6ebf8d0f3,tensorflow/tensorflow,"Skips another two test cases in ttf test where we don't have cpu
kernels for fft.
Change: 110621369",fft_ops_test.py,"@@ -74,16 +74,18 @@ class FFT2DOpsTest(tf.test.TestCase):
       self._Compare(gen(shape))
 
   def testEmpty(self):
-    x = np.zeros([40, 0]).astype(np.complex64)
-    self.assertEqual(x.shape, self._tfFFT2D(x).shape)
-    self.assertEqual(x.shape, self._tfIFFT2D(x).shape)
+    if tf.test.IsBuiltWithCuda():
+      x = np.zeros([40, 0]).astype(np.complex64)
+      self.assertEqual(x.shape, self._tfFFT2D(x).shape)
+      self.assertEqual(x.shape, self._tfIFFT2D(x).shape)
 
   def testError(self):
-    x = np.zeros([1, 2, 3]).astype(np.complex64)
-    with self.assertRaisesOpError(""Input is not a matrix""):
-      self._tfFFT2D(x)
-    with self.assertRaisesOpError(""Input is not a matrix""):
-      self._tfIFFT2D(x)
+    if tf.test.IsBuiltWithCuda():
+      x = np.zeros([1, 2, 3]).astype(np.complex64)
+      with self.assertRaisesOpError(""Input is not a matrix""):
+        self._tfFFT2D(x)
+      with self.assertRaisesOpError(""Input is not a matrix""):
+        self._tfIFFT2D(x)
 
 
 if __name__ == ""__main__"":
",0,train
4f8ce7437431e9a1a47535ff05ef5011a694f244,tensorflow/tensorflow,"[TF2XLA] Deprecate xla.experimental.compile

PiperOrigin-RevId: 325151668
Change-Id: I1a0ac5d58e8237cf47785034086c7cdc240ba116",xla.py,"@@ -37,6 +37,7 @@ from tensorflow.python.util import compat
 from tensorflow.python.util import nest
 from tensorflow.python.util import tf_inspect
 from tensorflow.python.util.compat import collections_abc
+from tensorflow.python.util.deprecation import deprecated
 from tensorflow.python.util.tf_export import tf_export
 
 _XLA_COMPILE_ATTR = '_xla_compile_id'
@@ -64,6 +65,10 @@ _UNSUPPORTED_OPS = set([
 
 
 @tf_export('xla.experimental.compile')
+@deprecated(
+    None, 'xla.experimental.compile is deprecated. Consider using '
+    'tf.function(experimental_compile=True)',
+    warn_once=True)
 def compile(computation, inputs=None):  # pylint: disable=redefined-builtin
   """"""Builds an operator that compiles and runs `computation` with XLA.
 
",0,train
d604689ea7a24dfc4f8994825b3ca9e0c63ddc9b,tensorflow/tensorflow,"Add some missing dependencies so that the TPU version of TensorFlow builds

PiperOrigin-RevId: 323477747
Change-Id: I13393c728bda8f6c541955513a7e6315799ec844",tpu_compilation_device.cc,"@@ -18,7 +18,14 @@ limitations under the License.
 #include ""tensorflow/core/tpu/tpu_node_device_util.h""
 
 namespace tensorflow {
+namespace {
 
-REGISTER_XLA_BACKEND(DEVICE_TPU_XLA_JIT, kTpuAllTypes, TpuOpFilter);
+bool RegisterTpuXlaBackend() {
+  REGISTER_XLA_BACKEND(DEVICE_TPU_XLA_JIT, kTpuAllTypes, TpuOpFilter);
+  return true;
+}
 
+static bool tpu_xla_backend_registered = RegisterTpuXlaBackend();
+
+}  // namespace
 }  // namespace tensorflow
",0,train
b6ed9186089de852c933244a7d772f836cc3eb27,tensorflow/tensorflow,"Update tensorboard dependency to 1.13.x

TensorBoard release: https://pypi.org/project/tensorboard/1.13.0/

PiperOrigin-RevId: 235563447",setup.py,"@@ -57,7 +57,7 @@ REQUIRED_PACKAGES = [
     'numpy >= 1.14.5, < 2.0',
     'six >= 1.10.0',
     'protobuf >= 3.6.1',
-    'tensorboard >= 1.12.0, < 1.13.0',
+    'tensorboard >= 1.13.0, < 1.14.0',
     'tensorflow_estimator >= 1.13.0rc0, < 1.14.0rc0',
     'termcolor >= 1.1.0',
 ]
",0,test
8cdd551adad84cb10631c72ad8b931061b350166,tensorflow/tensorflow,Adding ROCm support to optional_ops,optional_ops.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 #define EIGEN_USE_THREADS
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define EIGEN_USE_GPU
 
 #include ""tensorflow/core/kernels/data/optional_ops.h""
@@ -34,4 +34,4 @@ REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(ADD_VARIANT_BINARY_OP, DEVICE_GPU,
 }  // namespace data
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,test
023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,resize_area_op.cc,"@@ -19,7 +19,6 @@ limitations under the License.
 #include <algorithm>
 #include <memory>
 
-#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
@@ -28,6 +27,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/util/image_resizer_state.h""
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 
 namespace tensorflow {
 
@@ -144,17 +144,17 @@ class ResizeAreaOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    const Tensor& input = context->input(0);
     // The op always did the correct thing with regard to pixel centers, so we
     // always pass false here for half_pixel_centers since ImageResizerState
     // enforces that if align_corners_ is true, half_pixel_centers must be
     // false.
     ImageResizerState st(align_corners_, /*unused half_pixel_centers=*/false);
-    st.ValidateAndCreateOutput(context, input);
+    st.ValidateAndCreateOutput(context);
 
     if (!context->status().ok()) return;
 
-    typename TTypes<T, 4>::ConstTensor input_data(input.tensor<T, 4>());
+    typename TTypes<T, 4>::ConstTensor input_data(
+        context->input(0).tensor<T, 4>());
 
     // Precompute values used when iterating over x coordinates within a row.
     // Note that it may be useful to cache x_interps for a given
",0,train
023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,resize_bicubic_op.cc,"@@ -21,7 +21,6 @@ limitations under the License.
 #include <algorithm>
 #include <array>
 
-#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
@@ -30,6 +29,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/util/image_resizer_state.h""
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 
 namespace tensorflow {
 namespace {
@@ -557,13 +557,13 @@ class ResizeBicubicOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    const Tensor& input = context->input(0);
     ImageResizerState st(align_corners_, half_pixel_centers_);
-    st.ValidateAndCreateOutput(context, input);
+    st.ValidateAndCreateOutput(context);
 
     if (!context->status().ok()) return;
 
-    typename TTypes<T, 4>::ConstTensor input_data(input.tensor<T, 4>());
+    typename TTypes<T, 4>::ConstTensor input_data(
+        context->input(0).tensor<T, 4>());
     TTypes<float, 4>::Tensor output_data = st.output->tensor<float, 4>();
 
     interpolate_with_caching<T>(input_data, st, half_pixel_centers_,
@@ -587,16 +587,15 @@ class ResizeBicubicOpGrad : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     // Validate input.
-    // First argument is gradient with respect to resized image.
-    const Tensor& input = context->input(0);
-    const Tensor& original_image = context->input(1);
-
     ImageResizerGradientState st(align_corners_, half_pixel_centers_);
-    st.ValidateAndCreateOutput(context, input, original_image);
+    st.ValidateAndCreateOutput(context);
 
     if (!context->status().ok()) return;
 
-    TTypes<float, 4>::ConstTensor input_grad = input.tensor<float, 4>();
+    // First argument is gradient with respect to resized image.
+    TTypes<float, 4>::ConstTensor input_grad =
+        context->input(0).tensor<float, 4>();
+
     typename TTypes<T, 4>::Tensor output_grad(st.output->tensor<T, 4>());
 
     ResizeBicubicGrad<T>(input_grad, st, half_pixel_centers_, output_grad);
",0,train
023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,resize_bilinear_op.cc,"@@ -28,7 +28,6 @@ limitations under the License.
 
 #include <memory>
 
-#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
@@ -38,6 +37,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/util/image_resizer_state.h""
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 
 namespace tensorflow {
 
@@ -54,16 +54,16 @@ class ResizeBilinearOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    const Tensor& input = context->input(0);
     ImageResizerState st(align_corners_, half_pixel_centers_);
-    st.ValidateAndCreateOutput(context, input);
+    st.ValidateAndCreateOutput(context);
 
     if (!context->status().ok()) return;
 
     // Return if the output is empty.
     if (st.output->NumElements() == 0) return;
 
-    typename TTypes<T, 4>::ConstTensor image_data(input.tensor<T, 4>());
+    typename TTypes<T, 4>::ConstTensor image_data(
+        context->input(0).tensor<T, 4>());
     TTypes<float, 4>::Tensor output_data = st.output->tensor<float, 4>();
 
     functor::ResizeBilinear<Device, T>()(
@@ -370,16 +370,14 @@ class ResizeBilinearOpGrad : public OpKernel {
 
   void Compute(OpKernelContext* context) override {
     // Validate input.
-    // First argument is gradient with respect to resized image.
-    const Tensor& input = context->input(0);
-    const Tensor& original_image = context->input(1);
-
     ImageResizerGradientState st(align_corners_, half_pixel_centers_);
-    st.ValidateAndCreateOutput(context, input, original_image);
+    st.ValidateAndCreateOutput(context);
 
     if (!context->status().ok()) return;
 
-    TTypes<float, 4>::ConstTensor input_grad = input.tensor<float, 4>();
+    // First argument is gradient with respect to resized image.
+    TTypes<float, 4>::ConstTensor input_grad =
+        context->input(0).tensor<float, 4>();
 
     if (!std::is_same<T, Eigen::half>::value &&
         !std::is_same<T, Eigen::bfloat16>::value) {
",0,train
023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,resize_nearest_neighbor_op.cc,"@@ -20,7 +20,6 @@ limitations under the License.
 
 #include <memory>
 
-#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
@@ -29,6 +28,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/util/image_resizer_state.h""
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 
 namespace tensorflow {
 
@@ -46,9 +46,8 @@ class ResizeNearestNeighborOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    const Tensor& input = context->input(0);
     ImageResizerState st(align_corners_, half_pixel_centers_);
-    st.ValidateAndCreateOutput(context, input);
+    st.ValidateAndCreateOutput(context);
 
     if (!context->status().ok()) return;
 
@@ -59,7 +58,8 @@ class ResizeNearestNeighborOp : public OpKernel {
     // Return if the output is empty.
     if (st.output->NumElements() == 0) return;
 
-    typename TTypes<T, 4>::ConstTensor input_data(input.tensor<T, 4>());
+    typename TTypes<T, 4>::ConstTensor input_data(
+        context->input(0).tensor<T, 4>());
     typename TTypes<T, 4>::Tensor output_data(st.output->tensor<T, 4>());
 
     bool status;
",0,train
023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,quantized_resize_bilinear_op.cc,"@@ -700,19 +700,19 @@ class QuantizedResizeBilinearOp : public OpKernel {
   }
 
   void Compute(OpKernelContext* context) override {
-    const Tensor& input = context->input(0);
     const float in_min = context->input(2).flat<float>()(0);
     const float in_max = context->input(3).flat<float>()(0);
 
     ImageResizerState st(align_corners_, false);
-    st.ValidateAndCreateOutput(context, input);
+    st.ValidateAndCreateOutput(context);
 
     if (!context->status().ok()) return;
 
     // Return if the output is empty.
     if (st.output->NumElements() == 0) return;
 
-    typename TTypes<T, 4>::ConstTensor image_data(input.tensor<T, 4>());
+    typename TTypes<T, 4>::ConstTensor image_data(
+        context->input(0).tensor<T, 4>());
     typename TTypes<T, 4>::Tensor output_data(st.output->tensor<T, 4>());
 
     ResizeBilinear<T>(image_data, st.height_scale, st.width_scale, in_min,
",0,train
023ab92b0a2b63392214443c20f3279bb75845d4,tensorflow/tensorflow,refactoring image_resize_state,image_resizer_state.h,"@@ -27,13 +27,13 @@ limitations under the License.
 #include <algorithm>
 #include <array>
 
-#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 #include ""tensorflow/core/framework/bounds_check.h""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_shape.h""
 #include ""tensorflow/core/framework/types.h""
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 
 namespace tensorflow {
 
@@ -76,16 +76,38 @@ struct ImageResizerState {
   // height_scale and width_scale, and calculates the output size.
   // If any of these operations fails, it sets an error status in
   // the context, which the caller must check.
-  void ValidateAndCalculateOutputSize(OpKernelContext* context,
-                                      const Tensor& input) {
+  void ValidateAndCalculateOutputSize(OpKernelContext* context) {
     OP_REQUIRES(
         context,
         !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
         errors::InvalidArgument(""If half_pixel_centers is True, ""
                                 ""align_corners must be False.""));
-    OP_REQUIRES(context, input.dims() == 4,
+
+    const TensorShape& input_shape = context->input(0).shape();
+    OP_REQUIRES(context, input_shape.dims() == 4,
                 errors::InvalidArgument(""input must be 4-dimensional"",
-                                        input.shape().DebugString()));
+                                        input_shape.DebugString()));
+    batch_size = input_shape.dim_size(0);
+    channels = input_shape.dim_size(3);
+    OP_REQUIRES(
+        context, channels > 0,
+        errors::InvalidArgument(""image must have at least one channel""));
+
+    // Verify and assign `in_height` and `in_width`.
+    OP_REQUIRES(
+        context, input_shape.dim_size(1) > 0 && input_shape.dim_size(2) > 0,
+        errors::InvalidArgument(""input image must be of non-zero size""));
+    OP_REQUIRES(
+        context,
+        FastBoundsCheck(input_shape.dim_size(1),
+                        std::numeric_limits<int32>::max()) &&
+            FastBoundsCheck(input_shape.dim_size(2),
+                            std::numeric_limits<int32>::max()),
+        errors::InvalidArgument(""input sizes must be between 0 and max int32""));
+    in_height = static_cast<int32>(input_shape.dim_size(1));
+    in_width = static_cast<int32>(input_shape.dim_size(2));
+
+    // Verify the output tensor's shape.
     const Tensor& shape_t = context->input(1);
     OP_REQUIRES(context, shape_t.dims() == 1,
                 errors::InvalidArgument(""shape_t must be 1-dimensional"",
@@ -93,28 +115,14 @@ struct ImageResizerState {
     OP_REQUIRES(context, shape_t.NumElements() == 2,
                 errors::InvalidArgument(""shape_t must have two elements"",
                                         shape_t.shape().DebugString()));
+
+    // Verify and assign `out_height` and `out_width`.
     auto Svec = shape_t.vec<int32>();
-    batch_size = input.dim_size(0);
     out_height = internal::SubtleMustCopy(Svec(0));
     out_width = internal::SubtleMustCopy(Svec(1));
-    OP_REQUIRES(
-        context,
-        FastBoundsCheck(input.dim_size(1), std::numeric_limits<int32>::max()) &&
-            FastBoundsCheck(input.dim_size(2),
-                            std::numeric_limits<int32>::max()),
-        errors::InvalidArgument(""input sizes must be between 0 and max int32""));
-
-    in_height = static_cast<int32>(input.dim_size(1));
-    in_width = static_cast<int32>(input.dim_size(2));
-    channels = input.dim_size(3);
     OP_REQUIRES(context, out_height > 0 && out_width > 0,
                 errors::InvalidArgument(""output dimensions must be positive""));
-    OP_REQUIRES(
-        context, channels > 0,
-        errors::InvalidArgument(""image must have at least one channel""));
-    OP_REQUIRES(
-        context, input.dim_size(1) > 0 && input.dim_size(2) > 0,
-        errors::InvalidArgument(""input image must be of non-zero size""));
+
     height_scale = CalculateResizeScale(in_height, out_height, align_corners_);
     width_scale = CalculateResizeScale(in_width, out_width, align_corners_);
 
@@ -132,14 +140,14 @@ struct ImageResizerState {
   }
 
   // Calculates all the required variables, and allocates the output.
-  void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input) {
-    ValidateAndCalculateOutputSize(context, input);
+  void ValidateAndCreateOutput(OpKernelContext* context) {
+    ValidateAndCalculateOutputSize(context);
     if (!context->status().ok()) return;
-    OP_REQUIRES_OK(context, context->allocate_output(
-                                0,
-                                TensorShape({input.dim_size(0), out_height,
-                                             out_width, input.dim_size(3)}),
-                                &output));
+    OP_REQUIRES_OK(
+        context,
+        context->allocate_output(
+            0, TensorShape({batch_size, out_height, out_width, channels}),
+            &output));
   }
 
   int64 batch_size;
@@ -163,34 +171,29 @@ struct ImageResizerGradientState {
       : align_corners_(align_corners),
         half_pixel_centers_(half_pixel_centers) {}
 
-  void ValidateAndCreateOutput(OpKernelContext* context, const Tensor& input,
-                               const Tensor& original_image) {
+  void ValidateAndCreateOutput(OpKernelContext* context) {
     OP_REQUIRES(
         context,
         !half_pixel_centers_ || (half_pixel_centers_ && !align_corners_),
         errors::InvalidArgument(""If half_pixel_centers is True, ""
                                 ""align_corners must be False.""));
 
+    const Tensor& input = context->input(0);
     OP_REQUIRES(context, input.dims() == 4,
                 errors::InvalidArgument(""input_grad must be 4-dimensional"",
                                         input.shape().DebugString()));
+
     // Resizers always produce float images, so input gradient must
     // always be a float.
     OP_REQUIRES(context, input.dtype() == DT_FLOAT,
                 errors::InvalidArgument(""input_grad must be of type float"",
                                         DataTypeString(input.dtype())));
 
-    OP_REQUIRES(context, original_image.dims() == 4,
-                errors::InvalidArgument(""original_image must be 4-dimensional"",
-                                        original_image.shape().DebugString()));
-
-    // Allocate output and initialize to zeros.
     batch_size = input.dim_size(0);
     channels = input.dim_size(3);
+
     resized_height = input.dim_size(1);
     resized_width = input.dim_size(2);
-    original_height = original_image.dim_size(1);
-    original_width = original_image.dim_size(2);
 
     // The following check is also carried out for the forward op. It is added
     // here to prevent a divide-by-zero exception when either height_scale or
@@ -198,6 +201,13 @@ struct ImageResizerGradientState {
     OP_REQUIRES(context, resized_height > 0 && resized_width > 0,
                 errors::InvalidArgument(""resized dimensions must be positive""));
 
+    const TensorShape& output_shape = context->input(1).shape();
+    OP_REQUIRES(context, output_shape.dims() == 4,
+                errors::InvalidArgument(""original_image must be 4-dimensional"",
+                                        output_shape.DebugString()));
+    original_height = output_shape.dim_size(1);
+    original_width = output_shape.dim_size(2);
+
     // The following check is also carried out for the forward op. It is added
     // here to prevent either height_scale or width_scale from being set to
     // zero, which would cause a divide-by-zero exception in the deterministic
@@ -217,7 +227,7 @@ struct ImageResizerGradientState {
         CalculateResizeScale(original_height, resized_height, align_corners_);
     width_scale =
         CalculateResizeScale(original_width, resized_width, align_corners_);
-    output = nullptr;
+
     OP_REQUIRES_OK(context, context->allocate_output(
                                 0,
                                 TensorShape({batch_size, original_height,
@@ -233,7 +243,7 @@ struct ImageResizerGradientState {
   int64 original_width;
   float height_scale;
   float width_scale;
-  Tensor* output;
+  Tensor* output = nullptr;
 
  private:
   bool align_corners_;
",0,train
ddad9749b93f896ba6cafa3e95f52f6562657d0b,tensorflow/tensorflow,TRT Test ConvertTopK in dynamic shape mode,convert_nodes_test.cc,"@@ -1642,13 +1642,18 @@ class OpConverterTest : public ::testing::Test {
   }
 
   // Helper method to run both validation and conversion, and check the output
-  // shape.
+  // shapes.
   void RunValidationAndConversion(
       const NodeDef& node_def, const Status& status, const char* output_name,
       const std::vector<std::vector<int>>& exp_out_dims) {
     RunValidationAndConversion(node_def, status.code(),
                                status.error_message().c_str(), true);
     if (status.ok()) {
+      // TODO(tfeher): Enable this check in explicit_batch_mode.
+      // In dynamic shape mode the output dims cannot be tested here. In that
+      // case we need to wait for the concrate input shapes to be defined (by
+      // setBindingDimensions before enqueue) before we can check the output
+      // dims.
       if (converter_->use_implicit_batch()) {
         for (int i = 0; i < exp_out_dims.size(); i++) {
           TRT_TensorOrWeights output;
@@ -1656,14 +1661,7 @@ class OpConverterTest : public ::testing::Test {
           TF_EXPECT_OK(GetTensorOrWeights(name.c_str(), &output));
           ASSERT_TRUE(output.is_tensor());
           if (!exp_out_dims[i].empty()) {
-            // We only check output shape implicit batch mode. In dynamic shape
-            // mode we need to wait for the concrate input shapes to be defined
-            // (by setBindingDimensions before enqueue) before we can check
-            // whether the output dims are equal.
-            //
-            // TODO(tfeher): Enable this check in explicit_batch_mode.
-
-            // Removing batch dim
+            // Removing batch dim.
             auto out_dims = std::vector<int>(exp_out_dims[i].begin() + 1,
                                              exp_out_dims[i].end());
             VLOG(2) << ""Testing output shape for tensor "" << name;
@@ -5111,51 +5109,33 @@ TEST_P(OpConverter_FP32_Test, ConvertPool) {
   }
 }
 
-TEST_F(OpConverterTest, ConvertTopK) {
-  // TODO(tmorris): This test isn't setting the input dtype properly. TopK with
-  // int32 is unsupported by TRT.
-  for (const auto dtype : {DT_FLOAT}) {
-    // Get the NodeDef for TopKV2.
-    Scope s = Scope::NewRootScope();
-    auto input = ops::Placeholder(s.WithOpName(""input""), dtype);
-    auto weights = ops::Placeholder(s.WithOpName(""weights""), DT_INT32);
-    auto topk = ops::TopK(s.WithOpName(""my_topk""), input, weights);
-    const NodeDef& node_def = topk.operation.node()->def();
-    {
-      // K is a tensor, should fail.
-      Reset();
-      nvinfer1::DataType trt_type;
-      TF_ASSERT_OK(TfTypeToTrtType(dtype, &trt_type));
-      AddTestTensor(""input"", {1, 2, 3}, /*batch_size=*/1, trt_type);
-      AddTestTensor(""weights"", {2});
-      RunValidationAndConversion(
-          node_def, error::UNIMPLEMENTED,
-          ""The input \""k\"" for TopKV2 must be a constant, at my_topk"");
-    }
-    {
-      // Ok.
-      Reset();
-      AddTestTensor(""input"", {1, 2, 5});
-      AddTestWeights<int32>(""weights"", {1}, {2});
-      RunValidationAndConversion(node_def);
-      TRT_TensorOrWeights outputs[2];
-      TF_EXPECT_OK(GetTensorOrWeights(""my_topk"", &outputs[0]));
-      TF_EXPECT_OK(GetTensorOrWeights(""my_topk:1"", &outputs[1]));
-      for (auto& output : outputs) {
-        ASSERT_TRUE(output.is_tensor());
-        ExpectTrtDimsEqualsArray({1, 2, 2}, output.tensor()->getDimensions());
-      }
-
-      const DataVec input_data{
-          {""input"", AsTensor<float>({-9, 3, 5, 1, 6, -5, 7, 1, 0, -1})}};
-      DataVec output_data{{""my_topk"", ConstructTensor<float>(4)},
-                          {""my_topk:1"", ConstructTensor<int32>(4)}};
-      TF_EXPECT_OK(BuildAndRun(input_data, &output_data));
-      EXPECT_THAT(GetSpanForData<float>(output_data[0]),
-                  ElementsAre(6, 5, 7, 1));
-      EXPECT_THAT(GetSpanForData<int32>(output_data[1]),
-                  ElementsAre(4, 2, 1, 2));
-    }
+TEST_P(OpConverter_FP32_FP16_Test, ConvertTopK) {
+  // Get the NodeDef for TopKV2.
+  Scope s = Scope::NewRootScope();
+  auto input = ops::Placeholder(s.WithOpName(""input""), tf_type_);
+  auto weights = ops::Placeholder(s.WithOpName(""weights""), DT_INT32);
+  auto topk = ops::TopK(s.WithOpName(""my_topk""), input, weights);
+  const NodeDef& node_def = topk.operation.node()->def();
+  {
+    // K is a tensor, should fail.
+    Reset();
+    AddTestTensor(""input"", {1, 1, 2, 3});
+    AddTestTensor(""weights"", {1}, DT_INT32, {});
+    RunValidationAndConversion(
+        node_def, error::UNIMPLEMENTED,
+        ""The input \""k\"" for TopKV2 must be a constant, at my_topk"");
+  }
+  {
+    // Ok.
+    Reset();
+    AddTestTensor(""input"", {1, 1, 2, 5}, {-9, 3, 5, 1, 6, -5, 7, 1, 0, -1});
+    AddTestWeights<int32>(""weights"", {1}, {2});
+    std::vector<std::vector<int>> expected_output_dims{{1, 1, 2, 2},
+                                                       {1, 1, 2, 2}};
+    TestOpConverterMultiOut(""my_topk"", node_def, expected_output_dims,
+                            Status::OK(), Status::OK(),
+                            {ElementsAre(6, 5, 7, 1), ElementsAre(4, 2, 1, 2)},
+                            {tf_type_, DT_INT32});
   }
 }
 
",0,test
3cb03d093610e51cf2d36bfbf43c446a5de52941,tensorflow/tensorflow,"Update GraphDef version to 95.

PiperOrigin-RevId: 257935668",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 94  // Updated: 2019/7/12
+#define TF_GRAPH_DEF_VERSION 95  // Updated: 2019/7/13
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
459afb493f51095a3e1bfe63c01c982555bf4382,tensorflow/tensorflow,"VLOG(1) buffer allocation stats from gpu_compiler

PiperOrigin-RevId: 315411231
Change-Id: Id96519dd8ec69ddf1afadbe81c5a12c47118c778",gpu_compiler.cc,"@@ -499,6 +499,8 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
           /*allocate_buffers_for_constants=*/true,
           /*colorer=*/BufferAssigner::DefaultColorer(),
           /*must_not_live_out=*/{}, GetCanShareBuffer()));
+  VLOG(1) << ""Buffer Assignment Stats ""
+          << buffer_assignment->GetStats().ToString();
   DumpHloModuleIfEnabled(*module, *buffer_assignment, ""after_optimizations"");
 
   IrEmitterContext ir_emitter_context(
",0,train
f38dd432f4300de2a34374caab2616d1f82e5ce6,tensorflow/tensorflow,use a tuple for batch/time_id,cudnn_rnn_ops.py,"@@ -1119,8 +1119,7 @@ def _cudnn_rnn(inputs,
     args[""num_proj""] = 0 if num_proj is None else num_proj
     outputs, output_h, output_c, _, _ = gen_cudnn_rnn_ops.cudnn_rnnv3(**args)
   elif time_major is False or num_proj:
-    batch_id = 1 if time_major else 0
-    time_id = 0 if time_major else 1
+    batch_id, time_id = (1, 0) if time_major else (0, 1)
     batch_size = array_ops.shape(inputs)[batch_id]
     max_time = array_ops.shape(inputs)[time_id]
     sequence_lengths = array_ops.fill([batch_size], max_time)
",0,train
7bf9fa38b6744049b617e722b3f03f2deed1d51f,tensorflow/tensorflow,"Fix interfaces incompatibilities between Classifier and Estimator.
Change: 131316326",classifier.py,"@@ -42,7 +42,8 @@ class Classifier(estimator.Estimator):
   CLASS_OUTPUT = 'classes'
   PROBABILITY_OUTPUT = 'probabilities'
 
-  def __init__(self, model_fn, n_classes, model_dir=None, config=None):
+  def __init__(self, model_fn, n_classes, model_dir=None, config=None,
+               params=None):
     """"""Constructor for Classifier.
 
     Args:
@@ -52,11 +53,17 @@ class Classifier(estimator.Estimator):
         also be used to load checkpoints from the directory into a estimator to
         continue training a previously saved model.
       config: Configuration object (optional)
+      params: `dict` of hyper parameters that will be passed into `model_fn`.
     """"""
     self._n_classes = n_classes
     self._logits_fn = model_fn
-    super(Classifier, self).__init__(model_fn=self._classifier_model,
-                                     model_dir=model_dir, config=config)
+    if params:
+      model_fn = self._classifier_model_with_params
+    else:
+      model_fn = self._classifier_model
+    super(Classifier, self).__init__(model_fn=model_fn,
+                                     model_dir=model_dir, config=config,
+                                     params=params)
 
   def evaluate(self,
                x=None,
@@ -161,7 +168,15 @@ class Classifier(estimator.Estimator):
       return predictions[self.PROBABILITY_OUTPUT]
 
   def _classifier_model(self, features, targets, mode):
-    logits, loss, train_op = self._logits_fn(features, targets, mode)
+    return self._convert_to_estimator_model_result(
+        self._logits_fn(features, targets, mode))
+
+  def _classifier_model_with_params(self, features, targets, mode, params):
+    return self._convert_to_estimator_model_result(
+        self._logits_fn(features, targets, mode, params))
+
+  def _convert_to_estimator_model_result(self, logits_fn_result):
+    logits, loss, train_op = logits_fn_result
     return {
         'classes': math_ops.argmax(logits, len(logits.get_shape()) - 1),
         'probabilities': nn.softmax(logits)
",0,train
7bf9fa38b6744049b617e722b3f03f2deed1d51f,tensorflow/tensorflow,"Fix interfaces incompatibilities between Classifier and Estimator.
Change: 131316326",classifier_test.py,"@@ -46,19 +46,27 @@ def logistic_model_fn(features, target, unused_mode):
   return prediction, loss, train_op
 
 
+def logistic_model_params_fn(features, target, unused_mode, params):
+  target = tf.one_hot(target, 3, 1, 0)
+  prediction, loss = tf.contrib.learn.models.logistic_regression_zero_init(
+      features, target)
+  train_op = tf.contrib.layers.optimize_loss(
+      loss, tf.contrib.framework.get_global_step(), optimizer='Adagrad',
+      learning_rate=params['learning_rate'])
+  return prediction, loss, train_op
+
+
 class ClassifierTest(tf.test.TestCase):
 
   def testIrisAll(self):
-    iris = tf.contrib.learn.datasets.load_iris()
     est = tf.contrib.learn.Classifier(model_fn=logistic_model_fn, n_classes=3)
-    est.fit(iris.data, iris.target, steps=100)
-    scores = est.evaluate(x=iris.data, y=iris.target, name='eval')
-    predictions = est.predict(x=iris.data)
-    predictions_proba = est.predict_proba(x=iris.data)
-    self.assertEqual(predictions.shape[0], iris.target.shape[0])
-    self.assertAllEqual(predictions, np.argmax(predictions_proba, axis=1))
-    other_score = _sklearn.accuracy_score(iris.target, predictions)
-    self.assertAllClose(other_score, scores['accuracy'])
+    self._runIrisAll(est)
+
+  def testIrisAllWithParams(self):
+    est = tf.contrib.learn.Classifier(model_fn=logistic_model_params_fn,
+                                      n_classes=3,
+                                      params={'learning_rate': 0.01})
+    self._runIrisAll(est)
 
   def testIrisPredictAsIterable(self):
     iris = tf.contrib.learn.datasets.load_iris()
@@ -89,6 +97,17 @@ class ClassifierTest(tf.test.TestCase):
     predictions = list(est.predict(input_fn=predict_input_fn, as_iterable=True))
     self.assertEqual(len(predictions), iris.target.shape[0])
 
+  def _runIrisAll(self, est):
+    iris = tf.contrib.learn.datasets.load_iris()
+    est.fit(iris.data, iris.target, steps=100)
+    scores = est.evaluate(x=iris.data, y=iris.target, name='eval')
+    predictions = est.predict(x=iris.data)
+    predictions_proba = est.predict_proba(x=iris.data)
+    self.assertEqual(predictions.shape[0], iris.target.shape[0])
+    self.assertAllEqual(predictions, np.argmax(predictions_proba, axis=1))
+    other_score = _sklearn.accuracy_score(iris.target, predictions)
+    self.assertAllClose(other_score, scores['accuracy'])
+
 
 if __name__ == '__main__':
   tf.test.main()
",0,train
cd3a6effe4f7bbaa3857bfe6432a361a7676507f,tensorflow/tensorflow,"Fix documentation for the real shape of the output of crf_log_likelihood.

PiperOrigin-RevId: 185552171",crf.py,"@@ -166,8 +166,8 @@ def crf_log_likelihood(inputs,
     sequence_lengths: A [batch_size] vector of true sequence lengths.
     transition_params: A [num_tags, num_tags] transition matrix, if available.
   Returns:
-    log_likelihood: A scalar containing the log-likelihood of the given sequence
-        of tag indices.
+    log_likelihood: A [batch_size] `Tensor` containing the log-likelihood of
+      each example, given the sequence of tag indices.
     transition_params: A [num_tags, num_tags] transition matrix. This is either
         provided by the caller or created in this function.
   """"""
@@ -182,7 +182,7 @@ def crf_log_likelihood(inputs,
                                        transition_params)
   log_norm = crf_log_norm(inputs, sequence_lengths, transition_params)
 
-  # Normalize the scores to get the log-likelihood.
+  # Normalize the scores to get the log-likelihood per example.
   log_likelihood = sequence_scores - log_norm
   return log_likelihood, transition_params
 
",0,test
76ca9f1060fa7c789bcddcdb756a3b598cb634bd,tensorflow/tensorflow,"Add code-fences to doctest blocks.

Most >>> blocks already have ``` fences. Doctest runs them with or without the fences.
This change adds the ``` anywhere they're missing when api docs are generated from the docstrings. This will ensure that they look right when viewed as markdown.

+ fix docstring for `constant_initializer`: you can't have blank lines inside a doctest block. This prevents the rendering from getting corrupted.

PiperOrigin-RevId: 267009925",init_ops.py,"@@ -173,50 +173,42 @@ class Constant(Initializer):
     of the `value` list, even reshaped, as shown in the two commented lines
     below the `value` list initialization.
 
-  ```python
-    >>> import numpy as np
-    >>> import tensorflow as tf
-
-    >>> value = [0, 1, 2, 3, 4, 5, 6, 7]
-    >>> # value = np.array(value)
-    >>> # value = value.reshape([2, 4])
-    >>> init = tf.compat.v1.constant_initializer(value)
-
-    >>> print('fitting shape:')
-    >>> with tf.compat.v1.Session():
-    >>>   x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init)
-    >>>   x.initializer.run()
-    >>>   print(x.eval())
-
-    fitting shape:
-    [[ 0.  1.  2.  3.]
-     [ 4.  5.  6.  7.]]
-
-    >>> print('larger shape:')
-    >>> with tf.compat.v1.Session():
-    >>>   x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init)
-    >>>   x.initializer.run()
-    >>>   print(x.eval())
-
-    larger shape:
-    [[ 0.  1.  2.  3.]
-     [ 4.  5.  6.  7.]
-     [ 7.  7.  7.  7.]]
-
-    >>> print('smaller shape:')
-    >>> with tf.compat.v1.Session():
-    >>>   x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init)
-
-    ValueError: Too many elements provided. Needed at most 6, but received 8
-
-    >>> print('shape verification:')
-    >>> init_verify = tf.compat.v1.constant_initializer(value,
-    verify_shape=True)
-    >>> with tf.compat.v1.Session():
-    >>>   x = tf.compat.v1.get_variable('x', shape=[3, 4],
-    initializer=init_verify)
-
-    TypeError: Expected Tensor's shape: (3, 4), got (8,).
+  ```
+  >>> value = [0, 1, 2, 3, 4, 5, 6, 7]
+  >>> # value = np.array(value)
+  >>> # value = value.reshape([2, 4])
+  >>> init = tf.compat.v1.constant_initializer(value)
+  >>>
+  >>> # fitting shape
+  >>> with tf.compat.v1.Session():
+  ...   x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init)
+  ...   x.initializer.run()
+  ...   print(x.eval())
+  [[0. 1. 2. 3.]
+   [4. 5. 6. 7.]]
+  >>>
+  >>> # Larger shape
+  >>> with tf.compat.v1.Session():
+  ...   x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init)
+  ...   x.initializer.run()
+  ...   print(x.eval())
+  [[ 0.  1.  2.  3.]
+   [ 4.  5.  6.  7.]
+   [ 7.  7.  7.  7.]]
+  >>>
+  >>> # Smaller shape
+  >>> with tf.compat.v1.Session():
+  ...   x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init)
+  ValueError: Too many elements provided. Needed at most 6, but received 8
+  >>>
+  >>> # Shape verification
+  >>> init_verify = tf.compat.v1.constant_initializer(value,
+  verify_shape=True)
+  >>> with tf.compat.v1.Session():
+  ...  x = tf.compat.v1.get_variable('x', shape=[3, 4],
+  ...                                 initializer=init_verify)
+  TypeError: Expected Tensor's shape: (3, 4), got (8,).
+  >>>
   ```
   """"""
 
",0,test
76ca9f1060fa7c789bcddcdb756a3b598cb634bd,tensorflow/tensorflow,"Add code-fences to doctest blocks.

Most >>> blocks already have ``` fences. Doctest runs them with or without the fences.
This change adds the ``` anywhere they're missing when api docs are generated from the docstrings. This will ensure that they look right when viewed as markdown.

+ fix docstring for `constant_initializer`: you can't have blank lines inside a doctest block. This prevents the rendering from getting corrupted.

PiperOrigin-RevId: 267009925",init_ops_v2.py,"@@ -150,40 +150,31 @@ class Constant(Initializer):
     below the `value` list initialization.
 
   ```python
-    >>> import numpy as np
-    >>> import tensorflow as tf
-
-    >>> value = [0, 1, 2, 3, 4, 5, 6, 7]
-    >>> # value = np.array(value)
-    >>> # value = value.reshape([2, 4])
-    >>> init = tf.compat.v1.constant_initializer(value)
-
-    >>> print('fitting shape:')
-    >>> with tf.compat.v1.Session():
-    >>>   x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init)
-    >>>   x.initializer.run()
-    >>>   print(x.eval())
-
-    fitting shape:
-    [[ 0.  1.  2.  3.]
-     [ 4.  5.  6.  7.]]
-
-    >>> print('larger shape:')
-    >>> with tf.compat.v1.Session():
-    >>>   x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init)
-    >>>   x.initializer.run()
-    >>>   print(x.eval())
-
-    larger shape:
-    [[ 0.  1.  2.  3.]
-     [ 4.  5.  6.  7.]
-     [ 7.  7.  7.  7.]]
-
-    >>> print('smaller shape:')
-    >>> with tf.compat.v1.Session():
-    >>>   x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init)
-
-    ValueError: Too many elements provided. Needed at most 6, but received 8
+  >>> value = [0, 1, 2, 3, 4, 5, 6, 7]
+  >>> # value = np.array(value)
+  >>> # value = value.reshape([2, 4])
+  >>> init = tf.compat.v1.constant_initializer(value)
+  >>>
+  >>> # Fitting shape
+  >>> with tf.compat.v1.Session():
+  ...   x = tf.compat.v1.get_variable('x', shape=[2, 4], initializer=init)
+  ...   x.initializer.run()
+  ...   print(x.eval())
+  [[0. 1. 2. 3.]
+   [4. 5. 6. 7.]]
+  >>> # Larger shape
+  >>> with tf.compat.v1.Session():
+  ...   x = tf.compat.v1.get_variable('x', shape=[3, 4], initializer=init)
+  ...   x.initializer.run()
+  ...   print(x.eval())
+  [[ 0.  1.  2.  3.]
+   [ 4.  5.  6.  7.]
+   [ 7.  7.  7.  7.]]
+  >>> # Smaller shape
+  >>> with tf.compat.v1.Session():
+  ...   x = tf.compat.v1.get_variable('x', shape=[2, 3], initializer=init)
+  ValueError: Too many elements provided. Needed at most 6, but received 8
+
   ```
   """"""
 
",0,test
76d204f38757c2b4a3a82020b90b7e739b2c90b6,tensorflow/tensorflow,"Keep side effectful ops in grappler function items

PiperOrigin-RevId: 221653198",op_types.cc,"@@ -571,6 +571,10 @@ bool IsFreeOfSideEffect(const NodeDef& node) {
   if (node.op().find(""Queue"") != string::npos) {
     return false;
   }
+  // Sending a tensor via a network is a side effect.
+  if (IsSend(node)) {
+    return false;
+  }
   return !ModifiesInputsInPlace(node);
 }
 
",0,train
76d204f38757c2b4a3a82020b90b7e739b2c90b6,tensorflow/tensorflow,"Keep side effectful ops in grappler function items

PiperOrigin-RevId: 221653198",functions.cc,"@@ -347,12 +347,6 @@ GrapplerFunctionItem::GrapplerFunctionItem(
       fetch.push_back(output_tensor);
     }
   }
-  // Stateful and Send (it's not stateful) nodes must be preserved in the graph.
-  for (const NodeDef& node : graph.node()) {
-    if (IsSend(node)) {
-      keep_ops.push_back(node.name());
-    }
-  }
 }
 
 const string& GrapplerFunctionItem::description() const { return description_; }
@@ -584,8 +578,8 @@ Status MakeGrapplerFunctionItem(const FunctionDef& func,
     TF_RETURN_IF_ERROR(RegisterFunctionBodyOutputs(*registration, func_def_node,
                                                    &connectivity));
 
-    // Stateful and Send nodes must be preserved in a function body
-    if (registration->op_def.is_stateful() || IsSend(func_def_node)) {
+    // Ops with side effects must be preserved in a function body.
+    if (!IsFreeOfSideEffect(func_def_node)) {
       keep_nodes.push_back(func_def_node.name());
     }
   }
",0,train
76d204f38757c2b4a3a82020b90b7e739b2c90b6,tensorflow/tensorflow,"Keep side effectful ops in grappler function items

PiperOrigin-RevId: 221653198",functions.h,"@@ -142,12 +142,6 @@ class GrapplerFunctionItemInstantiation {
 class GrapplerFunctionItem : public GrapplerItem {
  public:
   GrapplerFunctionItem() = default;
-  GrapplerFunctionItem(string func_name, string description,
-                       AttrSlice func_attr,
-                       std::vector<InputArgExpansion> input_arg_expansions,
-                       std::vector<OutputArgExpansion> output_arg_expansions,
-                       std::vector<string> keep_nodes, int graph_def_version,
-                       bool is_stateful, GraphDef&& function_body);
 
   const string& description() const;
 
@@ -170,12 +164,22 @@ class GrapplerFunctionItem : public GrapplerItem {
   GrapplerFunctionItem& SwapFunctionBody(GraphDef&& other);
 
  private:
+  friend Status MakeGrapplerFunctionItem(const FunctionDef&, const AttrSlice&,
+                                         const FunctionLibraryDefinition&, int,
+                                         GrapplerFunctionItem*);
   friend Status ReplaceInputWithConst(const NodeDef&, int,
                                       GrapplerFunctionItem*);
   friend Status RemoveUnusedOutputs(
       const gtl::FlatSet<int>& active_outputs, GrapplerFunctionItem* item,
       std::vector<std::pair<int, int>>* output_mapping);
 
+  GrapplerFunctionItem(string func_name, string description,
+                       AttrSlice func_attr,
+                       std::vector<InputArgExpansion> input_arg_expansions,
+                       std::vector<OutputArgExpansion> output_arg_expansions,
+                       std::vector<string> keep_nodes, int graph_def_version,
+                       bool is_stateful, GraphDef&& function_body);
+
   string description_;
   AttrSlice func_attr_;  // Attributes specific to function definition that
                          // produced this item (FuncDef.attr field).
",0,train
76d204f38757c2b4a3a82020b90b7e739b2c90b6,tensorflow/tensorflow,"Keep side effectful ops in grappler function items

PiperOrigin-RevId: 221653198",functions_test.cc,"@@ -576,6 +576,33 @@ TEST_F(FunctionsTest, FromFunctionDefWithoutInput) {
   EXPECT_EQ(""two"", cast.input(0));
 }
 
+TEST_F(FunctionsTest, FromFunctionDefWithSideEffectfulOps) {
+  const Tensor kOne = test::AsScalar<float>(1.0);
+  FunctionDef func = FunctionDefHelper::Define(
+      /* Name */ ""SideEffects"",
+      /* Args */ {""x: Ref(float)""},
+      /* Return values */ {},
+      /* Attr def */ {},
+      /* Nodes */
+      {{{""one""}, ""Const"", {}, {{""value"", kOne}, {""dtype"", DT_FLOAT}}},
+       {{""update""}, ""AssignAdd"", {""x"", ""one""}, {{""T"", DT_FLOAT}}}});
+
+  protobuf::Map<string, AttrValue> func_instantiation_attr;
+  FunctionLibraryDefinition flib(OpRegistry::Global(), FunctionDefLibrary());
+
+  GrapplerFunctionItem item;
+  TF_EXPECT_OK(MakeGrapplerFunctionItem(func,
+                                        AttrSlice(&func_instantiation_attr),
+                                        flib, TF_GRAPH_DEF_VERSION, &item));
+
+  EXPECT_EQ(""SideEffects"", item.id);
+  EXPECT_EQ(3, item.function_body().node_size());
+  EXPECT_EQ(1, item.input_size());
+  EXPECT_EQ(0, item.output_size());
+  ASSERT_EQ(1, item.keep_ops.size());
+  EXPECT_EQ(""update"", item.keep_ops[0]);
+}
+
 TEST_F(FunctionsTest, MakeFunctionDef) {
   const Tensor kTwo = test::AsScalar<int64>(2);
   FunctionDef func = FunctionDefHelper::Define(
",0,train
0d39eea6e7e2b02a8812c46532af6bfcb5604865,tensorflow/tensorflow,"[NFC] Eliminate references to HLO Inst from CollectivePermute Thunk.

- Introduce a CollectivePermuteConfig object to hold relevant properties needed for
  execution of the Thunk and use that in the thunk object.

PiperOrigin-RevId: 335650568
Change-Id: I6c326100e2540fd7f80a9f2b5357ef4a781a6683",collective_permute_thunk.cc,"@@ -24,6 +24,7 @@ limitations under the License.
 #include ""absl/memory/memory.h""
 #include ""tensorflow/compiler/xla/refcounting_hash_map.h""
 #include ""tensorflow/compiler/xla/service/hlo_casting_utils.h""
+#include ""tensorflow/compiler/xla/service/hlo_instruction.h""
 #include ""tensorflow/compiler/xla/service/hlo_instructions.h""
 #include ""tensorflow/compiler/xla/statusor.h""
 #include ""tensorflow/core/lib/core/blocking_counter.h""
@@ -217,16 +218,23 @@ RefcountingHashMap<RendezvousKey, Rendezvous>& GlobalRendezvousMap() {
 
 }  // anonymous namespace
 
+CollectivePermuteConfig GetCollectivePermuteConfig(
+    const HloInstruction* instr) {
+  CollectivePermuteConfig config;
+  auto* collective_permute = Cast<HloCollectivePermuteInstruction>(instr);
+  config.source_target_pairs = collective_permute->source_target_pairs();
+  return config;
+}
+
 CollectivePermuteThunk::CollectivePermuteThunk(
-    ThunkInfo thunk_info, const BufferAllocation::Slice& src,
-    const BufferAllocation::Slice& dest)
+    ThunkInfo thunk_info, CollectivePermuteConfig&& config,
+    const BufferAllocation::Slice& src, const BufferAllocation::Slice& dest)
     : Thunk(kCollectivePermute, thunk_info),
-      hlo_instruction_(thunk_info.hlo_instruction),
+      config_(std::move(config)),
       src_(src),
       dest_(dest) {}
 
 Status CollectivePermuteThunk::ExecuteOnStream(const ExecuteParams& params) {
-  auto* instr = Cast<HloCollectivePermuteInstruction>(hlo_instruction_);
   auto op_profiler =
       params.profiler->MakeScopedInstructionProfiler(profile_index());
 
@@ -245,7 +253,7 @@ Status CollectivePermuteThunk::ExecuteOnStream(const ExecuteParams& params) {
 
   // Figure out which replicas our data is copied to.
   std::vector<int64> dest_replicas;
-  for (const auto& src_dest : instr->source_target_pairs()) {
+  for (const auto& src_dest : config_.source_target_pairs) {
     if (src_dest.first == replica_id) {
       dest_replicas.push_back(src_dest.second);
     }
@@ -260,7 +268,7 @@ Status CollectivePermuteThunk::ExecuteOnStream(const ExecuteParams& params) {
 
   // If no replica writes into us (i.e. we aren't the target of any copies), our
   // contract is that we zero our output.
-  if (absl::c_none_of(instr->source_target_pairs(),
+  if (absl::c_none_of(config_.source_target_pairs,
                       [&](std::pair<int64, int64> src_dest) {
                         return src_dest.second == replica_id;
                       })) {
",0,train
0d39eea6e7e2b02a8812c46532af6bfcb5604865,tensorflow/tensorflow,"[NFC] Eliminate references to HLO Inst from CollectivePermute Thunk.

- Introduce a CollectivePermuteConfig object to hold relevant properties needed for
  execution of the Thunk and use that in the thunk object.

PiperOrigin-RevId: 335650568
Change-Id: I6c326100e2540fd7f80a9f2b5357ef4a781a6683",collective_permute_thunk.h,"@@ -19,21 +19,28 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/service/buffer_assignment.h""
 #include ""tensorflow/compiler/xla/service/gpu/hlo_execution_profiler.h""
 #include ""tensorflow/compiler/xla/service/gpu/thunk.h""
+#include ""tensorflow/compiler/xla/service/hlo_instruction.h""
 
 namespace xla {
 namespace gpu {
 
+struct CollectivePermuteConfig {
+  std::vector<std::pair<int64, int64>> source_target_pairs;
+};
+
+CollectivePermuteConfig GetCollectivePermuteConfig(const HloInstruction* instr);
+
 // Thunk that implements the collective-permute HLO.
 class CollectivePermuteThunk : public Thunk {
  public:
-  CollectivePermuteThunk(ThunkInfo thunk_info,
+  CollectivePermuteThunk(ThunkInfo thunk_info, CollectivePermuteConfig&& config,
                          const BufferAllocation::Slice& src,
                          const BufferAllocation::Slice& dest);
 
   Status ExecuteOnStream(const ExecuteParams& params) override;
 
  private:
-  const HloInstruction* hlo_instruction_;
+  CollectivePermuteConfig config_;
   BufferAllocation::Slice src_;
   BufferAllocation::Slice dest_;
 };
",0,train
0d39eea6e7e2b02a8812c46532af6bfcb5604865,tensorflow/tensorflow,"[NFC] Eliminate references to HLO Inst from CollectivePermute Thunk.

- Introduce a CollectivePermuteConfig object to hold relevant properties needed for
  execution of the Thunk and use that in the thunk object.

PiperOrigin-RevId: 335650568
Change-Id: I6c326100e2540fd7f80a9f2b5357ef4a781a6683",ir_emitter_unnested.cc,"@@ -1623,9 +1623,10 @@ Status IrEmitterUnnested::HandleReplicaId(HloInstruction* hlo) {
 }
 
 Status IrEmitterUnnested::HandleCollectivePermute(HloInstruction* hlo) {
+  CollectivePermuteConfig config = GetCollectivePermuteConfig(hlo);
   AddThunkToThunkSequence(absl::make_unique<CollectivePermuteThunk>(
-      GetThunkInfo(hlo), GetAllocationSlice(*hlo->operand(0)),
-      GetAllocationSlice(*hlo)));
+      GetThunkInfo(hlo), std::move(config),
+      GetAllocationSlice(*hlo->operand(0)), GetAllocationSlice(*hlo)));
   return Status::OK();
 }
 
",0,train
003110094e8daa14306f872ceb8596e14d1f69d1,tensorflow/tensorflow,"[JAX] Make C++ jit code tolerant to jax.interpreters.xla._DeviceArray not existing.

Change in preparation for deleting jax.interpreters.xla._DeviceArray.

PiperOrigin-RevId: 407608971
Change-Id: I857aab2517dc2c08d36eec9fbb6a005d7289e253",jax_jit.cc,"@@ -566,7 +566,10 @@ xla::Status ComputeSignature(bool jax_enable_x64, xla::PyClient& pyclient,
   };
   static const auto& types = *[]() -> PythonTypes* {
     py::module xla_module(py::module::import(""jax.interpreters.xla""));
-    py::object device_array(xla_module.attr(""_DeviceArray""));
+    py::object device_array;
+    if (py::hasattr(xla_module, ""_DeviceArray"")) {
+      device_array = xla_module.attr(""_DeviceArray"");
+    }
     return new PythonTypes{device_array};
   }();
   // When the jitted function is not committed, we first check whether any
",0,train
65d193342103d972328934044df9e285438904b9,tensorflow/tensorflow,"Reformatted the error messages to f-strings or .format() command.

PiperOrigin-RevId: 430567942
Change-Id: I614475150d63cd4e68695609cf69a0317ceb2944",feature_column.py,"@@ -104,12 +104,12 @@ def embedding_column(categorical_column,
   """"""
   if isinstance(categorical_column, _DENYLISTED_CATEGORICAL_COLUMNS_V2):
     raise TypeError('categorical_column for tpu '
-                    ' embedding_column was denylisted type %s' %
-                    type(categorical_column))
+                    ' embedding_column was '
+                    f'denylisted type {type(categorical_column)}')
   if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS):
     raise TypeError(
         'categorical_column for tpu '
-        ' embedding_column must be type %s, got %s.' % (' or '.join([
+        ' embedding_column must be type {}, got {}.'.format(' or '.join([
             cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS
         ]), type(categorical_column)))
   if (dimension is None) or (dimension < 1):
@@ -221,14 +221,15 @@ def shared_embedding_columns(categorical_columns,
   for categorical_column in categorical_columns:
     if isinstance(categorical_column, _DENYLISTED_CATEGORICAL_COLUMNS_V2):
       raise TypeError('categorical_column for tpu '
-                      ' embedding_column was denylisted type %s' %
-                      type(categorical_column))
+                      ' embedding_column was denylisted type '
+                      f'{type(categorical_column)}')
     if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS):
       raise TypeError(
           'categorical_column for tpu '
-          ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([
-              cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS
-          ]), type(categorical_column)))
+          ' shared_embedding_columns must be type {}, got {}.'.format(
+              ' or '.join(
+                  [cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS]),
+              type(categorical_column)))
 
   if not max_sequence_lengths:
     max_sequence_lengths = [0] * len(categorical_columns)
@@ -711,7 +712,7 @@ def split_sequence_columns(feature_columns):
     if not isinstance(column, (_TPUEmbeddingColumn, _TPUSharedEmbeddingColumn)):
       raise TypeError(
           'column must be a _TPUEmbeddingColumn or  _TPUSharedEmbeddingColumn '
-          'but got %s instead.' % (type(column)))
+          f'but got {type(column)} instead.')
     if column.is_sequence_column():
       sequence_columns.append(column)
     else:
",0,train
65d193342103d972328934044df9e285438904b9,tensorflow/tensorflow,"Reformatted the error messages to f-strings or .format() command.

PiperOrigin-RevId: 430567942
Change-Id: I614475150d63cd4e68695609cf69a0317ceb2944",feature_column_v2.py,"@@ -144,7 +144,7 @@ def embedding_column_v2(categorical_column,
   if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS_V2):
     raise TypeError(
         'categorical_column for tpu '
-        ' embedding_column must be type %s, got %s.' % (' or '.join([
+        'embedding_column must be type {}, got {}.'.format(' or '.join([
             cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS_V2
         ]), type(categorical_column)))
   if (dimension is None) or (dimension < 1):
@@ -163,8 +163,8 @@ def embedding_column_v2(categorical_column,
 
   if (embedding_lookup_device and
       embedding_lookup_device not in _ALLOWED_DEVICES):
-    raise ValueError('If set, embedding_lookup_device must be in ',
-                     _ALLOWED_DEVICES)
+    raise ValueError(
+        f'If set, embedding_lookup_device must be in {_ALLOWED_DEVICES}')
 
   if embedding_lookup_device == 'cpu':
     embedding_lookup_device = EmbeddingDevice.CPU
@@ -314,9 +314,10 @@ def shared_embedding_columns_v2(categorical_columns,
     if not isinstance(categorical_column, _SUPPORTED_CATEGORICAL_COLUMNS_V2):
       raise TypeError(
           'categorical_column for tpu '
-          ' shared_embedding_columns must be type %s, got %s.' % (' or '.join([
-              cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS_V2
-          ]), type(categorical_column)))
+          ' shared_embedding_columns must be type {}, got {}.'.format(
+              ' or '.join(
+                  [cc.__name__ for cc in _SUPPORTED_CATEGORICAL_COLUMNS_V2]),
+              type(categorical_column)))
 
   if not max_sequence_lengths:
     max_sequence_lengths = [0] * len(categorical_columns)
@@ -364,8 +365,8 @@ def shared_embedding_columns_v2(categorical_columns,
 
   if (embedding_lookup_device and
       embedding_lookup_device not in _ALLOWED_DEVICES):
-    raise ValueError('If set, embedding_lookup_device must be in ',
-                     _ALLOWED_DEVICES)
+    raise ValueError(
+        f'If set, embedding_lookup_device must be in {_ALLOWED_DEVICES}')
 
   if embedding_lookup_device == 'cpu':
     embedding_lookup_device = EmbeddingDevice.CPU
@@ -779,7 +780,7 @@ def split_sequence_columns_v2(feature_columns):
                                _TPUSharedEmbeddingColumnV2)):
       raise TypeError(
           'column must be a _TPUEmbeddingColumnV2 or '
-          '_TPUSharedEmbeddingColumnV2 but got %s instead.' % (type(column)))
+          f'_TPUSharedEmbeddingColumnV2 but got {type(column)} instead.')
     if column.is_sequence_column():
       sequence_columns.append(column)
     else:
",0,train
65d193342103d972328934044df9e285438904b9,tensorflow/tensorflow,"Reformatted the error messages to f-strings or .format() command.

PiperOrigin-RevId: 430567942
Change-Id: I614475150d63cd4e68695609cf69a0317ceb2944",tpu_feed.py,"@@ -185,8 +185,8 @@ class InfeedQueue(object):
             ""number of tuple elements cannot be inferred from InfeedQueue ""
             ""constructor"")
     if number_of_tuple_elements <= 0:
-      raise ValueError(""number_of_tuple_elements %d must be > 0"" %
-                       number_of_tuple_elements)
+      raise ValueError(f""number_of_tuple_elements {number_of_tuple_elements} ""
+                       ""must be > 0"")
     # Make an empty sharding policy for each tuple element.
     self._sharding_policies = [
         tpu_sharding.ShardingPolicy() for _ in range(number_of_tuple_elements)
@@ -241,22 +241,24 @@ class InfeedQueue(object):
         dtype.
     """"""
     if len(tuple_types) != self.number_of_tuple_elements:
-      raise ValueError(""tuple_types is %s, but must be a list of length %d"" %
-                       (str(tuple_types), self.number_of_tuple_elements))
+      raise ValueError(
+          f""tuple_types is {str(tuple_types)}, but must be a list of ""
+          f""length {self.number_of_tuple_elements}""
+      )
     if self._frozen:
       for (frozen, updated) in zip(self._tuple_types, tuple_types):
         if frozen != updated:
           raise ValueError(
               ""Trying to update InfeedQueue with frozen configuration with an ""
-              ""incompatible type. Frozen types are %s, updated types are %s"" % (
-                  str(self._tuple_types), str(tuple_types)))
+              f""incompatible type. Frozen types are {str(self._tuple_types)}, ""
+              f""updated types are {str(tuple_types)}"")
     else:
       try:
         self._tuple_types = [dtypes.as_dtype(t) for t in tuple_types]
       except (TypeError) as e:
         raise TypeError(
-            ""tuple_types is %s, but must be a list of elements each ""
-            ""convertible to dtype: got error %s"" % (str(tuple_types), str(e)))
+            f""tuple_types is {str(tuple_types)}, but must be a list of ""
+            f""elements each convertible to dtype: got error {str(e)}"") from e
 
   @property
   def tuple_shapes(self):
@@ -280,22 +282,26 @@ class InfeedQueue(object):
         a TensorShape.
     """"""
     if len(tuple_shapes) != self.number_of_tuple_elements:
-      raise ValueError(""tuple_shapes is %s, but must be a list of length %d"" %
-                       (str(tuple_shapes), self.number_of_tuple_elements))
+      raise ValueError(
+          f""tuple_shapes is {str(tuple_shapes)}, but must be a list of ""
+          f""length {self.number_of_tuple_elements}""
+      )
     try:
       tuple_shapes = [tensor_shape.as_shape(shape) for shape in tuple_shapes]
     except (ValueError, TypeError) as e:
       raise TypeError(
-          ""tuple_shapes is %s, but must be a list of elements each ""
-          ""convertible to TensorShape: got error %s"" % (str(tuple_shapes),
-                                                        str(e)))
+          f""tuple_shapes is {str(tuple_shapes)}, but must be a list of ""
+          ""elements each convertible to TensorShape: got error ""
+          f""{str(e)}"") from e
     if self._frozen:
       for (frozen, updated) in zip(self._tuple_shapes, tuple_shapes):
         if frozen != updated:
           raise ValueError(
               ""Trying to update InfeedQueue with frozen configuration with an ""
-              ""incompatible shape. Frozen shapes are %s, updated shapes are %s""
-              % (str(self._tuple_shapes), str(tuple_shapes)))
+              ""incompatible shape. Frozen shapes are ""
+              f""{str(self._tuple_shapes)}, updated shapes are ""
+              f""{str(tuple_shapes)}"")
+
     else:
       self._tuple_shapes = tuple_shapes
     self._validate()
@@ -335,9 +341,8 @@ class InfeedQueue(object):
         range for the corresponding tuple element shape.
     """"""
     if len(shard_dimensions) != self.number_of_tuple_elements:
-      raise ValueError(""shard_dimensions is %s, but must be a list of length %d""
-                       % (str(shard_dimensions),
-                          self.number_of_tuple_elements))
+      raise ValueError(f""shard_dimensions is {str(shard_dimensions)}, but must ""
+                       f""be a list of length {self.number_of_tuple_elements}"")
     for (policy, dimension) in zip(self._sharding_policies, shard_dimensions):
       policy.set_shard_dimension(dimension)
     self._validate()
@@ -383,8 +388,8 @@ class InfeedQueue(object):
         self.number_of_tuple_elements
     """"""
     if len(input_tensors) != self.number_of_tuple_elements:
-      raise ValueError(""input_tensors is %s, but should be a list of %d Tensors""
-                       % (str(input_tensors), self.number_of_tuple_elements))
+      raise ValueError(f""input_tensors is {str(input_tensors)}, but should be ""
+                       f""a list of {self.number_of_tuple_elements} Tensors"")
     self.set_tuple_shapes([t.shape for t in input_tensors])
     self.set_tuple_types([t.dtype for t in input_tensors])
 
@@ -417,9 +422,9 @@ class InfeedQueue(object):
     for t in input_tensors:
       if len(t) != self.number_of_tuple_elements:
         raise ValueError(
-            ""input_tensors is %s but must be a list of lists, where each inner""
-            "" list has length number_of_tuple_elements=%d"" % (
-                str(input_tensors), self.number_of_tuple_elements))
+            f""input_tensors is {str(input_tensors)} but must be a list of ""
+            ""lists, where each inner list has length ""
+            f""number_of_tuple_elements={self.number_of_tuple_elements}"")
     # Transpose the inputs to make a list of shard shapes for each tuple
     # element.
     sharded_shapes = [[t[i].shape
@@ -435,8 +440,8 @@ class InfeedQueue(object):
       for (t1, t2) in zip(input_tensors[0], input_tensors[i]):
         if t1.dtype != t2.dtype:
           raise TypeError(
-              ""types of the tuple elements of input_tensors %s are not ""
-              ""consistent"" % str(input_tensors))
+              ""types of the tuple elements of input_tensors ""
+              f""{str(input_tensors)} are not consistent"")
     self.set_tuple_types([t.dtype for t in input_tensors[0]])
 
   def freeze(self):
@@ -548,8 +553,8 @@ class InfeedQueue(object):
       for i in range(1, self.number_of_tuple_elements):
         if devices[0] != devices[i]:
           raise ValueError(
-              ""input devices for shard %d are %s, but should all be the same"" %
-              (index, str(devices)))
+              f""input devices for shard {index} are {str(devices)}, but should ""
+              ""all be the same"")
       with ops.colocate_with(inputs[0]):
         return tpu_ops.infeed_enqueue_tuple(
             inputs=inputs,
",0,train
72028307fdd8b00559ed631a409c9237ff0c24b8,tensorflow/tensorflow,"fix the GitHub Issue #43789 that file_io.delete_recursively_v2 not compatible while calling on files on cloud storage.

PiperOrigin-RevId: 336951308
Change-Id: Ieb43a96b1f6c6fc481785cd4c60b1a5c31cb5c1c",multi_worker_callback_tf2_test.py,"@@ -205,7 +205,7 @@ class KerasCallbackMultiProcessTest(parameterized.TestCase, test.TestCase):
           raise
 
       multi_process_runner.get_barrier().wait()
-      backup_filepath = os.path.join(bar_dir, 'chief', 'checkpoint')
+      backup_filepath = os.path.join(bar_dir, 'checkpoint')
       test_obj.assertTrue(file_io.file_exists_v2(backup_filepath))
       test_obj.assertTrue(file_io.file_exists_v2(saving_filepath))
 
",0,train
72028307fdd8b00559ed631a409c9237ff0c24b8,tensorflow/tensorflow,"fix the GitHub Issue #43789 that file_io.delete_recursively_v2 not compatible while calling on files on cloud storage.

PiperOrigin-RevId: 336951308
Change-Id: Ieb43a96b1f6c6fc481785cd4c60b1a5c31cb5c1c",worker_training_state.py,"@@ -73,17 +73,15 @@ class WorkerTrainingState(object):
     # workers need to perform `save()`.
     # But all workers should restore from the same checkpoint_dir as passed in
     # read_checkpoint_manager.
-    self.read_checkpoint_manager = checkpoint_management.CheckpointManager(
-        checkpoint,
-        directory=os.path.join(checkpoint_dir, 'chief'),
-        max_to_keep=1)
-    write_checkpoint_dir = distributed_file_utils.write_dirpath(
+    self.write_checkpoint_dir = distributed_file_utils.write_dirpath(
         checkpoint_dir, self._model.distribute_strategy)
-    if write_checkpoint_dir == checkpoint_dir:
-      self.write_checkpoint_manager = self.read_checkpoint_manager
+    self.write_checkpoint_manager = checkpoint_management.CheckpointManager(
+        checkpoint, directory=self.write_checkpoint_dir, max_to_keep=1)
+    if self.write_checkpoint_dir == checkpoint_dir:
+      self.read_checkpoint_manager = self.write_checkpoint_manager
     else:
-      self.write_checkpoint_manager = checkpoint_management.CheckpointManager(
-          checkpoint, directory=write_checkpoint_dir, max_to_keep=1)
+      self.read_checkpoint_manager = checkpoint_management.CheckpointManager(
+          checkpoint, directory=checkpoint_dir, max_to_keep=1)
 
   def back_up(self, epoch):
     """"""Back up the current state of training into a checkpoint file.
@@ -113,8 +111,13 @@ class WorkerTrainingState(object):
     Delete the backup directories which should not exist after `fit()`
     successfully finishes.
     """"""
-    if self.write_checkpoint_manager is self.read_checkpoint_manager:
-      file_io.delete_recursively_v2(self.write_checkpoint_manager.directory)
+    # pylint: disable=protected-access
+    for pathname in file_io.get_matching_files_v2(
+        self.write_checkpoint_manager._prefix + '*'):
+      file_io.delete_recursively_v2(pathname)
+    for pathname in file_io.get_matching_files_v2(
+        os.path.join(self.write_checkpoint_manager.directory, 'checkpoint')):
+      file_io.delete_recursively_v2(pathname)
 
   def maybe_load_initial_epoch_from_ckpt(self, initial_epoch, mode):
     """"""Maybe load initial epoch from ckpt considering possible worker recovery.
",0,train
8d32eb3bd10aceea68118556e500e87f5565a983,tensorflow/tensorflow,"Handle tf.Case in tf-tensor-list-ops-decomposition pass.

PiperOrigin-RevId: 316490068
Change-Id: I8f9502c3b8361e767b6333428cffa68fe3d8a3ad",tensor_list_ops_decomposition.cc,"@@ -216,59 +216,62 @@ LogicalResult HandleWhileOp(
   return success();
 }
 
-LogicalResult HandleIfOp(TF::IfOp if_op, ModuleOp module,
-                         llvm::SmallDenseMap<Value, SizeInfo>* buffer_to_size,
-                         llvm::StringMap<PartitionedCallDecompositionInfo>*
-                             decomposed_partitioned_call_callees) {
+template <class CaseOrIfOp>
+LogicalResult HandleCaseOrIfOp(
+    CaseOrIfOp op, ArrayRef<FuncOp> branches, ModuleOp module,
+    llvm::SmallDenseMap<Value, SizeInfo>* buffer_to_size,
+    llvm::StringMap<PartitionedCallDecompositionInfo>*
+        decomposed_partitioned_call_callees) {
   // Rewrite the branches.
-  auto then_branch = module.lookupSymbol<FuncOp>(if_op.then_branch());
-  auto else_branch = module.lookupSymbol<FuncOp>(if_op.else_branch());
-  llvm::SmallDenseMap<Value, SizeInfo> then_map;
-  llvm::SmallDenseMap<Value, SizeInfo> else_map;
+  SmallVector<llvm::SmallDenseMap<Value, SizeInfo>, 2> branch_maps;
+  branch_maps.resize(branches.size());
 
   auto find_arg_buffer_type = [&](int64_t index) -> llvm::Optional<Type> {
-    auto it = buffer_to_size->find(if_op.getOperand(index + 1));
+    auto it = buffer_to_size->find(op.getOperand(index + 1));
     if (it == buffer_to_size->end()) return llvm::None;
     return it->getFirst().getType();
   };
   auto arg_buffer_size_is_fixed = [&](int64_t index) {
-    return (*buffer_to_size)[if_op.getOperand(index + 1)].fixed;
+    return (*buffer_to_size)[op.getOperand(index + 1)].fixed;
   };
-  OpBuilder builder(if_op);
-  ModifyFunctionSignature(then_branch, cutil::GetSizeType(builder), &then_map,
-                          find_arg_buffer_type, arg_buffer_size_is_fixed);
-  ModifyFunctionSignature(else_branch, cutil::GetSizeType(builder), &else_map,
-                          find_arg_buffer_type, arg_buffer_size_is_fixed);
-  const bool arg_no_changed = then_map.empty();
-  if (failed(DecomposeTensorListOpsInternal(
-          &then_branch.front(), module, &then_map,
-          decomposed_partitioned_call_callees)) ||
-      failed(DecomposeTensorListOpsInternal(
-          &else_branch.front(), module, &else_map,
-          decomposed_partitioned_call_callees))) {
-    return failure();
+  OpBuilder builder(op);
+  for (const auto& pair : llvm::zip(branches, branch_maps)) {
+    FuncOp branch = std::get<0>(pair);
+    llvm::SmallDenseMap<Value, SizeInfo>& branch_map = std::get<1>(pair);
+    ModifyFunctionSignature(branch, cutil::GetSizeType(builder), &branch_map,
+                            find_arg_buffer_type, arg_buffer_size_is_fixed);
+
+    if (failed(DecomposeTensorListOpsInternal(
+            &branch.front(), module, &branch_map,
+            decomposed_partitioned_call_callees)))
+      return failure();
   }
+
+  const bool arg_no_changed = branch_maps.front().empty();
   auto output_buffer_to_size =
-      AddTensorListSizesToReturn(then_branch, then_map);
-  AddTensorListSizesToReturn(else_branch, else_map);
+      AddTensorListSizesToReturn(branches.front(), branch_maps.front());
+  for (const auto& pair : llvm::drop_begin(llvm::zip(branches, branch_maps), 1))
+    AddTensorListSizesToReturn(std::get<0>(pair), std::get<1>(pair));
+
   if (output_buffer_to_size.empty() && arg_no_changed) return success();
-  // Recreate the If op.
-  auto new_if_operands = llvm::to_vector<8>(if_op.getOperands());
-  for (int64_t i = 1; i < if_op.getNumOperands(); ++i) {
-    auto it = buffer_to_size->find(if_op.getOperand(i));
+
+  // Recreate the op.
+  auto new_operands = llvm::to_vector<8>(op.getOperands());
+  for (int64_t i = 1; i < op.getNumOperands(); ++i) {
+    auto it = buffer_to_size->find(op.getOperand(i));
     if (it == buffer_to_size->end()) continue;
-    new_if_operands.push_back(it->getSecond().size);
+    new_operands.push_back(it->getSecond().size);
   }
-  auto new_if = OpBuilder(if_op).create<TF::IfOp>(
-      if_op.getLoc(), then_branch.getType().getResults(), new_if_operands,
-      if_op.getAttrs());
+  FuncOp first_branch = branches.front();
+  auto new_op = OpBuilder(op).create<CaseOrIfOp>(
+      op.getLoc(), first_branch.getType().getResults(), new_operands,
+      op.getAttrs());
   for (const auto& entry : output_buffer_to_size) {
-    (*buffer_to_size)[new_if.getResult(std::get<0>(entry))] = {
-        new_if.getResult(std::get<1>(entry)), std::get<2>(entry)};
+    (*buffer_to_size)[new_op.getResult(std::get<0>(entry))] = {
+        new_op.getResult(std::get<1>(entry)), std::get<2>(entry)};
   }
-  if_op.replaceAllUsesWith(
-      new_if.getResults().take_front(if_op.getNumResults()));
-  if_op.erase();
+  op.replaceAllUsesWith(new_op.getResults().take_front(op.getNumResults()));
+  op.erase();
   return success();
 }
 
@@ -710,8 +713,22 @@ LogicalResult DecomposeTensorListOpsInternal(
         return failure();
       }
     } else if (auto if_op = llvm::dyn_cast<TF::IfOp>(&op)) {
-      if (failed(HandleIfOp(if_op, module, buffer_to_size,
-                            decomposed_partitioned_call_callees))) {
+      auto then_branch = module.lookupSymbol<FuncOp>(if_op.then_branch());
+      auto else_branch = module.lookupSymbol<FuncOp>(if_op.else_branch());
+
+      if (failed(HandleCaseOrIfOp(if_op, {then_branch, else_branch}, module,
+                                  buffer_to_size,
+                                  decomposed_partitioned_call_callees))) {
+        return failure();
+      }
+    } else if (auto case_op = llvm::dyn_cast<TF::CaseOp>(&op)) {
+      SmallVector<FuncOp, 2> branches;
+      for (auto branch_symbol : case_op.branches()) {
+        branches.push_back(module.lookupSymbol<FuncOp>(
+            branch_symbol.cast<FlatSymbolRefAttr>()));
+      }
+      if (failed(HandleCaseOrIfOp(case_op, branches, module, buffer_to_size,
+                                  decomposed_partitioned_call_callees))) {
         return failure();
       }
     } else if (auto pcall = llvm::dyn_cast<TF::PartitionedCallOp>(&op)) {
",0,train
8de821fc169fb9bad8be681801e8551171f8e44a,tensorflow/tensorflow,"make_vjp in eager

PiperOrigin-RevId: 172363016",backprop.py,"@@ -581,6 +581,62 @@ def val_and_grad_function(f, params=None):
   return decorated
 
 
+def make_vjp(f, params=None):
+  """"""Returns a function that computes f and is vjp w.r.t. params.
+
+  The term ""vjp"" here is an abbreviation for vector-jacobian product.
+
+  Args:
+    f: the function to be differentiated.
+    params: the parameters (numbers or names) to differentiate with respect to.
+       A value of None will differentiate with respect to all parameters.
+
+  Returns:
+    A function, which when called, returns a tuple (value, vjp), where:
+    - value is the result of calling f.
+    - vjp is a function, which takes a vector as an argument and
+      returns the product of that vector with the Jacobian of f.
+      Providing no argument to vjp is equivalent to providing a
+      vector of ones.
+
+    For example,
+    ```python
+    def f(x):
+      return x * x
+
+    wrapped_fn = tfe.make_vjp(f)
+    result, vjp = wrapped_fn(tf.constant(3.0))
+    # result is 9.0
+    vjp()  # the vjp function rturns 6.0
+
+  """"""
+
+  parameter_positions = _get_arg_spec(f, params)
+
+  def decorated(*args, **kwds):
+    """"""Computes the value and gradient of the decorated function.""""""
+    assert not kwds, ""The gradient function can't take keyword arguments.""
+    tape.push_new_tape()
+    sources = []
+    args = [
+        ops.convert_to_tensor(args[i]) if i in parameter_positions else args[i]
+        for i in range(len(args))
+    ]
+    args = _ensure_unique_tensor_objects(parameter_positions, args)
+    for i in parameter_positions:
+      sources.append(args[i])
+      tape.watch(args[i])
+    result = f(*args)
+    t = tape.pop_tape()
+    def vjp(dy=None):
+      return imperative_grad.imperative_grad(
+          _default_vspace, t, nest.flatten(result), sources,
+          output_gradients=nest.flatten(dy) if dy is not None else None)
+    return result, vjp
+
+  return decorated
+
+
 def _aggregate_grads(gradients):
   """"""Aggregate gradients from multiple sources.
 
",0,test
8de821fc169fb9bad8be681801e8551171f8e44a,tensorflow/tensorflow,"make_vjp in eager

PiperOrigin-RevId: 172363016",backprop_test.py,"@@ -168,6 +168,16 @@ class BackpropTest(test.TestCase):
     grad = backprop.gradients_function(second, [0])(f)[0]
     self.assertAllEqual([[0.0]], grad.numpy())
 
+  def testMakeVJP(self):
+
+    def f(x):
+      return x * x
+
+    wrapped_fn = backprop.make_vjp(f)
+    result, vjp = wrapped_fn(constant_op.constant(3.0))
+    self.assertEqual(result.numpy(), 9.0)
+    self.assertEqual(vjp(2.0)[0].numpy(), 12.0)
+
   def testGradGrad(self):
 
     def sq(x):
",0,test
3c5ef53e374cf029a3d595fac6a83d3d337568e2,tensorflow/tensorflow,[3] Review comments handled,conv.cc,"@@ -171,7 +171,7 @@ bool IsIm2ColRequired(TfLiteTensor* input, TfLiteConvParams* params,
 
   switch (kernel_type) {
     case kReference:
-      if (input->type == kTfLiteFloat32) {
+      if (is_hybrid) {
         return true;
       } else {
         return false;
",0,train
a376886b16f2bb3cc268594cf23dadb826d12d48,tensorflow/tensorflow,"Added more constraints in Texture3D creation check.
Moved check for one layer support check.

PiperOrigin-RevId: 288365835
Change-Id: I19e6026a5c1e2bc9a1d2f82c2b4075eb41878f4e",tensor.cc,"@@ -297,10 +297,19 @@ bool CanCreateTensorWithShape(const CLContext& context, const CLDevice& device,
       return shape.b * shape.w * shape.h * depth <=
              device.GetInfo().image_buffer_max_size;
     case TensorStorageType::TEXTURE_3D:
+      if (device.cl_version() < OpenCLVersion::CL_1_2 && depth == 1) {
+        // clCreateImage3D (that used in CL 1.0/1.1) can not create image with
+        // depth = 1 by specification;
+        return false;
+      }
       return shape.w * shape.b <= device.GetInfo().image3d_max_width &&
              shape.h <= device.GetInfo().image3d_max_height &&
              depth <= device.GetInfo().image3d_max_depth;
     case TensorStorageType::TEXTURE_ARRAY:
+      // Bug on some Adreno. b/131099086
+      if (depth == 1 && !device.SupportsOneLayerTextureArray()) {
+        return false;
+      }
       return shape.w * shape.b <= device.GetInfo().image2d_max_width &&
              shape.h <= device.GetInfo().image2d_max_height &&
              depth <= device.GetInfo().image_array_max_layers;
@@ -412,12 +421,7 @@ Status AllocateTensorMemory(const CLContext& context, const CLDevice& device,
       desc.image_width = shape.w * shape.b;
       desc.image_height = shape.h;
       desc.image_depth = 0;
-      int layers_count = depth;
-      // Adreno bug. b/131099086
-      if (layers_count == 1 && !device.SupportsOneLayerTextureArray()) {
-        layers_count = 2;
-      }
-      desc.image_array_size = layers_count;
+      desc.image_array_size = depth;
       desc.image_row_pitch = 0;
       desc.image_slice_pitch = 0;
       desc.num_mip_levels = 0;
",0,train
52581df7928b137e4831c26696fea8634dbaefb0,tensorflow/tensorflow,"Support zero shapes for random_poisson. This matches random_uniform.

PiperOrigin-RevId: 159771215",random_poisson_op.cc,"@@ -303,10 +303,6 @@ class RandomPoissonOp : public OpKernel {
 
     const auto rate_flat = rate_t.flat<T>().data();
     const int64 num_rate = rate_t.NumElements();
-    OP_REQUIRES(
-        ctx, num_rate > 0,
-        errors::InvalidArgument(
-            ""Input rate should have non-zero element count, got: "", num_rate));
     auto samples_flat = samples_t->flat<T>().data();
     random::PhiloxRandom rng = generator_.ReserveRandomOutputs(
         num_samples * num_rate, kReservedSamplesPerOutput);
",0,train
52581df7928b137e4831c26696fea8634dbaefb0,tensorflow/tensorflow,"Support zero shapes for random_poisson. This matches random_uniform.

PiperOrigin-RevId: 159771215",random_poisson_test.py,"@@ -131,8 +131,14 @@ class RandomPoissonTest(test.TestCase):
         # be at least 1 if they are different.
         self.assertGreaterEqual(np.linalg.norm(diff.eval()), 1)
 
+  def testZeroShape(self):
+    with self.test_session():
+      rnd = random_ops.random_poisson([], [], seed=12345)
+      self.assertEqual([0], rnd.get_shape().as_list())
+      self.assertAllClose(np.array([], dtype=np.float32), rnd.eval())
+
   def testShape(self):
-    # Fully known shape.
+    # Fully known shape
     rnd = random_ops.random_poisson(2.0, [150], seed=12345)
     self.assertEqual([150], rnd.get_shape().as_list())
     rnd = random_ops.random_poisson(
",0,train
c90a24463ded1d22fc7b029a37d481dc3a626da6,tensorflow/tensorflow,"Make GRPC client events wait for requirement events before waiting for itself.

PiperOrigin-RevId: 281374353
Change-Id: I84483a658683b52091f488b7ed47a5a4d7833f3b",grpc_tpu_driver.cc,"@@ -215,10 +215,13 @@ class GrpcTpuStream {
   friend class GrpcTpuDriver;
 
   struct EventInfo {
+    bool all_deps_done = false;
     bool done = false;     // response received
     bool deleted = false;  // deleted by the user
     Status status;
     absl::InlinedVector<std::function<void(Status)>, 1> callbacks;
+    // Most events should have <= 2 requirement events.
+    absl::InlinedVector<EventId, 2> deps;
   };
 
   struct TransferInfo {
@@ -491,13 +494,22 @@ GrpcTpuStream::~GrpcTpuStream() {
 void GrpcTpuStream::InitializeRequest(StreamRequest::Entry* req,
                                       absl::Span<Event* const> wait_for) {
   auto operation_id = driver_->NewOperationId();
+  EventInfo event_info;
+
   req->set_operation_id(operation_id.AsInt());
-  for (auto* event : wait_for) {
-    auto grpc_event = static_cast<const GrpcEvent*>(event);
-    req->add_wait_for_id(grpc_event->id().AsInt());
+  if (wait_for.empty()) {
+    event_info.all_deps_done = true;
+  } else {
+    event_info.deps.reserve(wait_for.size());
+    for (auto* event : wait_for) {
+      auto grpc_event = static_cast<const GrpcEvent*>(event);
+      req->add_wait_for_id(grpc_event->id().AsInt());
+      event_info.deps.push_back(grpc_event->id());
+    }
   }
+
   absl::MutexLock lock(&events_mutex_);
-  events_[EventId::FromInt(req->operation_id())] = EventInfo();
+  events_[operation_id] = event_info;
 }
 
 void GrpcTpuStream::UpdateEventStatus(EventId id, Status status) {
@@ -551,16 +563,46 @@ void GrpcTpuStream::DeleteEvent(EventId id) {
 
 absl::optional<Status> GrpcTpuStream::WaitForEvent(EventId id,
                                                    absl::Duration duration) {
-  absl::MutexLock lock(&events_mutex_);
+  events_mutex_.Lock();
+  auto it = events_.find(id);
+
+  if (it == events_.end()) {
+    // This event has already been marked as done and deleted. Assume success.
+    events_mutex_.Unlock();
+    return Status::OK();
+  }
+
+  if (!it->second.all_deps_done) {
+    absl::InlinedVector<EventId, 2> deps = it->second.deps;
+    events_mutex_.Unlock();
+    for (auto dep : deps) {
+      // If a requirement event timed out, no point in any further waiting.
+      if (!WaitForEvent(dep, duration)) {
+        return absl::nullopt;
+      }
+    }
+    events_mutex_.Lock();
+  }
+
+  // Set the flag here, as we're guaranteed they have all completed at this
+  // point. This helps terminate recursion on a chain of completed events as
+  // soon as possible, at this event.
+  it = events_.find(id);
+  if (it != events_.end()) {
+    it->second.all_deps_done = true;
+  }
+
   auto done = [this, id]() {
     events_mutex_.AssertHeld();
     return !events_.contains(id) || events_[id].done;
   };
-
   if (events_mutex_.AwaitWithTimeout(absl::Condition(&done), duration)) {
-    return events_.contains(id) ? events_[id].status : Status();
+    auto status = events_.contains(id) ? events_[id].status : Status::OK();
+    events_mutex_.Unlock();
+    return status;
   }
-  return absl::optional<Status>();
+  events_mutex_.Unlock();
+  return absl::nullopt;
 }
 
 void GrpcTpuStream::AddEventCallback(EventId id,
",0,train
c90a24463ded1d22fc7b029a37d481dc3a626da6,tensorflow/tensorflow,"Make GRPC client events wait for requirement events before waiting for itself.

PiperOrigin-RevId: 281374353
Change-Id: I84483a658683b52091f488b7ed47a5a4d7833f3b",tpu_driver.h,"@@ -52,8 +52,9 @@ class Event {
  public:
   virtual ~Event() {}
 
-  // Block until the event completes and returns the result status.
+  // Blocks until the event completes and returns the result status.
   virtual xla::Status Await() = 0;
+  // Returns an empty result if the wait times out.
   virtual absl::optional<xla::Status> AwaitWithTimeout(
       absl::Duration duration) = 0;
 
",0,train
680966059e7a5ddc70f1ec4f10e7b19c64c60e4b,tensorflow/tensorflow,"Added a check for output_buffer_size <= 1 for ZlibOutputBuffer. Also adding some tests for Zlib compression reading / writing.
Change: 132370925",record_reader_writer_test.cc,"@@ -67,4 +67,42 @@ TEST(RecordReaderWriterTest, TestBasics) {
   }
 }
 
+TEST(RecordReaderWriterTest, TestZlib) {
+  Env* env = Env::Default();
+  string fname = testing::TmpDir() + ""/record_reader_writer_zlib_test"";
+
+  for (auto buf_size : BufferSizes()) {
+    // Zlib compression needs output buffer size > 1.
+    if (buf_size == 1) continue;
+    {
+      std::unique_ptr<WritableFile> file;
+      TF_CHECK_OK(env->NewWritableFile(fname, &file));
+
+      io::RecordWriterOptions options;
+      options.compression_type = io::RecordWriterOptions::ZLIB_COMPRESSION;
+      options.zlib_options.output_buffer_size = buf_size;
+      io::RecordWriter writer(file.get(), options);
+      writer.WriteRecord(""abc"");
+      writer.WriteRecord(""defg"");
+      TF_CHECK_OK(writer.Flush());
+    }
+
+    {
+      std::unique_ptr<RandomAccessFile> read_file;
+      // Read it back with the RecordReader.
+      TF_CHECK_OK(env->NewRandomAccessFile(fname, &read_file));
+      io::RecordReaderOptions options;
+      options.compression_type = io::RecordReaderOptions::ZLIB_COMPRESSION;
+      options.zlib_options.input_buffer_size = buf_size;
+      io::RecordReader reader(read_file.get(), options);
+      uint64 offset = 0;
+      string record;
+      TF_CHECK_OK(reader.ReadRecord(&offset, &record));
+      EXPECT_EQ(""abc"", record);
+      TF_CHECK_OK(reader.ReadRecord(&offset, &record));
+      EXPECT_EQ(""defg"", record);
+    }
+  }
+}
+
 }  // namespace tensorflow
",0,train
680966059e7a5ddc70f1ec4f10e7b19c64c60e4b,tensorflow/tensorflow,"Added a check for output_buffer_size <= 1 for ZlibOutputBuffer. Also adding some tests for Zlib compression reading / writing.
Change: 132370925",record_writer.cc,"@@ -33,6 +33,11 @@ RecordWriter::RecordWriter(WritableFile* dest,
     zlib_output_buffer_.reset(new ZlibOutputBuffer(
         dest_, options.zlib_options.input_buffer_size,
         options.zlib_options.output_buffer_size, options.zlib_options));
+    Status s = zlib_output_buffer_->Init();
+    if (!s.ok()) {
+      LOG(FATAL) << ""Failed to initialize Zlib inputbuffer. Error: ""
+                 << s.ToString();
+    }
 #endif  // IS_SLIM_BUILD
   } else if (options.compression_type == RecordWriterOptions::NONE) {
     // Nothing to do
",0,train
680966059e7a5ddc70f1ec4f10e7b19c64c60e4b,tensorflow/tensorflow,"Added a check for output_buffer_size <= 1 for ZlibOutputBuffer. Also adding some tests for Zlib compression reading / writing.
Change: 132370925",zlib_buffers_test.cc,"@@ -73,6 +73,7 @@ void TestAllCombinations(CompressionOptions input_options,
 
         ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
                              output_options);
+        TF_CHECK_OK(out.Init());
 
         TF_CHECK_OK(out.Write(StringPiece(data)));
         TF_CHECK_OK(out.Close());
@@ -120,6 +121,7 @@ void TestMultipleWrites(uint8 input_buf_size, uint8 output_buf_size,
   TF_CHECK_OK(env->NewWritableFile(fname, &file_writer));
   ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
                        output_options);
+  TF_CHECK_OK(out.Init());
 
   for (int i = 0; i < num_writes; i++) {
     TF_CHECK_OK(out.Write(StringPiece(data)));
@@ -172,6 +174,7 @@ TEST(ZlibInputStream, FailsToReadIfWindowBitsAreIncompatible) {
   string result;
   ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
                        output_options);
+  TF_CHECK_OK(out.Init());
 
   TF_CHECK_OK(out.Write(StringPiece(data)));
   TF_CHECK_OK(out.Close());
",0,train
680966059e7a5ddc70f1ec4f10e7b19c64c60e4b,tensorflow/tensorflow,"Added a check for output_buffer_size <= 1 for ZlibOutputBuffer. Also adding some tests for Zlib compression reading / writing.
Change: 132370925",zlib_outputbuffer.cc,"@@ -15,6 +15,8 @@ limitations under the License.
 
 #include ""tensorflow/core/lib/io/zlib_outputbuffer.h""
 
+#include ""tensorflow/core/lib/core/errors.h""
+
 namespace tensorflow {
 namespace io {
 
@@ -25,35 +27,45 @@ ZlibOutputBuffer::ZlibOutputBuffer(
     const ZlibCompressionOptions&
         zlib_options)  // size of z_stream.next_out buffer
     : file_(file),
+      init_status_(),
       input_buffer_capacity_(input_buffer_bytes),
       output_buffer_capacity_(output_buffer_bytes),
       z_stream_input_(new Bytef[input_buffer_bytes]),
       z_stream_output_(new Bytef[output_buffer_bytes]),
       zlib_options_(zlib_options),
-      z_stream_(new z_stream) {
+      z_stream_(new z_stream) {}
+
+ZlibOutputBuffer::~ZlibOutputBuffer() {
+  if (z_stream_.get()) {
+    LOG(WARNING) << ""ZlibOutputBuffer::Close() not called. Possible data loss"";
+  }
+}
+
+Status ZlibOutputBuffer::Init() {
+  // Output buffer size should be greater than 1 because deflation needs atleast
+  // one byte for book keeping etc.
+  if (output_buffer_capacity_ <= 1) {
+    return errors::InvalidArgument(
+        ""output_buffer_bytes should be greater than ""
+        ""1"");
+  }
   memset(z_stream_.get(), 0, sizeof(z_stream));
   z_stream_->zalloc = Z_NULL;
   z_stream_->zfree = Z_NULL;
   z_stream_->opaque = Z_NULL;
   int status =
-      deflateInit2(z_stream_.get(), zlib_options.compression_level,
-                   zlib_options.compression_method, zlib_options.window_bits,
-                   zlib_options.mem_level, zlib_options.compression_strategy);
+      deflateInit2(z_stream_.get(), zlib_options_.compression_level,
+                   zlib_options_.compression_method, zlib_options_.window_bits,
+                   zlib_options_.mem_level, zlib_options_.compression_strategy);
   if (status != Z_OK) {
-    LOG(FATAL) << ""deflateInit failed with status "" << status;
     z_stream_.reset(NULL);
-  } else {
-    z_stream_->next_in = z_stream_input_.get();
-    z_stream_->next_out = z_stream_output_.get();
-    z_stream_->avail_in = 0;
-    z_stream_->avail_out = output_buffer_capacity_;
-  }
-}
-
-ZlibOutputBuffer::~ZlibOutputBuffer() {
-  if (z_stream_.get()) {
-    LOG(WARNING) << ""ZlibOutputBuffer::Close() not called. Possible data loss"";
+    return errors::InvalidArgument(""deflateInit failed with status"", status);
   }
+  z_stream_->next_in = z_stream_input_.get();
+  z_stream_->next_out = z_stream_output_.get();
+  z_stream_->avail_in = 0;
+  z_stream_->avail_out = output_buffer_capacity_;
+  return Status::OK();
 }
 
 int32 ZlibOutputBuffer::AvailableInputSpace() const {
",0,train
680966059e7a5ddc70f1ec4f10e7b19c64c60e4b,tensorflow/tensorflow,"Added a check for output_buffer_size <= 1 for ZlibOutputBuffer. Also adding some tests for Zlib compression reading / writing.
Change: 132370925",zlib_outputbuffer.h,"@@ -45,6 +45,7 @@ class ZlibOutputBuffer {
   // 2. the deflated output
   // with sizes `input_buffer_bytes` and `output_buffer_bytes` respectively.
   // Does not take ownership of `file`.
+  // output_buffer_bytes should be greater than 1.
   ZlibOutputBuffer(
       WritableFile* file,
       int32 input_buffer_bytes,   // size of z_stream.next_in buffer
@@ -53,6 +54,10 @@ class ZlibOutputBuffer {
 
   ~ZlibOutputBuffer();
 
+  // Initializes some state necessary for the output buffer. This call is
+  // required before any other operation on the buffer.
+  Status Init();
+
   // Adds `data` to the compression pipeline.
   //
   // The input data is buffered in `z_stream_input_` and is compressed in bulk
@@ -78,6 +83,7 @@ class ZlibOutputBuffer {
 
  private:
   WritableFile* file_;  // Not owned
+  Status init_status_;
   size_t input_buffer_capacity_;
   size_t output_buffer_capacity_;
 
",0,train
af08f0ae55a7e4cc9f539dbd41266ac41903b7ef,tensorflow/tensorflow,"Avoid implicit double promotion in portable_tensor_utils.cc

PiperOrigin-RevId: 389050862
Change-Id: Ife07730df682baec4882ecb72db182173a3a8d7a",portable_tensor_utils.cc,"@@ -484,9 +484,9 @@ void PortableApplyLayerNormFloat(const int16_t* input,
     float stddev_inv = 0.0f;
     const float variance = sum_sq / n_input - mean * mean;
     if (variance == 0) {
-      stddev_inv = 1.0f / sqrt(1e-8f);
+      stddev_inv = 1.0f / std::sqrt(1e-8f);
     } else {
-      stddev_inv = 1.0f / sqrt(variance);
+      stddev_inv = 1.0f / std::sqrt(variance);
     }
     for (int i = 0; i < n_input; ++i) {
       const int index = batch * n_input + i;
",0,train
b1c9e600e02b93885dbebfa5dae92436c63d6c03,tensorflow/tensorflow,"[XLA] Add range check for xla::Array<> indexing.

PiperOrigin-RevId: 356981991
Change-Id: I73343a8776b0df0f2570bcd596247164c8588cb9",array.h,"@@ -561,6 +561,7 @@ class Array {
       index *= sizes_[i];
       index += indexes[i];
     }
+    DCHECK_LT(index, this->num_elements());
     return index;
   }
 
",0,train
82e53ebecdac677b37cd9316f8d1be5b1627eec3,tensorflow/tensorflow,"Fix sparse case of ProximalGradientDescent not being plumbed correctly
Change: 127908886",proximal_gradient_descent.py,"@@ -68,9 +68,14 @@ class ProximalGradientDescentOptimizer(optimizer.Optimizer):
         use_locking=self._use_locking).op
 
   def _apply_sparse(self, grad, var):
-    delta = ops.IndexedSlices(grad.values * self._learning_rate_tensor,
-                              grad.indices, grad.dense_shape)
-    return var.scatter_sub(delta, use_locking=self._use_locking)
+    return training_ops.sparse_apply_proximal_gradient_descent(
+        var,
+        self._learning_rate_tensor,
+        self._l1_regularization_strength_tensor,
+        self._l2_regularization_strength_tensor,
+        grad.values,
+        grad.indices,
+        use_locking=self._use_locking).op
 
   def _prepare(self):
     self._learning_rate_tensor = ops.convert_to_tensor(self._learning_rate,
",0,train
f5a9d24c847ffcc7ae09e850aad39e2cb55ae4f3,tensorflow/tensorflow,"Added shape inference for tf_device.LaunchOp.

PiperOrigin-RevId: 317875384
Change-Id: Idb070c9e92d07ee19cd8ed26c1beec3de86f43df",shape_inference.cc,"@@ -215,6 +215,10 @@ bool InferShapeForNonTFDialectOperation(Operation* op, Dialect* tf_dialect) {
     return InferShapeForPassThroughOps(
         tensor_cast.getOperation()->getOperands(), op, tf_dialect);
   }
+  if (auto launch_op = dyn_cast<tf_device::LaunchOp>(op)) {
+    return InferShapeForPassThroughOps(
+        launch_op.GetBody().getTerminator()->getOperands(), op, tf_dialect);
+  }
   return false;
 }
 
",0,test
8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR.

Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information.

PiperOrigin-RevId: 300227160
Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",tf_ops.cc,"@@ -57,6 +57,7 @@ limitations under the License.
 #include ""mlir/Support/LogicalResult.h""  // TF:llvm-project
 #include ""mlir/Support/STLExtras.h""  // TF:llvm-project
 #include ""mlir/Transforms/InliningUtils.h""  // TF:llvm-project
+#include ""tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h""
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_types.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/util/tensor_format.h""
@@ -1000,6 +1001,11 @@ LogicalResult Conv2DOp::UpdateDataFormat(StringRef data_format) {
   return success();
 }
 
+StringRef Conv2DOp::GetOptimalLayout(const RuntimeDevices &devices) {
+  // TODO(ezhulenev): Implement optimal layout selection.
+  return """";
+}
+
 //===----------------------------------------------------------------------===//
 // Conv2dBackpropInputOp
 //===----------------------------------------------------------------------===//
",0,test
8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR.

Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information.

PiperOrigin-RevId: 300227160
Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",tf_ops.h,"@@ -29,6 +29,7 @@ limitations under the License.
 #include ""mlir/IR/OpImplementation.h""  // TF:llvm-project
 #include ""mlir/IR/StandardTypes.h""  // TF:llvm-project
 #include ""mlir/IR/TypeUtilities.h""  // TF:llvm-project
+#include ""tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h""
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_traits.h""
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_types.h""
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_verifiers.h""
",0,test
8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR.

Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information.

PiperOrigin-RevId: 300227160
Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",tf_structs.cc,"@@ -20,4 +20,27 @@ namespace mlir {
 // NOLINTNEXTLINE
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_structs.cc.inc""
 
+namespace TF {
+
+void RuntimeDevices::AddDevice(const ParsedName& device) {
+  device_names_.push_back(device);
+}
+
+void RuntimeDevices::AddGpuDevice(const ParsedName& device,
+                                  const GpuDeviceMetadata& metadata) {
+  device_names_.push_back(device);
+  gpu_metadata_.insert({DeviceNameUtils::ParsedNameToString(device), metadata});
+}
+
+llvm::Optional<GpuDeviceMetadata> RuntimeDevices::GetGpuDeviceMetadata(
+    const ParsedName& device) const {
+  auto it = gpu_metadata_.find(DeviceNameUtils::ParsedNameToString(device));
+  if (it != gpu_metadata_.end()) {
+    return it->second;
+  } else {
+    return llvm::None;
+  }
+}
+
+}  // namespace TF
 }  // namespace mlir
",0,test
8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR.

Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information.

PiperOrigin-RevId: 300227160
Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",tf_structs.h,"@@ -18,16 +18,50 @@ limitations under the License.
 #ifndef TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_STRUCTS_H_
 #define TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_STRUCTS_H_
 
+#include ""llvm/ADT/StringMap.h""
 #include ""mlir/IR/Diagnostics.h""  // TF:llvm-project
 #include ""mlir/IR/Location.h""  // TF:llvm-project
 #include ""mlir/IR/Operation.h""  // TF:llvm-project
 #include ""mlir/IR/StandardTypes.h""  // TF:llvm-project
 #include ""mlir/IR/Types.h""  // TF:llvm-project
+#include ""tensorflow/core/util/device_name_utils.h""
 
 namespace mlir {
 
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_structs.h.inc""
 
-}  // end namespace mlir
+namespace TF {
+
+// Tensorflow devices available at runtime with corresponding metadata if it is
+// available. It's completely valid to have a device without any metadata
+// attached to it.
+class RuntimeDevices {
+  using DeviceNameUtils = ::tensorflow::DeviceNameUtils;
+  using ParsedName = ::tensorflow::DeviceNameUtils::ParsedName;
+
+ public:
+  // Adds a device with and empty metadata. Device can be of any type.
+  void AddDevice(const ParsedName& device);
+
+  // Adds a GPU device with GPU specific metadata.
+  void AddGpuDevice(const ParsedName& device,
+                    const GpuDeviceMetadata& metadata);
+
+  llvm::ArrayRef<ParsedName> device_names() const { return device_names_; }
+  size_t NumDevices() const { return device_names_.size(); }
+
+  // Returns GPU device metadata if it is available, otherwise returns None.
+  llvm::Optional<GpuDeviceMetadata> GetGpuDeviceMetadata(
+      const ParsedName& device) const;
+
+ private:
+  llvm::SmallVector<ParsedName, 8> device_names_;
+  // TODO(ezhulenev): Add DenseMapInfo<ParsedName> specialization to be able to
+  // use ParsedName as a key in a DenseMap.
+  llvm::StringMap<GpuDeviceMetadata> gpu_metadata_;
+};
+
+}  // namespace TF
+}  // namespace mlir
 
 #endif  // TENSORFLOW_COMPILER_MLIR_TENSORFLOW_IR_TF_STRUCTS_H_
",0,test
8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR.

Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information.

PiperOrigin-RevId: 300227160
Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",tpu_rewrite_pass.cc,"@@ -723,13 +723,14 @@ LogicalResult Rewrite(
 }
 
 void TPURewritePass::runOnModule() {
-  llvm::SmallVector<tensorflow::DeviceNameUtils::ParsedName, 8> devices;
+  mlir::TF::RuntimeDevices devices;
   if (failed(tensorflow::GetDevicesFromOp(getModule(), &devices)))
     return signalPassFailure();
 
   OpBuilder builder(&getContext());
   auto result = getModule().walk([&](tf_device::LaunchFuncOp op) {
-    if (failed(Rewrite(op, devices, &builder))) return WalkResult::interrupt();
+    if (failed(Rewrite(op, devices.device_names(), &builder)))
+      return WalkResult::interrupt();
 
     return WalkResult::advance();
   });
",0,test
8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR.

Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information.

PiperOrigin-RevId: 300227160
Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",device_util.cc,"@@ -37,8 +37,6 @@ constexpr char kDevicesAttr[] = ""tf.devices"";
 
 namespace {
 
-using DeviceNames = llvm::SmallVectorImpl<DeviceNameUtils::ParsedName>;
-
 // Parse GPU compute capability from physical device description. If compute
 // capability is not found in device description, return an empty dictionary
 // attribute.
@@ -58,11 +56,13 @@ mlir::DictionaryAttr ParseGpuDeviceMetadata(const Device& device,
   return builder->getDictionaryAttr({});
 }
 
-// Get device names from an array of string attributes.
+// Get devices from an array of string attributes.
+// TODO(ezhulenev): Update all tests to use dictionary attribute for
+// `tf.devices` and remove this function.
 mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op,
                                      mlir::ArrayAttr array_attr,
-                                     DeviceNames* devices) {
-  devices->resize(array_attr.size());
+                                     mlir::TF::RuntimeDevices* devices) {
+  DeviceNameUtils::ParsedName device;
 
   for (auto& kv : llvm::enumerate(array_attr)) {
     const int idx = kv.index();
@@ -72,30 +72,39 @@ mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op,
       return op->emitOpError(llvm::formatv(
           ""bad '{0}' attribute at index {1}, not a string"", kDevicesAttr, idx));
 
-    if (!DeviceNameUtils::ParseFullName(string_attr.getValue().str(),
-                                        &(*devices)[idx]))
+    if (DeviceNameUtils::ParseFullName(string_attr.getValue().str(), &device)) {
+      devices->AddDevice(device);
+    } else {
       return op->emitOpError(
           llvm::formatv(""bad '{0}' attribute, '{1}', not a valid device"",
                         kDevicesAttr, string_attr.getValue()));
+    }
   }
 
   return mlir::success();
 }
 
-// Get device names from a metadata dictionary.
+// Get devices from a dictionary attribute.
 mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op,
                                      mlir::DictionaryAttr dict_attr,
-                                     DeviceNames* devices) {
-  devices->resize(dict_attr.size());
+                                     mlir::TF::RuntimeDevices* devices) {
+  DeviceNameUtils::ParsedName device;
 
   // Parse device names and metadata from dictionary attribute.
-  for (auto& kv : llvm::enumerate(dict_attr)) {
-    const mlir::Identifier name = kv.value().first;
+  for (auto& kv : dict_attr) {
+    const mlir::Identifier name = kv.first;
+    const mlir::Attribute attr = kv.second;
 
-    if (!DeviceNameUtils::ParseFullName(name.str(), &(*devices)[kv.index()]))
+    if (!DeviceNameUtils::ParseFullName(name.str(), &device))
       return op->emitOpError(
           llvm::formatv(""bad '{0}' attribute, '{1}', not a valid device"",
                         kDevicesAttr, name.strref()));
+
+    if (auto gpu_metadata = attr.dyn_cast<mlir::TF::GpuDeviceMetadata>()) {
+      devices->AddGpuDevice(device, gpu_metadata);
+    } else {
+      devices->AddDevice(device);
+    }
   }
 
   return mlir::success();
@@ -131,7 +140,7 @@ void AddDevicesToOp(mlir::Operation* op, const DeviceSet* device_set) {
 }
 
 mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op,
-                                     DeviceNames* devices) {
+                                     mlir::TF::RuntimeDevices* devices) {
   auto devices_attr = op->getAttr(kDevicesAttr);
   if (!devices_attr) return mlir::success();
 
@@ -146,15 +155,4 @@ mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op,
       llvm::formatv(""unsupported '{0}' attribute"", kDevicesAttr));
 }
 
-llvm::Optional<mlir::TF::GpuDeviceMetadata> GetGpuDeviceMetadata(
-    mlir::Operation* op, const DeviceNameUtils::ParsedName& device) {
-  auto metadata = op->getAttrOfType<mlir::DictionaryAttr>(kDevicesAttr);
-  if (!metadata) return llvm::None;
-
-  auto device_attr = metadata.get(DeviceNameUtils::ParsedNameToString(device));
-  if (!device_attr) return llvm::None;
-
-  return device_attr.dyn_cast<mlir::TF::GpuDeviceMetadata>();
-}
-
 }  // namespace tensorflow
",0,test
8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR.

Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information.

PiperOrigin-RevId: 300227160
Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",device_util.h,"@@ -36,16 +36,10 @@ namespace tensorflow {
 // (1) GpuDeviceMetadata: GPU device compute capability.
 void AddDevicesToOp(mlir::Operation* op, const DeviceSet* device_set);
 
-// Collects devices as DeviceNameUtils::ParsedName from an op `tf.devices`
-// attribute. A failure will be returned if device name is not valid.
-mlir::LogicalResult GetDevicesFromOp(
-    mlir::Operation* op,
-    llvm::SmallVectorImpl<DeviceNameUtils::ParsedName>* devices);
-
-// Returns GPU device metadata for the parsed device name if it exists in the
-// device metadata attributes, returns None otherwise.
-llvm::Optional<mlir::TF::GpuDeviceMetadata> GetGpuDeviceMetadata(
-    mlir::Operation* op, const DeviceNameUtils::ParsedName& device);
+// Collects devices information from an op `tf.devices` attributes. Returns
+// failure if can't parse device metadata from the attribute.
+mlir::LogicalResult GetDevicesFromOp(mlir::Operation* op,
+                                     mlir::TF::RuntimeDevices* devices);
 
 }  // namespace tensorflow
 
",0,test
8d2178ea81b5e18c52d9742904e0de06d1362a10,tensorflow/tensorflow,"[TF:MLIR] Add Tensorflow RuntimeDevices to TF IR.

Keep track of Tensorflow devices available at runtime in class available to the Ops defined in the Tensorflow IR. This allows writing optimization passes and Op interfaces that require device information.

PiperOrigin-RevId: 300227160
Change-Id: Ic4d7ed6e56cbb40e14d5bb50d82b56fc8ee662b6",device_util_test.cc,"@@ -113,7 +113,7 @@ TEST(DeviceUtilTest, GetDevicesFromOpNoDevicesAttribute) {
   mlir::OwningModuleRef module_ref =
       mlir::ModuleOp::create(mlir::UnknownLoc::get(&context));
 
-  llvm::SmallVector<DeviceNameUtils::ParsedName, 8> devices;
+  mlir::TF::RuntimeDevices devices;
   EXPECT_TRUE(mlir::succeeded(GetDevicesFromOp(*module_ref, &devices)));
 }
 
@@ -124,7 +124,7 @@ TEST(DeviceUtilTest, GetDevicesFromOpBadDevicesAttributeType) {
   mlir::Builder builder(*module_ref);
   module_ref->setAttr(""tf.devices"", builder.getBoolAttr(false));
 
-  llvm::SmallVector<DeviceNameUtils::ParsedName, 8> devices;
+  mlir::TF::RuntimeDevices devices;
   EXPECT_TRUE(mlir::failed(GetDevicesFromOp(*module_ref, &devices)));
 }
 
@@ -135,7 +135,7 @@ TEST(DeviceUtilTest, GetDevicesFromOpBadDevicesAttributeArraySubtype) {
   mlir::Builder builder(*module_ref);
   module_ref->setAttr(""tf.devices"", builder.getI32ArrayAttr({8}));
 
-  llvm::SmallVector<DeviceNameUtils::ParsedName, 8> devices;
+  mlir::TF::RuntimeDevices devices;
   EXPECT_TRUE(mlir::failed(GetDevicesFromOp(*module_ref, &devices)));
 }
 
@@ -148,7 +148,7 @@ TEST(DeviceUtilTest, GetDevicesFromOpBadDevicesInDevicesAttribute) {
                       builder.getDictionaryAttr(builder.getNamedAttr(
                           ""bad_device"", builder.getDictionaryAttr({}))));
 
-  llvm::SmallVector<DeviceNameUtils::ParsedName, 8> devices;
+  mlir::TF::RuntimeDevices devices;
   EXPECT_TRUE(mlir::failed(GetDevicesFromOp(*module_ref, &devices)));
 }
 
@@ -163,10 +163,12 @@ TEST(DeviceUtilTest, GetDevicesFromOpValidDeviceInDevicesAttribute) {
                             builder.getDictionaryAttr({}))});
   module_ref->setAttr(""tf.devices"", device_dict);
 
-  llvm::SmallVector<DeviceNameUtils::ParsedName, 8> devices;
+  mlir::TF::RuntimeDevices devices;
   EXPECT_TRUE(mlir::succeeded(GetDevicesFromOp(*module_ref, &devices)));
-  ASSERT_EQ(devices.size(), 1);
-  EXPECT_EQ(DeviceNameUtils::ParsedNameToString(devices[0]),
+
+  ASSERT_EQ(devices.NumDevices(), 1);
+  ASSERT_EQ(devices.device_names().size(), 1);
+  ASSERT_EQ(DeviceNameUtils::ParsedNameToString(devices.device_names()[0]),
             ""/job:worker/replica:0/task:0/device:CPU:0"");
 }
 
@@ -188,15 +190,18 @@ TEST(DeviceUtilTest, GetGpuDeviceMetadata) {
 
   module_ref->setAttr(""tf.devices"", builder.getDictionaryAttr(metadata));
 
+  mlir::TF::RuntimeDevices devices;
+  EXPECT_TRUE(mlir::succeeded(GetDevicesFromOp(*module_ref, &devices)));
+
   DeviceNameUtils::ParsedName parsed_name;
   DeviceNameUtils::ParseFullName(gpu0, &parsed_name);
-  auto meta_0 = GetGpuDeviceMetadata(*module_ref, parsed_name);
+  auto meta_0 = devices.GetGpuDeviceMetadata(parsed_name);
   ASSERT_TRUE(meta_0.hasValue());
   ASSERT_EQ(meta_0->cc_major().getInt(), 1);
   ASSERT_EQ(meta_0->cc_minor().getInt(), 2);
 
   DeviceNameUtils::ParseFullName(gpu1, &parsed_name);
-  auto meta_1 = GetGpuDeviceMetadata(*module_ref, parsed_name);
+  auto meta_1 = devices.GetGpuDeviceMetadata(parsed_name);
   ASSERT_FALSE(meta_1.hasValue());
 }
 
",0,test
1fd93618d4e40ee126cc4406ade66782a6f632a8,tensorflow/tensorflow,"[tf.data service] Add a Lint check to keep policies in sync.

PiperOrigin-RevId: 386998531
Change-Id: I50496ce7022745318a84dd6fe9b61397c1295087",auto_shard_test.py,"@@ -501,12 +501,12 @@ class AutoShardTest(data_service_test_base.TestBase,
       _ = _make_service_cluster(
           num_workers=5, local_shard_index=1, worker_addresses=worker_addresses)
 
-  # TODO(b/186023347): Use Lint to keep the policies in sync.
   @combinations.generate(
       combinations.times(
           test_base.default_test_combinations(),
           combinations.combine(sharding_policy=list(ShardingPolicy))))
   def testEnumerateShardingPolicies(self, sharding_policy):
+    """"""Verifies tf.data service handles every sharding policy with no errors.""""""
     cluster = _make_service_cluster(num_workers=5, local_shard_index=3)
     dataset = dataset_ops.Dataset.list_files(self._filenames, shuffle=False)
     dataset = dataset.flat_map(readers.TFRecordDataset)
",0,train
1fd93618d4e40ee126cc4406ade66782a6f632a8,tensorflow/tensorflow,"[tf.data service] Add a Lint check to keep policies in sync.

PiperOrigin-RevId: 386998531
Change-Id: I50496ce7022745318a84dd6fe9b61397c1295087",data_service_ops.py,"@@ -89,12 +89,14 @@ class ShardingPolicy(enum.IntEnum):
   placeholder to replace with `shard(num_workers, worker_index)`.
   """"""
 
+  # LINT.IfChange(tf_data_service_sharding_policy)
   OFF = 0
   DYNAMIC = 1
   FILE = 2
   DATA = 3
   FILE_OR_DATA = 4
   HINT = 5
+  # LINT.ThenChange()
 
   def _to_proto(self):
     """"""Converts the policy to ProcessingModeDef proto enum.""""""
",0,train
1fd93618d4e40ee126cc4406ade66782a6f632a8,tensorflow/tensorflow,"[tf.data service] Add a Lint check to keep policies in sync.

PiperOrigin-RevId: 386998531
Change-Id: I50496ce7022745318a84dd6fe9b61397c1295087",distribute_options.py,"@@ -45,11 +45,14 @@ class AutoShardPolicy(enum.IntEnum):
   HINT: Looks for the presence of `shard(SHARD_HINT, ...)` which is treated as a
   placeholder to replace with `shard(num_workers, worker_index)`.
   """"""
+
+  # LINT.IfChange
   OFF = -1
   AUTO = 0
   FILE = 1
   DATA = 2
   HINT = 3
+  # LINT.ThenChange(//tensorflow/python/data/experimental/ops/data_service_ops.py:tf_data_service_sharding_policy)
 
   @classmethod
   def _to_proto(cls, obj):
",0,train
eef4e3acc83c73963258ce848d1b9bad8021e036,tensorflow/tensorflow,Added pytest for Bfloat16,sparse_xent_op_test.py,"@@ -182,6 +182,23 @@ class SparseXentTest(test.TestCase):
           np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]).astype(np.float64),
           np.array([0, 3]).astype(label_dtype))
 
+  def testBfloat(self):
+    for label_dtype in np.int32, np.int64:
+      np_features = np.array([[1., 1., 1., 1.], [1., 2., 3., 4.]]
+              ).astype(np.float32)
+      np_labels = np.array([0, 3]).astype(label_dtype)
+      np_loss, np_backprop = self._npXent(np_features, np_labels)
+
+      bf_np_features = math_ops.cast(np_features, dtypes.bfloat16)
+      bf_np_loss = math_ops.cast(np_loss, dtypes.bfloat16)
+      bf_np_backprop = math_ops.cast(np_backprop, dtypes.bfloat16)
+      with self.cached_session(use_gpu=False) as sess:
+        loss, backprop = gen_nn_ops.sparse_softmax_cross_entropy_with_logits(
+          bf_np_features, np_labels)
+        tf_loss, tf_backprop = self.evaluate([loss, backprop])
+      self.assertAllCloseAccordingToType(bf_np_loss, tf_loss)
+      self.assertAllCloseAccordingToType(bf_np_backprop, tf_backprop)
+
   def testHalf(self):
     for label_dtype in np.int32, np.int64:
       self._testXent(
",0,train
731ba067d07db04ecffb176f4310ce0f163dac14,tensorflow/tensorflow,"[XLA:CPU] Avoid UB in cpu_runtime

PiperOrigin-RevId: 420339155
Change-Id: I91ee2affba5af35e731503ede3ea6669d093d713",cpu_runtime.cc,"@@ -587,13 +587,29 @@ class CpuAllReduceRendezvous
     }
   }
 
+  template <typename T, bool kIsSignedIntegralType>
+  struct SumProductTypeForReductionStep {
+    using type = T;
+  };
+
+  template <typename T>
+  struct SumProductTypeForReductionStep<T, /*kIsSignedIntegralType=*/true> {
+    using type = typename std::make_unsigned_t<T>;
+  };
+
   template <typename T>
   T PerformReductionStep(xla::ReductionKind reduction_kind, T a, T b) {
+    using SumProductType = typename SumProductTypeForReductionStep<
+        T, std::is_integral<T>::value && std::is_signed<T>::value>::type;
     switch (reduction_kind) {
       case xla::ReductionKind::SUM:
-        return a + b;
+        return absl::bit_cast<T>(
+            static_cast<SumProductType>(absl::bit_cast<SumProductType>(a) +
+                                        absl::bit_cast<SumProductType>(b)));
       case xla::ReductionKind::PRODUCT:
-        return a * b;
+        return absl::bit_cast<T>(
+            static_cast<SumProductType>(absl::bit_cast<SumProductType>(a) *
+                                        absl::bit_cast<SumProductType>(b)));
       case xla::ReductionKind::MIN:
         return std::min(a, b);
       case xla::ReductionKind::MAX:
",0,test
2b482a4bd7bd69cee5775278ab7b4d22563b6e81,tensorflow/tensorflow,Keras: Replace trivial control flow with tf.where,metrics.py,"@@ -12,13 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-# pylint: disable=unused-import
 # pylint: disable=g-classes-have-attributes
 # pylint: disable=g-doc-return-or-yield
 """"""Built-in metrics.""""""
 
 import abc
-import math
 import types
 import warnings
 
@@ -33,7 +31,6 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
-from tensorflow.python.framework import tensor_spec
 from tensorflow.python.keras import activations
 from tensorflow.python.keras import backend
 from tensorflow.python.keras.engine import base_layer
@@ -56,7 +53,6 @@ from tensorflow.python.keras.saving.saved_model import metric_serialization
 from tensorflow.python.keras.utils import generic_utils
 from tensorflow.python.keras.utils import losses_utils
 from tensorflow.python.keras.utils import metrics_utils
-from tensorflow.python.keras.utils import tf_inspect
 from tensorflow.python.keras.utils.generic_utils import deserialize_keras_object
 from tensorflow.python.keras.utils.generic_utils import serialize_keras_object
 from tensorflow.python.keras.utils.generic_utils import to_list
@@ -64,13 +60,11 @@ from tensorflow.python.keras.utils.tf_utils import is_tensor_or_variable
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import confusion_matrix
-from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import variables as variables_module
 from tensorflow.python.ops import weights_broadcast_ops
-from tensorflow.python.training.tracking import base as trackable
 from tensorflow.python.util import dispatch
 from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import keras_export
@@ -1575,13 +1569,11 @@ class SensitivitySpecificityBase(Metric, metaclass=abc.ABCMeta):
 
     Returns maximal dependent value, if no value satiesfies the constraint 0.0.
     """"""
-    feasible = array_ops.where(predicate(constrained, self.value))
+    feasible = array_ops.where_v2(predicate(constrained, self.value))
     feasible_exists = math_ops.greater(array_ops.size(feasible), 0)
+    max_dependent = math_ops.reduce_max(array_ops.gather(dependent, feasible))
 
-    def get_max():
-      return math_ops.reduce_max(array_ops.gather(dependent, feasible))
-
-    return control_flow_ops.cond(feasible_exists, get_max, lambda: 0.0)
+    return array_ops.where_v2(feasible_exists, max_dependent, 0.0)
 
 
 @keras_export('keras.metrics.SensitivityAtSpecificity')
",0,train
2b482a4bd7bd69cee5775278ab7b4d22563b6e81,tensorflow/tensorflow,Keras: Replace trivial control flow with tf.where,learning_rate_schedule.py,"@@ -20,6 +20,7 @@ import math
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import ops
 from tensorflow.python.keras.utils import generic_utils
+from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
@@ -416,9 +417,9 @@ class PolynomialDecay(LearningRateSchedule):
       if self.cycle:
         # Find the first multiple of decay_steps that is bigger than
         # global_step. If global_step is zero set the multiplier to 1
-        multiplier = control_flow_ops.cond(
-            math_ops.equal(global_step_recomp, 0), lambda: 1.0,
-            lambda: math_ops.ceil(global_step_recomp / self.decay_steps))
+        multiplier = array_ops.where_v2(
+            math_ops.equal(global_step_recomp, 0), 1.0,
+            math_ops.ceil(global_step_recomp / self.decay_steps))
         decay_steps_recomp = math_ops.multiply(decay_steps_recomp, multiplier)
       else:
         # Make sure that the global_step used is not bigger than decay_steps.
",0,train
2b482a4bd7bd69cee5775278ab7b4d22563b6e81,tensorflow/tensorflow,Keras: Replace trivial control flow with tf.where,metrics_utils.py,"@@ -377,9 +377,8 @@ def update_confusion_matrix_variables(variables_to_update,
     num_labels = 1
   else:
     num_labels = gen_math_ops.Prod(input=pred_shape[1:], axis=0)
-  thresh_label_tile = control_flow_ops.cond(
-      one_thresh, lambda: num_labels,
-      lambda: math_ops.cast(1, dtype=dtypes.int32))
+  thresh_label_tile = array_ops.where_v2(
+      one_thresh, num_labels, array_ops.ones([], dtype=dtypes.int32))
 
   # Reshape predictions and labels, adding a dim for thresholding.
   if multi_label:
",0,train
50eb1f5f289a77298b72a87d4aa74274e28c5a98,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-06-02

PiperOrigin-RevId: 377011648
Change-Id: Iff283dd9876abc4676061ef412769393df73fbd7",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 6, 1)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 6, 2)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,test
aa0695e115a51d2ec1998c68463f83a9121f0ee1,tensorflow/tensorflow,"The original motivation seems to have been that inside fusions we don't have
layouts and thus cannot replace reshapes and transposes with bitcasts.
Therefore we preferred not fusing potential bitcasts.
Now that we run layout assignment before fusion, we do already replace
potential bitcasts with bitcasts, so this check is obsolete.

PiperOrigin-RevId: 302648869
Change-Id: I2b07192afc06be3a7220797571c19e9e747482c9",gpu_fusible.cc,"@@ -239,13 +239,6 @@ bool IsProducerConsumerFusible(const HloInstruction& producer,
       !LayoutsAreReduceInputFusionFriendly(producer, consumer)) {
     return false;
   }
-  // We can't fuse library calls, so if a user of such an op could become a
-  // bitcast, leave it unfused. See `xla::InstructionFusion::ShouldFuse` for
-  // further rationale.
-  if (producer.CouldBeBitcast() &&
-      ImplementedAsLibraryCall(*producer.operand(0))) {
-    return false;
-  }
   // Fuse scalar constants into loop fusion nodes. This reduces the number of
   // parameters and makes matching scalar broadcasts easier.
   //
",0,train
aa0695e115a51d2ec1998c68463f83a9121f0ee1,tensorflow/tensorflow,"The original motivation seems to have been that inside fusions we don't have
layouts and thus cannot replace reshapes and transposes with bitcasts.
Therefore we preferred not fusing potential bitcasts.
Now that we run layout assignment before fusion, we do already replace
potential bitcasts with bitcasts, so this check is obsolete.

PiperOrigin-RevId: 302648869
Change-Id: I2b07192afc06be3a7220797571c19e9e747482c9",instruction_fusion_test.cc,"@@ -109,21 +109,23 @@ TEST_F(InstructionFusionTest,
   EXPECT_THAT(computation->root_instruction(), op::Fusion());
 }
 
-TEST_F(InstructionFusionTest, PotentialBitcastReshapeOfDotUnfused) {
+TEST_F(InstructionFusionTest, PotentialBitcastReshapeOfDotFused) {
   HloComputation::Builder builder(TestName());
   auto param0 = builder.AddInstruction(HloInstruction::CreateParameter(
-      0, ShapeUtil::MakeShape(S32, {1, 1}), ""0""));
+      0, ShapeUtil::MakeShape(F32, {1, 1}), ""0""));
   auto dot1 = builder.AddInstruction(
-      CreateCanonicalDot(ShapeUtil::MakeShape(S32, {1, 1}), param0, param0));
+      CreateCanonicalDot(ShapeUtil::MakeShape(F32, {1, 1}), param0, param0));
   auto reshape2 = builder.AddInstruction(HloInstruction::CreateReshape(
-      ShapeUtil::MakeShape(S32, {1, 1, 1}), dot1));
+      ShapeUtil::MakeShape(F32, {1, 1, 1}), dot1));
+  auto log = builder.AddInstruction(HloInstruction::CreateUnary(
+      reshape2->shape(), xla::HloOpcode::kLog, reshape2));
 
   auto module = CreateNewVerifiedModule();
   auto computation = module->AddEntryComputation(builder.Build());
-  EXPECT_EQ(reshape2, computation->root_instruction());
-  EXPECT_FALSE(GpuInstructionFusion(/*may_duplicate=*/true)
-                   .Run(module.get())
-                   .ValueOrDie());
+  EXPECT_EQ(log, computation->root_instruction());
+  EXPECT_TRUE(GpuInstructionFusion(/*may_duplicate=*/true)
+                  .Run(module.get())
+                  .ValueOrDie());
 }
 
 TEST_F(InstructionFusionTest, PotentialBitcastTransposeOfDotUnfused) {
",0,train
f4ebdaba60d2fb2698d14ee87960e3a6294be196,tensorflow/tensorflow,"Add missing license header
Change: 117999025",optimizers_test.py,"@@ -1,3 +1,17 @@
+# Copyright 2015 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
 """"""Tests for optimizers.""""""
 
 from __future__ import absolute_import
",0,test
937c279fd9758ce75c43ad8d2d828475b496334a,tensorflow/tensorflow,"[tf.data] Fix MultiDeviceIterator initialization to use correct FLR.

In the recent change to instantiate functions at iterator
initialization time, I forgot to make the corresponding change was to
MultiDeviceIterator. This change fixes that problem and unbreaks
MultiDeviceIterator.

PiperOrigin-RevId: 209838406",prefetching_kernels.cc,"@@ -548,7 +548,9 @@ class MultiDeviceIterator : public ResourceBase {
         devices_(devices),
         flib_def_(std::move(flib_def)),
         pflr_(std::move(pflr)),
-        lib_(lib) {}
+        lib_(lib) {
+    CHECK_NOTNULL(lib_);
+  }
 
   string DebugString() override {
     return strings::StrCat(""MultiDeviceIterator for "", devices_.size(),
@@ -600,6 +602,11 @@ class MultiDeviceIterator : public ResourceBase {
     return lib_def_;
   }
 
+  FunctionLibraryRuntime* const lib() {
+    tf_shared_lock l(mu_);
+    return lib_;
+  }
+
  private:
   // A private class that uses a background thread to keep a per device buffer
   // full.
@@ -930,8 +937,10 @@ class MultiDeviceIteratorInitOp : public OpKernel {
     core::ScopedUnref unref(resource);
 
     std::unique_ptr<IteratorBase> iterator;
-    OP_REQUIRES_OK(ctx, dataset->MakeIterator(IteratorContext(ctx), ""Iterator"",
-                                              &iterator));
+    IteratorContext iter_ctx(ctx);
+    iter_ctx.set_lib(resource->lib());
+    OP_REQUIRES_OK(
+        ctx, dataset->MakeIterator(std::move(iter_ctx), ""Iterator"", &iterator));
     int64 incarnation_id;
     OP_REQUIRES_OK(ctx, resource->Init(std::move(iterator), max_buffer_size,
                                        &incarnation_id));
",0,train
ddbb2c52db5cfab02b80b2ef563d8d6251dcfe77,tensorflow/tensorflow,"Fix a crash in Quantize() when tf.contrib.framework.get_name_scope() == None.

PiperOrigin-RevId: 191068059",quantize.py,"@@ -416,7 +416,9 @@ def _InsertQuantOp(context,
   # name_prefix starts with 'TPUReplicate/loop/'; without dropping it
   # variables are created as TPUReplicate/loop/TPUReplicate/loop/..., which
   # breaks things later.
-  name_prefix = common.DropStringPrefix(name_prefix, ops.get_name_scope() + '/')
+  name_scope = ops.get_name_scope()
+  if name_scope:
+    name_prefix = common.DropStringPrefix(name_prefix, name_scope + '/')
 
   inputs = producer.outputs[0]
   # Prevent ops from being quantized multiple times. Bypass ops can sometimes
",0,train
ddbb2c52db5cfab02b80b2ef563d8d6251dcfe77,tensorflow/tensorflow,"Fix a crash in Quantize() when tf.contrib.framework.get_name_scope() == None.

PiperOrigin-RevId: 191068059",quantize_test.py,"@@ -247,6 +247,27 @@ class QuantizeTest(test_util.TensorFlowTestCase):
       self.assertTrue(not op.name.startswith('name_scope/name_scope/'),
                       'Broken op: %s' % op.name)
 
+  def testWithNullNameScope(self):
+    self._RunTestOverParameters(self._TestWithNullNameScope)
+
+  def _TestWithNullNameScope(self, is_training):
+    graph = ops.Graph()
+    with graph.as_default():
+      with graph.name_scope(None):
+        batch_size, height, width, depth = 5, 128, 128, 3
+        input1 = array_ops.zeros((batch_size, height, width, depth))
+        _ = conv2d(
+            input1,
+            32, [5, 5],
+            stride=2,
+            padding='SAME',
+            weights_initializer=self._WeightInit(0.09),
+            activation_fn=None,
+            scope='test')
+
+        quantize.Quantize(graph, is_training, weight_bits=8, activation_bits=8)
+        # Passes if Quantize() does not crash.
+
   def _WeightInit(self, stddev):
     """"""Returns truncated normal variable initializer.
 
",0,train
719ad3bfde3eae7169229853d3844155aa49f62f,tensorflow/tensorflow,"Ruy: Minor fix to x86 (AVX-512) code.

Minor bug made possible by very poor type checking.

PiperOrigin-RevId: 260925827",pack_avx512.cc,"@@ -337,8 +337,8 @@ inline void HalfPackFloatAvx512(const float* src_ptr, const float* zerobuf,
       // available_src_rows = std::max(0, std::min(8, src_rows - k - 8 * m));
       // but treat each case separately.
       if (available_src_rows > 7) {
-        __m512i t0, t1, t2, t3;
-        __m512i r0, r1, r2, r3;
+        __m512 t0, t1, t2, t3;
+        __m512 r0, r1, r2, r3;
 
         t0 = LoaduTwo(src_ptr0, src_ptr4);
         t1 = LoaduTwo(src_ptr1, src_ptr5);
@@ -376,8 +376,8 @@ inline void HalfPackFloatAvx512(const float* src_ptr, const float* zerobuf,
         const __mmask8 row_mask =
             (static_cast<std::uint32_t>(1) << available_src_rows) - 1;
 
-        __m512i t0, t1, t2, t3;
-        __m512i r0, r1, r2, r3;
+        __m512 t0, t1, t2, t3;
+        __m512 r0, r1, r2, r3;
 
         t0 = MaskLoaduTwo(row_mask, src_ptr0, src_ptr4);
         t1 = MaskLoaduTwo(row_mask, src_ptr1, src_ptr5);
",0,train
cd63c718be123324b6c39e0f8fbe453319799746,tensorflow/tensorflow,"[update]
  fix naming stuff in tensorflow/contrib/tensorrt/convert/convert_nodes.cc",convert_nodes.cc,"@@ -120,7 +120,7 @@ class TRT_ShapedWeights {
         type_(type),
         values_(values),
         owned_values_(owned_values ? *owned_values : std::vector<char>({})),
-        dummy_flag_(false) {
+        empty_weight_flag_(false) {
     // Note: this->shape.type[] is not used
   }
 
@@ -129,14 +129,14 @@ class TRT_ShapedWeights {
         type_(type),
         values_(nullptr),
         owned_values_(),
-        dummy_flag_(true) {}
+        empty_weight_flag_(true) {}
 
   TRT_ShapedWeights(const TRT_ShapedWeights& rhs)
       : shape_(rhs.shape_),
         type_(rhs.type_),
         values_(rhs.values_),
         owned_values_(rhs.owned_values_),
-        dummy_flag_(rhs.dummy_flag_) {}
+        empty_weight_flag_(rhs.empty_weight_flag_) {}
 
   int64_t count() const {
     int64_t c = 1;
@@ -147,7 +147,7 @@ class TRT_ShapedWeights {
   nvinfer1::Weights GetWeightsForTRT() const {
     nvinfer1::DataType trt_type(nvinfer1::DataType::kFLOAT);
     TF_CHECK_OK(ConvertDType(type_, &trt_type));
-    if (dummy_flag_) return nvinfer1::Weights{trt_type, nullptr, 0};
+    if (empty_weight_flag_) return nvinfer1::Weights{trt_type, nullptr, 0};
 
     // Note: this->shape.type[] is not used
     return nvinfer1::Weights{trt_type, GetValues(), GetShapeSize(shape_)};
@@ -178,39 +178,39 @@ class TRT_ShapedWeights {
  private:
   const void* values_;
   std::vector<char> owned_values_;
-  bool dummy_flag_;
+  bool empty_weight_flag_;
 };
 
 class TRT_TensorOrWeights {
  public:
   explicit TRT_TensorOrWeights(nvinfer1::ITensor* tensor)
-      : _tensor_(tensor), _weights_(DT_FLOAT), _variant_(TRT_NODE_TENSOR) {}
+      : tensor_(tensor), weights_(DT_FLOAT), variant_(TRT_NODE_TENSOR) {}
   explicit TRT_TensorOrWeights(const TRT_ShapedWeights& weights)
-      : _tensor_(nullptr), _weights_(weights), _variant_(TRT_NODE_WEIGHTS) {}
+      : tensor_(nullptr), weights_(weights), variant_(TRT_NODE_WEIGHTS) {}
   TRT_TensorOrWeights(const TRT_TensorOrWeights& rhs)
-      : _tensor_(rhs._tensor_),
-        _weights_(rhs._weights_),
-        _variant_(rhs._variant_) {}
+      : tensor_(rhs.tensor_),
+        weights_(rhs.weights_),
+        variant_(rhs.variant_) {}
   ~TRT_TensorOrWeights() {}
 
-  bool is_tensor() const { return _variant_ == TRT_NODE_TENSOR; }
-  bool is_weights() const { return _variant_ == TRT_NODE_WEIGHTS; }
+  bool is_tensor() const { return variant_ == TRT_NODE_TENSOR; }
+  bool is_weights() const { return variant_ == TRT_NODE_WEIGHTS; }
 
   nvinfer1::ITensor* tensor() {
     CHECK_EQ(is_tensor(), true);
-    return _tensor_;
+    return tensor_;
   }
   const nvinfer1::ITensor* tensor() const {
     CHECK_EQ(is_tensor(), true);
-    return _tensor_;
+    return tensor_;
   }
   TRT_ShapedWeights& weights() {
     CHECK_EQ(is_weights(), true);
-    return _weights_;
+    return weights_;
   }
   const TRT_ShapedWeights& weights() const {
     CHECK_EQ(is_weights(), true);
-    return _weights_;
+    return weights_;
   }
   nvinfer1::Dims shape() const {
     if (is_tensor()) {
@@ -221,69 +221,35 @@ class TRT_TensorOrWeights {
   }
 
  private:
-  nvinfer1::ITensor* _tensor_;
-  TRT_ShapedWeights _weights_;
-  enum { TRT_NODE_TENSOR, TRT_NODE_WEIGHTS } _variant_;
-};
-
-class TRT_LayerOrWeights {
- public:
-  explicit TRT_LayerOrWeights(nvinfer1::ILayer* layer)
-      : _layer_(layer), _variant_(TRT_NODE_LAYER) {}
-  explicit TRT_LayerOrWeights(const TRT_ShapedWeights& weights)
-      : _weights_(weights), _variant_(TRT_NODE_WEIGHTS) {}
-  bool is_layer() const { return _variant_ == TRT_NODE_LAYER; }
-  bool is_weights() const { return _variant_ == TRT_NODE_WEIGHTS; }
-  nvinfer1::ILayer* layer() {
-    CHECK_EQ(this->is_layer(), true);
-    return _layer_;
-  }
-  TRT_ShapedWeights& weights() {
-    CHECK_EQ(this->is_weights(), true);
-    return _weights_;
-  }
-  TRT_TensorOrWeights output(int index = 0) const {
-    if (this->is_layer()) {
-      nvinfer1::ITensor* tensor = _layer_->getOutput(index);
-      return TRT_TensorOrWeights(tensor);
-    } else {
-      CHECK_EQ(index, 0);
-      return TRT_TensorOrWeights(_weights_);
-    }
-  }
-
- private:
-  union {
-    nvinfer1::ILayer* _layer_;
-    TRT_ShapedWeights _weights_;
-  };
-  enum { TRT_NODE_LAYER, TRT_NODE_WEIGHTS } _variant_;
+  nvinfer1::ITensor* tensor_;
+  TRT_ShapedWeights weights_;
+  enum { TRT_NODE_TENSOR, TRT_NODE_WEIGHTS } variant_;
 };
 
 class TFAttrs {
  public:
   explicit TFAttrs(const tensorflow::NodeDef& tf_node) {
     for (const auto& attr : tf_node.attr()) {
-      _attrs.insert({attr.first, &attr.second});
+      attrs_.insert({attr.first, &attr.second});
     }
   }
-  bool count(string key) const { return _attrs.count(key); }
+  bool count(string key) const { return attrs_.count(key); }
   tensorflow::AttrValue const* at(string key) const {
-    if (!_attrs.count(key)) {
+    if (!attrs_.count(key)) {
       LOG(FATAL) << ""Attribute not found: "" << key;
     }
-    return _attrs.at(key);
+    return attrs_.at(key);
   }
   template <typename T>
   T get(string key) const;
   template <typename T>
   T get(string key, const T& default_value) const {
-    return _attrs.count(key) ? this->get<T>(key) : default_value;
+    return attrs_.count(key) ? this->get<T>(key) : default_value;
   }
 
  private:
   typedef std::map<string, tensorflow::AttrValue const*> AttrMap;
-  AttrMap _attrs;
+  AttrMap attrs_;
 };
 
 template <>
@@ -385,10 +351,10 @@ using OpConverter =
                                      std::vector<TRT_TensorOrWeights>*)>;
 
 class Converter {
-  std::unordered_map<string, TRT_TensorOrWeights> _trt_tensors;
-  std::unordered_map<string, OpConverter> _op_registry;
-  nvinfer1::INetworkDefinition* _trt_network;
-  std::list<std::vector<uint8_t>> _temp_bufs;
+  std::unordered_map<string, TRT_TensorOrWeights> trt_tensors_;
+  std::unordered_map<string, OpConverter> op_registry_;
+  nvinfer1::INetworkDefinition* trt_network_;
+  std::list<std::vector<uint8_t>> temp_bufs_;
 
   void register_op_converters();
 
@@ -397,14 +363,14 @@ class Converter {
     std::vector<TRT_TensorOrWeights> inputs;
     for (const auto& input_name : node_def.input()) {
       VLOG(2) << ""Retrieve input: "" << input_name;
-      inputs.push_back(_trt_tensors.at(input_name));
+      inputs.push_back(trt_tensors_.at(input_name));
     }
     return inputs;
   }
 
  public:
   explicit Converter(nvinfer1::INetworkDefinition* trt_network)
-      : _trt_network(trt_network) {
+      : trt_network_(trt_network) {
     this->register_op_converters();
   }
 
@@ -412,8 +378,8 @@ class Converter {
                                      nvinfer1::Dims shape) {
     TRT_ShapedWeights weights(type, nullptr, shape);
     // TODO(jie): check weights size_bytes. 0 means type error
-    _temp_bufs.push_back(std::vector<uint8_t>(weights.size_bytes()));
-    weights.SetValues(_temp_bufs.back().data());
+    temp_bufs_.push_back(std::vector<uint8_t>(weights.size_bytes()));
+    weights.SetValues(temp_bufs_.back().data());
     return weights;
   }
 
@@ -424,11 +390,11 @@ class Converter {
   tensorflow::Status convert_node(const tensorflow::NodeDef& node_def) {
     std::vector<TRT_TensorOrWeights> inputs = this->get_inputs(node_def);
     string op = node_def.op();
-    if (!_op_registry.count(op)) {
+    if (!op_registry_.count(op)) {
       return tensorflow::errors::Unimplemented(
           ""No converter registered for op: "" + op);
     }
-    OpConverter op_converter = _op_registry.at(op);
+    OpConverter op_converter = op_registry_.at(op);
     std::vector<TRT_TensorOrWeights> outputs;
     TF_RETURN_IF_ERROR(op_converter(*this, node_def, inputs, &outputs));
     for (size_t i = 0; i < outputs.size(); ++i) {
@@ -440,7 +406,7 @@ class Converter {
         output.tensor()->setName(output_name.c_str());
       }
       VLOG(2) << ""Write out tensor: "" << output_name;
-      if (!_trt_tensors.insert({output_name, output}).second) {
+      if (!trt_tensors_.insert({output_name, output}).second) {
         return tensorflow::errors::AlreadyExists(
             ""Output tensor already exists for op: "" + op);
       }
@@ -448,17 +414,17 @@ class Converter {
     return tensorflow::Status::OK();
   }
 
-  nvinfer1::INetworkDefinition* network() { return _trt_network; }
+  nvinfer1::INetworkDefinition* network() { return trt_network_; }
 
   TRT_TensorOrWeights get_tensor(string name) {
-    if (!_trt_tensors.count(name)) {
+    if (!trt_tensors_.count(name)) {
       return TRT_TensorOrWeights(nullptr);
     }
-    return _trt_tensors.at(name);
+    return trt_tensors_.at(name);
   }
 
   bool insert_input_tensor(string name, nvinfer1::ITensor* tensor) {
-    return _trt_tensors.insert({name, TRT_TensorOrWeights(tensor)}).second;
+    return trt_tensors_.insert({name, TRT_TensorOrWeights(tensor)}).second;
   }
 
   nvinfer1::ITensor* TransposeTensor(nvinfer1::ITensor* input_tensor,
@@ -1428,25 +1394,25 @@ tensorflow::Status ConvertPad(Converter& ctx,
 
 void Converter::register_op_converters() {
   // vgg_16 slim implementation
-  _op_registry[""Placeholder""] = ConvertPlaceholder;
-  _op_registry[""Conv2D""] = ConvertConv2D;
-  _op_registry[""Relu""] = ConvertActivation;
-  _op_registry[""MaxPool""] = ConvertPool;
+  op_registry_[""Placeholder""] = ConvertPlaceholder;
+  op_registry_[""Conv2D""] = ConvertConv2D;
+  op_registry_[""Relu""] = ConvertActivation;
+  op_registry_[""MaxPool""] = ConvertPool;
   // This could be really handled as ConvertBinary
-  _op_registry[""BiasAdd""] = ConvertScale;
-  _op_registry[""Const""] = ConvertConst;
-  // _op_registry[""MatMul""] = ConvertFullyConnected;  // Not used in vgg
+  op_registry_[""BiasAdd""] = ConvertScale;
+  op_registry_[""Const""] = ConvertConst;
+  // op_registry_[""MatMul""] = ConvertFullyConnected;  // Not used in vgg
   // TODO(ben,jie): this is a temp hack.
-  _op_registry[""Identity""] = ConvertIdentity;  // Identity should be removed
-  // _op_registry[""AvgPool""] = ConvertPool;
+  op_registry_[""Identity""] = ConvertIdentity;  // Identity should be removed
+  // op_registry_[""AvgPool""] = ConvertPool;
 
   // resnet_50_v1 slim implementation
-  _op_registry[""Add""] = ConvertBinary;
-  _op_registry[""Mul""] = ConvertBinary;
-  _op_registry[""Sub""] = ConvertBinary;
-  _op_registry[""Rsqrt""] = ConvertUnary;
-  _op_registry[""Mean""] = ConvertReduce;
-  _op_registry[""Pad""] = ConvertPad;
+  op_registry_[""Add""] = ConvertBinary;
+  op_registry_[""Mul""] = ConvertBinary;
+  op_registry_[""Sub""] = ConvertBinary;
+  op_registry_[""Rsqrt""] = ConvertUnary;
+  op_registry_[""Mean""] = ConvertReduce;
+  op_registry_[""Pad""] = ConvertPad;
   // TODO(ben,jie): Add more ops
 }
 
@@ -1595,6 +1561,7 @@ tensorflow::Status ConvertSubGraphToTensorRTNodeDef(
   }
 
   VLOG(2) << ""Finished output"";
+  // TODO(jie): static_id is not thread safe.
   static int static_id = 0;
 
   // Build the engine
",0,train
5a297f5efea725fc5889132bafc39707f57aa7ec,tensorflow/tensorflow,"Update profiler API to allow user using it in graph mode.

PiperOrigin-RevId: 233492307",c_api_experimental.cc,"@@ -25,7 +25,7 @@ void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h, TF_Status* status) {
   op->operation.ConsumeInput(h->handle);
 }
 
-TFE_Profiler* TFE_NewProfiler(TFE_Context* ctx) {
+TFE_Profiler* TFE_NewProfiler(TFE_ProfilerContext* ctx) {
   return new TFE_Profiler(ctx);
 }
 
@@ -50,17 +50,21 @@ void TFE_ProfilerSerializeToString(TFE_Context* ctx, TFE_Profiler* profiler,
   };
 }
 
-TFE_ProfilerServerOptions* TFE_NewProfilerServerOptions() {
-  return new TFE_ProfilerServerOptions;
+TFE_ProfilerContext* TFE_NewProfilerContext() {
+  return new TFE_ProfilerContext;
 }
 
-void TFE_ProfilerServerOptionsSetEagerContext(
-    TFE_ProfilerServerOptions* options, TFE_Context* ctx) {
-  options->profiler_context.eager_context = &ctx->context;
+void TFE_ProfilerContextSetEagerContext(TFE_ProfilerContext* profiler_context,
+                                        TFE_Context* eager_context) {
+  profiler_context->profiler_context.eager_context = &eager_context->context;
 }
 
-void TFE_StartProfilerServer(TFE_ProfilerServerOptions* options, int port) {
+void TFE_DeleteProfilerContext(TFE_ProfilerContext* profiler_context) {
+  delete profiler_context;
+}
+
+void TFE_StartProfilerServer(TFE_ProfilerContext* context, int port) {
   // Release child thread intentionally. The child thread can be terminate by
   // terminating the main thread.
-  tensorflow::StartProfilerServer(&options->profiler_context, port).release();
+  tensorflow::StartProfilerServer(&context->profiler_context, port).release();
 }
",0,train
5a297f5efea725fc5889132bafc39707f57aa7ec,tensorflow/tensorflow,"Update profiler API to allow user using it in graph mode.

PiperOrigin-RevId: 233492307",c_api_experimental.h,"@@ -25,6 +25,8 @@ extern ""C"" {
 TF_CAPI_EXPORT extern void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h,
                                               TF_Status* status);
 
+typedef struct TFE_ProfilerContext TFE_ProfilerContext;
+
 // A profiler which will start profiling when creating the object and will stop
 // when the object is destroyed. It will profile all operations run under the
 // given TFE_Context. Multiple instance of it can be created, but at most one
@@ -32,7 +34,7 @@ TF_CAPI_EXPORT extern void TFE_OpConsumeInput(TFE_Op* op, TFE_TensorHandle* h,
 // Thread-safety: TFE_Profiler is thread-safe.
 typedef struct TFE_Profiler TFE_Profiler;
 
-TF_CAPI_EXPORT extern TFE_Profiler* TFE_NewProfiler(TFE_Context* ctx);
+TF_CAPI_EXPORT extern TFE_Profiler* TFE_NewProfiler(TFE_ProfilerContext* ctx);
 TF_CAPI_EXPORT extern bool TFE_ProfilerIsOk(TFE_Profiler* profiler);
 TF_CAPI_EXPORT extern void TFE_DeleteProfiler(TFE_Profiler* profiler);
 
@@ -43,15 +45,16 @@ TF_CAPI_EXPORT extern void TFE_ProfilerSerializeToString(TFE_Context* ctx,
                                                          TF_Buffer* buf,
                                                          TF_Status* status);
 
-typedef struct TFE_ProfilerServerOptions TFE_ProfilerServerOptions;
-
-// Return a new Profiler server options object.
-TF_CAPI_EXPORT extern TFE_ProfilerServerOptions* TFE_NewProfilerServerOptions(
-    void);
+// Return a new profiler context object.
+TF_CAPI_EXPORT extern TFE_ProfilerContext* TFE_NewProfilerContext(void);
 
 // Set the eager context in TFE_ProfilerServerOptions
-TF_CAPI_EXPORT extern void TFE_ProfilerServerOptionsSetEagerContext(
-    TFE_ProfilerServerOptions* options, TFE_Context* ctx);
+TF_CAPI_EXPORT extern void TFE_ProfilerContextSetEagerContext(
+    TFE_ProfilerContext* profiler_context, TFE_Context* eager_context);
+
+// Destroy a profiler context object.
+TF_CAPI_EXPORT extern void TFE_DeleteProfilerContext(
+    TFE_ProfilerContext* profiler_context);
 
 // Start a profiler grpc server which listens to specified port. It will start
 // the server on its own thread. It can be shutdown by terminating tensorflow.
@@ -61,8 +64,8 @@ TF_CAPI_EXPORT extern void TFE_ProfilerServerOptionsSetEagerContext(
 // tensorflow/contrib/tpu/profiler/capture_tpu_profile to capture tracable
 // file following
 // https://cloud.google.com/tpu/docs/cloud-tpu-tools#capture_trace.
-TF_CAPI_EXPORT extern void TFE_StartProfilerServer(
-    TFE_ProfilerServerOptions* options, int port);
+TF_CAPI_EXPORT extern void TFE_StartProfilerServer(TFE_ProfilerContext* context,
+                                                   int port);
 
 #ifdef __cplusplus
 } /* end extern ""C"" */
",0,train
5a297f5efea725fc5889132bafc39707f57aa7ec,tensorflow/tensorflow,"Update profiler API to allow user using it in graph mode.

PiperOrigin-RevId: 233492307",c_api_experimental_test.cc,"@@ -41,9 +41,12 @@ void ExecuteWithProfiling(bool async) {
   TFE_ContextOptions* opts = TFE_NewContextOptions();
   TFE_ContextOptionsSetAsync(opts, static_cast<unsigned char>(async));
   TFE_Context* ctx = TFE_NewContext(opts, status);
-  TFE_Profiler* profiler = TFE_NewProfiler(ctx);
+  TFE_ProfilerContext* profiler_context = TFE_NewProfilerContext();
+  TFE_ProfilerContextSetEagerContext(profiler_context, ctx);
+  TFE_Profiler* profiler = TFE_NewProfiler(profiler_context);
   CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TFE_DeleteContextOptions(opts);
+  TFE_DeleteProfilerContext(profiler_context);
 
   TFE_TensorHandle* m = TestMatrixTensorHandle();
   TFE_Op* matmul = MatMulOp(ctx, m, m);
@@ -108,14 +111,18 @@ TEST(CAPI, MultipleProfilerSession) {
   CHECK_EQ(TF_OK, TF_GetCode(status)) << TF_Message(status);
   TFE_DeleteContextOptions(opts);
 
-  TFE_Profiler* profiler1 = TFE_NewProfiler(ctx);
+  TFE_ProfilerContext* profiler_context = TFE_NewProfilerContext();
+  TFE_ProfilerContextSetEagerContext(profiler_context, ctx);
+
+  TFE_Profiler* profiler1 = TFE_NewProfiler(profiler_context);
   EXPECT_TRUE(TFE_ProfilerIsOk(profiler1));
 
-  TFE_Profiler* profiler2 = TFE_NewProfiler(ctx);
+  TFE_Profiler* profiler2 = TFE_NewProfiler(profiler_context);
   EXPECT_FALSE(TFE_ProfilerIsOk(profiler2));
 
   TFE_DeleteProfiler(profiler1);
   TFE_DeleteProfiler(profiler2);
+  TFE_DeleteProfilerContext(profiler_context);
 }
 
 }  // namespace
",0,train
5a297f5efea725fc5889132bafc39707f57aa7ec,tensorflow/tensorflow,"Update profiler API to allow user using it in graph mode.

PiperOrigin-RevId: 233492307",c_api_internal.h,"@@ -107,20 +107,18 @@ struct TFE_Op {
   tensorflow::EagerOperation operation;
 };
 
+struct TFE_ProfilerContext {
+  tensorflow::ProfilerContext profiler_context;
+};
+
 struct TFE_Profiler {
-  TFE_Profiler(TFE_Context* ctx) {
-    tensorflow::ProfilerContext profiler_context;
-    profiler_context.eager_context = &ctx->context;
-    profiler = tensorflow::ProfilerSession::Create(&profiler_context);
+  TFE_Profiler(TFE_ProfilerContext* ctx) {
+    profiler = tensorflow::ProfilerSession::Create(&ctx->profiler_context);
   }
 
   std::unique_ptr<tensorflow::ProfilerSession> profiler;
 };
 
-struct TFE_ProfilerServerOptions {
-  tensorflow::ProfilerContext profiler_context;
-};
-
 namespace tensorflow {
 // Set an AttrValue on the op. Doesn't handle the list types.
 void SetOpAttrValueScalar(TFE_Context* ctx, TFE_Op* op,
",0,train
5a297f5efea725fc5889132bafc39707f57aa7ec,tensorflow/tensorflow,"Update profiler API to allow user using it in graph mode.

PiperOrigin-RevId: 233492307",profiler.py,"@@ -46,7 +46,13 @@ def start():
   if _profiler is not None:
     raise AssertionError('Another profiler is running.')
   with _profiler_lock:
-    _profiler = pywrap_tensorflow.TFE_NewProfiler(context.context()._handle)  # pylint: disable=protected-access
+    profiler_context = pywrap_tensorflow.TFE_NewProfilerContext()
+    if context.default_execution_mode == context.EAGER_MODE:
+      pywrap_tensorflow.TFE_ProfilerContextSetEagerContext(
+          profiler_context,
+          context.context()._handle)  # pylint: disable=protected-access
+    _profiler = pywrap_tensorflow.TFE_NewProfiler(profiler_context)
+    pywrap_tensorflow.TFE_DeleteProfilerContext(profiler_context)
     if not pywrap_tensorflow.TFE_ProfilerIsOk(_profiler):
       logging.warning('Another profiler session is running which is probably '
                       'created by profiler server. Please avoid using profiler '
@@ -93,12 +99,13 @@ def start_profiler_server(port):
   Args:
     port: port profiler server listens to.
   """"""
-  opts = pywrap_tensorflow.TFE_NewProfilerServerOptions()
+  profiler_context = pywrap_tensorflow.TFE_NewProfilerContext()
   if context.default_execution_mode == context.EAGER_MODE:
-    pywrap_tensorflow.TFE_ProfilerServerOptionsSetEagerContext(
-        opts,
+    pywrap_tensorflow.TFE_ProfilerContextSetEagerContext(
+        profiler_context,
         context.context()._handle)  # pylint: disable=protected-access
-  pywrap_tensorflow.TFE_StartProfilerServer(opts, port)
+  pywrap_tensorflow.TFE_StartProfilerServer(profiler_context, port)
+  pywrap_tensorflow.TFE_DeleteProfilerContext(profiler_context)
 
 
 class Profiler(object):
",0,train
28db548cfd2d238e63c3fb049119fca8369abdbe,tensorflow/tensorflow,Fixing a bug in conv+add fusion,mkl_conv_ops.cc,"@@ -24,8 +24,8 @@ limitations under the License.
 #include <unordered_map>
 #include <vector>
 
-#include ""mkldnn.hpp""
 #include ""absl/strings/str_join.h""
+#include ""mkldnn.hpp""
 #include ""tensorflow/core/framework/bounds_check.h""
 #include ""tensorflow/core/framework/numeric_op.h""
 #include ""tensorflow/core/framework/op_kernel.h""
@@ -944,23 +944,18 @@ class MklConvOp : public OpKernel {
       if (native_format) {
         // Forward the summand tensor to the output only if it has no other
         // references, otherwise make a copy of it.
-        if (!context->forward_input_to_output_with_shape(
+        if (context->forward_input_to_output_with_shape(
                 kInputIndex_Add, kOutputIndex_Dst, output_tf_shape,
                 output_tensor)) {
-          AllocateOutputSetMklShape(context, kOutputIndex_Dst, output_tensor,
-                                    output_tf_shape, *output_mkl_shape,
-                                    native_format);
-          bool result =
-              (*output_tensor)->CopyFrom(add_tensor, add_tensor.shape());
-          DCHECK(result);
+          return;
         }
-        return;
       }
       // Check if reorder is needed
       if (add_mkl_shape == *output_mkl_shape &&
           ForwardMklTensorInToOutWithMklShape(context, kInputIndex_Add,
                                               kOutputIndex_Dst, output_tensor,
-                                              add_mkl_shape, false)) {
+                                              add_mkl_shape, false) &&
+          !native_format) {
         return;
       } else {
         AllocateOutputSetMklShape(context, kOutputIndex_Dst, output_tensor,
@@ -987,6 +982,13 @@ class MklConvOp : public OpKernel {
             const_cast<Toutput*>(add_tensor.flat<Toutput>().data()));
         void* dst_buf =
             static_cast<void*>((*output_tensor)->flat<Ttemp_output>().data());
+        if (native_format) {
+          // We are simply deep copying the add_tensor to output_tensor without
+          // changing memory layout, hence using same memory descriptor.
+          ADD_MD = DST_MD =
+              memory::desc({add_tensor.NumElements()}, MklDnnType<Toutput>(),
+                           mkldnn::memory::format_tag::x);
+        }
         fuse_add_src_.reset(
             new MEMORY_CONSTRUCTOR(ADD_MD, this->cpu_engine_, add_buf));
         fuse_add_dst_.reset(
",0,train
673b993983f37f332ff70cdb642305f69089337d,tensorflow/tensorflow,"Ensuring that the Switch op used as a pivot is always placed on the CPU. For this we set a private attribute _PivotSwitch while creating this op and then make sure that the device overwriting logic in GraphPartition isn't executed for this op.

Note: Had to fix up control_flow_ops_py_test so that we don't expect a GPU graph when we don't get one. The reason is that now since we already know the switch_pred is going to be placed on CPU, the placer ensures that its input is placed on the CPU as well and we end up saving a copy. This means there is no GPU graph when we partition.
PiperOrigin-RevId: 338246477
Change-Id: I5641c9ae1b2d593a2996947bafe92b22cb63371d",lower_if_op.cc,"@@ -148,13 +148,22 @@ Status CondBuilder::SetColocationAndFinalize(NodeBuilder node_builder,
 Status CondBuilder::CreatePivotNodes() {
   // Construct the basic cond body (consisting of feeding in the predicate to
   // create pivot nodes).
+
+  // This is a special pivot switch node for lowering. We mark this with a
+  // special _PivotSwitch attr on it as later on in the graph partitioner we
+  // do some special placement for Switch nodes and its necessary to distinguish
+  // between a ""normal"" Switch node and one of these pivot switches. We would
+  // like to place this node on the CPU always as the pred_ will be on the CPU
+  // as well (either a CPU op output or a GPU op with HostMemory annotation).
+  // TODO(b/171321391): Fix this for NUMA cases.
   Node* switch_pred;
   TF_RETURN_IF_ERROR(
       SetColocationAndFinalize(NodeBuilder(NewName(""switch_pred""), ""Switch"",
                                            graph_->op_registry(), &debug_info_)
                                    .Input(NodeOut(pred_))
                                    .Input(NodeOut(pred_))
-                                   .Device(if_op_->requested_device()),
+                                   .Attr(""_PivotSwitch"", true)
+                                   .Device(""/CPU:0""),
                                graph_, &switch_pred));
   control_predecessor_ = switch_pred;
   TF_RETURN_IF_ERROR(
",0,train
673b993983f37f332ff70cdb642305f69089337d,tensorflow/tensorflow,"Ensuring that the Switch op used as a pivot is always placed on the CPU. For this we set a private attribute _PivotSwitch while creating this op and then make sure that the device overwriting logic in GraphPartition isn't executed for this op.

Note: Had to fix up control_flow_ops_py_test so that we don't expect a GPU graph when we don't get one. The reason is that now since we already know the switch_pred is going to be placed on CPU, the placer ensures that its input is placed on the CPU as well and we end up saving a copy. This means there is no GPU graph when we partition.
PiperOrigin-RevId: 338246477
Change-Id: I5641c9ae1b2d593a2996947bafe92b22cb63371d",lower_if_op_test.cc,"@@ -147,6 +147,115 @@ TEST(LowerIfOpTest, Simple) {
   }
 }
 
+TEST(LowerIfOpTest, GPUPlacement) {
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+
+  // Add test functions for then and else branch.
+  FunctionDefLibrary f_lib_proto;
+  *(f_lib_proto.add_function()) = test::function::XTimesTwo();
+  *(f_lib_proto.add_function()) = test::function::XTimesFour();
+
+  // Construct simple conditional that switches on `pred` and operates only on
+  // single input `A`.
+  Scope root = Scope::NewRootScope().ExitOnError();
+  TF_ASSERT_OK(root.graph()->AddFunctionLibrary(f_lib_proto));
+  auto a = ops::Placeholder(root.WithOpName(""A""), DT_INT32);
+  auto x = ops::Placeholder(root.WithOpName(""X""), DT_INT32);
+  auto y = ops::Placeholder(root.WithOpName(""Y""), DT_INT32);
+  Node* pred;
+  TF_ASSERT_OK(NodeBuilder(""greater"", ""Greater"", &root.graph()->flib_def())
+                   .Input(x.node())
+                   .Input(y.node())
+                   .Device(""/GPU:0"")
+                   .Finalize(root.graph(), &pred));
+  Node* written_if;
+  std::vector<NodeBuilder::NodeOut> inputs({NodeBuilder::NodeOut(a.node())});
+  TF_ASSERT_OK(
+      NodeBuilder(""if"", ""If"", &root.graph()->flib_def())
+          .Input(pred)
+          .Input(inputs)
+          .Attr(""then_branch"", FuncAttr(""XTimesTwo""))
+          .Attr(""else_branch"", FuncAttr(""XTimesFour""))
+          .Attr(LowerFunctionalOpsPass::kLowerUsingSwitchMergeAttr, true)
+          .Attr(""Tout"", {DT_INT32})
+          .Device(""/GPU:0"")
+          .Finalize(root.graph(), &written_if));
+  TF_ASSERT_OK(root.DoShapeInference(written_if));
+  TF_ASSERT_OK(root.ToGraph(graph.get()));
+
+  // The input graph has no switch or merge nodes.
+  int node_called_if_count = 0;
+  for (const auto* op : graph->op_nodes()) {
+    ASSERT_FALSE(op->IsSwitch());
+    ASSERT_FALSE(op->IsMerge());
+    if (op->name() == ""if"") {
+      ++node_called_if_count;
+    }
+  }
+  ASSERT_EQ(node_called_if_count, 1);
+
+  TF_ASSERT_OK(Rewrite(&graph));
+
+  // Verify the resultant graph has switch and merge nodes, and a node called
+  // `if` (but not If nodes).
+  int switch_count = 0;
+  int merge_count = 0;
+  node_called_if_count = 0;
+  for (const auto* op : graph->op_nodes()) {
+    if (op->IsSwitch()) {
+      ++switch_count;
+    }
+    if (op->IsMerge()) {
+      ++merge_count;
+    }
+    ASSERT_NE(op->type_string(), ""If"");
+    if (op->name() == ""if"") {
+      ++node_called_if_count;
+    }
+  }
+  // One switch for predicate and one for input (A).
+  ASSERT_EQ(switch_count, 2);
+  // One merge for the single output value of then and else, and one more merge
+  // to enforce then and else function call execution (`branch_executed` node).
+  ASSERT_EQ(merge_count, 2);
+  ASSERT_EQ(node_called_if_count, 1);
+
+  // Verify execution.
+  ClientSession session(root, SessionOptionsWithInlining());
+  {
+    RunMetadata metadata;
+    RunOptions options;
+    options.set_output_partition_graphs(true);
+    ClientSession::FeedType feeds;
+    feeds.emplace(Output(x.node()), Input::Initializer(5));
+    feeds.emplace(Output(y.node()), Input::Initializer(10));
+    feeds.emplace(Output(a.node()), Input::Initializer(10));
+    std::vector<Tensor> out_tensors;
+    TF_ASSERT_OK(session.Run(options, feeds, {Output(written_if)}, {},
+                             &out_tensors, &metadata));
+    GraphDef cpu_graph = metadata.partition_graphs(1);
+    int num_cpu_switch = 0;
+    for (const auto& node : cpu_graph.node()) {
+      if (node.op() == ""Switch"") {
+        ++num_cpu_switch;
+      }
+    }
+    EXPECT_EQ(num_cpu_switch, 2);
+    EXPECT_EQ(out_tensors.size(), 1);
+    EXPECT_EQ(out_tensors[0].scalar<int>()(), 40);
+  }
+  {
+    ClientSession::FeedType feeds;
+    feeds.emplace(Output(x.node()), Input::Initializer(10));
+    feeds.emplace(Output(y.node()), Input::Initializer(5));
+    feeds.emplace(Output(a.node()), Input::Initializer(10));
+    std::vector<Tensor> out_tensors;
+    TF_ASSERT_OK(session.Run(feeds, {Output(written_if)}, &out_tensors));
+    EXPECT_EQ(out_tensors.size(), 1);
+    EXPECT_EQ(out_tensors[0].scalar<int>()(), 20);
+  }
+}
+
 TEST(LowerIfOpTest, BranchFunctionsWithoutOutputs) {
   using ::tensorflow::test::function::GDef;
   using ::tensorflow::test::function::NDef;
",0,train
673b993983f37f332ff70cdb642305f69089337d,tensorflow/tensorflow,"Ensuring that the Switch op used as a pivot is always placed on the CPU. For this we set a private attribute _PivotSwitch while creating this op and then make sure that the device overwriting logic in GraphPartition isn't executed for this op.

Note: Had to fix up control_flow_ops_py_test so that we don't expect a GPU graph when we don't get one. The reason is that now since we already know the switch_pred is going to be placed on CPU, the placer ensures that its input is placed on the CPU as well and we end up saving a copy. This means there is no GPU graph when we partition.
PiperOrigin-RevId: 338246477
Change-Id: I5641c9ae1b2d593a2996947bafe92b22cb63371d",graph_partition.cc,"@@ -371,6 +371,13 @@ NodeDef* AddControlTrigger(const PartitionOptions& opts, GraphDef* gdef,
 void OptimizeControlFlowColocation(Graph* graph) {
   auto visit = [](Node* node) {
     if (IsSwitch(node)) {
+      // Pivot Switch nodes (which are also of type Switch) are already placed
+      // on the CPU and colocated with its inputs that are also already on the
+      // CPU (or might be placed on GPU but in host memory).
+      if (HasNodeAttr(node->def(), ""_PivotSwitch"")) {
+        DCHECK(node->requested_device().find(""CPU"") != string::npos);
+        return;
+      }
       for (const Edge* in_edge : node->in_edges()) {
         if (in_edge->dst_input() == 0) {
           // Colocate with the data input.
",0,train
673b993983f37f332ff70cdb642305f69089337d,tensorflow/tensorflow,"Ensuring that the Switch op used as a pivot is always placed on the CPU. For this we set a private attribute _PivotSwitch while creating this op and then make sure that the device overwriting logic in GraphPartition isn't executed for this op.

Note: Had to fix up control_flow_ops_py_test so that we don't expect a GPU graph when we don't get one. The reason is that now since we already know the switch_pred is going to be placed on CPU, the placer ensures that its input is placed on the CPU as well and we end up saving a copy. This means there is no GPU graph when we partition.
PiperOrigin-RevId: 338246477
Change-Id: I5641c9ae1b2d593a2996947bafe92b22cb63371d",control_flow_ops_py_test.py,"@@ -730,6 +730,8 @@ class ControlFlowTest(test.TestCase, parameterized.TestCase):
         g for g in run_metadata.partition_graphs
         if device_str in g.node[0].device
     ]
+    if not device_graphs:
+      return 0
     self.assertLen(device_graphs, 1)
     switch_nodes = [
         n for n in device_graphs[0].node
@@ -759,7 +761,6 @@ class ControlFlowTest(test.TestCase, parameterized.TestCase):
       options = config_pb2.RunOptions(output_partition_graphs=True)
       sess.run(
           r, feed_dict={x: -10.}, options=options, run_metadata=run_metadata)
-      self.assertLen(run_metadata.partition_graphs, 2)
       # Check that the Switch for `arg` gets placed on CPU.
       self.assertEqual(
           self._count_matching_switch_nodes_on_device(run_metadata, ""CPU"",
",0,train
b55300d9d569cd5b1b2c30bff9ca6a6cb129ba32,tensorflow/tensorflow,"[XLA:GPU] Ban bad CUDNN algo

PiperOrigin-RevId: 383932719
Change-Id: I3a5b29ac56d621388cb430a52ce7879cdffe87e4",gpu_conv_algorithm_picker.cc,"@@ -397,7 +397,7 @@ GpuConvAlgorithmPicker::PickBestAlgorithmNoCacheCuda(
   const bool crash_on_checking_failure =
       debug_options.xla_gpu_crash_on_verification_failures();
 
-  const auto canonical_hlo =
+  std::string canonical_hlo =
       std::get<1>(AutotuneCacheKeyfromInstruction(instr, stream_exec_));
 
   string blas_version;
",0,train
b55300d9d569cd5b1b2c30bff9ca6a6cb129ba32,tensorflow/tensorflow,"[XLA:GPU] Ban bad CUDNN algo

PiperOrigin-RevId: 383932719
Change-Id: I3a5b29ac56d621388cb430a52ce7879cdffe87e4",hlo_algorithm_denylist.cc,"@@ -39,6 +39,14 @@ constexpr char kDefaultDenylist[] = R""pb(
     algos { id: 7 tensor_ops: true }
     blas_version: ""10201""
   }
+  entries {
+    hlo: ""(f16[3,3,256,256]{2,1,0,3}, u8[0]{0}) custom-call(f16[2048,7,7,256]{3,2,1,0}, f16[2048,7,7,256]{3,2,1,0}), window={size=3x3 pad=1_1x1_1}, dim_labels=b01f_01io->b01f, custom_call_target=\""__cudnn$convBackwardFilter\"", backend_config=\""{\\\""algorithm\\\"":\\\""0\\\"",\\\""tensor_ops_enabled\\\"":false,\\\""conv_result_scale\\\"":1,\\\""activation_mode\\\"":\\\""0\\\"",\\\""side_input_scale\\\"":0}\""""
+    cc { major: 7 }
+    cudnn_version { major: 8 minor: 2 patch: 1 } algos
+    [ { id: 0 tensor_ops: true }
+      , { id: 0 }]
+    blas_version: ""11402""
+  }
 )pb"";
 
 absl::Span<const stream_executor::dnn::AlgorithmDesc> GetDisabledConvAlgorithms(
",0,train
f9d1a8c9625fb7b4c398516e4756500c1163febf,tensorflow/tensorflow,"[NFC] Replace std::clamp with inline implementation.

std::clamp is not in C++14, thus replace it with std::min and std::max.
PiperOrigin-RevId: 270604708",UniformSupport.h,"@@ -145,7 +145,7 @@ private:
     // Round to nearest integer with halfway cases rounded away from zero.
     const double scaledRounded = std::round(scaled);
     const double clamped =
-        std::clamp(scaledRounded, clampMinDouble, clampMaxDouble);
+        std::min(std::max(scaledRounded, clampMinDouble), clampMaxDouble);
 
     uint64_t signlessResult;
     if (isSigned) {
",0,train
c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker.

Added a new test in remote_test.py. Without remote device map, the test will report following error:
InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device.
Additional GRPC error information:
{""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43]
PiperOrigin-RevId: 250813680",remote_test.py,"@@ -203,7 +203,7 @@ class RemoteExecutionTest(test.TestCase):
     """"""Basic server connection.""""""
     remote.connect_to_remote_host(self._cached_server1_target)
 
-    with ops.device(""job:worker/replica:0/task:1/device:CPU:0""):
+    with ops.device(""job:worker/replica:0/task:0/device:CPU:0""):
       x1 = array_ops.ones([2, 2])
       x2 = array_ops.ones([2, 2])
       y = math_ops.matmul(x1, x2)
",0,train
c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker.

Added a new test in remote_test.py. Without remote device map, the test will report following error:
InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device.
Additional GRPC error information:
{""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43]
PiperOrigin-RevId: 250813680",context.cc,"@@ -66,8 +66,10 @@ EagerContext::EagerContext(
     bool async, const DeviceMgr* device_mgr, bool device_mgr_owned,
     Rendezvous* rendezvous, const CustomKernelCreator* custom_kernel_creator,
     DistributedFunctionLibraryRuntime* cluster_flr,
-    std::function<Rendezvous*(const int64)> rendezvous_creator)
+    std::function<Rendezvous*(const int64)> rendezvous_creator,
+    const DeviceMgr* remote_device_mgr)
     : policy_(default_policy),
+      remote_unowned_device_manager_(remote_device_mgr),
       devices_(device_mgr->ListDevices()),
       rendezvous_(rendezvous),
       rendezvous_creator_(std::move(rendezvous_creator)),
@@ -117,8 +119,8 @@ void EagerContext::InitDeviceMapAndAsync() {
     devices_map_[device->name()] = device;
   }
 
-  if (remote_device_manager_ != nullptr) {
-    for (auto* device : remote_device_manager_->ListDevices()) {
+  if (remote_device_mgr() != nullptr) {
+    for (auto* device : remote_device_mgr()->ListDevices()) {
       if (devices_map_.find(device->name()) == devices_map_.end()) {
         devices_map_[device->name()] = device;
         devices_.push_back(device);
@@ -332,6 +334,7 @@ ScopedStepContainer* EagerContext::StepContainer() {
 }
 
 Status EagerContext::MaybeRegisterFunctionRemotely(const FunctionDef& fdef) {
+  // Only client context can register function on remote worker context.
   if (remote_device_manager_ == nullptr) return Status::OK();
 #if !defined(IS_MOBILE_PLATFORM)
   BlockingCounter blocking_counter(static_cast<int>(remote_contexts_.size()));
@@ -487,6 +490,10 @@ Status GetTaskName(Device* d, string* task_name) {
 Status EagerContext::GetClientAndContextID(Device* device,
                                            eager::EagerClient** client,
                                            uint64* context_id) {
+  if (remote_eager_workers_ == nullptr) {
+    return errors::Internal(
+        ""Haven't set up remote eager worker in this eager context yet."");
+  }
   auto it = device_to_client_cache_.find(device);
   if (it != device_to_client_cache_.end()) {
     *client = it->second.first;
",0,train
c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker.

Added a new test in remote_test.py. Without remote device map, the test will report following error:
InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device.
Additional GRPC error information:
{""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43]
PiperOrigin-RevId: 250813680",context.h,"@@ -94,7 +94,8 @@ class EagerContext : public core::RefCounted {
       bool async, const DeviceMgr* device_mgr, bool device_mgr_owned,
       Rendezvous* rendezvous, const CustomKernelCreator* custom_kernel_creator,
       DistributedFunctionLibraryRuntime* cluster_flr = nullptr,
-      std::function<Rendezvous*(const int64)> rendezvous_creator = nullptr);
+      std::function<Rendezvous*(const int64)> rendezvous_creator = nullptr,
+      const DeviceMgr* remote_device_mgr = nullptr);
 
   ~EagerContext();
 
@@ -206,7 +207,8 @@ class EagerContext : public core::RefCounted {
                                               : local_unowned_device_manager_;
   }
   const tensorflow::DeviceMgr* remote_device_mgr() const {
-    return remote_device_manager_.get();
+    return (remote_device_manager_ != nullptr) ? remote_device_manager_.get()
+                                               : remote_unowned_device_manager_;
   }
 
   // TODO(apassos) remove the need for this
@@ -292,7 +294,11 @@ class EagerContext : public core::RefCounted {
   // Only one of the below is set.
   std::unique_ptr<const DeviceMgr> local_device_manager_;
   const DeviceMgr* local_unowned_device_manager_;
+
+  // Only one of the below is set. remote_unowned_device_manager_ is set on
+  // remote worker to allow running multi-device function on remote worker.
   std::unique_ptr<DeviceMgr> remote_device_manager_;
+  const DeviceMgr* remote_unowned_device_manager_;
 
   // Devices owned by device_manager
   std::vector<Device*> devices_;
",0,train
c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker.

Added a new test in remote_test.py. Without remote device map, the test will report following error:
InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device.
Additional GRPC error information:
{""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43]
PiperOrigin-RevId: 250813680",eager_service_impl.cc,"@@ -121,7 +121,8 @@ Status EagerServiceImpl::CreateContext(const CreateContextRequest* request,
       SessionOptions(),
       tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT,
       request->async(), device_mgr, false, r, nullptr,
-      worker_session->cluster_flr.get(), std::move(rendezvous_creator));
+      worker_session->cluster_flr.get(), std::move(rendezvous_creator),
+      worker_session->remote_device_mgr());
 
   std::vector<DeviceAttributes> device_attributes;
   device_mgr->ListDeviceAttributes(&device_attributes);
",0,train
c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker.

Added a new test in remote_test.py. Without remote device map, the test will report following error:
InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device.
Additional GRPC error information:
{""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43]
PiperOrigin-RevId: 250813680",worker_session.h,"@@ -49,6 +49,8 @@ struct WorkerSession {
     return device_mgr_ ? device_mgr_.get() : borrowed_device_mgr_;
   }
 
+  DeviceMgr* remote_device_mgr() { return remote_device_mgr_.get(); }
+
   // graph_mgr keeps track of the registered graphs of this session.
   //
   // Note: graph_mgr must be deleted before rendezvous_mgr!
",0,train
c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker.

Added a new test in remote_test.py. Without remote device map, the test will report following error:
InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device.
Additional GRPC error information:
{""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43]
PiperOrigin-RevId: 250813680",remote.py,"@@ -24,6 +24,7 @@ from tensorflow.core.protobuf.cluster_pb2 import ClusterDef
 from tensorflow.core.protobuf.tensorflow_server_pb2 import ServerDef
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
+from tensorflow.python.util import nest
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -51,34 +52,37 @@ def connect_to_remote_host(remote_host=None, job_name=""worker""):
   ```
 
   Args:
-    remote_host: The addr of the remote server in host-port format.
+    remote_host: a single or a list the remote server addr in host-port format.
     job_name: The job name under which the new server will be accessible.
 
   Raises:
     ValueError: if remote_host is None.
   """"""
-  if remote_host is None:
-    raise ValueError(""Must provide an remote_host"")
+  if not remote_host:
+    raise ValueError(""Must provide at least one remote_host"")
 
+  remote_host = nest.flatten(remote_host)
   grpc_prefix = ""grpc://""
-  if remote_host.startswith(grpc_prefix):
-    remote_host = remote_host[len(grpc_prefix):]
 
   local_port = pywrap_tensorflow.TF_PickUnusedPortOrDie()
 
   cluster_def = ClusterDef()
   job_def = cluster_def.job.add()
-  job_def.name = job_name
+  job_def.name = ""localhost""
   # TODO(fishx): Update this to make sure remote worker has valid ip address
   # to connect with local.
   job_def.tasks[0] = ""localhost:{}"".format(local_port)
-  job_def.tasks[1] = remote_host
+
+  job_def = cluster_def.job.add()
+  job_def.name = job_name
+  for i in range(len(remote_host)):
+    if remote_host[i].startswith(grpc_prefix):
+      job_def.tasks[i] = remote_host[i][len(grpc_prefix):]
+    else:
+      job_def.tasks[i] = remote_host[i]
 
   server_def = ServerDef(
-      cluster=cluster_def,
-      job_name=job_name,
-      task_index=0,
-      protocol=""grpc"")
+      cluster=cluster_def, job_name=""localhost"", task_index=0, protocol=""grpc"")
 
   # TODO(nareshmodi): Make this default since it works in more situations.
   os.environ[""TF_EAGER_REMOTE_USE_SEND_TENSOR_RPC""] = ""1""
",0,train
c59869928608498b9161c8075874a2d4c9e9a405,tensorflow/tensorflow,"Set up device map for eager context on device worker to allow invoke multi-device function on remote worker.

Added a new test in remote_test.py. Without remote device map, the test will report following error:
InvalidArgumentError: /job:worker/replica:0/task:1/device:GPU:0 unknown device.
Additional GRPC error information:
{""created"":""@1558735306.239191376"",""description"":""Error received from peer ipv6:[::1]:17007"",""file"":""third_party/grpc/src/core/lib/surface/call.cc"",""file_line"":1046,""grpc_message"":""/job:worker/replica:0/task:1/device:GPU:0 unknown device."",""grpc_status"":3} [Op:__inference_remote_function_43]
PiperOrigin-RevId: 250813680",remote_test.py,"@@ -28,10 +28,10 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.ops import variables
 
 
-class RemoteTest(test.TestCase):
+class SingleWorkerTest(test.TestCase):
 
   def setUp(self):
-    super(RemoteTest, self).setUp()
+    super(SingleWorkerTest, self).setUp()
 
     workers, _ = test_util.create_local_cluster(1, 0)
     remote.connect_to_remote_host(workers[0].target)
@@ -40,9 +40,9 @@ class RemoteTest(test.TestCase):
 
     @def_function.function
     def basic(i):
-      with ops.device('/job:worker/replica:0/task:0/cpu:0'):
+      with ops.device('/job:localhost/replica:0/task:0/cpu:0'):
         a = constant_op.constant([2]) + i
-      with ops.device('/job:worker/replica:0/task:1/cpu:0'):
+      with ops.device('/job:worker/replica:0/task:0/cpu:0'):
         b = constant_op.constant([1])
 
       return a + b
@@ -51,7 +51,7 @@ class RemoteTest(test.TestCase):
     self.assertAllEqual(basic(constant_op.constant([1])).numpy(), [4])
 
   def testMultiDeviceFunctionVariable(self):
-    with ops.device('/job:worker/replica:0/task:1/cpu:0'):
+    with ops.device('/job:worker/replica:0/task:0/cpu:0'):
       variable_b = variables.Variable(1)
 
     @def_function.function
@@ -61,7 +61,7 @@ class RemoteTest(test.TestCase):
     self.assertAllEqual(with_variable(constant_op.constant([2])).numpy(), [3])
 
   def testMultiDeviceFunctionRemoteOutput(self):
-    with ops.device('/job:worker/replica:0/task:1/cpu:0'):
+    with ops.device('/job:worker/replica:0/task:0/cpu:0'):
       variable_b = variables.Variable(1)
 
     @def_function.function
@@ -83,7 +83,7 @@ class RemoteTest(test.TestCase):
         return i + constant_op.constant([2])
 
     with self.assertRaises(errors.InvalidArgumentError) as cm:
-      with ops.device('/job:worker/replica:0/task:1/cpu:0'):
+      with ops.device('/job:worker/replica:0/task:0/cpu:0'):
         self.assertAllEqual(
             ambiguous_device(constant_op.constant([2])).numpy(), [3])
 
@@ -91,5 +91,28 @@ class RemoteTest(test.TestCase):
                   cm.exception.message)
 
 
+class MultiWorkersTest(test.TestCase):
+
+  def setUp(self):
+    super(MultiWorkersTest, self).setUp()
+
+    workers, _ = test_util.create_local_cluster(2, 0)
+    remote.connect_to_remote_host([workers[0].target, workers[1].target])
+
+  def testMultiDeviceFunctionOnRemoteDevice(self):
+    with ops.device('/job:worker/replica:0/task:1'):
+      variable_b = variables.Variable(1.0)
+
+    @def_function.function
+    def remote_function(i):
+      with ops.device('/job:worker/replica:0/task:0'):
+        a = i + variable_b
+      c = a + 1.0
+      return c
+
+    with ops.device('/job:worker/replica:0/task:0'):
+      self.assertAllEqual(remote_function(constant_op.constant([1.0])), [3.0])
+
+
 if __name__ == '__main__':
   test.main()
",0,train
7ae5b12f4714455028b7422dcfe3f0dd3172b4d3,tensorflow/tensorflow,"Add more information in CheckpointManager's summary line.

PiperOrigin-RevId: 307162471
Change-Id: Ifdc5e6ab7800f80b35968e6650918277f0178f85",checkpoint_management.py,"@@ -511,7 +511,7 @@ def meta_graph_filename(checkpoint_filename, meta_graph_suffix=""meta""):
 # TODO(allenl): Allow tf.keras.Model instances in the constructor directly?
 @tf_export(""train.CheckpointManager"")
 class CheckpointManager(object):
-  """"""Deletes old checkpoints.
+  """"""Manages multiple checkpoints by keeping some and deleting unneeded ones.
 
   Example usage:
 
",0,train
b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false.

This requires specifying dependent dialects in several passes.

PiperOrigin-RevId: 365758084
Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",hlo_legalize_to_lhlo.cc,"@@ -20,6 +20,7 @@ limitations under the License.
 #include ""mlir-hlo/Dialect/mhlo/transforms/map_hlo_to_lhlo_op.h""
 #include ""mlir-hlo/Dialect/mhlo/transforms/passes.h""
 #include ""mlir-hlo/Dialect/mhlo/transforms/rewriters.h""
+#include ""mlir/Dialect/MemRef/IR/MemRef.h""
 #include ""mlir/Dialect/Shape/IR/Shape.h""
 #include ""mlir/Dialect/Shape/Transforms/Passes.h""
 #include ""mlir/Dialect/StandardOps/IR/Ops.h""
@@ -564,7 +565,8 @@ class HloToLhloTensorStoreOpLegacyConverter
 struct HloLegalizeToLhlo
     : public PassWrapper<HloLegalizeToLhlo, OperationPass<ModuleOp>> {
   void getDependentDialects(DialectRegistry& registry) const override {
-    registry.insert<lmhlo::LmhloDialect>();
+    registry.insert<lmhlo::LmhloDialect, memref::MemRefDialect,
+                    shape::ShapeDialect>();
   }
 
  public:
",0,train
b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false.

This requires specifying dependent dialects in several passes.

PiperOrigin-RevId: 365758084
Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",legalize_to_linalg.cc,"@@ -27,6 +27,7 @@ limitations under the License.
 #include ""mlir/Dialect/Linalg/IR/LinalgOps.h""
 #include ""mlir/Dialect/Linalg/IR/LinalgTypes.h""
 #include ""mlir/Dialect/Math/IR/Math.h""
+#include ""mlir/Dialect/MemRef/IR/MemRef.h""
 #include ""mlir/Dialect/SCF/SCF.h""
 #include ""mlir/Dialect/StandardOps/IR/Ops.h""
 #include ""mlir/Dialect/Tensor/IR/Tensor.h""
@@ -1965,7 +1966,9 @@ void populateLHLOToLinalgConversionPattern(MLIRContext* context,
 struct LhloLegalizeToLinalgPass
     : public PassWrapper<LhloLegalizeToLinalgPass, FunctionPass> {
   void getDependentDialects(DialectRegistry& registry) const override {
-    registry.insert<AffineDialect, linalg::LinalgDialect, math::MathDialect>();
+    registry
+        .insert<AffineDialect, complex::ComplexDialect, linalg::LinalgDialect,
+                math::MathDialect, memref::MemRefDialect>();
   }
 
   void runOnFunction() override {
@@ -1986,8 +1989,9 @@ struct LhloLegalizeToLinalgPass
 struct HloLegalizeToLinalgPass
     : public PassWrapper<HloLegalizeToLinalgPass, FunctionPass> {
   void getDependentDialects(DialectRegistry& registry) const override {
-    registry.insert<linalg::LinalgDialect, scf::SCFDialect,
-                    complex::ComplexDialect, math::MathDialect>();
+    registry
+        .insert<linalg::LinalgDialect, scf::SCFDialect, complex::ComplexDialect,
+                math::MathDialect, memref::MemRefDialect>();
   }
 
   void runOnFunction() override {
",0,train
b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false.

This requires specifying dependent dialects in several passes.

PiperOrigin-RevId: 365758084
Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",lhlo_legalize_to_gpu.cc,"@@ -24,6 +24,7 @@ limitations under the License.
 #include ""mlir/Dialect/GPU/GPUDialect.h""
 #include ""mlir/Dialect/Linalg/IR/LinalgOps.h""
 #include ""mlir/Dialect/Linalg/IR/LinalgTypes.h""
+#include ""mlir/Dialect/MemRef/IR/MemRef.h""
 #include ""mlir/Dialect/SCF/SCF.h""
 #include ""mlir/Dialect/StandardOps/IR/Ops.h""
 #include ""mlir/IR/Attributes.h""
@@ -174,7 +175,7 @@ struct LhloLegalizeToGpuPass
     : public PassWrapper<LhloLegalizeToGpuPass, FunctionPass> {
   void getDependentDialects(DialectRegistry& registry) const override {
     registry.insert<AffineDialect, gpu::GPUDialect, linalg::LinalgDialect,
-                    scf::SCFDialect>();
+                    memref::MemRefDialect, scf::SCFDialect>();
   }
 
   void runOnFunction() override {
",0,train
b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false.

This requires specifying dependent dialects in several passes.

PiperOrigin-RevId: 365758084
Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",test_infer_shaped_type_pass.cc,"@@ -13,6 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include ""mlir/Dialect/Shape/IR/Shape.h""
 #include ""mlir/IR/Attributes.h""
 #include ""mlir/IR/Identifier.h""
 #include ""mlir/IR/MLIRContext.h""
@@ -83,6 +84,9 @@ struct ReifyReturnTypeShapesPattern : public RewritePattern {
 
 struct TestInferShapedTypeMethodsPass
     : public PassWrapper<TestInferShapedTypeMethodsPass, FunctionPass> {
+  void getDependentDialects(DialectRegistry &registry) const override {
+    registry.insert<shape::ShapeDialect>();
+  }
   void runOnFunction() override {
     OwningRewritePatternList patterns(&getContext());
     patterns.insert<ReifyReturnTypeShapesPattern>(&getContext());
",0,train
b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false.

This requires specifying dependent dialects in several passes.

PiperOrigin-RevId: 365758084
Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",transform_unranked_hlo.cc,"@@ -528,7 +528,7 @@ struct ConvertUnrankedDynamicBroadcastSelectOp
 struct TransformUnrankedHloPass
     : public PassWrapper<TransformUnrankedHloPass, FunctionPass> {
   void getDependentDialects(DialectRegistry &registry) const override {
-    registry.insert<chlo::HloClientDialect, mhlo::MhloDialect,
+    registry.insert<chlo::HloClientDialect, mhlo::MhloDialect, scf::SCFDialect,
                     shape::ShapeDialect>();
   }
 
",0,train
b438b8de6af3572fbdce6ea4097d9e7935b55f30,tensorflow/tensorflow,"mlir-hlo-opt: set preloadDialectsInContext to false.

This requires specifying dependent dialects in several passes.

PiperOrigin-RevId: 365758084
Change-Id: If32ab0b12173c0a5e7706e9c7a0388de513c59c8",unfuse_batch_norm_pass.cc,"@@ -15,7 +15,9 @@ limitations under the License.
 
 #include ""mlir-hlo/Dialect/mhlo/IR/hlo_ops.h""
 #include ""mlir-hlo/Dialect/mhlo/transforms/rewriters.h""
+#include ""mlir/Dialect/MemRef/IR/MemRef.h""
 #include ""mlir/Dialect/StandardOps/IR/Ops.h""
+#include ""mlir/IR/Dialect.h""
 #include ""mlir/IR/MLIRContext.h""
 #include ""mlir/IR/Operation.h""
 #include ""mlir/Pass/Pass.h""
@@ -29,6 +31,9 @@ namespace {
 
 struct TestUnfuseBatchNormPass
     : public PassWrapper<TestUnfuseBatchNormPass, OperationPass<>> {
+  void getDependentDialects(DialectRegistry& registry) const override {
+    registry.insert<memref::MemRefDialect>();
+  }
   void runOnOperation() override {
     OwningRewritePatternList patterns(&getContext());
     PopulateUnfuseBatchNormPatterns(&getContext(), &patterns);
",0,train
0b87efcbae8cde976a302415c6df2189958e7a8b,tensorflow/tensorflow,"Add a command line parameter to toco to change the way toco rescales input and output tensors.

PiperOrigin-RevId: 191825756",args.h,"@@ -202,6 +202,7 @@ struct ParsedModelFlags {
   Arg<toco::IntList> input_shape;
   Arg<toco::StringMapList> rnn_states;
   Arg<toco::StringMapList> model_checks;
+  Arg<bool> change_concat_input_ranges = Arg<bool>(true);
   // Debugging output options.
   // TODO(benoitjacob): these shouldn't be ModelFlags.
   Arg<string> graphviz_first_array;
",0,test
0b87efcbae8cde976a302415c6df2189958e7a8b,tensorflow/tensorflow,"Add a command line parameter to toco to change the way toco rescales input and output tensors.

PiperOrigin-RevId: 191825756",hardcode_min_max.cc,"@@ -95,30 +95,37 @@ bool HardcodeMinMaxForConcatenation(Model* model, Operator* op) {
   overall_minmax.min = overall_min;
   overall_minmax.max = overall_max;
   bool changed = false;
-  for (const auto& input : op->inputs) {
-    auto& array = model->GetArray(input);
-    if (!array.minmax) {
-      changed = true;
-    } else if (!(overall_minmax == array.GetMinMax())) {
-      changed = true;
-      LOG(WARNING)
-          << ""Tweaking the MinMax of array "" << input << "", which is ""
-          << ""an input to "" << LogName(*op) << "", because we want all inputs ""
-          << ""and outputs of a Concatenation operator to have the same MinMax ""
-          << ""so that it can be implemented as a pure byte-copy, no ""
-             ""arithmetic."";
+  if (model->flags.change_concat_input_ranges()) {
+    for (const auto& input : op->inputs) {
+      auto& array = model->GetArray(input);
+      if (!array.minmax) {
+        changed = true;
+      } else if (!(overall_minmax == array.GetMinMax())) {
+        changed = true;
+        LOG(WARNING)
+            << ""Tweaking the MinMax of array "" << input << "", which is ""
+            << ""an input to "" << LogName(*op) << "", because we want all inputs ""
+            << ""and outputs of a Concatenation operator to have the same ""
+            << ""MinMax so that it can be implemented as a pure byte-copy, no ""
+               ""arithmetic."";
+      }
+      array.GetOrCreateMinMax() = overall_minmax;
     }
-    array.GetOrCreateMinMax() = overall_minmax;
   }
   if (!output.minmax) {
     changed = true;
   } else if (!(overall_minmax == output.GetMinMax())) {
-    changed = true;
-    LOG(WARNING)
-        << ""Tweaking the MinMax of the output array of "" << LogName(*op)
-        << "", because we want all inputs ""
-        << ""and outputs of a Concatenation operator to have the same MinMax ""
-        << ""so that it can be implemented as a pure byte-copy, no arithmetic."";
+    if (model->flags.change_concat_input_ranges()) {
+      changed = true;
+      LOG(WARNING)
+          << ""Tweaking the MinMax of the output array of "" << LogName(*op)
+          << "", because we want all inputs ""
+          << ""and outputs of a Concatenation operator to have the same MinMax ""
+          << ""so that it can be implemented as a pure byte-copy, no ""
+          << ""arithmetic."";
+    } else {
+      return false;
+    }
   }
   output.GetOrCreateMinMax() = overall_minmax;
 
",0,test
0b87efcbae8cde976a302415c6df2189958e7a8b,tensorflow/tensorflow,"Add a command line parameter to toco to change the way toco rescales input and output tensors.

PiperOrigin-RevId: 191825756",quantize.cc,"@@ -431,7 +431,8 @@ bool ChooseQuantizationForOperatorOutput(
       (op.type == OperatorType::kSpaceToDepth) ||
       (op.type == OperatorType::kTensorFlowReshape) ||
       (op.type == OperatorType::kTensorFlowSplit) ||
-      (op.type == OperatorType::kConcatenation)) {
+      (op.type == OperatorType::kConcatenation &&
+       model->flags.change_concat_input_ranges())) {
     int data_input_index = 0;
     if (op.type == OperatorType::kTensorFlowSplit) {
       data_input_index = 1;
",0,test
0b87efcbae8cde976a302415c6df2189958e7a8b,tensorflow/tensorflow,"Add a command line parameter to toco to change the way toco rescales input and output tensors.

PiperOrigin-RevId: 191825756",model_cmdline_flags.cc,"@@ -165,6 +165,11 @@ bool ParseModelFlagsFromCommandLineFlags(
            ""Path to an optional file containing a serialized ModelFlags proto. ""
            ""Options specified on the command line will override the values in ""
            ""the proto.""),
+      Flag(""change_concat_input_ranges"",
+           parsed_flags.change_concat_input_ranges.bind(),
+           parsed_flags.change_concat_input_ranges.default_value(),
+           ""Boolean to change the behavior of min/max ranges for inputs and""
+           "" output of the concat operators.""),
   };
   bool asked_for_help =
       *argc == 2 && (!strcmp(argv[1], ""--help"") || !strcmp(argv[1], ""-help""));
@@ -399,6 +404,8 @@ void ReadModelFlagsFromCommandLineFlags(
       parsed_model_flags.allow_nonascii_arrays.value());
   model_flags->set_allow_nonexistent_arrays(
       parsed_model_flags.allow_nonexistent_arrays.value());
+  model_flags->set_change_concat_input_ranges(
+      parsed_model_flags.change_concat_input_ranges.value());
 
   if (parsed_model_flags.arrays_extra_info_file.specified()) {
     string arrays_extra_info_file_contents;
",0,test
0b87efcbae8cde976a302415c6df2189958e7a8b,tensorflow/tensorflow,"Add a command line parameter to toco to change the way toco rescales input and output tensors.

PiperOrigin-RevId: 191825756",tooling_util.cc,"@@ -1413,7 +1413,8 @@ void ResolveModelFlags(const ModelFlags& model_flags, Model* model) {
       CHECK(input_array.shape().dims_size());
     }
   }
-
+  model->flags.set_change_concat_input_ranges(
+      model_flags.change_concat_input_ranges());
   model->flags.set_allow_nonascii_arrays(model_flags.allow_nonascii_arrays());
   model->flags.set_allow_nonexistent_arrays(
       model_flags.allow_nonexistent_arrays());
",0,test
8d7447799904c2ac16a99154519f80d979eab0eb,tensorflow/tensorflow,"Refactor kernel thunk's launch dimension setting - part 6/8.

Move SetThunkLaunchDimensions() to right after KernelThunk construction. Launch dimension will be passed to KernelThunk's constructor as a parameter.

PiperOrigin-RevId: 386347660
Change-Id: I560d7f695c4e50a54a156584c4082a1cd73a5e14",ir_emitter_unnested.cc,"@@ -1853,9 +1853,20 @@ Status IrEmitterUnnested::EmitFusion(mlir::Operation* op) {
     // emit it in a separate kernel. Treat it like a loop fusion, writing to
     // the output buffer.
     {
+      auto unroll_factor =
+          ComputeMaxUnrollFactor(fusion_op, hlo_module_config_);
+      const Shape& element_shape = root->shape();
+      TF_ASSIGN_OR_RETURN(
+          LaunchDimensions launch_dimensions,
+          CalculateLaunchDimensions(element_shape,
+                                    ir_emitter_context_->gpu_device_info(),
+                                    {unroll_factor, /*few_waves=*/false}));
+
       std::vector<llvm_ir::IrArray> ir_arrays;
       TF_ASSIGN_OR_RETURN(auto operand_thunk,
                           BuildKernelThunk(op, Thunk::ThunkInfo(), &ir_arrays));
+      SetThunkLaunchDimensions(launch_dimensions, operand_thunk.get(),
+                               ir_emitter_context_->llvm_module());
       thunks.push_back(std::move(operand_thunk));
 
       GpuElementalIrEmitter operand_elemental_emitter(
@@ -1874,16 +1885,6 @@ Status IrEmitterUnnested::EmitFusion(mlir::Operation* op) {
       TF_ASSIGN_OR_RETURN(auto generator,
                           operand_fused_emitter.GetGenerator(root->operand(0)));
 
-      auto unroll_factor =
-          ComputeMaxUnrollFactor(fusion_op, hlo_module_config_);
-      const Shape& element_shape = root->shape();
-      TF_ASSIGN_OR_RETURN(
-          LaunchDimensions launch_dimensions,
-          CalculateLaunchDimensions(element_shape,
-                                    ir_emitter_context_->gpu_device_info(),
-                                    {unroll_factor, /*few_waves=*/false}));
-      SetThunkLaunchDimensions(launch_dimensions, thunks.back().get(),
-                               ir_emitter_context_->llvm_module());
       TF_RETURN_IF_ERROR(
           ParallelLoopEmitter(generator, ir_arrays.back(), launch_dimensions,
                               &b_, {unroll_factor})
@@ -1968,37 +1969,33 @@ Status IrEmitterUnnested::EmitFusion(mlir::Operation* op) {
     // touching the un-updated elements.
     CHECK_EQ(1, GetHloOutputs(op).size());
 
-    // Set up kernel thunk and fused ir emitter.
-    std::vector<llvm_ir::IrArray> ir_arrays;
-    TF_ASSIGN_OR_RETURN(
-        auto fusion_thunk,
-        BuildKernelThunk(fusion_op, GetThunkInfo(op), &ir_arrays));
-
     TF_ASSIGN_OR_RETURN(
         const HloComputation* fused_computation,
         GetOrCreateSubComputationFromRegion(&fusion_op.region(),
                                             /*is_fusion=*/true));
 
-    GpuElementalIrEmitter elemental_emitter(hlo_module_config_,
-                                            ir_emitter_context_->llvm_module(),
-                                            &b_, GetNestedComputer());
-
     // Shape of the dynamic-update-slice's ""update"" operand.
     Shape update_shape =
         fused_computation->root_instruction()->operand(1)->shape();
 
-    // Array to write into.  Because this is an in-place operation, this is the
-    // same as operand 0's array.
-    const IrArray& output_array = ir_arrays.back();
-
     TF_ASSIGN_OR_RETURN(
         LaunchDimensions launch_dimensions,
         CalculateLaunchDimensions(update_shape,
                                   ir_emitter_context_->gpu_device_info()));
+
+    // Set up kernel thunk and fused ir emitter.
+    std::vector<llvm_ir::IrArray> ir_arrays;
+    TF_ASSIGN_OR_RETURN(
+        auto fusion_thunk,
+        BuildKernelThunk(fusion_op, GetThunkInfo(op), &ir_arrays));
     SetThunkLaunchDimensions(launch_dimensions, fusion_thunk.get(),
                              ir_emitter_context_->llvm_module());
     AddThunkToThunkSequence(std::move(fusion_thunk));
 
+    GpuElementalIrEmitter elemental_emitter(hlo_module_config_,
+                                            ir_emitter_context_->llvm_module(),
+                                            &b_, GetNestedComputer());
+
     FusedIrEmitter fused_emitter(&elemental_emitter);
 
     for (int i = 0; i < fused_computation->num_parameters(); i++) {
@@ -2011,6 +2008,10 @@ Status IrEmitterUnnested::EmitFusion(mlir::Operation* op) {
           });
     }
 
+    // Array to write into.  Because this is an in-place operation, this is the
+    // same as operand 0's array.
+    const IrArray& output_array = ir_arrays.back();
+
     return llvm_ir::EmitParallelFusedDynamicUpdateSliceInPlace(
         fused_computation, output_array, &fused_emitter, launch_dimensions,
         &b_);
",0,train
388f9fa67d1f3300f25b491e69ece14f1299997d,tensorflow/tensorflow,"Tweak, add some examples and comparisons with alternatives to the docstring for tf.nn.sigmoid_cross_entropy_with_logits.

I've seen some confusion about the wording, which previously made it seem like this symbol was not appropriate for binary classification with mutually exclusive classes.

PiperOrigin-RevId: 363897491
Change-Id: I8b65b6a225320c12e4dee6fb3ef1e2ed0e8a6a02",nn_impl.py,"@@ -117,48 +117,7 @@ def sigmoid_cross_entropy_with_logits(  # pylint: disable=invalid-name
     labels=None,
     logits=None,
     name=None):
-  """"""Computes sigmoid cross entropy given `logits`.
-
-  Measures the probability error in discrete classification tasks in which each
-  class is independent and not mutually exclusive.  For instance, one could
-  perform multilabel classification where a picture can contain both an elephant
-  and a dog at the same time.
-
-  For brevity, let `x = logits`, `z = labels`.  The logistic loss is
-
-        z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))
-      = z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))
-      = z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))
-      = z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))
-      = (1 - z) * x + log(1 + exp(-x))
-      = x - x * z + log(1 + exp(-x))
-
-  For x < 0, to avoid overflow in exp(-x), we reformulate the above
-
-        x - x * z + log(1 + exp(-x))
-      = log(exp(x)) - x * z + log(1 + exp(-x))
-      = - x * z + log(1 + exp(x))
-
-  Hence, to ensure stability and avoid overflow, the implementation uses this
-  equivalent formulation
-
-      max(x, 0) - x * z + log(1 + exp(-abs(x)))
-
-  `logits` and `labels` must have the same type and shape.
-
-  Args:
-    _sentinel: Used to prevent positional parameters. Internal, do not use.
-    labels: A `Tensor` of the same type and shape as `logits`.
-    logits: A `Tensor` of type `float32` or `float64`.
-    name: A name for the operation (optional).
-
-  Returns:
-    A `Tensor` of the same shape as `logits` with the componentwise
-    logistic losses.
-
-  Raises:
-    ValueError: If `logits` and `labels` do not have the same shape.
-  """"""
+  """"""See sigmoid_cross_entropy_with_logits_v2.""""""
   # pylint: disable=protected-access
   nn_ops._ensure_xent_args(""sigmoid_cross_entropy_with_logits"", _sentinel,
                            labels, logits)
@@ -199,12 +158,13 @@ def sigmoid_cross_entropy_with_logits_v2(  # pylint: disable=invalid-name
     labels=None,
     logits=None,
     name=None):
-  """"""Computes sigmoid cross entropy given `logits`.
+  r""""""Computes sigmoid cross entropy given `logits`.
 
-  Measures the probability error in discrete classification tasks in which each
-  class is independent and not mutually exclusive.  For instance, one could
-  perform multilabel classification where a picture can contain both an elephant
-  and a dog at the same time.
+  Measures the probability error in tasks with two outcomes in which each
+  outcome is independent and need not have a fully certain label. For instance,
+  one could perform a regression where the probability of an event happening is
+  known and used as a label. This loss may also be used for binary
+  classification, where labels are either zero or one.
 
   For brevity, let `x = logits`, `z = labels`.  The logistic loss is
 
@@ -228,9 +188,51 @@ def sigmoid_cross_entropy_with_logits_v2(  # pylint: disable=invalid-name
 
   `logits` and `labels` must have the same type and shape.
 
+  >>> logits = tf.constant([1., -1., 0., 1., -1., 0., 0.])
+  >>> labels = tf.constant([0., 0., 0., 1., 1., 1., 0.5])
+  >>> tf.nn.sigmoid_cross_entropy_with_logits(
+  ...     labels=labels, logits=logits).numpy()
+  array([1.3132617, 0.3132617, 0.6931472, 0.3132617, 1.3132617, 0.6931472,
+         0.6931472], dtype=float32)
+
+  Compared to the losses which handle multiple outcomes,
+  `tf.nn.softmax_cross_entropy_with_logits` for general multi-class
+  classification and `tf.nn.sparse_softmax_cross_entropy_with_logits` for more
+  efficient multi-class classification with hard labels,
+  `sigmoid_cross_entropy_with_logits` is a slight simplification for binary
+  classification:
+
+        sigmoid(x) = softmax([x, 0])[0]
+
+  $$\frac{1}{1 + e^{-x}} = \frac{e^x}{e^x + e^0}$$
+
+  While `sigmoid_cross_entropy_with_logits` works for soft binary labels
+  (probabilities between 0 and 1), it can also be used for binary classification
+  where the labels are hard. There is an equivalence between all three symbols
+  in this case, with a probability 0 indicating the second class or 1 indicating
+  the first class:
+
+  >>> sigmoid_logits = tf.constant([1., -1., 0.])
+  >>> softmax_logits = tf.stack([sigmoid_logits, tf.zeros_like(sigmoid_logits)],
+  ...                           axis=-1)
+  >>> soft_binary_labels = tf.constant([1., 1., 0.])
+  >>> soft_multiclass_labels = tf.stack(
+  ...     [soft_binary_labels, 1. - soft_binary_labels], axis=-1)
+  >>> hard_labels = tf.constant([0, 0, 1])
+  >>> tf.nn.sparse_softmax_cross_entropy_with_logits(
+  ...     labels=hard_labels, logits=softmax_logits).numpy()
+  array([0.31326166, 1.3132616 , 0.6931472 ], dtype=float32)
+  >>> tf.nn.softmax_cross_entropy_with_logits(
+  ...     labels=soft_multiclass_labels, logits=softmax_logits).numpy()
+  array([0.31326166, 1.3132616, 0.6931472], dtype=float32)
+  >>> tf.nn.sigmoid_cross_entropy_with_logits(
+  ...     labels=soft_binary_labels, logits=sigmoid_logits).numpy()
+  array([0.31326166, 1.3132616, 0.6931472], dtype=float32)
+
   Args:
-    labels: A `Tensor` of the same type and shape as `logits`.
-    logits: A `Tensor` of type `float32` or `float64`.
+    labels: A `Tensor` of the same type and shape as `logits`. Between 0 and 1,
+      inclusive.
+    logits: A `Tensor` of type `float32` or `float64`. Any real number.
     name: A name for the operation (optional).
 
   Returns:
@@ -244,6 +246,10 @@ def sigmoid_cross_entropy_with_logits_v2(  # pylint: disable=invalid-name
       logits=logits, labels=labels, name=name)
 
 
+sigmoid_cross_entropy_with_logits.__doc__ = (
+    sigmoid_cross_entropy_with_logits_v2.__doc__)
+
+
 @tf_export(""nn.weighted_cross_entropy_with_logits"", v1=[])
 @dispatch.add_dispatch_support
 def weighted_cross_entropy_with_logits_v2(labels, logits, pos_weight,
",0,train
fcfef1062556eb199faa2c2759aa93d15e6bfa3f,tensorflow/tensorflow,"[tf.data] Graduate the experiment `enable_bufferio_v2` and make it default for all tf.data input pipelines.

PiperOrigin-RevId: 425725025
Change-Id: Id37818f2bbea416d9672302b7594ea3964cd4e29",dataset_utils.cc,"@@ -890,7 +890,6 @@ absl::flat_hash_map<string, int64_t> DatasetExperimentRegistry::Experiments() {
 namespace {
 
 REGISTER_DATASET_EXPERIMENT(""initial_parallelism_value"", 50);
-REGISTER_DATASET_EXPERIMENT(""enable_bufferedio_v2"", 100);
 REGISTER_DATASET_EXPERIMENT(""inject_prefetch"", 100);
 REGISTER_DATASET_EXPERIMENT(""max_parallelism"", 100);
 REGISTER_DATASET_EXPERIMENT(""max_parallelism_v2"", 100);
",0,train
1aa8056e084494f706fb492013cf328212e782ee,tensorflow/tensorflow,"Update def_function_xla_jit_test.py

Use assertEqual instead of assertTrue",def_function_xla_jit_test.py,"@@ -112,8 +112,8 @@ class DefFunctionTest(xla_test.XLATestCase):
 
       # Check that the must-compile attribute gets correctly propagated to the
       # created derivatives.
-      self.assertTrue(backward.function_def.attr['_XlaMustCompile'])
-      self.assertTrue(forward.definition.attr['_XlaMustCompile'])
+      self.assertEqual(backward.function_def.attr['_XlaMustCompile'])
+      self.assertEqual(forward.definition.attr['_XlaMustCompile'])
 
   # Calling function with jit_compile=True from
   # jit_compile=False should compile the inner func.
@@ -1179,7 +1179,7 @@ class DefFunctionTest(xla_test.XLATestCase):
               stage='hlo')
 
       # Test that reduction occurs only once.
-      self.assertTrue(hlo.count('reduce'), 1)
+      self.assertEqual(hlo.count('reduce'), 1)
 
 
 if __name__ == '__main__':
",0,test
b2a4a7da1a07830bfd4618603637520e8a26bd3f,tensorflow/tensorflow,"Java: Avoid some compiler and deprecation warnings
Change: 149554731",exception_jni.h,"@@ -22,7 +22,7 @@ limitations under the License.
 extern ""C"" {
 #endif
 
-class TF_Status;
+struct TF_Status;
 
 extern const char kIllegalArgumentException[];
 extern const char kIllegalStateException[];
",0,train
a1a3b0c6c3abe0df0c8e017d9f134db4731484b9,tensorflow/tensorflow,"Android: show inference stats on debug screen in demo (accessed with volume keys).
Change: 143149923",tensorflow_inference_jni.cc,"@@ -53,6 +53,9 @@ struct SessionVariables {
   int num_runs = 0;
   int64 timing_total_us = 0;
 
+  bool log_stats = false;
+  StatSummarizer* summarizer = nullptr;
+
   InputMap input_tensors;
   std::vector<std::string> output_tensor_names;
   std::vector<tensorflow::Tensor> output_tensors;
@@ -129,6 +132,10 @@ JNIEXPORT jint JNICALL TENSORFLOW_METHOD(initializeTensorFlow)(
   LOG(INFO) << ""GraphDef loaded from "" << model_str << "" with ""
             << tensorflow_graph.node_size() << "" nodes."";
 
+  // Whether or not stat logging is currently enabled, the StatSummarizer must
+  // be initialized here with the GraphDef while it is available.
+  vars->summarizer = new StatSummarizer(tensorflow_graph);
+
   LOG(INFO) << ""Creating TensorFlow graph from GraphDef."";
   tensorflow::Status s = session->Create(tensorflow_graph);
 
@@ -193,8 +200,28 @@ JNIEXPORT jint JNICALL TENSORFLOW_METHOD(runInference)(
   }
 
   vars->output_tensors.clear();
-  s = vars->session->Run(input_tensors, vars->output_tensor_names, {},
-                         &(vars->output_tensors));
+
+  if (vars->log_stats) {
+    RunOptions run_options;
+    run_options.set_trace_level(RunOptions::FULL_TRACE);
+    RunMetadata run_metadata;
+
+    s = vars->session->Run(run_options, input_tensors,
+                           vars->output_tensor_names, {},
+                           &(vars->output_tensors), &run_metadata);
+
+    assert(run_metadata.has_step_stats());
+    const StepStats& step_stats = run_metadata.step_stats();
+    vars->summarizer->ProcessStepStats(step_stats);
+
+    // Print the full output string, not just the abbreviated one returned by
+    // getStatString().
+    vars->summarizer->PrintStepStats();
+  } else {
+    s = vars->session->Run(input_tensors, vars->output_tensor_names, {},
+                           &(vars->output_tensors));
+  }
+
   end_time = CurrentWallTimeUs();
   const int64 elapsed_time_inf = end_time - start_time;
   vars->timing_total_us += elapsed_time_inf;
@@ -208,6 +235,24 @@ JNIEXPORT jint JNICALL TENSORFLOW_METHOD(runInference)(
   return s.code();
 }
 
+JNIEXPORT void JNICALL TENSORFLOW_METHOD(enableStatLogging)(
+    JNIEnv* env, jobject thiz, jboolean enableStatLogging) {
+  SessionVariables* vars = GetSessionVars(env, thiz);
+  vars->log_stats = enableStatLogging;
+}
+
+JNIEXPORT jstring JNICALL TENSORFLOW_METHOD(getStatString)(JNIEnv* env,
+                                                           jobject thiz) {
+  // Return an abbreviated stat string suitable for displaying on screen.
+  SessionVariables* vars = GetSessionVars(env, thiz);
+  std::stringstream ss;
+  ss << vars->summarizer->GetStatsByMetric(""Top 10 CPU"",
+                                           StatSummarizer::BY_TIME, 10);
+  ss << vars->summarizer->GetStatsByNodeType();
+  ss << vars->summarizer->ShortSummary();
+  return env->NewStringUTF(ss.str().c_str());
+}
+
 JNIEXPORT jint JNICALL TENSORFLOW_METHOD(close)(JNIEnv* env, jobject thiz) {
   SessionVariables* vars = GetSessionVars(env, thiz);
 
@@ -216,6 +261,8 @@ JNIEXPORT jint JNICALL TENSORFLOW_METHOD(close)(JNIEnv* env, jobject thiz) {
     LOG(ERROR) << ""Error closing session: "" << s;
   }
 
+  delete vars->summarizer;
+
   mutex_lock l(mutex_);
   std::map<int64, SessionVariables*>& sessions = *GetSessionsSingleton();
   sessions.erase(vars->id);
",0,train
a1a3b0c6c3abe0df0c8e017d9f134db4731484b9,tensorflow/tensorflow,"Android: show inference stats on debug screen in demo (accessed with volume keys).
Change: 143149923",tensorflow_inference_jni.h,"@@ -48,6 +48,12 @@ JNIEXPORT jint JNICALL TENSORFLOW_METHOD(initializeTensorFlow)(
 JNIEXPORT jint JNICALL TENSORFLOW_METHOD(runInference)(
     JNIEnv* env, jobject thiz, jobjectArray output_name_strings);
 
+JNIEXPORT void JNICALL TENSORFLOW_METHOD(enableStatLogging)(
+    JNIEnv* env, jobject thiz, jboolean enableStatLogging);
+
+JNIEXPORT jstring JNICALL TENSORFLOW_METHOD(getStatString)(JNIEnv* env,
+                                                           jobject thiz);
+
 JNIEXPORT jint JNICALL TENSORFLOW_METHOD(close)(JNIEnv* env, jobject thiz);
 
 FILL_NODE_SIGNATURE(Float, float);
",0,train
a1a3b0c6c3abe0df0c8e017d9f134db4731484b9,tensorflow/tensorflow,"Android: show inference stats on debug screen in demo (accessed with volume keys).
Change: 143149923",stat_summarizer.h,"@@ -113,6 +113,15 @@ class Stat {
 // See tensorflow/examples/android/jni/tensorflow_jni.cc for an example usage.
 class StatSummarizer {
  public:
+  enum SortingMetric {
+    BY_NAME,
+    BY_DEFINITION_ORDER,
+    BY_RUN_ORDER,
+    BY_TIME,
+    BY_MEMORY,
+    BY_TYPE,
+  };
+
   explicit StatSummarizer(const tensorflow::GraphDef& tensorflow_graph);
 
   // Adds another run's StepStats output to the aggregate counts.
@@ -122,6 +131,8 @@ class StatSummarizer {
   // format which can be pasted into a spreadsheet for further analysis.
   std::string GetOutputString() const;
 
+  std::string ShortSummary() const;
+
   // Prints the string returned by GetOutputString().
   void PrintStepStats() const;
 
@@ -130,6 +141,10 @@ class StatSummarizer {
 
   std::string GetStatsByNodeType() const;
 
+  std::string GetStatsByMetric(const string& title,
+                               SortingMetric sorting_metric,
+                               int num_stats) const;
+
   void Reset() {
     run_total_us_.Reset();
     memory_.Reset();
@@ -153,31 +168,16 @@ class StatSummarizer {
     std::vector<TensorDescription> outputs;
   };
 
-  enum SortingMetric {
-    BY_NAME,
-    BY_DEFINITION_ORDER,
-    BY_RUN_ORDER,
-    BY_TIME,
-    BY_MEMORY,
-    BY_TYPE,
-  };
-
   void Validate(const Detail* detail, const NodeExecStats& ns) const;
 
   void OrderNodesByMetric(SortingMetric sorting_metric,
                           std::vector<const Detail*>* details) const;
 
-  std::string GetStatsByMetric(const string& title,
-                               SortingMetric sorting_metric,
-                               int num_stats) const;
-
   std::string HeaderString(const string& title) const;
   std::string ColumnString(const Detail& detail,
                            const int64 cumulative_stat_on_node,
                            const Stat<int64>& stat) const;
 
-  std::string ShortSummary() const;
-
   Stat<int64> run_total_us_;
   Stat<int64> memory_;
 
",0,train
0e52356c86cb6cdc500592124e9a21f1556934bf,tensorflow/tensorflow,"Add derived type attributes for TensorFlow ops generated by TableGen

Motivation for this change is to remove redundant TF type attributes for
TensorFlow ops. For example, tf$T: ""tfdtype$DT_FLOAT"". Type attributes can be derived using the MLIR operand or result MLIR types, attribute names and their mapping. This will also allow constant folding of instructions generated within MLIR (and not imported from TensorFlow) without adding type attributes for the instruction.

Derived attributes are populated while exporting MLIR to TF GraphDef using
auto-generated populators. Populators are only available for the ops that are generated by the TableGen.

Also, fixed Operator::getNumArgs method to exclude derived attributes as they are not
part of the arguments.

TESTED with unit test

PiperOrigin-RevId: 232531561",Operator.h,"@@ -29,6 +29,7 @@
 #include ""llvm/ADT/PointerUnion.h""
 #include ""llvm/ADT/SmallVector.h""
 #include ""llvm/ADT/StringRef.h""
+#include ""llvm/Support/SMLoc.h""
 
 namespace llvm {
 class CodeInit;
@@ -54,6 +55,9 @@ public:
   // Returns the TableGen definition name split around '_'.
   const SmallVectorImpl<StringRef> &getSplitDefName() const;
 
+  // Returns dialect name of the op.
+  StringRef getDialectName() const;
+
   // Returns the C++ class name of the op.
   StringRef getCppClassName() const;
 
@@ -69,15 +73,16 @@ public:
   StringRef getResultName(int index) const;
 
   // Op attribute interators.
-  using attribute_iterator = NamedAttribute *;
-  attribute_iterator attribute_begin();
-  attribute_iterator attribute_end();
-  llvm::iterator_range<attribute_iterator> getAttributes();
+  using attribute_iterator = const NamedAttribute *;
+  attribute_iterator attribute_begin() const;
+  attribute_iterator attribute_end() const;
+  llvm::iterator_range<attribute_iterator> getAttributes() const;
 
   // Op attribute accessors.
   int getNumAttributes() const { return attributes.size(); }
   // Returns the total number of native attributes.
   int getNumNativeAttributes() const;
+  int getNumDerivedAttributes() const;
   NamedAttribute &getAttribute(int index) { return attributes[index]; }
   const NamedAttribute &getAttribute(int index) const;
 
@@ -96,7 +101,9 @@ public:
   Argument getArg(int index);
   StringRef getArgName(int index) const;
   // Returns the total number of arguments.
-  int getNumArgs() const { return operands.size() + attributes.size(); }
+  int getNumArgs() const { return getNumOperands() + getNumNativeAttributes(); }
+
+  ArrayRef<llvm::SMLoc> getLoc() const;
 
   // Query functions for the documentation of the operator.
   bool hasDescription() const;
",0,train
8b2dfadb82db849fa6f61879c994ed084612a7d6,tensorflow/tensorflow,"Log all the model's metrics with the SidecarEvaluator, not only the compiled metrics.

PiperOrigin-RevId: 353804173
Change-Id: Icf46f0063a5327fd70c1571c2f617e78ff5ffc69",sidecar_evaluator.py,"@@ -211,14 +211,16 @@ class SidecarEvaluator(object):
       # TODO(rchao): Support arbitrary callback for extensibility.
       self.model.evaluate(self.data, steps=self.steps)
 
-      logging.info('End of evaluation. Accuracy: %r', [
-          metric.result().numpy()
-          for metric in self.model.compiled_metrics.metrics
-      ])
+      logging.info(
+          'End of evaluation. Metrics: %s', ' '.join([
+              '{}={}'.format(metric.name,
+                             metric.result().numpy())
+              for metric in self.model.metrics
+          ]))
 
       if self._summary_writer:
         with summary_ops_v2.record_if(True), self._summary_writer.as_default():
-          for metric in self.model.compiled_metrics.metrics:
+          for metric in self.model.metrics:
             summary_ops_v2.scalar(
                 metric.name,
                 metric.result(),
",0,train
8b2dfadb82db849fa6f61879c994ed084612a7d6,tensorflow/tensorflow,"Log all the model's metrics with the SidecarEvaluator, not only the compiled metrics.

PiperOrigin-RevId: 353804173
Change-Id: Icf46f0063a5327fd70c1571c2f617e78ff5ffc69",sidecar_evaluator_test.py,"@@ -58,12 +58,14 @@ class SidecarEvaluatorTest(test.TestCase):
 
     # Asserts the content of the summary file.
     event_pb_written = False
+    event_tags = []
     for event_pb in summary_iterator.summary_iterator(
         os.path.join(log_dir, summary_files[0])):
       if event_pb.step > 0:
         self.assertEqual(event_pb.step, 32)
-        self.assertEqual(event_pb.summary.value[0].tag, 'categorical_accuracy')
+        event_tags.append(event_pb.summary.value[0].tag)
         event_pb_written = True
+    self.assertCountEqual(event_tags, ['categorical_accuracy', 'loss'])
 
     # Verifying at least one non-zeroth step is written to summary.
     self.assertTrue(event_pb_written)
",0,train
e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency.
Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder.
Currently this adds control dependency between stateful ops.
stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops.

PiperOrigin-RevId: 367756651
Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",common.h,"@@ -456,8 +456,8 @@ typedef struct TfLiteTensor {
 } TfLiteTensor;
 
 // A structure representing an instance of a node.
-// This structure only exhibits the inputs, outputs and user defined data, not
-// other features like the type.
+// This structure only exhibits the inputs, outputs, user defined data and some
+// node properties (like statefulness), not other features like the type.
 typedef struct TfLiteNode {
   // Inputs to this node expressed as indices into the simulator's tensors.
   TfLiteIntArray* inputs;
@@ -490,6 +490,9 @@ typedef struct TfLiteNode {
   // created by calling `interpreter.ModifyGraphWithDelegate`.
   // WARNING: This is an experimental interface that is subject to change.
   struct TfLiteDelegate* delegate;
+
+  // Whether this op might have side effect (e.g. stateful op).
+  bool might_have_side_effect;
 } TfLiteNode;
 #else   // defined(TF_LITE_STATIC_MEMORY)?
 // NOTE: This flag is opt-in only at compile time.
",0,train
e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency.
Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder.
Currently this adds control dependency between stateful ops.
stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops.

PiperOrigin-RevId: 367756651
Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",subgraph.cc,"@@ -30,6 +30,7 @@ limitations under the License.
 #include ""tensorflow/lite/allocation.h""
 #include ""tensorflow/lite/arena_planner.h""
 #include ""tensorflow/lite/builtin_ops.h""
+#include ""tensorflow/lite/c/c_api_types.h""
 #include ""tensorflow/lite/c/common.h""
 #include ""tensorflow/lite/context_util.h""
 #include ""tensorflow/lite/core/api/error_reporter.h""
@@ -786,6 +787,7 @@ TfLiteStatus Subgraph::AddNodeWithParameters(
     node.custom_initial_data = nullptr;
     node.custom_initial_data_size = 0;
   }
+  node.might_have_side_effect = OpMightHaveSideEffect(&node, registration);
 
   node.delegate = nullptr;
   // Copying of registration is required to support unresolved custom ops.
@@ -794,6 +796,37 @@ TfLiteStatus Subgraph::AddNodeWithParameters(
   return kTfLiteOk;
 }
 
+namespace {
+// Returns true if any tensor identified by indexes in 'tensor_indexes' is
+// of type 'kTfLiteResource'. False otherwise.
+bool AnyTensorOfTypeResource(const std::vector<TfLiteTensor>& tensors,
+                             const TfLiteIntArray* tensor_indexes) {
+  for (int i = 0; i < tensor_indexes->size; ++i) {
+    int tensor_index = tensor_indexes->data[i];
+    if (tensor_index >= 0 && tensor_index < tensors.size() &&
+        tensors[tensor_index].type == kTfLiteResource)
+      return true;
+  }
+  return false;
+}
+
+}  // namespace
+
+bool Subgraph::OpMightHaveSideEffect(
+    const TfLiteNode* node, const TfLiteRegistration* registration) const {
+  // Check if any of the input tensors are of type resource.
+  if (AnyTensorOfTypeResource(tensors_, node->inputs)) return true;
+  // Check if any of the output tensors are of type resource.
+  if (AnyTensorOfTypeResource(tensors_, node->outputs)) return true;
+  // Consider control flow ops has side effect, some ops in the control flow
+  // subgraph can have side effect.
+  if (registration->builtin_code == kTfLiteBuiltinIf ||
+      registration->builtin_code == kTfLiteBuiltinWhile ||
+      registration->builtin_code == kTfLiteBuiltinCallOnce)
+    return true;
+  return false;
+}
+
 TfLiteStatus Subgraph::ResizeInputTensor(int tensor_index,
                                          const std::vector<int>& dims) {
   const bool delegates_applied = !pre_delegation_execution_plan_.empty();
",0,train
e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency.
Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder.
Currently this adds control dependency between stateful ops.
stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops.

PiperOrigin-RevId: 367756651
Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",subgraph.h,"@@ -615,6 +615,15 @@ class Subgraph {
   // Enables preserving intermediates for debugging.
   TfLiteStatus PreserveAllTensorsExperimental();
 
+  // Returns true if 'node' could have side effect (e.g. stateful op).
+  // Note that any node that might update other tensors beside op's output
+  // are considered to have side effect.
+  // So control flow ops like 'If' and 'While' are considered to have
+  // side effect because they can have ops that have side effect in the
+  // condition and body subgraphs.
+  bool OpMightHaveSideEffect(const TfLiteNode* node,
+                             const TfLiteRegistration* registration) const;
+
   // The state of the Interpreter.
   enum State {
     // The interpreter isn't ready to be invoked.
",0,train
e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency.
Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder.
Currently this adds control dependency between stateful ops.
stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops.

PiperOrigin-RevId: 367756651
Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",graph_info.cc,"@@ -56,6 +56,22 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl {
     tensor_epochs_.resize(info_->num_tensors(), kEpochAlwaysReady);
     node_epochs_.clear();
     node_epochs_.resize(info_->num_execution_nodes(), kEpochNotReady);
+    control_deps_.clear();
+    control_deps_.resize(info_->num_execution_nodes());
+    // Add control dependency between stateful ops.
+    // TODO(b/149099381): Revisit better way for adding control dependency.
+    int last_op_with_side_effect = -1;
+    for (int i = 0; i < info_->num_execution_nodes(); ++i) {
+      const auto& node = info_->node(i);
+      // Set default value.
+      control_deps_[i] = -1;
+      if (node.might_have_side_effect) {
+        if (last_op_with_side_effect != -1) {
+          control_deps_[i] = last_op_with_side_effect;
+        }
+        last_op_with_side_effect = i;
+      }
+    }
     // Set computed tensors to be kEpochNotReady (initializer set everything to
     // AlwaysReady).
     for (int node_index = 0; node_index < info_->num_execution_nodes();
@@ -134,6 +150,12 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl {
         return false;
       }
     }
+    // If any of the nodes that current node depend on is not assigned
+    // any epochs then don't process this node.
+    if (control_deps_[node_index] != -1 &&
+        node_epochs_[control_deps_[node_index]] == kEpochNotReady) {
+      return false;
+    }
 
     int original_node_idx = info_->node_index(node_index);
     // When we are starting a new epoch, the first ready node defines
@@ -209,6 +231,10 @@ class PartitionGraphIntoIndependentNodeSubsetsImpl {
   // Maps from tensor index to the epoch in which it is assigned. Also special
   // negative values of kEpochNotReady if not assigned.
   std::vector<int> node_epochs_;
+  // For each node the node id that this op depends on.
+  // TODO(b/149099381): This should be a list, but we are now chaining
+  // dependency between previous ops.
+  std::vector<int> control_deps_;
 };
 // LINT.ThenChange(//tensorflow/lite/delegates/utils.h)
 
",0,train
e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency.
Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder.
Currently this adds control dependency between stateful ops.
stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops.

PiperOrigin-RevId: 367756651
Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",graph_info_test.cc,"@@ -66,12 +66,13 @@ class SimpleTestGraph : public GraphInfo {
   const std::vector<int>& outputs() const override { return outputs_; }
   const std::vector<int>& variables() const override { return variables_; }
 
-  void AddNode(const std::vector<int>& inputs,
-               const std::vector<int>& outputs) {
+  void AddNode(const std::vector<int>& inputs, const std::vector<int>& outputs,
+               bool might_have_side_effect = false) {
     nodes_.push_back(TfLiteNode());
     TfLiteNode& node = nodes_.back();
     node.inputs = ConvertVector(inputs);
     node.outputs = ConvertVector(outputs);
+    node.might_have_side_effect = might_have_side_effect;
   }
 
   void AddTensors(int count) { tensors_.resize(count + tensors_.size()); }
@@ -342,6 +343,50 @@ TEST(PartitionTest, Nodes3PartitionNodes2) {
       {expected_subgraph0, expected_subgraph1, expected_subgraph2});
 }
 
+// Test correct partition for graph with control dependency.
+// Graph for test is like
+// varhandleOp -> ReadVariableOp -> Add -> AssignVariableOp
+//             |_________________________^    ^^
+//             |------------------------->ReadVariableOp -> (Output)
+// ^^ is control dependency, in this case we don't want to invoke the
+// last ReadVariableOp before AssignVariableOp finishes executing.
+// '>' and '^' represents data dependency.
+TEST(PartitionTest, Nodes4PartitionNodes3_WithControlDependency) {
+  SimpleTestGraph graph;
+  // Construct graph.
+  {
+    graph.AddTensors(5);
+    graph.AddNode({0}, {1}, true);
+    graph.AddNode({1}, {2}, true);
+    graph.AddNode({2}, {3}, false);
+    graph.AddNode({1, 3}, {}, true);
+    graph.AddNode({1}, {4}, true);
+  }
+  graph.SetInputsAndOutputs({0}, {4});
+  std::vector<int> nodes_to_partition = {0, 1, 3, 4};
+  std::vector<NodeSubset> generated_subgraphs;
+  PartitionGraph(graph, nodes_to_partition, &generated_subgraphs);
+
+  NodeSubset expected_subgraph0;
+  expected_subgraph0.type = NodeSubset::kTfPartition;
+  expected_subgraph0.nodes = {0, 1};
+  expected_subgraph0.input_tensors = {0};
+  expected_subgraph0.output_tensors = {1, 2};
+  NodeSubset expected_subgraph1;
+  expected_subgraph1.type = NodeSubset::kTfNonPartition;
+  expected_subgraph1.nodes = {2};
+  expected_subgraph1.input_tensors = {2};
+  expected_subgraph1.output_tensors = {3};
+  NodeSubset expected_subgraph2;
+  expected_subgraph2.type = NodeSubset::kTfPartition;
+  expected_subgraph2.nodes = {3, 4};
+  expected_subgraph2.input_tensors = {1, 3};
+  expected_subgraph2.output_tensors = {4};
+  CheckPartitionSubgraphs(
+      generated_subgraphs,
+      {expected_subgraph0, expected_subgraph1, expected_subgraph2});
+}
+
 }  // namespace
 }  // namespace tflite
 
",0,train
e06673958eb2c37a263de5dfdf7b487439f427d4,tensorflow/tensorflow,"[lite] Update Graph partitioning to handle control dependency.
Add has_side_effect to TfLiteNode which will be set during InterpreterBuilder.
Currently this adds control dependency between stateful ops.
stateful ops are determined to be any op that have input/output of type resource and selected subset for some ops.

PiperOrigin-RevId: 367756651
Change-Id: Ib8cc6c875dbeafd8cd8471aed1051613a9cc8914",interpreter_builder.cc,"@@ -27,6 +27,7 @@ limitations under the License.
 #include <vector>
 
 #include ""flatbuffers/flatbuffers.h""  // from @flatbuffers
+#include ""tensorflow/lite/c/c_api_types.h""
 #include ""tensorflow/lite/core/api/error_reporter.h""
 #include ""tensorflow/lite/core/api/flatbuffer_conversions.h""
 #include ""tensorflow/lite/core/api/op_resolver.h""
@@ -764,10 +765,12 @@ TfLiteStatus InterpreterBuilder::operator()(
         FlatBufferIntArrayToVector(subgraph->outputs()));
 
     // Finally setup nodes and tensors
-    if (ParseNodes(operators, modified_subgraph) != kTfLiteOk)
-      return cleanup_and_error();
+    // Parse tensors before nodes as ParseNodes checks input tensors for the
+    // nodes.
     if (ParseTensors(buffers, tensors, modified_subgraph) != kTfLiteOk)
       return cleanup_and_error();
+    if (ParseNodes(operators, modified_subgraph) != kTfLiteOk)
+      return cleanup_and_error();
 
     std::vector<int> variables;
     for (int i = 0; i < modified_subgraph->tensors_size(); ++i) {
",0,train
b36f214ee82bb51d1f1c1fe9c74cc5ce81df400f,tensorflow/tensorflow,"Simplify GetCpuFlags. Now that the dotprod detection is just a getauxval, it's cheap, not worth caching in a context object, so use ruy:detect_arm directly.

Also note that neon_tensor_utils.cc is already using ruy:detect_arm.

PiperOrigin-RevId: 306682916
Change-Id: I5b81be71035c5d0cfb22006a5edb2dcafa335bcd",depthwiseconv_quantized_test.cc,"@@ -1074,13 +1074,9 @@ void TestOneDepthwiseConv3x3Filter(
 void TestOneNeonDot3x3(const TestParam& test_param) {
 #if defined(__aarch64__) && !defined(GOOGLE_L4T) && defined(__ANDROID__) && \
     defined(__clang__)
-  CpuBackendContext backend_context;
-  ruy::Context* ruy_context = backend_context.ruy_context();
-  const auto ruy_paths = ruy_context != nullptr
-                             ? ruy_context->GetRuntimeEnabledPaths()
-                             : ruy::Path::kNone;
-  const bool has_dot_product_instructions =
-      (ruy_paths & ruy::Path::kNeonDotprod) != ruy::Path::kNone;
+  CpuFlags cpu_flags;
+  GetCpuFlags(&cpu_flags);
+  const bool has_dot_product_instructions = cpu_flags.neon_dotprod;
   if (test_param.forced_invocation ==
           DepthwiseConvImplementation::kUseNeon3x3DotProduct &&
       !has_dot_product_instructions) {
",0,train
b36f214ee82bb51d1f1c1fe9c74cc5ce81df400f,tensorflow/tensorflow,"Simplify GetCpuFlags. Now that the dotprod detection is just a getauxval, it's cheap, not worth caching in a context object, so use ruy:detect_arm directly.

Also note that neon_tensor_utils.cc is already using ruy:detect_arm.

PiperOrigin-RevId: 306682916
Change-Id: I5b81be71035c5d0cfb22006a5edb2dcafa335bcd",cpu_check.h,"@@ -15,8 +15,7 @@ limitations under the License.
 #ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_CPU_CHECK_H_
 #define TENSORFLOW_LITE_KERNELS_INTERNAL_OPTIMIZED_CPU_CHECK_H_
 
-#include ""tensorflow/lite/kernels/cpu_backend_context.h""
-#include ""tensorflow/lite/kernels/internal/optimized/neon_check.h""
+#include ""ruy/detect_arm.h""  // from @ruy
 
 namespace tflite {
 
@@ -24,16 +23,8 @@ struct CpuFlags {
   bool neon_dotprod = false;
 };
 
-inline void GetCpuFlags(CpuBackendContext* cpu_backend_context,
-                        CpuFlags* cpu_flags) {
-#if RUY_PLATFORM(ARM)
-  ruy::Context* ruy_context = cpu_backend_context->ruy_context();
-  cpu_flags->neon_dotprod =
-      ruy_context != nullptr && (ruy_context->GetRuntimeEnabledPaths() &
-                                 ruy::Path::kNeonDotprod) != ruy::Path::kNone;
-#else
-  cpu_flags->neon_dotprod = false;
-#endif
+inline void GetCpuFlags(CpuFlags* cpu_flags) {
+  cpu_flags->neon_dotprod = ruy::DetectDotprod();
 }
 
 }  // namespace tflite
",0,train
b36f214ee82bb51d1f1c1fe9c74cc5ce81df400f,tensorflow/tensorflow,"Simplify GetCpuFlags. Now that the dotprod detection is just a getauxval, it's cheap, not worth caching in a context object, so use ruy:detect_arm directly.

Also note that neon_tensor_utils.cc is already using ruy:detect_arm.

PiperOrigin-RevId: 306682916
Change-Id: I5b81be71035c5d0cfb22006a5edb2dcafa335bcd",depthwiseconv_multithread.h,"@@ -144,7 +144,7 @@ inline void DepthwiseConv(const DepthwiseParams& params,
   const int output_height = output_shape.Dims(1);
 
   CpuFlags cpu_flags;
-  GetCpuFlags(cpu_backend_context, &cpu_flags);
+  GetCpuFlags(&cpu_flags);
 
   if (thread_count == 1) {
     DepthwiseConvImpl(params, input_shape, input_data, filter_shape,
",0,train
b36f214ee82bb51d1f1c1fe9c74cc5ce81df400f,tensorflow/tensorflow,"Simplify GetCpuFlags. Now that the dotprod detection is just a getauxval, it's cheap, not worth caching in a context object, so use ruy:detect_arm directly.

Also note that neon_tensor_utils.cc is already using ruy:detect_arm.

PiperOrigin-RevId: 306682916
Change-Id: I5b81be71035c5d0cfb22006a5edb2dcafa335bcd",depthwise_conv.h,"@@ -1810,13 +1810,10 @@ inline void DepthwiseConvWithRounding(
 // Jetson TX-2. This compiler does not support the offsetof() macro.
 #if defined(__aarch64__) && !defined(GOOGLE_L4T)
 #if defined(__ANDROID__) && defined(__clang__)
-  ruy::Context* ruy_context = cpu_backend_context.ruy_context();
-  const auto ruy_paths = ruy_context != nullptr
-                             ? ruy_context->GetRuntimeEnabledPaths()
-                             : ruy::Path::kNone;
+  CpuFlags cpu_flags;
+  GetCpuFlags(&cpu_flags);
   // TODO(b/150208140): Re-enable once erroneous activation in test is resolved.
-  const bool has_dot_product_instructions =
-      false && (ruy_paths & ruy::Path::kNeonDotprod) != ruy::Path::kNone;
+  const bool has_dot_product_instructions = false && cpu_flags.neon_dotprod;
 
   // Dispatch to dot-product 3x3 kernels when supported.
   if (has_dot_product_instructions) {
",0,train
b36f214ee82bb51d1f1c1fe9c74cc5ce81df400f,tensorflow/tensorflow,"Simplify GetCpuFlags. Now that the dotprod detection is just a getauxval, it's cheap, not worth caching in a context object, so use ruy:detect_arm directly.

Also note that neon_tensor_utils.cc is already using ruy:detect_arm.

PiperOrigin-RevId: 306682916
Change-Id: I5b81be71035c5d0cfb22006a5edb2dcafa335bcd",neon_tensor_utils.h,"@@ -20,6 +20,7 @@ limitations under the License.
 #include ""tensorflow/lite/c/builtin_op_data.h""
 #include ""tensorflow/lite/kernels/cpu_backend_context.h""
 #include ""tensorflow/lite/kernels/internal/optimized/cpu_check.h""
+#include ""tensorflow/lite/kernels/internal/optimized/neon_check.h""
 #include ""tensorflow/lite/kernels/internal/optimized/neon_tensor_utils_impl.h""
 #include ""tensorflow/lite/kernels/internal/reference/portable_tensor_utils_impl.h""
 
",0,train
aabcdcbdff5e68cdbf734ae3ed297242c5b6dfdf,tensorflow/tensorflow,"preserve the argument order when inserting the fake quant ops

Previously, it relies on pointer values to determine the inserting order. This will introduce test flakiness.
This CL makes the order deterministic by using the op visited order.

PiperOrigin-RevId: 256266368",quantization_driver.cc,"@@ -267,7 +267,9 @@ class QuantizationDriver {
     }
     cached.first->second = InitializeState(op, index, in, /*as_result=*/false);
     if (is_argument) {
-      arg_states_[llvm::cast<BlockArgument>(in)] = cached.first->second;
+      auto *arg = llvm::cast<BlockArgument>(in);
+      arg_states_[arg] = cached.first->second;
+      args_.push_back(arg);
     }
   }
 
@@ -299,11 +301,15 @@ class QuantizationDriver {
   // the values from `operand_states_` and `result_state_`.
   std::unordered_map<int, RequantizeState> rescale_states_;
 
-  // Maps of indexes to the propagation state vector from the ops results and
-  // op operands. Both maps are unmodified after initialization.
+  // Maps of indexes to the propagation state vector from the ops operands,
+  // results and arguments.
   llvm::DenseMap<OpValue, int> operand_states_;
   llvm::DenseMap<OpValue, int> result_states_;
   llvm::DenseMap<BlockArgument *, int> arg_states_;
+
+  // This vector is to preserve the arguments order, so the newly inserted
+  // quantized ops for the arguments are deterministically ordered.
+  llvm::SmallVector<BlockArgument *, 4> args_;
 };
 
 #include ""tensorflow/compiler/mlir/lite/utils/generated_op_quant_spec_getters.inc""
@@ -656,10 +662,7 @@ bool QuantizationDriver::PropagateParams() {
 }
 
 void QuantizationDriver::Finalize() {
-  std::map<BlockArgument *, int> sorted_states(arg_states_.begin(),
-                                               arg_states_.end());
-  for (auto it : sorted_states) {
-    BlockArgument *arg = it.first;
+  for (auto *arg : args_) {
     auto &state = GetArgQuantState(arg);
     auto &requantize = GetArgRequantizeState(arg);
     if (state.IsEmpty() ||
",0,train
22bf3df6dc91fbb80a581c1fbce30cb9d2d411e9,tensorflow/tensorflow,"[MLIR][CHLO] Use CHLO lowering for `is_inf` op

PiperOrigin-RevId: 355189054
Change-Id: I28304ff8ed9f564a9698fb5609c19d5d19956e86",transform_unranked_hlo.cc,"@@ -54,8 +54,8 @@ namespace {
 #define MAP_CHLO_OPERATION_CWISE_UNARY(fn, sep)                            \
   fn(AcosOp) sep fn(AcoshOp) sep fn(AsinOp) sep fn(AsinhOp) sep fn(AtanOp) \
       sep fn(AtanhOp) sep fn(ConjOp) sep fn(CoshOp) sep fn(DigammaOp)      \
-          sep fn(ErfOp) sep fn(ErfcOp) sep fn(LgammaOp) sep fn(SinhOp)     \
-              sep fn(TanOp)
+          sep fn(ErfOp) sep fn(ErfcOp) sep fn(IsInfOp) sep fn(LgammaOp)    \
+              sep fn(SinhOp) sep fn(TanOp)
 
 template <typename OpTy>
 inline void AddLegalOpOnRankedTensor(ConversionTarget *target) {
",0,train
22bf3df6dc91fbb80a581c1fbce30cb9d2d411e9,tensorflow/tensorflow,"[MLIR][CHLO] Use CHLO lowering for `is_inf` op

PiperOrigin-RevId: 355189054
Change-Id: I28304ff8ed9f564a9698fb5609c19d5d19956e86",lower_tf.cc,"@@ -1552,7 +1552,6 @@ void PopulateTFLoweringBeforeHLOPatterns(MLIRContext *context,
       LowerExpm1Op,
       LowerFakeQuantWithMinMaxArgs,
       LowerFillOp,
-      LowerIsInfOp,
       LowerIsNanOp,
       LowerL2LossOp,
       LowerMulNoNanOp,
",0,train
d86e84eff8640f7f30818c52c3345a67cf5acb38,tensorflow/tensorflow,Rexert last part.,core.py,"@@ -659,8 +659,8 @@ class Lambda(Layer):
   `Lambda` layer is saving and inspecting a Model. `Lambda` layers 
   are saved by serializing the Python bytecode, whereas subclassed 
   Layers can be saved via overriding their `get_config` method. Overriding 
-  `get_config` improves the portability of and the ability to inspect,
-  visualize and reason about them.
+  `get_config` improves the portability of Models. Models that rely on 
+  subclassed Layers are also often easier to visualize and reason about.
 
   Examples:
 
",0,train
4bfe1dce6437e5883c2fdf232b859f8b88471083,tensorflow/tensorflow,"Small additions to DistributedStrategy's API docs

PiperOrigin-RevId: 308949260
Change-Id: Ib77b03bbcc38083ce64504e29f84c2cfc8073f85",distribute_lib.py,"@@ -520,7 +520,10 @@ class StrategyBase(object):
   """"""A state & compute distribution policy on a list of devices.
 
   See [the guide](https://www.tensorflow.org/guide/distributed_training)
-  for overview and examples.
+  for overview and examples. See `tf.distribute.StrategyExtended` and
+  [`tf.distribute`](https://www.tensorflow.org/api_docs/python/tf/distribute)
+  for a glossory of concepts mentioned on this page such as ""per-replica"",
+  _replica_, and _reduce_.
 
   In short:
 
@@ -736,12 +739,16 @@ class StrategyBase(object):
     # Iterate over the distributed dataset
     for x in dist_dataset:
       # process dataset elements
-      strategy.run(train_step, args=(x,))
+      strategy.run(replica_fn, args=(x,))
     ```
 
-    We will assume that the input dataset is batched by the
-    global batch size. With this assumption, we will make a best effort to
-    divide each batch across all the replicas (one or more workers).
+    In the code snippet above, the dataset `dist_dataset` is batched by
+    GLOBAL_BATCH_SIZE, and we iterate through it using `for x in dist_dataset`,
+    where x is one batch of data of GLOBAL_BATCH_SIZE containing N batches of
+    data of per-replica batch size, corresponding to N replicas.
+    `tf.distribute.Strategy.run` will take care of feeding
+    the right per-replica batch to the right `replica_fn` execution on each
+    replica.
 
     In a multi-worker setting, we will first attempt to distribute the dataset
     by attempting to detect whether the dataset is being created out of
@@ -892,8 +899,13 @@ class StrategyBase(object):
     `tf.distribute.DistributedValues` containing tensors or composite tensors.
 
     IMPORTANT: Depending on the implementation of `tf.distribute.Strategy` and
-    whether eager execution is enabled, `fn` may be called one or more times (
-    once for each replica).
+    whether eager execution is enabled, `fn` may be called one or more times. If
+    `fn` is annotated with `tf.function` or `tf.distribute.Strategy.run` is
+    called inside a `tf.function`, eager execution is disabled and `fn` is
+    called once (or once per replica, if you are using MirroredStrategy) to
+    generate a Tensorflow graph, which will then be reused for execution with
+    new inputs. Otherwise, if eager execution is enabled, `fn` will be called
+    every step just like regular python code.
 
     Example usage:
 
",0,train
c2830904c770c4343b3581d91f0f14c08c2a727b,tensorflow/tensorflow,use `delete[]` instead of `delete`,gradient_checker_test.cc,"@@ -52,7 +52,7 @@ void CompareNumericalAndManualGradients(
   for (int j = 0; j < num_grad; j++) {
     ASSERT_NEAR(dnumerical[j], expected_grad[j], abs_error);
   }
-  delete dnumerical;
+  delete[] dnumerical;
   TF_DeleteTensor(numerical_tensor);
 }
 
",0,train
913d597ef1d3f278084c2217d2ec82826f475c0d,tensorflow/tensorflow,"Deprecate Network `state_updates` property.

PiperOrigin-RevId: 310024527
Change-Id: Ic87294583db713b4d1799e51b13689f9dbd3be25",network.py,"@@ -59,9 +59,11 @@ from tensorflow.python.training.tracking import data_structures
 from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils
 from tensorflow.python.training.tracking import tracking
 from tensorflow.python.training.tracking import util as trackable_utils
+from tensorflow.python.util import deprecation
 from tensorflow.python.util import nest
 from tensorflow.python.util import serialization
 from tensorflow.python.util import tf_inspect
+from tensorflow.tools.docs import doc_controls
 
 
 # pylint: disable=g-import-not-at-top
@@ -524,8 +526,15 @@ class Network(base_layer.Layer):
         layer.reset_states()
 
   @property
+  @deprecation.deprecated(
+      date=None,
+      instructions='This property should not be used in TensorFlow 2.0, '
+      'as updates are applied automatically.')
+  @doc_controls.do_not_generate_docs
   def state_updates(self):
-    """"""Returns the `updates` from all layers that are stateful.
+    """"""Deprecated, do NOT use!
+
+    Returns the `updates` from all layers that are stateful.
 
     This is useful for separating training updates and
     state updates, e.g. when we need to update a layer's internal state
",0,train
3b5b75e304e2801b3a374f9a328bb3ebba23083e,tensorflow/tensorflow,"Add strides attribute to HLO Slice Op

PiperOrigin-RevId: 268957427",hlo_function_importer.cc,"@@ -327,7 +327,8 @@ StatusOr<mlir::Operation*> HloFunctionImporter::ImportInstruction(
           ->create<mlir::xla_hlo::SliceOp>(
               loc, result_type, operands[0],
               ConvertDimensions(instruction->slice_starts()),
-              ConvertDimensions(instruction->slice_limits()))
+              ConvertDimensions(instruction->slice_limits()),
+              ConvertDimensions(instruction->slice_strides()))
           .getOperation();
     }
     case HloOpcode::kConcatenate: {
",0,train
2109a2b3d9fb5bf34ca09e06ff9ca990e9b8fbc7,tensorflow/tensorflow,"Minor cleanup

PiperOrigin-RevId: 166070170",multioutput_fusion_test.cc,"@@ -42,17 +42,15 @@ limitations under the License.
 #include ""tensorflow/core/platform/test_benchmark.h""
 #include ""tensorflow/core/platform/types.h""
 
-using tensorflow::gtl::ArraySlice;
-
 namespace xla {
 namespace {
 
-class MultiOutputFusionTest : public HloTestBase {
- public:
-  ErrorSpec error_spec_{0.0001, 1e-2};
+using ::tensorflow::gtl::ArraySlice;
 
+class MultiOutputFusionTest : public HloTestBase {
  protected:
-  MultiOutputFusionTest() {}
+  MultiOutputFusionTest() { error_spec_ = ErrorSpec{0.0001, 1e-2}; }
+
   void RunTest2D(bool manual_fusion, int64 size) {
     auto builder = HloComputation::Builder(TestName());
     auto hlo_module = CreateNewModule();
",0,train
2109a2b3d9fb5bf34ca09e06ff9ca990e9b8fbc7,tensorflow/tensorflow,"Minor cleanup

PiperOrigin-RevId: 166070170",heap_test.cc,"@@ -15,15 +15,10 @@ limitations under the License.
 
 #include ""tensorflow/contrib/nearest_neighbor/kernels/heap.h""
 
-#include <vector>
-
 #include ""tensorflow/core/kernels/ops_testutil.h""
 
-using std::vector;
-
-using tensorflow::nearest_neighbor::SimpleHeap;
-using tensorflow::nearest_neighbor::AugmentedHeap;
-
+namespace tensorflow {
+namespace nearest_neighbor {
 namespace {
 
 TEST(HeapTest, SimpleHeapTest1) {
@@ -189,3 +184,5 @@ TEST(HeapTest, AugmentedHeapTest1) {
 }
 
 }  // namespace
+}  // namespace nearest_neighbor
+}  // namespace tensorflow
",0,train
2109a2b3d9fb5bf34ca09e06ff9ca990e9b8fbc7,tensorflow/tensorflow,"Minor cleanup

PiperOrigin-RevId: 166070170",hyperplane_lsh_probes.cc,"@@ -101,8 +101,8 @@ class HyperplaneLSHProbesOp : public OpKernel {
 
     int batch_size = products_tensor.dim_size(0);
 
-    Tensor* probes_tensor = NULL;
-    Tensor* tables_tensor = NULL;
+    Tensor* probes_tensor = nullptr;
+    Tensor* tables_tensor = nullptr;
     TensorShape output_shape({batch_size, num_probes});
     OP_REQUIRES_OK(context,
                    context->allocate_output(0, output_shape, &probes_tensor));
",0,train
61df29fa97cf82f3d1ef129a70bb5fa3ed99fe3a,tensorflow/tensorflow,Update version string to 1.6.0-rc1,setup.py,"@@ -20,7 +20,7 @@ from __future__ import print_function
 
 from setuptools import setup
 
-_VERSION = '1.6.0-rc0'
+_VERSION = '1.6.0-rc1'
 
 CONSOLE_SCRIPTS = [
     'capture_tpu_profile=cloud_tpu_profiler.main:run_main',
",0,train
61df29fa97cf82f3d1ef129a70bb5fa3ed99fe3a,tensorflow/tensorflow,Update version string to 1.6.0-rc1,version.h,"@@ -24,7 +24,7 @@ limitations under the License.
 
 // TF_VERSION_SUFFIX is non-empty for pre-releases (e.g. ""-alpha"", ""-alpha.1"",
 // ""-beta"", ""-rc"", ""-rc.1"")
-#define TF_VERSION_SUFFIX ""-rc0""
+#define TF_VERSION_SUFFIX ""-rc1""
 
 #define TF_STR_HELPER(x) #x
 #define TF_STR(x) TF_STR_HELPER(x)
",0,train
61df29fa97cf82f3d1ef129a70bb5fa3ed99fe3a,tensorflow/tensorflow,Update version string to 1.6.0-rc1,setup.py,"@@ -29,7 +29,7 @@ from setuptools.dist import Distribution
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.6.0-rc0'
+_VERSION = '1.6.0-rc1'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
",0,train
a1e51910493309d90819fc23351d8103112cf501,tensorflow/tensorflow,"[XLA GPU] [NFC] Simplify and document getters of KernelMappingScheme

PiperOrigin-RevId: 265975853",ir_emitter_unnested.cc,"@@ -2627,9 +2627,9 @@ void IrEmitterUnnested::EmitHlo021Tile(
   constexpr int kNumRows = 4;
   KernelMappingScheme mapping_scheme(
       reduced_output_dims, /*tile_size_y=*/kWarpSize,
-      /*tile_size_x=*/kWarpSize, /*req_block_sizes=*/{1, 1, 1},
+      /*tile_size_x=*/kWarpSize, /*block_size_z=*/1,
       /*num_threads_y=*/kNumRows,
-      /*num_threads_x=*/kWarpSize, &b_);
+      /*num_threads_x=*/kWarpSize, /*is_dilated_x=*/false, &b_);
   KernelCodegenInfo kernel_info(&mapping_scheme);
 
   std::vector<IrArray> param_arrays;
@@ -3062,7 +3062,7 @@ bool IsUnrollingColumnReductionBeneficial(const HloInstruction* unnested_hlo,
 
 }  // namespace
 
-std::tuple<KernelMappingScheme, bool>
+std::pair<KernelMappingScheme, bool>
 IrEmitterUnnested::ComputeMappingSchemeAndReductionKind(
     const HloInstruction* unnested_hlo, const HloInstruction* first_reduce) {
   const Shape& input_shape = first_reduce->operand(0)->shape();
@@ -3121,12 +3121,10 @@ IrEmitterUnnested::ComputeMappingSchemeAndReductionKind(
     tile_size_y = kNumElementsPerPartialSum;
   }
 
-  DimensionVector req_block_sizes{block_size_z, 1, 1};
   llvm_ir::KernelMappingScheme mapping_scheme(
-      dims_in_elem, tile_size_y, tile_size_x, req_block_sizes, num_threads_y,
-      num_threads_x, &b_);
-  mapping_scheme.SetDilatedX(dilated_x);
-  return std::make_tuple(mapping_scheme, is_row_reduction);
+      dims_in_elem, tile_size_y, tile_size_x, block_size_z, num_threads_y,
+      num_threads_x, dilated_x, &b_);
+  return std::make_pair(mapping_scheme, is_row_reduction);
 }
 
 Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions(
@@ -3197,11 +3195,11 @@ Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions(
                                      ""doesn't set the input layout of ""
                                   << first_reduce->ToString();
 
-  bool is_row_reduction;
-  llvm_ir::KernelMappingScheme mapping_scheme;
-  std::tie(mapping_scheme, is_row_reduction) =
+  auto mapping_scheme_pair =
       ComputeMappingSchemeAndReductionKind(unnested_hlo, first_reduce);
-  ReductionCodegenInfo reduction_info(&mapping_scheme, is_row_reduction);
+  bool is_row_reduction = mapping_scheme_pair.second;
+  ReductionCodegenInfo reduction_info(&mapping_scheme_pair.first,
+                                      is_row_reduction);
   EmitElementFunction emit_reduction_tile =
       [&](const llvm_ir::IrArray::Index& index, llvm::Value* y_loc,
           llvm::Value* x_loc, int64 x_iter_num) {
@@ -3216,9 +3214,9 @@ Status IrEmitterUnnested::EmitReductionFromOrToContiguousDimensions(
       [&](llvm::Value* y, llvm::Value* x, const IrArray::Index& index,
           const string& loop_name, llvm::Value* tile_height,
           llvm::Value* tile_width, KernelSupportLibrary* ksl) {
-        EmitTiledElementalCodeWithBoundsCheck(&mapping_scheme, index, loop_name,
-                                              ksl, &b_, y, x, tile_height,
-                                              tile_width, emit_reduction_tile);
+        EmitTiledElementalCodeWithBoundsCheck(
+            &mapping_scheme_pair.first, index, loop_name, ksl, &b_, y, x,
+            tile_height, tile_width, emit_reduction_tile);
       },
       /*block_prologue_generator=*/
       [&](HloInstruction* hlo, KernelCodegenInfo* kernel_info) {
",0,test
a1e51910493309d90819fc23351d8103112cf501,tensorflow/tensorflow,"[XLA GPU] [NFC] Simplify and document getters of KernelMappingScheme

PiperOrigin-RevId: 265975853",ir_emitter_unnested.h,"@@ -212,7 +212,7 @@ class IrEmitterUnnested : public IrEmitter,
   // and first_reduce are the same instruction. For a kInput fusion,
   // unnested_hlo is the fusion instruction while first_reduce is the first
   // reduce op.
-  std::tuple<llvm_ir::KernelMappingScheme, bool>
+  std::pair<llvm_ir::KernelMappingScheme, bool>
   ComputeMappingSchemeAndReductionKind(const HloInstruction* unnested_hlo,
                                        const HloInstruction* first_reduce);
 
",0,test
a1e51910493309d90819fc23351d8103112cf501,tensorflow/tensorflow,"[XLA GPU] [NFC] Simplify and document getters of KernelMappingScheme

PiperOrigin-RevId: 265975853",kernel_tiling.cc,"@@ -103,29 +103,36 @@ absl::optional<std::vector<int64> > FindTranspose021(const Shape& a,
   return absl::nullopt;
 }
 
-KernelMappingScheme::KernelMappingScheme(
-    absl::Span<const int64> dims_in_elems, int64 tile_size_y, int64 tile_size_x,
-    absl::Span<const int64> req_block_sizes, int64 num_threads_y,
-    int64 num_threads_x, llvm::IRBuilder<>* b)
+KernelMappingScheme::KernelMappingScheme(absl::Span<const int64> dims_in_elems,
+                                         int64 tile_size_y, int64 tile_size_x,
+                                         int64 block_size_z,
+                                         int64 num_threads_y,
+                                         int64 num_threads_x, bool is_dilated_x,
+                                         llvm::IRBuilder<>* b)
     : b_(b),
-      dims_in_elems_{dims_in_elems.at(0), dims_in_elems.at(1),
-                     dims_in_elems.at(2)},
+      dims_in_elems_{dims_in_elems[0], dims_in_elems[1], dims_in_elems[2]},
       tile_sizes_{1, tile_size_y, tile_size_x},
-      dims_in_tiles_(ElementWiseCeilOfRatio(dims_in_elems_, tile_sizes_)),
-      block_sizes_{std::min(req_block_sizes.at(0), dims_in_tiles_.at(0)),
-                   std::min(req_block_sizes.at(1), dims_in_tiles_.at(1)),
-                   std::min(req_block_sizes.at(2), dims_in_tiles_.at(2))},
-      dims_in_blocks_(ElementWiseCeilOfRatio(dims_in_tiles_, block_sizes_)),
+      dims_in_tiles_{dims_in_elems[0],
+                     CeilOfRatio<int64>(dims_in_elems[1], tile_size_y),
+                     CeilOfRatio<int64>(dims_in_elems[2], tile_size_x)},
+      block_sizes_{block_size_z, 1, 1},
+      dims_in_blocks_{CeilOfRatio<int64>(dims_in_elems[0], block_sizes_[0]),
+                      dims_in_tiles_[1], dims_in_tiles_[2]},
       num_threads_x_(num_threads_x),
       num_threads_y_(num_threads_y),
-      dilated_x_(true) {
-  DCHECK_EQ(req_block_sizes.size(), 3);
+      dilated_x_(is_dilated_x) {
   DCHECK_EQ(tile_size_y % num_threads_y_, 0);
   DCHECK_EQ(tile_size_x % num_threads_x_, 0);
+  CHECK_EQ((dims_in_elems[0] % block_size_z), 0);
   VLOG(10) << ""dims_in_elems_ = ["" << absl::StrJoin(dims_in_elems_, "","") << ""]"";
   VLOG(10) << ""dims_in_tiles_ = ["" << absl::StrJoin(dims_in_tiles_, "","") << ""]"";
   VLOG(10) << ""dims_in_blocks_ = ["" << absl::StrJoin(dims_in_blocks_, "","")
            << ""]"";
+  if (!dilated_x_) {
+    // dilated_x_=false is for the purpose of vectorization, which requires
+    // GetTileSizeForDimension(DimX) to be a multiplier of num_threads_x_.
+    CHECK_EQ(GetTileSizeForDimension(DimX) % num_threads_x_, 0);
+  }
 }
 
 IrArray::Index KernelMappingScheme::GetUnnormalizedIndex(
",0,test
a1e51910493309d90819fc23351d8103112cf501,tensorflow/tensorflow,"[XLA GPU] [NFC] Simplify and document getters of KernelMappingScheme

PiperOrigin-RevId: 265975853",kernel_tiling.h,"@@ -90,23 +90,24 @@ class KernelMappingScheme {
   enum { DimZ = 0, DimY, DimX, DimTot };
 
  public:
-  KernelMappingScheme() {}
   // dims_in_elems: the normalized tensor dimensions.
-  // req_block_sizes: the requested block size in number of tiles for each
-  //   dimension. The actual block size is set to min(req_block_size,
-  //   dims_in_number_of_blocks).
   KernelMappingScheme(absl::Span<const int64> dims_in_elems, int64 tile_size_y,
-                      int64 tile_size_x,
-                      absl::Span<const int64> req_block_sizes,
+                      int64 tile_size_x, int64 block_size_z,
                       int64 num_threads_y, int64 num_threads_x,
-                      llvm::IRBuilder<>* b);
+                      bool is_dilated_x, llvm::IRBuilder<>* b);
 
+  // Number of elements in each dimension (Z/Y/X respectively).
   absl::Span<const int64> GetDimensionsInElements() const {
     return dims_in_elems_;
   }
+
+  // Ratio of elements in each dimension over tile sizes for Z/Y/X
+  // respectively.
   absl::Span<const int64> GetDimensionsInTiles() const {
     return dims_in_tiles_;
   }
+
+  // Ratio of dimensions per tile over block sizes.
   absl::Span<const int64> GetDimensionsInBlocks() const {
     return dims_in_blocks_;
   }
@@ -147,14 +148,6 @@ class KernelMappingScheme {
   }
 
   bool DilatedX() const { return dilated_x_; }
-  void SetDilatedX(bool v) {
-    dilated_x_ = v;
-    if (!dilated_x_) {
-      // dilated_x_=false is for the purpose of vectorization, which requires
-      // GetTileSizeForDimension(DimX) to be a multiplier of num_threads_x_.
-      CHECK_EQ(GetTileSizeForDimension(DimX) % num_threads_x_, 0);
-    }
-  }
 
   IrArray::Index EmitBlockIndex(llvm::Type* index_ty);
   // Returns the index for the first tile in the block with the given block
",0,test
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,gpu_prim.h,"@@ -31,17 +31,27 @@ limitations under the license, the license you must see.
 #include ""third_party/gpus/cuda/include/cusparse.h""
 
 namespace gpuprim = ::cub;
+
+// Required for sorting Eigen::half
+namespace cub {
+template <>
+struct NumericTraits<Eigen::half>
+    : BaseTraits<FLOATING_POINT, true, false, uint16_t, Eigen::half> {};
+}  // namespace cub
+
 #elif TENSORFLOW_USE_ROCM
 #include ""rocm/include/hipcub/hipcub.hpp""
 namespace gpuprim = ::hipcub;
 
+// Required for sorting Eigen::half
 namespace rocprim {
 namespace detail {
 template <>
 struct radix_key_codec_base<Eigen::half>
-    : radix_key_codec_floating<Eigen::half, unsigned short> {};
+    : radix_key_codec_floating<Eigen::half, uint16_t> {};
 };  // namespace detail
 };  // namespace rocprim
-#endif  // GOOGLE_CUDA
+
+#endif  // TENSORFLOW_USE_ROCM
 
 #endif  // TENSORFLOW_CORE_KERNELS_GPU_PRIM_H_
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,in_topk_op_test.cc,"@@ -76,9 +76,9 @@ static Graph* InTopK(int num_targets, int num_classes, T top_k) {
 BM_InTopK(int64, 64, 1000, 10, cpu);
 BM_InTopK(int64, 64, 10000, 10, cpu);
 
-#ifdef GOOGLE_CUDA
+#ifdef GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 BM_InTopK(int64, 64, 1000, 10, gpu);
 BM_InTopK(int64, 64, 10000, 10, gpu);
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // namespace tensorflow
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op.cc,"@@ -244,7 +244,7 @@ TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS);
 #undef REGISTER_KERNELS_NAME
 #undef REGISTER_KERNELS
 
-#ifdef GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 namespace functor {
 #define DECLARE_GPU_SPEC(T)                                                  \
@@ -277,6 +277,6 @@ TF_CALL_GPU_NUMBER_TYPES(REGISTER_KERNELS);
 TF_CALL_INTEGRAL_TYPES(REGISTER_KERNELS);
 #undef REGISTER_KERNELS
 
-#endif  // end GOOGLE_CUDA
+#endif  // end GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // end namespace tensorflow
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu.h,"@@ -15,11 +15,12 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_KERNELS_TOPK_OP_GPU_H_
 #define TENSORFLOW_CORE_KERNELS_TOPK_OP_GPU_H_
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #define EIGEN_USE_GPU
 
 #include <cmath>
+#include <string>
 #include <vector>
 
 #include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
@@ -34,15 +35,6 @@ limitations under the License.
 #include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/core/util/gpu_kernel_helper.h""
 
-#if GOOGLE_CUDA
-// Required for sorting Eigen::half
-namespace cub {
-template <>
-struct NumericTraits<Eigen::half>
-    : BaseTraits<FLOATING_POINT, true, false, unsigned short, Eigen::half> {};
-}  // namespace cub
-#endif  // GOOGLE_CUDA
-
 namespace tensorflow {
 
 typedef Eigen::GpuDevice GPUDevice;
@@ -93,7 +85,7 @@ struct IndirectLinearData {
   Entry* const backing_data;
 };
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 template <typename T>
 struct StridedData {
   typedef impl::Entry<T> Entry;
@@ -115,6 +107,7 @@ template <HeapType heapType, PreferIndices preferIndices,
 struct IndexedHeap {
   typedef typename Data<T>::Entry Entry;
   const Data<T> data;
+  __device__ IndexedHeap(const Data<T>& d) : data(d) {}
 
   __device__ bool is_above(int left, int right) {
     T left_value = data.get_value(left);
@@ -337,12 +330,21 @@ __device__ void mergeShards(int num_shards, int k,
   }
 }
 
+#if GOOGLE_CUDA
 extern __shared__ char shared_memory[];
+#endif
 
 template <typename T>
-__global__ void TopKKernel(const T* __restrict__ input, int length, int k,
-                           bool sorted, T* __restrict__ output,
-                           int* __restrict__ indices) {
+#if TENSORFLOW_USE_ROCM
+__attribute__((amdgpu_flat_work_group_size(1, 256))) 
+#endif
+__global__ void TopKKernel(
+    const T* __restrict__ input, int length, int k, bool sorted,
+    T* __restrict__ output, int* __restrict__ indices) {
+#if TENSORFLOW_USE_ROCM
+  HIP_DYNAMIC_SHARED(char, shared_memory);
+#endif
+
   const int batch_index = blockIdx.x;
   const T* batch_input = input + batch_index * length;
 
@@ -370,7 +372,7 @@ __global__ void TopKKernel(const T* __restrict__ input, int length, int k,
 }
 
 template <typename T>
-cudaError LaunchTopKKernel(const cudaStream_t& stream, int num_shards,
+cudaError LaunchTopKKernel(const gpuStream_t& stream, int num_shards,
                            const T* input, int batch_size, int length, int k,
                            bool sorted, T* output, int* indices) {
   // This code assumes that k is small enough that the computation
@@ -395,9 +397,17 @@ cudaError LaunchTopKKernel(const cudaStream_t& stream, int num_shards,
     }
     if (num_shards <= 0) {
       num_shards = 1;
+#if GOOGLE_CUDA
     } else if (num_shards > 1024) {
       num_shards = 1024;
     }
+#else
+      // ROCm can't execute with 1024 and requires an explicit
+      // amdgpu_flat_work_group_size attribute with >256
+    } else if (num_shards > 256) {
+      num_shards = 256;
+    }
+#endif
   }
   // We are limited by the amount of shared memory we have per block.
   auto shared_memory_size = (num_shards + 1) * k * sizeof(Entry<T>);
@@ -448,9 +458,9 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows,
   input_indices_t.device(d) =
       input_indices_t.generate(ColumnIndexCreator(num_cols));
 
-  cub::CountingInputIterator<int> counting_iter(0);
-  cub::TransformInputIterator<int, SegmentOffsetCreator,
-                              cub::CountingInputIterator<int>>
+  gpuprim::CountingInputIterator<int> counting_iter(0);
+  gpuprim::TransformInputIterator<int, SegmentOffsetCreator,
+                                  gpuprim::CountingInputIterator<int>>
       segment_offsets_t(counting_iter, SegmentOffsetCreator(num_cols));
 
   Tensor temp_values;
@@ -472,7 +482,7 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows,
     sorted_values_ptr = temp_values.flat<T>().data();
   }
 
-  auto err = cub::DeviceSegmentedRadixSort::SortPairsDescending(
+  auto err = gpuprim::DeviceSegmentedRadixSort::SortPairsDescending(
       /* d_temp_storage */ nullptr,
       /* temp_storage_bytes */ temp_storage_bytes,
       /* d_keys_in */ input,
@@ -489,7 +499,8 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows,
   if (err != cudaSuccess) {
     return errors::Internal(
         ""TopKOp: Could not launch ""
-        ""cub::DeviceSegmentedRadixSort::SortPairsDescending to calculate ""
+        ""cub::gpuprim::DeviceSegmentedRadixSort::SortPairsDescending to ""
+        ""calculate ""
         ""temp_storage_bytes, status: "",
         cudaGetErrorString(err));
   }
@@ -497,7 +508,7 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows,
   TF_RETURN_IF_ERROR(ctx->allocate_temp(
       DT_INT8, TensorShape({static_cast<int64>(temp_storage_bytes)}),
       &temp_storage));
-  err = cub::DeviceSegmentedRadixSort::SortPairsDescending(
+  err = gpuprim::DeviceSegmentedRadixSort::SortPairsDescending(
       /* d_temp_storage */ temp_storage.flat<int8>().data(),
       /* temp_storage_bytes */ temp_storage_bytes,
       /* d_keys_in */ input,
@@ -514,7 +525,8 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows,
   if (err != cudaSuccess) {
     return errors::Internal(
         ""TopKOp: Could not launch ""
-        ""cub::DeviceSegmentedRadixSort::SortPairsDescending to sort input, ""
+        ""cub::gpuprim::DeviceSegmentedRadixSort::SortPairsDescending to sort ""
+        ""input, ""
         ""temp_storage_bytes: "",
         temp_storage_bytes, "", status: "", cudaGetErrorString(err));
   }
@@ -567,6 +579,6 @@ struct TopKFunctor<GPUDevice, T> {
 }  // end namespace functor
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 #endif  // TENSORFLOW_CORE_KERNELS_TOPK_OP_GPU_H_
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_double.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define EIGEN_USE_GPU
 
 #include ""tensorflow/core/kernels/topk_op.h""
@@ -25,4 +25,4 @@ using Eigen::GpuDevice;
 template struct functor::TopKFunctor<GPUDevice, double>;
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_float.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define EIGEN_USE_GPU
 
 #include ""tensorflow/core/kernels/topk_op.h""
@@ -25,4 +25,4 @@ using Eigen::GpuDevice;
 template struct functor::TopKFunctor<GPUDevice, float>;
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_half.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define EIGEN_USE_GPU
 
 #include ""tensorflow/core/kernels/topk_op.h""
@@ -25,4 +25,4 @@ using Eigen::GpuDevice;
 template struct functor::TopKFunctor<GPUDevice, Eigen::half>;
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_int16.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define EIGEN_USE_GPU
 
 #include ""tensorflow/core/kernels/topk_op.h""
@@ -25,4 +25,4 @@ using Eigen::GpuDevice;
 template struct functor::TopKFunctor<GPUDevice, int16>;
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_int32.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define EIGEN_USE_GPU
 
 #include ""tensorflow/core/kernels/topk_op.h""
@@ -25,4 +25,4 @@ using Eigen::GpuDevice;
 template struct functor::TopKFunctor<GPUDevice, int32>;
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_int64.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define EIGEN_USE_GPU
 
 #include ""tensorflow/core/kernels/topk_op.h""
@@ -25,4 +25,4 @@ using Eigen::GpuDevice;
 template struct functor::TopKFunctor<GPUDevice, int64>;
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_int8.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define EIGEN_USE_GPU
 
 #include ""tensorflow/core/kernels/topk_op.h""
@@ -25,4 +25,4 @@ using Eigen::GpuDevice;
 template struct functor::TopKFunctor<GPUDevice, int8>;
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_uint16.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define EIGEN_USE_GPU
 
 #include ""tensorflow/core/kernels/topk_op.h""
@@ -27,4 +27,4 @@ template struct functor::TopKFunctor<GPUDevice, uint32>;
 
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_gpu_uint8.cu.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 #define EIGEN_USE_GPU
 
 #include ""tensorflow/core/kernels/topk_op.h""
@@ -25,4 +25,4 @@ using Eigen::GpuDevice;
 template struct functor::TopKFunctor<GPUDevice, uint8>;
 }  // namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,train
d8ec97466cbae03171944638af1ab9ce7ced36ca,tensorflow/tensorflow,Fixing and enabling TopK on ROCm,topk_op_test.py,"@@ -102,11 +102,13 @@ class TopKTest(test.TestCase):
     self._validateTopK(inputs, 2, [[0.4, 0.3], [0.4, 0.3]], [[3, 1], [2, 1]])
 
   def testTop3(self):
-    k = 5
-    inputs = np.random.permutation(np.linspace(0, 100, 6140, dtype=np.float64))
-    indices = np.argsort(-inputs)[:k]
-    values = -np.sort(-inputs)[:k]
-    self._validateTopK(inputs, k, values, indices)
+    for k in range(3, 11, 2):
+      for dim in range(512, 12288, 512):
+        inputs = np.random.permutation(np.linspace(0, 100, dim, 
+                                                   dtype=np.float64))
+        indices = np.argsort(-inputs)[:k]
+        values = -np.sort(-inputs)[:k]
+        self._validateTopK(inputs, k, values, indices)
 
   def testTop1AllNan(self):
     inputs = [[np.NaN, np.NaN], [np.NaN, np.NaN]]
",0,train
b758593dc6e0b88d704233a3ab8ae6c28d54575d,tensorflow/tensorflow,"Rollback of #37837

PiperOrigin-RevId: 304698537
Change-Id: Ib68e9dcc719add6091e2a7af1b6d15d8c6aadf03",def_function.py,"@@ -422,19 +422,6 @@ class Function(object):
     self._input_signature = input_signature
     self._call_counter = _CallCounter(FREQUENT_TRACING_WARNING_MAX_CALL_HISTORY)
 
-  def __getstate__(self):
-    """"""Custom pickling, to omit unpickleable objects.""""""
-    result = self.__dict__.copy()
-    del result[""_lock""]
-    del result[""_descriptor_cache""]
-    return result
-
-  def __setstate__(self, state):
-    """"""Restore from pickled state.""""""
-    self.__dict__ = state
-    self._lock = threading.Lock()
-    self._descriptor_cache = weakref.WeakKeyDictionary()
-
   def _defun_with_scope(self, scope):
     """"""Creates a defun wrapped inside a variable creator scope.""""""
 
",0,test
b758593dc6e0b88d704233a3ab8ae6c28d54575d,tensorflow/tensorflow,"Rollback of #37837

PiperOrigin-RevId: 304698537
Change-Id: Ib68e9dcc719add6091e2a7af1b6d15d8c6aadf03",def_function_test.py,"@@ -19,7 +19,6 @@ from __future__ import print_function
 
 import functools
 import itertools
-import pickle
 import re
 import weakref
 
@@ -69,10 +68,6 @@ class _ModelWithOptimizer(training.Model):
     return {'loss': loss}
 
 
-def undecorated_function(x):
-  return x * 3.
-
-
 class _HasDecoratedMethod(object):
 
   @def_function.function
@@ -752,41 +747,6 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase):
     # If the graph is deleted, then an exception is raised on reading `captures`
     self.assertEmpty(graph.captures)
 
-  @parameterized.parameters(*itertools.product(
-      (None, (tensor_spec.TensorSpec([]),)),  # input_signature
-      (True, False),  # autograph
-      (None, converter.Feature.ALL),  # autograph_options
-      (None, 'foo.bar'),  # implements
-      (None, True, False),  # relax_shapes
-  ))
-  def test_pickle(self, input_signature, autograph, autograph_options,
-                  implements, relax_shapes):
-    """"""@function objects can be pickled and unpickled.""""""
-    # Can't pickle functions in __main__:
-    from tensorflow.python.eager.def_function_test import undecorated_function
-    original_py_function = undecorated_function
-
-    func = def_function.function(
-        func=original_py_function,
-        input_signature=input_signature,
-        autograph=autograph,
-        experimental_implements=implements,
-        experimental_autograph_options=autograph_options,
-        experimental_relax_shapes=relax_shapes,
-    )
-
-    cloned = pickle.loads(pickle.dumps(func))
-
-    self.assertEqual(func._name, cloned._name)
-    self.assertEqual(input_signature, cloned._input_signature)
-    self.assertEqual(autograph, cloned._autograph)
-    self.assertEqual(implements, cloned._implements)
-    self.assertEqual(autograph_options, cloned._experimental_autograph_options)
-    self.assertEqual(relax_shapes, cloned._experimental_relax_shapes)
-
-    x = array_ops.ones([])
-    self.assertEqual(self.evaluate(cloned(x)), self.evaluate(func(x)))
-
 
 if __name__ == '__main__':
   ops.enable_eager_execution()
",0,test
24b2d0252bba21953e1921d8bccf850cbdfbcb09,tensorflow/tensorflow,"Add unidirectional sequence rnn op_def to graphdef_to_flatbuffer and also add a e2e test.

PiperOrigin-RevId: 297050448
Change-Id: Ifa7249a5e4585f61ea9833f11ea28a9f2f9e0363",graphdef_to_tfl_flatbuffer.cc,"@@ -87,6 +87,17 @@ const char kUnidirectionalSequenceLstmOp[] =
     ""'LastState' type: DT_FLOAT } output_arg: { name: 'Output' type: DT_FLOAT} ""
     ""attr : { name: '_tflite_input_indices' type: 'list(int)'}"";
 
+const char kUnidirectionalSequenceRnnOp[] =
+    ""name: 'UnidirectionalSequenceRnn' input_arg: {name: 'Input' type: ""
+    ""DT_FLOAT} input_arg: { name: 'Weights' type: DT_FLOAT } ""
+    ""input_arg: { name: 'RecurrentWeights' type: DT_FLOAT } input_arg: { ""
+    ""name: 'Bias' type: DT_FLOAT} ""
+    ""input_arg: { name: 'HiddenState' type: DT_FLOAT} ""
+    ""output_arg: { name: ""
+    ""'LastState' type: DT_FLOAT } output_arg: { name: 'Output' type: ""
+    ""DT_FLOAT} ""
+    ""attr : { name: '_tflite_input_indices' type: 'list(int)'}"";
+
 // Converts the toco::IODataType to tensorflow::DataType. Only contains the
 // conversion mapping for constants defined in TFLite Python API.
 DataType ConvertIODataTypeToDataType(toco::IODataType dtype) {
@@ -285,6 +296,7 @@ Status ConvertGraphDefToTFLiteFlatBuffer(const toco::ModelFlags& model_flags,
                                       toco_flags.custom_opdefs().end());
   extra_tf_opdefs.push_back(kDetectionPostProcessOp);
   extra_tf_opdefs.push_back(kUnidirectionalSequenceLstmOp);
+  extra_tf_opdefs.push_back(kUnidirectionalSequenceRnnOp);
   TF_RETURN_IF_ERROR(RegisterCustomBuiltinOps(extra_tf_opdefs));
 
   TF_ASSIGN_OR_RETURN(
",0,test
24b2d0252bba21953e1921d8bccf850cbdfbcb09,tensorflow/tensorflow,"Add unidirectional sequence rnn op_def to graphdef_to_flatbuffer and also add a e2e test.

PiperOrigin-RevId: 297050448
Change-Id: Ifa7249a5e4585f61ea9833f11ea28a9f2f9e0363",unidirectional_sequence_rnn_test.py,"@@ -249,6 +249,10 @@ class UnidirectionalSequenceRnnTest(test_util.TensorFlowTestCase):
     result = self.tfliteInvoke(new_sess, test_inputs, x, output_class, False)
     self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-2))
 
+    # Test MLIR-converted model.
+    result = self.tfliteInvoke(new_sess, test_inputs, x, output_class, True)
+    self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-2))
+
   @test_util.enable_control_flow_v2
   def testDynamicRnnMultiRnnCell(self):
     sess = tf.compat.v1.Session(config=CONFIG)
@@ -269,6 +273,10 @@ class UnidirectionalSequenceRnnTest(test_util.TensorFlowTestCase):
     result = self.tfliteInvoke(new_sess, test_inputs, x, output_class, False)
     self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-2))
 
+    # Test MLIR-converted model.
+    result = self.tfliteInvoke(new_sess, test_inputs, x, output_class, True)
+    self.assertTrue(np.allclose(expected_output, result, rtol=1e-6, atol=1e-2))
+
 
 if __name__ == ""__main__"":
   test.main()
",0,test
3f1d5d5670e42ea4b01795c787e847dc799b9e54,tensorflow/tensorflow,"Fixes issues with the `tf.train.Saver` in the 0.7.0 release.

1. The cifar10_train.py example model was emitting warnings, because of
   non-Variable objects in the `tf.moving_average_variables()`
   collection. This change fixes that by only adding `Variable`-typed
   objects to that collection in `moving_averages.py` (which better
   agrees with the definition in `tf.GraphKeys.MOVING_AVERAGES_VARIABLES`).

2. Saver.save() now calls `tf.gfile.MakeDirs(os.path.dirname(save_path))`,
   which fails if `save_path` does not contain a directory component.
   This change fixes the implementation of `tf.gfile.MakeDirs('')` to be a
   no-op (which better matches the internal library that it is shadowing).

Fixes #1123. Fixes #1135.
Change: 114895020",_gfile.py,"@@ -282,17 +282,16 @@ def MakeDirs(path, mode=0o755):  # pylint: disable=invalid-name
   """"""Recursively create the directory ""path"" with the given mode.
 
   Args:
-    path: The directory path
+    path: The directory path.
     mode: The file mode for the created directories
 
-  Returns:
-    None
-
-
   Raises:
     OSError: if the path already exists
   """"""
-  os.makedirs(path, mode)
+  # NOTE(mrry): MakeDirs("""") should be a no-op to match other
+  # implementations of tf.gfile.
+  if path:
+    os.makedirs(path, mode)
 
 
 def RmDir(directory):   # pylint: disable=invalid-name
",0,train
3f1d5d5670e42ea4b01795c787e847dc799b9e54,tensorflow/tensorflow,"Fixes issues with the `tf.train.Saver` in the 0.7.0 release.

1. The cifar10_train.py example model was emitting warnings, because of
   non-Variable objects in the `tf.moving_average_variables()`
   collection. This change fixes that by only adding `Variable`-typed
   objects to that collection in `moving_averages.py` (which better
   agrees with the definition in `tf.GraphKeys.MOVING_AVERAGES_VARIABLES`).

2. Saver.save() now calls `tf.gfile.MakeDirs(os.path.dirname(save_path))`,
   which fails if `save_path` does not contain a directory component.
   This change fixes the implementation of `tf.gfile.MakeDirs('')` to be a
   no-op (which better matches the internal library that it is shadowing).

Fixes #1123. Fixes #1135.
Change: 114895020",gfile_test.py,"@@ -17,6 +17,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import contextlib
 import os
 import shutil
 import time
@@ -148,6 +149,22 @@ class FunctionTests(_BaseTest, googletest.TestCase):
     gfile.DeleteRecursively(self.tmp + ""test_dir"")
     self.assertFalse(gfile.Exists(self.tmp + ""test_dir""))
 
+  @contextlib.contextmanager
+  def _working_directory(self, wd):
+    original_cwd = os.getcwd()
+    os.chdir(wd)
+    try:
+      yield
+    finally:
+      os.chdir(original_cwd)
+
+  def testMakeDirsWithEmptyString(self):
+    gfile.MakeDirs(self.tmp + ""test_dir"")
+    with self._working_directory(self.tmp + ""test_dir""):
+      gfile.MakeDirs("""")
+    # Should succeed because MakeDirs("""") is a no-op.
+    gfile.RmDir(self.tmp + ""test_dir"")
+
   def testErrors(self):
     self.assertRaises(
         OSError, lambda: gfile.RmDir(self.tmp + ""dir_doesnt_exist""))
",0,train
3f1d5d5670e42ea4b01795c787e847dc799b9e54,tensorflow/tensorflow,"Fixes issues with the `tf.train.Saver` in the 0.7.0 release.

1. The cifar10_train.py example model was emitting warnings, because of
   non-Variable objects in the `tf.moving_average_variables()`
   collection. This change fixes that by only adding `Variable`-typed
   objects to that collection in `moving_averages.py` (which better
   agrees with the definition in `tf.GraphKeys.MOVING_AVERAGES_VARIABLES`).

2. Saver.save() now calls `tf.gfile.MakeDirs(os.path.dirname(save_path))`,
   which fails if `save_path` does not contain a directory component.
   This change fixes the implementation of `tf.gfile.MakeDirs('')` to be a
   no-op (which better matches the internal library that it is shadowing).

Fixes #1123. Fixes #1135.
Change: 114895020",moving_averages.py,"@@ -269,12 +269,14 @@ class ExponentialMovingAverage(object):
           avg = slot_creator.create_slot(
               var, var.initialized_value(), self._name,
               colocate_with_primary=True)
+          # NOTE(mrry): We only add `tf.Variable` objects to the
+          # `MOVING_AVERAGE_VARIABLES` collection.
+          ops.add_to_collection(ops.GraphKeys.MOVING_AVERAGE_VARIABLES, var)
         else:
           avg = slot_creator.create_zeros_slot(
               var, self._name,
               colocate_with_primary=(var.op.type == ""Variable""))
       self._averages[var] = avg
-      ops.add_to_collection(ops.GraphKeys.MOVING_AVERAGE_VARIABLES, var)
 
     with ops.name_scope(self._name) as scope:
       decay = ops.convert_to_tensor(self._decay, name=""decay"")
",0,train
3f1d5d5670e42ea4b01795c787e847dc799b9e54,tensorflow/tensorflow,"Fixes issues with the `tf.train.Saver` in the 0.7.0 release.

1. The cifar10_train.py example model was emitting warnings, because of
   non-Variable objects in the `tf.moving_average_variables()`
   collection. This change fixes that by only adding `Variable`-typed
   objects to that collection in `moving_averages.py` (which better
   agrees with the definition in `tf.GraphKeys.MOVING_AVERAGES_VARIABLES`).

2. Saver.save() now calls `tf.gfile.MakeDirs(os.path.dirname(save_path))`,
   which fails if `save_path` does not contain a directory component.
   This change fixes the implementation of `tf.gfile.MakeDirs('')` to be a
   no-op (which better matches the internal library that it is shadowing).

Fixes #1123. Fixes #1135.
Change: 114895020",moving_averages_test.py,"@@ -87,6 +87,8 @@ class ExponentialMovingAverageTest(tf.test.TestCase):
     avg1 = ema.average(var1)
     avg2 = ema.average(tensor2)
 
+    self.assertItemsEqual([var0, var1], tf.moving_average_variables())
+
     self.assertFalse(avg0 in tf.trainable_variables())
     self.assertFalse(avg1 in tf.trainable_variables())
     self.assertFalse(avg2 in tf.trainable_variables())
",0,train
09b8ed34f47dbd6921304f2d4ceb3669c1e089e6,tensorflow/tensorflow,"Add @ebrevdo's temporary fix for int32 overflow issue, and add a test case for it

Fix imports",core.py,"@@ -26,6 +26,7 @@ import warnings
 import numpy as np
 
 from tensorflow.python.eager import context
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
@@ -580,9 +581,21 @@ class Flatten(Layer):
       permutation.append(1)
       inputs = array_ops.transpose(inputs, perm=permutation)
 
-    outputs = array_ops.reshape(
-        inputs, (tensor_shape.dimension_value(inputs.shape[0]) or
-                 array_ops.shape(inputs)[0], -1))
+    input_shape = inputs.shape
+    if input_shape[1:].is_fully_defined():
+      flattened_dim = tensor_shape.dimension_value(
+          np.prod(input_shape[1:], dtype=int))
+      # Temporary fix for integer overflow issue.
+      if flattened_dim > np.iinfo(np.int32).max:
+        shape_dtype = dtypes.int64
+      else:
+        shape_dtype = dtypes.int32
+      outputs = array_ops.reshape(
+          inputs, constant_op.constant((-1, flattened_dim), shape_dtype))
+    else:
+      outputs = array_ops.reshape(
+          inputs, (tensor_shape.dimension_value(inputs.shape[0]) or
+                   array_ops.shape(inputs)[0], -1))
     if not context.executing_eagerly():
       outputs.set_shape(self.compute_output_shape(inputs.shape))
     return outputs
",0,train
09b8ed34f47dbd6921304f2d4ceb3669c1e089e6,tensorflow/tensorflow,"Add @ebrevdo's temporary fix for int32 overflow issue, and add a test case for it

Fix imports",core_test.py,"@@ -556,6 +556,12 @@ class FlattenTest(test.TestCase):
       self.assertEqual(list(np_output.shape), [5, 6])
       self.assertEqual(y.get_shape().as_list(), [5, None])
 
+  @test_util.run_deprecated_v1
+  def testFlattenLargeDim(self):
+    x = array_ops.placeholder(shape=(None, 21316, 21316, 80), dtype='float32')
+    y = core_layers.Flatten()(x)
+    self.assertEqual(y.shape.as_list(), [None, 21316 * 21316 * 80])
+
 
 if __name__ == '__main__':
   test.main()
",0,train
5220e565b7cc32a5f757896c76c7d57c33bcd323,tensorflow/tensorflow,"Don't use tensorflow::Edge after freeing it

Even with this bug we were accidentally doing the right thing (so the test case
doesn't actually fail without the fix): deleting an Edge sets its input and
output indices to kControlSlot-1 so we'd normally expect to fail when there is a
control edge out of the TF cluster (because a control edge would be recognized
as a data edge).  But AddEdge(x, -1, y, -1) seems to do the right thing for both
control and data edges.

PiperOrigin-RevId: 214831204",build_xla_ops_pass.cc,"@@ -112,16 +112,9 @@ static void MoveOutgoingEdges(Graph* g, Node* old_node, Node* new_node) {
   std::vector<const Edge*> out_edges(old_node->out_edges().begin(),
                                      old_node->out_edges().end());
   for (const Edge* edge : out_edges) {
-    Node* dst = edge->dst();
-    int src_output = edge->src_output();
-    int dst_input = edge->dst_input();
+    // TODO(sanjoy): This does not update NodeDef inputs.
+    g->AddEdge(new_node, edge->src_output(), edge->dst(), edge->dst_input());
     g->RemoveEdge(edge);
-
-    if (edge->IsControlEdge()) {
-      g->AddControlEdge(new_node, dst);
-    } else {
-      g->AddEdge(new_node, src_output, dst, dst_input);
-    }
   }
 }
 
",0,train
5220e565b7cc32a5f757896c76c7d57c33bcd323,tensorflow/tensorflow,"Don't use tensorflow::Edge after freeing it

Even with this bug we were accidentally doing the right thing (so the test case
doesn't actually fail without the fix): deleting an Edge sets its input and
output indices to kControlSlot-1 so we'd normally expect to fail when there is a
control edge out of the TF cluster (because a control edge would be recognized
as a data edge).  But AddEdge(x, -1, y, -1) seems to do the right thing for both
control and data edges.

PiperOrigin-RevId: 214831204",build_xla_ops_pass_test.cc,"@@ -0,0 +1,112 @@
+/* Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/compiler/jit/build_xla_ops_pass.h""
+
+#include ""tensorflow/cc/framework/ops.h""
+#include ""tensorflow/cc/ops/array_ops.h""
+#include ""tensorflow/cc/ops/resource_variable_ops.h""
+#include ""tensorflow/cc/ops/standard_ops.h""
+#include ""tensorflow/compiler/jit/defs.h""
+#include ""tensorflow/compiler/jit/encapsulate_subgraphs_pass.h""
+#include ""tensorflow/compiler/jit/node_matchers.h""
+#include ""tensorflow/core/graph/algorithm.h""
+#include ""tensorflow/core/grappler/optimizers/data/graph_utils.h""
+#include ""tensorflow/core/lib/core/status_test_util.h""
+#include ""tensorflow/core/platform/test.h""
+
+namespace tensorflow {
+namespace {
+
+using ::tensorflow::testing::FindNodeByName;
+using ::tensorflow::testing::matchers::CtrlDeps;
+using ::tensorflow::testing::matchers::NodeWith;
+using ::tensorflow::testing::matchers::Op;
+
+Status BuildXlaOps(const Scope& s, std::unique_ptr<Graph>* result) {
+  auto graph = absl::make_unique<Graph>(OpRegistry::Global());
+  TF_RETURN_IF_ERROR(s.ToGraph(graph.get()));
+
+  // Assign all nodes to the CPU device.
+  static const char* kCpuDevice = ""/job:localhost/replica:0/task:0/cpu:0"";
+  for (Node* n : graph->nodes()) {
+    if (n->assigned_device_name().empty()) {
+      n->set_assigned_device_name(kCpuDevice);
+    }
+  }
+
+  GraphOptimizationPassOptions opt_options;
+  opt_options.graph = &graph;
+  BuildXlaOpsPass pass;
+  TF_RETURN_IF_ERROR(pass.Run(opt_options));
+  *result = std::move(graph);
+  return Status::OK();
+}
+
+Status MakeXlaCompiledKernel(Graph* graph, const string& callee_name,
+                             const string& node_name, Node** result) {
+  NodeDef call_node;
+  call_node.set_name(node_name);
+  call_node.set_op(callee_name);
+  AddNodeAttr(kXlaCompiledKernelAttr, true, &call_node);
+  AddNodeAttr(kXlaNumConstantArgsAttr, 0, &call_node);
+  AddNodeAttr(kXlaNumResourceArgsAttr, 0, &call_node);
+  Status s;
+  *result = graph->AddNode(call_node, &s);
+  return s;
+}
+
+Node* MakeWrite(const Scope& scope, const string& id) {
+  Output var_handle =
+      ops::VarHandleOp(scope.WithOpName(""Var"" + id), DT_FLOAT, TensorShape({}));
+  Output value_to_write =
+      ops::Const(scope.WithOpName(""ValueToAssign"" + id), 1.0f);
+  ops::AssignVariableOp assign_op(scope.WithOpName(""Assignee"" + id), var_handle,
+                                  value_to_write);
+  return assign_op.operation.node();
+}
+
+FunctionDefLibrary CreateFunctionDefLibWithConstFunction(const string& name) {
+  FunctionDefLibrary flib_def;
+  FunctionDef func = FunctionDefHelper::Create(
+      /*function_name=*/name, /*in_def=*/{}, /*out_def=*/{""out: float""},
+      /*attr_def*/
+      {}, /*node_def=*/{FunctionDefHelper::Const(""one"", 1.0f)},
+      /*ret_def=*/{{""out"", ""out:output:0""}});
+  *flib_def.add_function() = std::move(func);
+  return flib_def;
+}
+
+TEST(BuildXlaOps, ControlDepsPreserved) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+
+  FunctionDefLibrary flib_def =
+      CreateFunctionDefLibWithConstFunction(""cluster_0"");
+  TF_ASSERT_OK(root.graph()->AddFunctionLibrary(flib_def));
+  Node* call;
+  TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), ""cluster_0"", ""C"", &call));
+  Node* write_op = MakeWrite(root, ""write"");
+  root.graph()->AddControlEdge(call, write_op);
+
+  std::unique_ptr<Graph> graph;
+  TF_ASSERT_OK(BuildXlaOps(root, &graph));
+
+  Node* write_op_new = FindNodeByName(graph.get(), write_op->name());
+  ASSERT_NE(write_op_new, nullptr);
+  EXPECT_THAT(write_op_new, NodeWith(CtrlDeps(NodeWith(Op(""_XlaRun"")))));
+}
+
+}  // namespace
+}  // namespace tensorflow
",0,train
2248a3488c53f8b858e2a0b8be93d62c3056df36,tensorflow/tensorflow,"[XLA] Don't call Literal::Get in HloEvaluator's convolution loop.

This speeds up the implementation of conv because Literal::Get calls
Literal::Piece::data, which is relatively slow.

Instead, we call Literal::Data() once and cache the result.

Before: ConvolutionTest/0.StridedFilter (59094 ms)
After:  ConvolutionTest/0.StridedFilter (41812 ms)

Speedup: 59/42 = 1.4x
PiperOrigin-RevId: 191830741",hlo_evaluator.cc,"@@ -1003,6 +1003,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
     DimensionVector rhs_index(rhs_rank);
     DimensionVector rhs_spatial_index(dnums.kernel_spatial_dimensions_size());
 
+    auto lhs_literal_data = lhs_literal.data<ReturnT>();
+    auto rhs_literal_data = rhs_literal.data<ReturnT>();
+
     auto func = [&](ArraySlice<int64> out_index) {
       ElementwiseT result_val = static_cast<ElementwiseT>(0);
 
@@ -1062,9 +1065,13 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
                     : rhs_spatial_index[ki];
           }
 
-          result_val +=
-              static_cast<ElementwiseT>(lhs_literal.Get<ReturnT>(lhs_index)) *
-              static_cast<ElementwiseT>(rhs_literal.Get<ReturnT>(rhs_index));
+          auto lhs_elem = static_cast<ElementwiseT>(
+              lhs_literal_data[IndexUtil::MultidimensionalIndexToLinearIndex(
+                  lhs_shape, lhs_index)]);
+          auto rhs_elem = static_cast<ElementwiseT>(
+              rhs_literal_data[IndexUtil::MultidimensionalIndexToLinearIndex(
+                  rhs_shape, rhs_index)]);
+          result_val += lhs_elem * rhs_elem;
         }
       cnt : {}
       } while (IndexUtil::BumpIndices(window_shape, &rhs_spatial_index));
",0,test
9707af5cb390eea0dd8f14a270911de8ad819bfd,tensorflow/tensorflow,"Int32 support for Empty op on GPU.

PiperOrigin-RevId: 236790821",inplace_ops.cc,"@@ -543,6 +543,7 @@ REGISTER_EMPTY(float, GPU);
 REGISTER_EMPTY(double, GPU);
 REGISTER_EMPTY(Eigen::half, GPU);
 REGISTER_EMPTY(int64, GPU);
+REGISTER_EMPTY(int32, GPU);
 
 #endif  // GOOGLE_CUDA
 
",0,train
f1e0098f2a702c8cbce80ece16f2c0aa23942fd5,tensorflow/tensorflow,"Dropping tests for non-EagerService code path in remote_cluster_test.

PiperOrigin-RevId: 302922575
Change-Id: I8b335e1f0ff8ed5a3f47b1fbc77302e500d7de37",remote_cluster_test.py,"@@ -495,28 +495,6 @@ class DynamicClusterTest(test.TestCase, parameterized.TestCase):
       context.check_alive(""/job:remote_device/replica:0/task:10"")
 
 
-class DynamicClusterWithoutLazyRemoteInputsCopyTest(DynamicClusterTest):
-
-  @classmethod
-  def setUpClass(cls):
-    super(DynamicClusterWithoutLazyRemoteInputsCopyTest, cls).setUpClass()
-    context._reset_context()
-    context.context().lazy_remote_inputs_copy = False
-
-  @classmethod
-  def tearDownClass(cls):
-    super(DynamicClusterWithoutLazyRemoteInputsCopyTest, cls).tearDownClass()
-    context._reset_context()
-    context.context().lazy_remote_inputs_copy = True
-
-  # TODO(haoyuzhang): When lazyh remote inputs copy is disabled, we use the
-  # WorkerService RunGraph request to execute component functions in distributed
-  # function execution. We currently do not have access control in WorkerService
-  # to allow concurrent cluster update and function execution.
-  def testMultiThreadPendingNodesLockFree(self):
-    self.skipTest(""Unsupported case"")
-
-
 if __name__ == ""__main__"":
   ops.enable_eager_execution()
   test.main()
",0,train
3258ebf5e18e898a11f9d2bde25efd3224738e43,tensorflow/tensorflow,"Reuse the rendezvous provided by the OpKernelContext for PartitionedCallOp.
This will allow send/recv across different tf.functions.

PiperOrigin-RevId: 313267770
Change-Id: I28fb8e43cb7b3374feeca9b0f203a968a338ec9e",partitioned_function_ops.cc,"@@ -245,7 +245,6 @@ void PartitionedCallOp::RunFunction(FunctionLibraryRuntime::Handle handle,
   run_opts.source_device =
       lib->device() == nullptr ? """" : lib->device()->name();
   run_opts.allow_dead_tensors = true;
-  run_opts.rendezvous = ctx->rendezvous();
 
   std::vector<Tensor>* rets = new std::vector<Tensor>;
   const string& func_name = func_->name();
",0,train
e232013764faa5c8926b562c2b1c61594d705ebe,tensorflow/tensorflow,Forgot to mark before setting type,convert_nodes.cc,"@@ -977,12 +977,12 @@ Status Converter::RenameAndMarkOutputTensors(
       tensor = layer->getOutput(0);
     }
     tensor->setName(output.dest_node_name.c_str());
+    network()->markOutput(*tensor);
     // Set type after marking as output. TRT only supports setType for engine
     // outputs and inputs (type is inferred otherwise).
     tensor->setType(output.trt_dtype);
     VLOG(1) << ""Marking output TRT tensor "" << output.source_tensor_name
             << "", which feeds TF node "" << output.dest_node_name;
-    network()->markOutput(*tensor);
   }
   return Status::OK();
 }
",0,train
2229ae89c927b46355a15e8af22365d24afc25bf,tensorflow/tensorflow,"Use group_id as step_id.

PiperOrigin-RevId: 317353238
Change-Id: If52b2b4872c92d3f65af8f6ce1651e8c6da7dae7",xplane_to_memory_profile.cc,"@@ -42,6 +42,8 @@ namespace profiler {
 
 namespace {
 
+constexpr int64 kInvalidStepId = -1;
+
 // Index of the time-sorted memory_profile_snapshots list, and the
 // MemoryActivityMetadata proto it contains.
 using IndexMetaPair = std::pair<int64 /*index*/, const MemoryActivityMetadata*>;
@@ -63,7 +65,7 @@ struct ActivityMetadata {
   int64 allocation_bytes = 0;
   uint64 address = 0;
   absl::string_view tf_op_name;
-  int64 step_id = -1;
+  int64 step_id = kInvalidStepId;
   absl::string_view region_type;
   int64 data_type = 0;
   absl::string_view tensor_shape;
@@ -129,7 +131,6 @@ void UpdateProfileSummary(const AggregationStats& stats, int64 time_offset_ps,
 MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
   XPlaneVisitor plane = CreateTfXPlaneVisitor(host_trace);
   MemoryProfile memory_profile;
-  auto* step_count = memory_profile.mutable_step_count();
   // Iterate over all XEvents in the XPlane, and add the XStats to a new
   // MemoryProfileSnapshot if the EventType is kMemoryAllocation or
   // kMemoryDeallocation.
@@ -181,9 +182,8 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
           case StatType::kTfOp:
             metadata.tf_op_name = stat.StrOrRefValue();
             break;
-          case StatType::kStepId:
+          case StatType::kGroupId:
             metadata.step_id = stat.IntValue();
-            if (metadata.step_id != 0) (*step_count)[metadata.step_id]++;
             break;
           case StatType::kRegionType:
             metadata.region_type = stat.StrOrRefValue();
@@ -214,40 +214,21 @@ MemoryProfile GenerateMemoryProfile(const XPlane* host_trace) {
   return memory_profile;
 }
 
-// Sequentialize step ids for the memory profile.
-void UpdateStepId(const tensorflow::protobuf::Map<
-                      tensorflow::protobuf_int64 /*orig_step_id*/,
-                      tensorflow::protobuf_int64 /*count*/>& step_count,
-                  PerAllocatorMemoryProfile* memory_profile) {
-  // Map from original random step id to sequential step id.
-  absl::flat_hash_map<int64 /*orig_step_id*/, int64 /*step_id*/> step_map;
-  constexpr int kUnknownStep = -2;
-  constexpr double kStepFilterRatio = 0.1;  // Magic number for filtering.
-  tensorflow::protobuf_int64 max_step_count = 0;
-  for (const auto& step_and_count : step_count) {
-    max_step_count = std::max(max_step_count, step_and_count.second);
-  }
-  // Filter out noisy and incomplete original step ids.
-  for (const auto& step_and_count : step_count) {
-    if (static_cast<double>(step_and_count.second) / max_step_count >
-        kStepFilterRatio) {
-      step_map[step_and_count.first] = kUnknownStep;
-    }
-  }
-
-  // Update the step ids in memory_profile for this allocator.
-  int64 step_id = -1;
+// Fix invalid step ids of snapshots at the beginning/end of the profile or at
+// the step boundaries. The snapshots with invalid step ids at the beginning get
+// 0 for their step ids. Those at the step boundaries or at the end get the
+// previous snapshot's step id + 1.
+void UpdateStepId(PerAllocatorMemoryProfile* memory_profile) {
+  int64 last_valid_step_id = -1;
+  // Snapshots are already sorted in time.
   for (auto& snapshot : *memory_profile->mutable_memory_profile_snapshots()) {
     DCHECK(snapshot.has_activity_metadata());
-    // Convert the random step id to sequential step id.
-    int64 orig_step_id = snapshot.activity_metadata().step_id();
-    if (step_map.contains(orig_step_id) &&
-        step_map[orig_step_id] == kUnknownStep) {
-      step_map[orig_step_id] = ++step_id;
+    if (snapshot.mutable_activity_metadata()->step_id() == kInvalidStepId) {
+      snapshot.mutable_activity_metadata()->set_step_id(last_valid_step_id + 1);
+    } else {
+      last_valid_step_id = snapshot.mutable_activity_metadata()->step_id();
     }
-    snapshot.mutable_activity_metadata()->set_step_id(step_id);
   }
-  VLOG(2) << ""Max sequential step id in profile: "" << step_id;
 }
 
 // Update the MemoryActivityMetadata for each deallocation event by copying from
@@ -481,14 +462,14 @@ void ProcessMemoryProfileProto(int64 max_num_snapshots,
       return a.time_offset_ps() < b.time_offset_ps();
     });
 
-    UpdateStepId(memory_profile->step_count(), allocator_memory_profile);
+    UpdateStepId(allocator_memory_profile);
     UpdateDeallocation(allocator_memory_profile);
 
-    int64 peak_bytes_profile = allocator_memory_profile->profile_summary()
-                                   .peak_stats()
-                                   .peak_bytes_in_use();
     int64 peak_step_id =
-        GetPeakMemoryStep(peak_bytes_profile, allocator_memory_profile);
+        GetPeakMemoryStep(allocator_memory_profile->profile_summary()
+                              .peak_stats()
+                              .peak_bytes_in_use(),
+                          allocator_memory_profile);
     ProcessActiveAllocations(peak_step_id, allocator_memory_profile);
     SampleSnapshots(max_num_snapshots, snapshots);
   }
",0,train
2229ae89c927b46355a15e8af22365d24afc25bf,tensorflow/tensorflow,"Use group_id as step_id.

PiperOrigin-RevId: 317353238
Change-Id: If52b2b4872c92d3f65af8f6ce1651e8c6da7dae7",xplane_to_memory_profile.h,"@@ -25,6 +25,7 @@ namespace profiler {
 
 // Process the host threads XPlane and generate MemoryProfile result; at most
 // max_num_snapshots will be displayed on the UI.
+// REQUIRED: host_plane should have been grouped by calling GroupTfEvents().
 MemoryProfile ConvertXPlaneToMemoryProfile(const XPlane& host_plane,
                                            int64 max_num_snapshots = 1000);
 
",0,train
2229ae89c927b46355a15e8af22365d24afc25bf,tensorflow/tensorflow,"Use group_id as step_id.

PiperOrigin-RevId: 317353238
Change-Id: If52b2b4872c92d3f65af8f6ce1651e8c6da7dae7",xplane_to_memory_profile_test.cc,"@@ -20,6 +20,7 @@ limitations under the License.
 #include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/core/profiler/protobuf/memory_profile.pb.h""
 #include ""tensorflow/core/profiler/protobuf/xplane.pb.h""
+#include ""tensorflow/core/profiler/utils/group_events.h""
 #include ""tensorflow/core/profiler/utils/xplane_builder.h""
 #include ""tensorflow/core/profiler/utils/xplane_schema.h""
 #include ""tensorflow/core/profiler/utils/xplane_test_utils.h""
@@ -84,11 +85,11 @@ TEST(ConvertXPlaneToMemoryProfile, OneAllocatorMultiActivitiesTest) {
                 {StatType::kRegionType, ""temp""},
                 {StatType::kTensorShapes, ""[1, 2]""}});
 
+  tensorflow::profiler::GroupTfEvents(&space, nullptr);
   MemoryProfile memory_profile = ConvertXPlaneToMemoryProfile(*host_plane);
   EXPECT_EQ(memory_profile.memory_profile_per_allocator().size(), 1);
   EXPECT_EQ(memory_profile.num_hosts(), 1);
   EXPECT_EQ(memory_profile.memory_ids_size(), 1);
-  EXPECT_EQ(memory_profile.step_count().size(), 1);
   EXPECT_EQ(memory_profile.memory_profile_per_allocator().begin()->first,
             ""GPU_0_bfc"");
   const auto& allocator_memory_profile =
",0,train
3b5ada30c14d35d6fbf0aeaaee898c5ff65b008c,tensorflow/tensorflow,"Add ability to disable zero-debiasing in ExponentialMovingAverage, for the purpose of backwards compatibility to support old checkpoints. For now, set this default value to avoid debiasing.
Change: 140613316",moving_averages.py,"@@ -288,7 +288,8 @@ class ExponentialMovingAverage(object):
   @@variables_to_restore
   """"""
 
-  def __init__(self, decay, num_updates=None, name=""ExponentialMovingAverage""):
+  def __init__(self, decay, num_updates=None, zero_debias=False,
+               name=""ExponentialMovingAverage""):
     """"""Creates a new ExponentialMovingAverage object.
 
     The `apply()` method has to be called to create shadow variables and add
@@ -305,11 +306,14 @@ class ExponentialMovingAverage(object):
     Args:
       decay: Float.  The decay to use.
       num_updates: Optional count of number of updates applied to variables.
+      zero_debias: If `True`, zero debias moving-averages that are initialized
+        with tensors.
       name: String. Optional prefix name to use for the name of ops added in
         `apply()`.
     """"""
     self._decay = decay
     self._num_updates = num_updates
+    self._zero_debias = zero_debias
     self._name = name
     self._averages = {}
 
@@ -373,7 +377,8 @@ class ExponentialMovingAverage(object):
               var,
               self._name,
               colocate_with_primary=(var.op.type == ""Variable""))
-          zero_debias_true.add(avg)
+          if self._zero_debias:
+            zero_debias_true.add(avg)
       self._averages[var] = avg
 
     with ops.name_scope(self._name) as scope:
",0,train
3b5ada30c14d35d6fbf0aeaaee898c5ff65b008c,tensorflow/tensorflow,"Add ability to disable zero-debiasing in ExponentialMovingAverage, for the purpose of backwards compatibility to support old checkpoints. For now, set this default value to avoid debiasing.
Change: 140613316",moving_averages_test.py,"@@ -89,6 +89,11 @@ def _Repeat(value, dim):
 class ExponentialMovingAverageTest(tf.test.TestCase):
 
   def _CheckDecay(self, ema, actual_decay, dim):
+    def _Scale(dk, steps):
+      if ema._zero_debias:
+        return 1 - dk ** (steps + 1)
+      else:
+        return 1
     tens = _Repeat(10.0, dim)
     thirties = _Repeat(30.0, dim)
     var0 = tf.Variable(tens, name=""v0"")
@@ -133,7 +138,7 @@ class ExponentialMovingAverageTest(tf.test.TestCase):
     self.assertAllClose(expected, avg0.eval())
     expected = _Repeat(30.0 * dk + 30.0 * (1 - dk), dim)
     self.assertAllClose(expected, avg1.eval())
-    expected = _Repeat(0.0 * dk + (10.0 + 30.0) * (1 - dk) / (1 - dk ** 2), dim)
+    expected = _Repeat(0.0 * dk + (10.0 + 30.0) * (1 - dk) / _Scale(dk, 1), dim)
     self.assertAllClose(expected, avg2.eval())
 
     # Again, update the averages and check.
@@ -145,7 +150,7 @@ class ExponentialMovingAverageTest(tf.test.TestCase):
                        dim)
     self.assertAllClose(expected, avg1.eval())
     expected = _Repeat(((0.0 * dk + (10.0 + 30.0) * (1 - dk)) * dk +
-                        (10.0 + 30.0) * (1 - dk)) / (1 - dk ** 3),
+                        (10.0 + 30.0) * (1 - dk)) / _Scale(dk, 2),
                        dim)
     self.assertAllClose(expected, avg2.eval())
 
@@ -154,23 +159,47 @@ class ExponentialMovingAverageTest(tf.test.TestCase):
       ema = tf.train.ExponentialMovingAverage(0.25)
       self._CheckDecay(ema, actual_decay=0.25, dim=1)
 
+  def testAverageVariablesNoNumUpdates_Scalar_Debias(self):
+    with self.test_session():
+      ema = tf.train.ExponentialMovingAverage(0.25, zero_debias=True)
+      self._CheckDecay(ema, actual_decay=0.25, dim=1)
+
   def testAverageVariablesNoNumUpdates_Vector(self):
     with self.test_session():
       ema = tf.train.ExponentialMovingAverage(0.25)
       self._CheckDecay(ema, actual_decay=0.25, dim=5)
 
+  def testAverageVariablesNoNumUpdates_Vector_Debias(self):
+    with self.test_session():
+      ema = tf.train.ExponentialMovingAverage(0.25, zero_debias=True)
+      self._CheckDecay(ema, actual_decay=0.25, dim=5)
+
   def testAverageVariablesNumUpdates_Scalar(self):
     with self.test_session():
       # With num_updates 1, the decay applied is 0.1818
       ema = tf.train.ExponentialMovingAverage(0.25, num_updates=1)
       self._CheckDecay(ema, actual_decay=0.181818, dim=1)
 
+  def testAverageVariablesNumUpdates_Scalar_Debias(self):
+    with self.test_session():
+      # With num_updates 1, the decay applied is 0.1818
+      ema = tf.train.ExponentialMovingAverage(
+          0.25, num_updates=1, zero_debias=True)
+      self._CheckDecay(ema, actual_decay=0.181818, dim=1)
+
   def testAverageVariablesNumUpdates_Vector(self):
     with self.test_session():
       # With num_updates 1, the decay applied is 0.1818
       ema = tf.train.ExponentialMovingAverage(0.25, num_updates=1)
       self._CheckDecay(ema, actual_decay=0.181818, dim=5)
 
+  def testAverageVariablesNumUpdates_Vector_Debias(self):
+    with self.test_session():
+      # With num_updates 1, the decay applied is 0.1818
+      ema = tf.train.ExponentialMovingAverage(
+          0.25, num_updates=1, zero_debias=True)
+      self._CheckDecay(ema, actual_decay=0.181818, dim=5)
+
   def testAverageVariablesWithControlDeps(self):
     with self.test_session() as sess:
       v0 = tf.Variable(0, name=""v0"")
@@ -195,14 +224,15 @@ class ExponentialMovingAverageTest(tf.test.TestCase):
       self.assertEqual(1, sess.run(v0))
       self.assertEqual([17.5], sess.run(v1_avg))
 
-  def testAverageVariablesNames(self):
+  def averageVariablesNamesHelper(self, zero_debias):
     with self.test_session():
       v0 = tf.Variable(10.0, name=""v0"")
       v1 = tf.Variable(30.0, name=""v1"")
       # Add a non-trainable variable.
       v2 = tf.Variable(20.0, name=""v2"", trainable=False)
       tensor2 = v0 + v1
-      ema = tf.train.ExponentialMovingAverage(0.25, name=""foo"")
+      ema = tf.train.ExponentialMovingAverage(
+          0.25, zero_debias=zero_debias, name=""foo"")
       self.assertEqual(""v0/foo"", ema.average_name(v0))
       self.assertEqual(""v1/foo"", ema.average_name(v1))
       self.assertEqual(""add/foo"", ema.average_name(tensor2))
@@ -212,21 +242,30 @@ class ExponentialMovingAverageTest(tf.test.TestCase):
       # {v0/foo : v0,
       #  v1/foo : v1,
       #  add/foo : add/foo,
-      #  add/foo/biased: add/foo/biased,
-      #  add/foo/local_step: add/foo/local_step,
       #  v2 : v2}
+      expected_names = [ema.average_name(v0),
+                        ema.average_name(v1),
+                        ema.average_name(tensor2),
+                        v2.op.name]
+      if zero_debias:
+        # vars_to_restore should also contain the following:
+        #  {add/foo/biased: add/foo/biased,
+        #  add/foo/local_step: add/foo/local_step}
+        expected_names += [ema.average_name(tensor2) + ""/biased"",
+                           ema.average_name(tensor2) + ""/local_step""]
       self.assertEqual(sorted(vars_to_restore.keys()),
-                       sorted([ema.average_name(v0),
-                               ema.average_name(v1),
-                               ema.average_name(tensor2),
-                               ema.average_name(tensor2) + ""/biased"",
-                               ema.average_name(tensor2) + ""/local_step"",
-                               v2.op.name]))
+                       sorted(expected_names))
       self.assertEqual(ema.average_name(v0), ema.average(v0).op.name)
       self.assertEqual(ema.average_name(v1), ema.average(v1).op.name)
       self.assertEqual(ema.average_name(tensor2), ema.average(tensor2).op.name)
 
-  def testAverageVariablesNamesRespectScope(self):
+  def testAverageVariablesNames(self):
+    self.averageVariablesNamesHelper(zero_debias=True)
+
+  def testAverageVariablesNamesNoDebias(self):
+    self.averageVariablesNamesHelper(zero_debias=False)
+
+  def averageVariablesNamesRespectScopeHelper(self, zero_debias):
     # See discussion on #2740.
     with self.test_session():
       with tf.variable_scope(""scope1""):
@@ -236,7 +275,8 @@ class ExponentialMovingAverageTest(tf.test.TestCase):
         v2 = tf.Variable(20.0, name=""v2"", trainable=False)
         tensor2 = v0 + v1
       with tf.variable_scope(""scope2""):
-        ema = tf.train.ExponentialMovingAverage(0.25, name=""foo"")
+        ema = tf.train.ExponentialMovingAverage(
+            0.25, zero_debias=zero_debias, name=""foo"")
         self.assertEqual(""scope2/scope1/v0/foo"", ema.average_name(v0))
         self.assertEqual(""scope2/scope1/v1/foo"", ema.average_name(v1))
         self.assertEqual(""scope2/scope1/add/foo"", ema.average_name(tensor2))
@@ -246,22 +286,32 @@ class ExponentialMovingAverageTest(tf.test.TestCase):
         # {scope2/scope1/v0/foo : v0,
         #  scope2/scope1/v1/foo : v1,
         #  scope2/scope1/add/foo : add/foo,
-        #  scope2/scope2/scope1/add/foo/biased: add/foo/biased,
-        #  scope2/scope2/scope1/add/foo/local_step: add/foo/local_step,
         #  scope1/v2 : v2}
-        sc = ""scope2/""
+        expected_names = [ema.average_name(v0),
+                          ema.average_name(v1),
+                          ema.average_name(tensor2),
+                          v2.op.name]
+        if zero_debias:
+          # vars_to_restore should also contain the following:
+          # {scope2/scope2/scope1/add/foo/biased: add/foo/biased,
+          #  scope2/scope2/scope1/add/foo/local_step: add/foo/local_step}
+          sc = ""scope2/""
+          expected_names += [sc + ema.average_name(tensor2) + ""/biased"",
+                             sc + ema.average_name(tensor2) + ""/local_step""]
+
         self.assertEqual(sorted(vars_to_restore.keys()),
-                         sorted([ema.average_name(v0),
-                                 ema.average_name(v1),
-                                 ema.average_name(tensor2),
-                                 sc + ema.average_name(tensor2) + ""/biased"",
-                                 sc + ema.average_name(tensor2) + ""/local_step"",
-                                 v2.op.name]))
+                         sorted(expected_names))
         self.assertEqual(ema.average_name(v0), ema.average(v0).op.name)
         self.assertEqual(ema.average_name(v1), ema.average(v1).op.name)
         self.assertEqual(ema.average_name(tensor2),
                          ema.average(tensor2).op.name)
 
+  def testAverageVariablesNamesRespectScope(self):
+    self.averageVariablesNamesRespectScopeHelper(zero_debias=True)
+
+  def testAverageVariablesNamesRespectScopeNoDebias(self):
+    self.averageVariablesNamesRespectScopeHelper(zero_debias=False)
+
   def testSubsetAverageVariablesNames(self):
     with self.test_session():
       v0 = tf.Variable(10.0, name=""v0"")
",0,train
5c145b837609062d5ec5f0d2ddbd90c5fadee4ff,tensorflow/tensorflow,Udated usage example following the requested changes,image_ops_impl.py,"@@ -1948,9 +1948,8 @@ def random_hue(image, max_delta, seed=None):
 
   Usage Example:
   ```python
-  >> import tensorflow as tf
-  >> x = tf.random.normal(shape=(256, 256, 3))
-  >> y = tf.image.random_hue(x, max_delta=0.1)
+  >> x = tf.constant([[[2.0, 3.0, 2.0]]])
+  >> y = tf.image.random_hue(x, max_delta=0.1, seed=1)
   ```
   
   Args:
@@ -2942,8 +2941,7 @@ def rgb_to_yiq(images):
   
   Usage Example:
     ```python
-    >> import tensorflow as tf
-    >> x = tf.random.normal(shape=(256, 256, 3))
+    >> x = tf.constant([[[1.0, 2.0, 3.0]]])
     >> y = tf.image.rgb_to_yiq(x)
     ```
 
",0,train
7a3b953e342dcf35869bece309f5ba3e9be81fd8,tensorflow/tensorflow,supporting quantized pooling op,quantized_pooling_ops.cc,"@@ -137,4 +137,14 @@ REGISTER_KERNEL_BUILDER(
     Name(""QuantizedMaxPool"").Device(DEVICE_CPU).TypeConstraint<quint8>(""T""),
     QuantizedMaxPoolingOp<CPUDevice, quint8>);
 
+#ifdef INTEL_MKL
+REGISTER_KERNEL_BUILDER(
+    Name(""QuantizedAvgPool"").Device(DEVICE_CPU).TypeConstraint<qint8>(""T""),
+    QuantizedAvgPoolingOp<CPUDevice, qint8>);
+
+REGISTER_KERNEL_BUILDER(
+    Name(""QuantizedMaxPool"").Device(DEVICE_CPU).TypeConstraint<qint8>(""T""),
+    QuantizedMaxPoolingOp<CPUDevice, qint8>);
+#endif
+
 }  // namespace tensorflow
",0,train
afb966c4316a60823b584add5cec023d88a88887,tensorflow/tensorflow,"Decouple TFL_Model and TFL_Interpreter lifetimes

PiperOrigin-RevId: 211988805",c_api.cc,"@@ -14,6 +14,8 @@ limitations under the License.
 ==============================================================================*/
 #include ""tensorflow/contrib/lite/experimental/c/c_api.h""
 
+#include <memory>
+
 #include ""tensorflow/contrib/lite/context.h""
 #include ""tensorflow/contrib/lite/experimental/c/c_api_internal.h""
 #include ""tensorflow/contrib/lite/interpreter.h""
@@ -29,12 +31,14 @@ extern ""C"" {
 TFL_Model* TFL_NewModel(const void* model_data, size_t model_size) {
   auto model = tflite::FlatBufferModel::BuildFromBuffer(
       static_cast<const char*>(model_data), model_size);
-  return model ? new TFL_Model{std::move(model)} : nullptr;
+  std::shared_ptr<const tflite::FlatBufferModel> shared_model(model.release());
+  return shared_model ? new TFL_Model{std::move(shared_model)} : nullptr;
 }
 
 TFL_Model* TFL_NewModelFromFile(const char* model_path) {
   auto model = tflite::FlatBufferModel::BuildFromFile(model_path);
-  return model ? new TFL_Model{std::move(model)} : nullptr;
+  std::shared_ptr<const tflite::FlatBufferModel> shared_model(model.release());
+  return shared_model ? new TFL_Model{std::move(shared_model)} : nullptr;
 }
 
 void TFL_DeleteModel(TFL_Model* model) { delete model; }
@@ -72,7 +76,7 @@ TFL_Interpreter* TFL_NewInterpreter(
     }
   }
 
-  return new TFL_Interpreter{std::move(interpreter)};
+  return new TFL_Interpreter{model->impl, std::move(interpreter)};
 }
 
 void TFL_DeleteInterpreter(TFL_Interpreter* interpreter) { delete interpreter; }
",0,train
afb966c4316a60823b584add5cec023d88a88887,tensorflow/tensorflow,"Decouple TFL_Model and TFL_Interpreter lifetimes

PiperOrigin-RevId: 211988805",c_api.h,"@@ -93,7 +93,8 @@ typedef struct TFL_Interpreter TFL_Interpreter;
 // failure.
 //
 // * `model` must be a valid model instance. The caller retains ownership of the
-//   object, and can destroy it immediately after creating the interpreter.
+//   object, and can destroy it immediately after creating the interpreter; the
+//   interpreter will maintain its own reference to the underlying model data.
 // * `optional_options` may be null. The caller retains ownership of the object,
 //   and can safely destroy it immediately after creating the interpreter.
 //
",0,train
afb966c4316a60823b584add5cec023d88a88887,tensorflow/tensorflow,"Decouple TFL_Model and TFL_Interpreter lifetimes

PiperOrigin-RevId: 211988805",c_api_internal.h,"@@ -24,7 +24,8 @@ limitations under the License.
 // not be depended on.
 
 struct TFL_Model {
-  std::unique_ptr<tflite::FlatBufferModel> impl;
+  // Sharing is safe as FlatBufferModel is const.
+  std::shared_ptr<const tflite::FlatBufferModel> impl;
 };
 
 struct TFL_InterpreterOptions {
@@ -35,6 +36,9 @@ struct TFL_InterpreterOptions {
 };
 
 struct TFL_Interpreter {
+  // Taking a reference to the (const) model data avoids lifetime-related issues
+  // and complexity with the TFL_Model's existence.
+  std::shared_ptr<const tflite::FlatBufferModel> model;
   std::unique_ptr<tflite::Interpreter> impl;
 };
 
",0,train
b1b7d5930ecdc9412e7a3035bdd2be49e9cfc230,tensorflow/tensorflow,"Add a tag constant, gpu, to present graph with GPU support.

PiperOrigin-RevId: 161242660",reader_test.py,"@@ -81,16 +81,23 @@ class ReaderTest(test.TestCase):
 
     # Graph that updates the single variable. SavedModel is invoked:
     # - to add the model (weights are not updated).
-    # - multiple custom tags.
+    # - multiple predefined tags.
     with self.test_session(graph=ops.Graph()) as sess:
       self._init_and_validate_variable(sess, ""v"", 44)
+      builder.add_meta_graph([tag_constants.SERVING, tag_constants.GPU])
+
+    # Graph that updates the single variable. SavedModel is invoked:
+    # - to add the model (weights are not updated).
+    # - multiple custom tags.
+    with self.test_session(graph=ops.Graph()) as sess:
+      self._init_and_validate_variable(sess, ""v"", 45)
       builder.add_meta_graph([""foo"", ""bar""])
 
     # Save the SavedModel to disk.
     builder.save()
 
     actual_tags = reader.get_saved_model_tag_sets(saved_model_dir)
-    expected_tags = [[""train""], [""serve""], [""foo"", ""bar""]]
+    expected_tags = [[""train""], [""serve""], [""serve"", ""gpu""], [""foo"", ""bar""]]
     self.assertEqual(expected_tags, actual_tags)
 
 
",0,train
b1b7d5930ecdc9412e7a3035bdd2be49e9cfc230,tensorflow/tensorflow,"Add a tag constant, gpu, to present graph with GPU support.

PiperOrigin-RevId: 161242660",saved_model_test.py,"@@ -207,6 +207,13 @@ class SavedModelTest(test.TestCase):
       self._init_and_validate_variable(sess, ""v"", 43)
       builder.add_meta_graph([tag_constants.SERVING])
 
+    # Graph that updates the single variable. SavedModel invoked to:
+    # - simply add the model (weights are not updated).
+    # - multiple tags (from predefined constants).
+    with self.test_session(graph=ops.Graph()) as sess:
+      self._init_and_validate_variable(sess, ""v"", 45)
+      builder.add_meta_graph([tag_constants.SERVING, tag_constants.GPU])
+
     # Graph that updates the single variable. SavedModel is invoked:
     # - to add the model (weights are not updated).
     # - multiple custom tags.
@@ -230,6 +237,13 @@ class SavedModelTest(test.TestCase):
       self.assertEqual(
           42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval())
 
+    # Restore the graph with multiple predefined tags whose variables were not
+    # saved.
+    with self.test_session(graph=ops.Graph()) as sess:
+      loader.load(sess, [tag_constants.SERVING, tag_constants.GPU], export_dir)
+      self.assertEqual(
+          42, ops.get_collection(ops.GraphKeys.GLOBAL_VARIABLES)[0].eval())
+
     # Restore the graph with multiple tags. Provide duplicate tags to test set
     # semantics.
     with self.test_session(graph=ops.Graph()) as sess:
",0,train
b1b7d5930ecdc9412e7a3035bdd2be49e9cfc230,tensorflow/tensorflow,"Add a tag constant, gpu, to present graph with GPU support.

PiperOrigin-RevId: 161242660",tag_constants.py,"@@ -28,9 +28,12 @@ SERVING = ""serve""
 # Tag for the `training` graph.
 TRAINING = ""train""
 
+# Tag for the `gpu` graph.
+GPU = ""gpu""
 
 _allowed_symbols = [
     ""SERVING"",
-    ""TRAINING""
+    ""TRAINING"",
+    ""GPU""
 ]
 remove_undocumented(__name__, _allowed_symbols)
",0,train
4ecd2a70dd750b20a61033fe08301745685bf288,tensorflow/tensorflow,"Added unit test for max_to_keep being None.
Change: 115516426",saver_test.py,"@@ -37,6 +37,14 @@ from tensorflow.python.framework import function
 from tensorflow.python.platform import gfile
 
 
+def _TestDir(test_name):
+  test_dir = os.path.join(tf.test.get_temp_dir(), test_name)
+  if os.path.exists(test_dir):
+    shutil.rmtree(test_dir)
+  gfile.MakeDirs(test_dir)
+  return test_dir
+
+
 class SaverTest(tf.test.TestCase):
 
   def testBasics(self):
@@ -349,12 +357,7 @@ class SaveRestoreShardedTest(tf.test.TestCase):
 class MaxToKeepTest(tf.test.TestCase):
 
   def testNonSharded(self):
-    save_dir = os.path.join(self.get_temp_dir(), ""max_to_keep_non_sharded"")
-    try:
-      gfile.DeleteRecursively(save_dir)
-    except OSError:
-      pass                      # Ignore
-    gfile.MakeDirs(save_dir)
+    save_dir = _TestDir(""max_to_keep_non_sharded"")
 
     with self.test_session() as sess:
       v = tf.Variable(10.0, name=""v"")
@@ -456,12 +459,7 @@ class MaxToKeepTest(tf.test.TestCase):
       self.assertTrue(gfile.Exists(save._MetaGraphFilename(s1)))
 
   def testSharded(self):
-    save_dir = os.path.join(self.get_temp_dir(), ""max_to_keep_sharded"")
-    try:
-      gfile.DeleteRecursively(save_dir)
-    except OSError:
-      pass                      # Ignore
-    gfile.MakeDirs(save_dir)
+    save_dir = _TestDir(""max_to_keep_sharded"")
 
     with tf.Session(
         target="""",
@@ -495,17 +493,39 @@ class MaxToKeepTest(tf.test.TestCase):
       self.assertEqual(2, len(gfile.Glob(s3)))
       self.assertTrue(gfile.Exists(save._MetaGraphFilename(s3)))
 
+  def testNoMaxToKeep(self):
+    save_dir = _TestDir(""no_max_to_keep"")
+    save_dir2 = _TestDir(""max_to_keep_0"")
+
+    with self.test_session() as sess:
+      v = tf.Variable(10.0, name=""v"")
+      tf.initialize_all_variables().run()
+
+      # Test max_to_keep being None.
+      save = tf.train.Saver({""v"": v}, max_to_keep=None)
+      self.assertEqual([], save.last_checkpoints)
+      s1 = save.save(sess, os.path.join(save_dir, ""s1""))
+      self.assertEqual([], save.last_checkpoints)
+      self.assertTrue(gfile.Exists(s1))
+      s2 = save.save(sess, os.path.join(save_dir, ""s2""))
+      self.assertEqual([], save.last_checkpoints)
+      self.assertTrue(gfile.Exists(s2))
+
+      # Test max_to_keep being 0.
+      save2 = tf.train.Saver({""v"": v}, max_to_keep=0)
+      self.assertEqual([], save2.last_checkpoints)
+      s1 = save2.save(sess, os.path.join(save_dir2, ""s1""))
+      self.assertEqual([], save2.last_checkpoints)
+      self.assertTrue(gfile.Exists(s1))
+      s2 = save2.save(sess, os.path.join(save_dir2, ""s2""))
+      self.assertEqual([], save2.last_checkpoints)
+      self.assertTrue(gfile.Exists(s2))
+
 
 class KeepCheckpointEveryNHoursTest(tf.test.TestCase):
 
   def testNonSharded(self):
-    save_dir = os.path.join(self.get_temp_dir(),
-                            ""keep_checkpoint_every_n_hours"")
-    try:
-      gfile.DeleteRecursively(save_dir)
-    except OSError:
-      pass                      # Ignore
-    gfile.MakeDirs(save_dir)
+    save_dir = _TestDir(""keep_checkpoint_every_n_hours"")
 
     with self.test_session() as sess:
       v = tf.Variable([10.0], name=""v"")
@@ -685,15 +705,8 @@ class LatestCheckpointWithRelativePaths(tf.test.TestCase):
 
 class CheckpointStateTest(tf.test.TestCase):
 
-  def _TestDir(self, test_name):
-    test_dir = os.path.join(self.get_temp_dir(), test_name)
-    if os.path.exists(test_dir):
-      shutil.rmtree(test_dir)
-    gfile.MakeDirs(test_dir)
-    return test_dir
-
   def testAbsPath(self):
-    save_dir = self._TestDir(""abs_paths"")
+    save_dir = _TestDir(""abs_paths"")
     abs_path = os.path.join(save_dir, ""model-0"")
     ckpt = tf.train.generate_checkpoint_state_proto(save_dir, abs_path)
     self.assertEqual(ckpt.model_checkpoint_path, abs_path)
@@ -712,7 +725,7 @@ class CheckpointStateTest(tf.test.TestCase):
     self.assertEqual(ckpt.all_model_checkpoint_paths[-1], new_rel_path)
 
   def testAllModelCheckpointPaths(self):
-    save_dir = self._TestDir(""all_models_test"")
+    save_dir = _TestDir(""all_models_test"")
     abs_path = os.path.join(save_dir, ""model-0"")
     for paths in [None, [], [""model-2""]]:
       ckpt = tf.train.generate_checkpoint_state_proto(
@@ -726,7 +739,7 @@ class CheckpointStateTest(tf.test.TestCase):
       self.assertEqual(ckpt.all_model_checkpoint_paths[-1], abs_path)
 
   def testUpdateCheckpointState(self):
-    save_dir = self._TestDir(""update_checkpoint_state"")
+    save_dir = _TestDir(""update_checkpoint_state"")
     os.chdir(save_dir)
     # Make a temporary train directory.
     train_dir = ""train""
@@ -746,15 +759,8 @@ class CheckpointStateTest(tf.test.TestCase):
 
 class MetaGraphTest(tf.test.TestCase):
 
-  def _TestDir(self, test_name):
-    test_dir = os.path.join(self.get_temp_dir(), test_name)
-    if os.path.exists(test_dir):
-      shutil.rmtree(test_dir)
-    gfile.MakeDirs(test_dir)
-    return test_dir
-
   def testAddCollectionDef(self):
-    test_dir = self._TestDir(""good_collection"")
+    test_dir = _TestDir(""good_collection"")
     filename = os.path.join(test_dir, ""metafile"")
     with self.test_session():
       # Creates a graph.
@@ -819,7 +825,7 @@ class MetaGraphTest(tf.test.TestCase):
       self.assertEqual(len(meta_graph_def.collection_def), 0)
 
   def _testMultiSaverCollectionSave(self):
-    test_dir = self._TestDir(""saver_collection"")
+    test_dir = _TestDir(""saver_collection"")
     filename = os.path.join(test_dir, ""metafile"")
     saver0_ckpt = os.path.join(test_dir, ""saver0.ckpt"")
     saver1_ckpt = os.path.join(test_dir, ""saver1.ckpt"")
@@ -894,7 +900,7 @@ class MetaGraphTest(tf.test.TestCase):
     self._testMultiSaverCollectionRestore()
 
   def testBinaryAndTextFormat(self):
-    test_dir = self._TestDir(""binary_and_text"")
+    test_dir = _TestDir(""binary_and_text"")
     filename = os.path.join(test_dir, ""metafile"")
     with self.test_session(graph=tf.Graph()):
       # Creates a graph.
@@ -924,7 +930,7 @@ class MetaGraphTest(tf.test.TestCase):
         tf.train.import_meta_graph(filename)
 
   def testSliceVariable(self):
-    test_dir = self._TestDir(""slice_saver"")
+    test_dir = _TestDir(""slice_saver"")
     filename = os.path.join(test_dir, ""metafile"")
     with self.test_session():
       v1 = tf.Variable([20.0], name=""v1"")
@@ -946,7 +952,7 @@ class MetaGraphTest(tf.test.TestCase):
       self.assertProtoEquals(meta_graph_def, new_meta_graph_def)
 
   def _testGraphExtensionSave(self):
-    test_dir = self._TestDir(""graph_extension"")
+    test_dir = _TestDir(""graph_extension"")
     filename = os.path.join(test_dir, ""metafile"")
     saver0_ckpt = os.path.join(test_dir, ""saver0.ckpt"")
     with self.test_session(graph=tf.Graph()) as sess:
",0,train
7f06d633e58ba37cbf654c1371135100260f20d8,tensorflow/tensorflow,"effective_sample_size kwarg change (same default behavior).

* rename max_lags --> filter_beyond_lag
* rename max_lags_threshold --> filter_threshold
* Users can use both filters, and they combine in an ""OR"" manner
* None ==> turn off a filter.

PiperOrigin-RevId: 185666926",mcmc_diagnostics_test.py,"@@ -41,12 +41,14 @@ class _EffectiveSampleSizeTest(object):
                                                    sess,
                                                    atol=1e-2,
                                                    rtol=1e-2,
-                                                   max_lags_threshold=None,
-                                                   max_lags=None):
+                                                   filter_threshold=None,
+                                                   filter_beyond_lag=None):
     x = array_ops.placeholder_with_default(
         input=x_, shape=x_.shape if self.use_static_shape else None)
     ess = mcmc_diagnostics.effective_sample_size(
-        x, max_lags_threshold=max_lags_threshold, max_lags=max_lags)
+        x,
+        filter_threshold=filter_threshold,
+        filter_beyond_lag=filter_beyond_lag)
     if self.use_static_shape:
       self.assertAllEqual(x.shape[1:], ess.shape)
 
@@ -56,18 +58,19 @@ class _EffectiveSampleSizeTest(object):
         np.ones_like(ess_) * expected_ess, ess_, atol=atol, rtol=rtol)
 
   def testIidRank1NormalHasFullEssMaxLags10(self):
-    # With a length 5000 iid normal sequence, and max_lags = 10, we should
-    # have a good estimate of ESS, and it should be close to the full sequence
-    # length of 5000.
-    # The choice of max_lags = 10 is a short cutoff, reasonable only since we
-    # know the correlation length should be zero right away.
+    # With a length 5000 iid normal sequence, and filter_beyond_lag = 10, we
+    # should have a good estimate of ESS, and it should be close to the full
+    # sequence length of 5000.
+    # The choice of filter_beyond_lag = 10 is a short cutoff, reasonable only
+    # since we know the correlation length should be zero right away.
     with self.test_session() as sess:
       with spectral_ops_test_util.fft_kernel_label_map():
         self._check_versus_expected_effective_sample_size(
             x_=rng.randn(5000).astype(np.float32),
             expected_ess=5000,
             sess=sess,
-            max_lags=10,
+            filter_beyond_lag=10,
+            filter_threshold=None,
             rtol=0.3)
 
   def testIidRank2NormalHasFullEssMaxLags10(self):
@@ -78,23 +81,25 @@ class _EffectiveSampleSizeTest(object):
             x_=rng.randn(5000, 2).astype(np.float32),
             expected_ess=5000,
             sess=sess,
-            max_lags=10,
+            filter_beyond_lag=10,
+            filter_threshold=None,
             rtol=0.3)
 
   def testIidRank1NormalHasFullEssMaxLagThresholdZero(self):
-    # With a length 5000 iid normal sequence, and max_lags_threshold = 0,
+    # With a length 5000 iid normal sequence, and filter_threshold = 0,
     # we should have a super-duper estimate of ESS, and it should be very close
     # to the full sequence length of 5000.
-    # The choice of max_lags_cutoff = 0 means we cutoff as soon as the auto-corr
-    # is below zero.  This should happen very quickly, due to the fact that the
-    # theoretical auto-corr is [1, 0, 0,...]
+    # The choice of filter_beyond_lag = 0 means we cutoff as soon as the
+    # auto-corris below zero.  This should happen very quickly, due to the fact
+    # that the theoretical auto-corr is [1, 0, 0,...]
     with self.test_session() as sess:
       with spectral_ops_test_util.fft_kernel_label_map():
         self._check_versus_expected_effective_sample_size(
             x_=rng.randn(5000).astype(np.float32),
             expected_ess=5000,
             sess=sess,
-            max_lags_threshold=0.,
+            filter_beyond_lag=None,
+            filter_threshold=0.,
             rtol=0.1)
 
   def testIidRank2NormalHasFullEssMaxLagThresholdZero(self):
@@ -105,7 +110,8 @@ class _EffectiveSampleSizeTest(object):
             x_=rng.randn(5000, 2).astype(np.float32),
             expected_ess=5000,
             sess=sess,
-            max_lags_threshold=0.,
+            filter_beyond_lag=None,
+            filter_threshold=0.,
             rtol=0.1)
 
   def testLength10CorrelationHasEssOneTenthTotalLengthUsingMaxLags50(self):
@@ -121,7 +127,8 @@ class _EffectiveSampleSizeTest(object):
             x_=x_,
             expected_ess=50000 // 10,
             sess=sess,
-            max_lags=50,
+            filter_beyond_lag=50,
+            filter_threshold=None,
             rtol=0.2)
 
   def testLength10CorrelationHasEssOneTenthTotalLengthUsingMaxLagsThresholdZero(
@@ -138,7 +145,8 @@ class _EffectiveSampleSizeTest(object):
             x_=x_,
             expected_ess=50000 // 10,
             sess=sess,
-            max_lags_threshold=0.,
+            filter_beyond_lag=None,
+            filter_threshold=0.,
             rtol=0.1)
 
   def testListArgs(self):
@@ -148,16 +156,16 @@ class _EffectiveSampleSizeTest(object):
     x_ = (iid_x_ * np.ones((5000, 10)).astype(np.float32)).reshape((50000,))
     y_ = rng.randn(50000).astype(np.float32)
     states = [x_, x_, y_, y_]
-    max_lags_threshold = [0., None, 0., None]
-    max_lags = [None, 5, None, 5]
+    filter_threshold = [0., None, 0., None]
+    filter_beyond_lag = [None, 5, None, 5]
 
     # See other tests for reasoning on tolerance.
     with self.test_session() as sess:
       with spectral_ops_test_util.fft_kernel_label_map():
         ess = mcmc_diagnostics.effective_sample_size(
             states,
-            max_lags_threshold=max_lags_threshold,
-            max_lags=max_lags)
+            filter_threshold=filter_threshold,
+            filter_beyond_lag=filter_beyond_lag)
         ess_ = sess.run(ess)
     self.assertAllEqual(4, len(ess_))
 
@@ -166,6 +174,59 @@ class _EffectiveSampleSizeTest(object):
     self.assertAllClose(50000, ess_[2], rtol=0.1)
     self.assertAllClose(50000, ess_[3], rtol=0.1)
 
+  def testMaxLagsThresholdLessThanNeg1SameAsNone(self):
+    # Setting both means we filter out items R_k from the auto-correlation
+    # sequence if k > filter_beyond_lag OR k >= j where R_j < filter_threshold.
+
+    # x_ has correlation length 10.
+    iid_x_ = rng.randn(500, 1).astype(np.float32)
+    x_ = (iid_x_ * np.ones((500, 10)).astype(np.float32)).reshape((5000,))
+    with self.test_session() as sess:
+      with spectral_ops_test_util.fft_kernel_label_map():
+        x = array_ops.placeholder_with_default(
+            input=x_, shape=x_.shape if self.use_static_shape else None)
+
+        ess_none_none = mcmc_diagnostics.effective_sample_size(
+            x, filter_threshold=None, filter_beyond_lag=None)
+        ess_none_200 = mcmc_diagnostics.effective_sample_size(
+            x, filter_threshold=None, filter_beyond_lag=200)
+        ess_neg2_200 = mcmc_diagnostics.effective_sample_size(
+            x, filter_threshold=-2., filter_beyond_lag=200)
+        ess_neg2_none = mcmc_diagnostics.effective_sample_size(
+            x, filter_threshold=-2., filter_beyond_lag=None)
+        ess_none_none_, ess_none_200_, ess_neg2_200_, ess_neg2_none_ = sess.run(
+            [ess_none_none, ess_none_200, ess_neg2_200, ess_neg2_none])
+
+        # filter_threshold=-2 <==> filter_threshold=None.
+        self.assertAllClose(ess_none_none_, ess_neg2_none_)
+        self.assertAllClose(ess_none_200_, ess_neg2_200_)
+
+  def testMaxLagsArgsAddInAnOrManner(self):
+    # Setting both means we filter out items R_k from the auto-correlation
+    # sequence if k > filter_beyond_lag OR k >= j where R_j < filter_threshold.
+
+    # x_ has correlation length 10.
+    iid_x_ = rng.randn(500, 1).astype(np.float32)
+    x_ = (iid_x_ * np.ones((500, 10)).astype(np.float32)).reshape((5000,))
+    with self.test_session() as sess:
+      with spectral_ops_test_util.fft_kernel_label_map():
+        x = array_ops.placeholder_with_default(
+            input=x_, shape=x_.shape if self.use_static_shape else None)
+
+        ess_1_9 = mcmc_diagnostics.effective_sample_size(
+            x, filter_threshold=1., filter_beyond_lag=9)
+        ess_1_none = mcmc_diagnostics.effective_sample_size(
+            x, filter_threshold=1., filter_beyond_lag=None)
+        ess_none_9 = mcmc_diagnostics.effective_sample_size(
+            x, filter_threshold=1., filter_beyond_lag=9)
+        ess_1_9_, ess_1_none_, ess_none_9_ = sess.run(
+            [ess_1_9, ess_1_none, ess_none_9])
+
+        # Since R_k = 1 for k < 10, and R_k < 1 for k >= 10,
+        # filter_threshold = 1 <==> filter_beyond_lag = 9.
+        self.assertAllClose(ess_1_9_, ess_1_none_)
+        self.assertAllClose(ess_1_9_, ess_none_9_)
+
 
 class EffectiveSampleSizeStaticTest(test.TestCase, _EffectiveSampleSizeTest):
 
",0,test
7f06d633e58ba37cbf654c1371135100260f20d8,tensorflow/tensorflow,"effective_sample_size kwarg change (same default behavior).

* rename max_lags --> filter_beyond_lag
* rename max_lags_threshold --> filter_threshold
* Users can use both filters, and they combine in an ""OR"" manner
* None ==> turn off a filter.

PiperOrigin-RevId: 185666926",mcmc_diagnostics_impl.py,"@@ -36,13 +36,13 @@ __all__ = [
 
 
 def effective_sample_size(states,
-                          max_lags_threshold=None,
-                          max_lags=None,
+                          filter_threshold=0.,
+                          filter_beyond_lag=None,
                           name=None):
   """"""Estimate a lower bound on effective sample size for each independent chain.
 
-  Roughly speaking, the ""effective sample size"" (ESS) is the size of an iid
-  sample with the same variance as `state`.
+  Roughly speaking, ""effective sample size"" (ESS) is the size of an iid sample
+  with the same variance as `state`.
 
   More precisely, given a stationary sequence of possibly correlated random
   variables `X_1, X_2,...,X_N`, each identically distributed ESS is the number
@@ -87,21 +87,28 @@ def effective_sample_size(states,
   This function estimates the above by first estimating the auto-correlation.
   Since `R_k` must be estimated using only `N - k` samples, it becomes
   progressively noisier for larger `k`.  For this reason, the summation over
-  `R_k` should be truncated at some number `max_lags < N`.  Since many MCMC
-  methods generate chains where `R_k > 0`, a reasonable critera is to truncate
-  at the first index where the estimated auto-correlation becomes negative.
+  `R_k` should be truncated at some number `filter_beyond_lag < N`.  Since many
+  MCMC methods generate chains where `R_k > 0`, a reasonable critera is to
+  truncate at the first index where the estimated auto-correlation becomes
+  negative.
+
+  The arguments `filter_beyond_lag`, `filter_threshold` are filters intended to
+  remove noisy tail terms from `R_k`.  They combine in an ""OR"" manner meaning
+  terms are removed if they were to be filtered under the `filter_beyond_lag` OR
+  `filter_threshold` criteria.
 
   Args:
     states:  `Tensor` or list of `Tensor` objects.  Dimension zero should index
       identically distributed states.
-    max_lags_threshold:  `Tensor` or list of `Tensor` objects.
+    filter_threshold:  `Tensor` or list of `Tensor` objects.
       Must broadcast with `state`.  The auto-correlation sequence is truncated
-      after the first appearance of a term less than `max_lags_threshold`.  If
-      both `max_lags` and `max_lags_threshold` are `None`,
-      `max_lags_threshold` defaults to `0`.
-    max_lags:  `Tensor` or list of `Tensor` objects.  Must be `int`-like and
-      scalar valued.  The auto-correlation sequence is truncated to this length.
-      May be provided only if `max_lags_threshold` is not.
+      after the first appearance of a term less than `filter_threshold`.
+      Setting to `None` means we use no threshold filter.  Since `|R_k| <= 1`,
+      setting to any number less than `-1` has the same effect.
+    filter_beyond_lag:  `Tensor` or list of `Tensor` objects.  Must be
+      `int`-like and scalar valued.  The auto-correlation sequence is truncated
+      to this length.  Setting to `None` means we do not filter based on number
+      of lags.
     name:  `String` name to prepend to created ops.
 
   Returns:
@@ -109,8 +116,8 @@ def effective_sample_size(states,
       each component of `states`.  Shape will be `states.shape[1:]`.
 
   Raises:
-    ValueError:  If `states` and `max_lags_threshold` or `states` and `max_lags`
-      are both lists with different lengths.
+    ValueError:  If `states` and `filter_threshold` or `states` and
+      `filter_beyond_lag` are both lists with different lengths.
   """"""
   states_was_list = _is_list_like(states)
 
@@ -118,15 +125,16 @@ def effective_sample_size(states,
   if not states_was_list:
     states = [states]
 
-  max_lags = _broadcast_maybelist_arg(states, max_lags, ""max_lags"")
-  max_lags_threshold = _broadcast_maybelist_arg(states, max_lags_threshold,
-                                                ""max_lags_threshold"")
+  filter_beyond_lag = _broadcast_maybelist_arg(states, filter_beyond_lag,
+                                               ""filter_beyond_lag"")
+  filter_threshold = _broadcast_maybelist_arg(states, filter_threshold,
+                                              ""filter_threshold"")
 
   # Process items, one at a time.
   with ops.name_scope(name, ""effective_sample_size""):
     ess_list = [
         _effective_sample_size_single_state(s, ml, mlt)
-        for (s, ml, mlt) in zip(states, max_lags, max_lags_threshold)
+        for (s, ml, mlt) in zip(states, filter_beyond_lag, filter_threshold)
     ]
 
   if states_was_list:
@@ -134,38 +142,31 @@ def effective_sample_size(states,
   return ess_list[0]
 
 
-def _effective_sample_size_single_state(states, max_lags, max_lags_threshold):
+def _effective_sample_size_single_state(states, filter_beyond_lag,
+                                        filter_threshold):
   """"""ESS computation for one single Tensor argument.""""""
-  if max_lags is not None and max_lags_threshold is not None:
-    raise ValueError(
-        ""Expected at most one of max_lags, max_lags_threshold to be provided.  ""
-        ""Found: {}, {}"".format(max_lags, max_lags_threshold))
-
-  if max_lags_threshold is None:
-    max_lags_threshold = 0.
 
   with ops.name_scope(
       ""effective_sample_size_single_state"",
-      values=[states, max_lags, max_lags_threshold]):
+      values=[states, filter_beyond_lag, filter_threshold]):
 
     states = ops.convert_to_tensor(states, name=""states"")
     dt = states.dtype
 
-    if max_lags is not None:
-      auto_corr = sample_stats.auto_correlation(
-          states, axis=0, max_lags=max_lags)
-    elif max_lags_threshold is not None:
-      max_lags_threshold = ops.convert_to_tensor(
-          max_lags_threshold, dtype=dt, name=""max_lags_threshold"")
-      auto_corr = sample_stats.auto_correlation(states, axis=0)
+    # filter_beyond_lag == None ==> auto_corr is the full sequence.
+    auto_corr = sample_stats.auto_correlation(
+        states, axis=0, max_lags=filter_beyond_lag)
+    if filter_threshold is not None:
+      filter_threshold = ops.convert_to_tensor(
+          filter_threshold, dtype=dt, name=""filter_threshold"")
       # Get a binary mask to zero out values of auto_corr below the threshold.
       #   mask[i, ...] = 1 if auto_corr[j, ...] > threshold for all j <= i,
       #   mask[i, ...] = 0, otherwise.
       # So, along dimension zero, the mask will look like [1, 1, ..., 0, 0,...]
       # Building step by step,
-      #   Assume auto_corr = [1, 0.5, 0.0, 0.3], and max_lags_threshold = 0.2.
+      #   Assume auto_corr = [1, 0.5, 0.0, 0.3], and filter_threshold = 0.2.
       # Step 1:  mask = [False, False, True, False]
-      mask = auto_corr < max_lags_threshold
+      mask = auto_corr < filter_threshold
       # Step 2:  mask = [0, 0, 1, 1]
       mask = math_ops.cast(mask, dtype=dt)
       # Step 3:  mask = [0, 0, 1, 2]
@@ -173,14 +174,12 @@ def _effective_sample_size_single_state(states, max_lags, max_lags_threshold):
       # Step 4:  mask = [1, 1, 0, 0]
       mask = math_ops.maximum(1. - mask, 0.)
       auto_corr *= mask
-    else:
-      auto_corr = sample_stats.auto_correlation(states, axis=0)
 
     # With R[k] := auto_corr[k, ...],
     # ESS = N / {1 + 2 * Sum_{k=1}^N (N - k) / N * R[k]}
     #     = N / {-1 + 2 * Sum_{k=0}^N (N - k) / N * R[k]} (since R[0] = 1)
     #     approx N / {-1 + 2 * Sum_{k=0}^M (N - k) / N * R[k]}
-    #, where M is the max_lags truncation point chosen above.
+    # where M is the filter_beyond_lag truncation point chosen above.
 
     # Get the factor (N - k) / N, and give it shape [M, 1,...,1], having total
     # ndims the same as auto_corr
",0,test
a1d6d4524a47d11aced9156865946592f425701a,tensorflow/tensorflow,"[tf:tfrt] Temporary disable clusters with i1 inputs

PiperOrigin-RevId: 390134209
Change-Id: I4454b2355add463262958c64881e9d5818560007",tf_cpurt_clustering.cc,"@@ -721,6 +721,20 @@ mlir::LogicalResult VerifyCluster(const Cluster& cluster) {
     (void)inserted;
   }
 
+  // TODO(b/196192286): This is a temporary workaround to disable excessive
+  // recompilation for dynamic shapes in one particular model. Remove this once
+  // specialization will be done based on shape constraints.
+  for (Operation* op : ops) {
+    for (Value value : op->getOperands()) {
+      Operation* defining_op = value.getDefiningOp();
+      if (!defining_op) continue;
+
+      if (!ops.contains(defining_op) &&
+          mlir::getElementTypeOrSelf(value.getType()).isInteger(1))
+        return failure();
+    }
+  }
+
   for (auto& pair : cluster.constraints) {
     Value value = pair.getFirst();
     ValueConstraint constraint = pair.getSecond();
",0,train
a0ed0cbc9251e59d7bbd6d0ea6f20f6c28b9625d,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-07-08

PiperOrigin-RevId: 383585806
Change-Id: Id24ee27b5f7b68a31501009c0ddb7d436e6c485d",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 7, 7)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 7, 8)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
e45c9f22722df4d967bf81467f1691cc6b8e864b,tensorflow/tensorflow,"Fix crash when unfused layer normalization used with mixed precision.

For numeric safety, I do the unfused layer normalization in fp32, as parts of the fused version are internally done in fp32. I'm not sure if doing the layer normalization in fp32 over fp16 makes a difference in practice.

PiperOrigin-RevId: 293717765
Change-Id: Ie91ed55d73b02b93530a72f917243c39a37e7430",normalization.py,"@@ -1078,6 +1078,12 @@ class LayerNormalization(Layer):
       return v
 
     if not self._fused:
+      input_dtype = inputs.dtype
+      if input_dtype in ('float16', 'bfloat16') and self.dtype == 'float32':
+        # If mixed precision is used, cast inputs to float32 so that this is at
+        # least as numerically stable as the fused version.
+        inputs = math_ops.cast(inputs, 'float32')
+
       # Calculate the moments on the last axis (layer activations).
       mean, variance = nn.moments(inputs, self.axis, keep_dims=True)
 
@@ -1091,6 +1097,7 @@ class LayerNormalization(Layer):
           offset=offset,
           scale=scale,
           variance_epsilon=self.epsilon)
+      outputs = math_ops.cast(outputs, input_dtype)
     else:
       # Collapse dims before self.axis, and dims in self.axis
       pre_dim, in_dim = (1, 1)
",0,train
e45c9f22722df4d967bf81467f1691cc6b8e864b,tensorflow/tensorflow,"Fix crash when unfused layer normalization used with mixed precision.

For numeric safety, I do the unfused layer normalization in fp32, as parts of the fused version are internally done in fp32. I'm not sure if doing the layer normalization in fp32 over fp16 makes a difference in practice.

PiperOrigin-RevId: 293717765
Change-Id: Ie91ed55d73b02b93530a72f917243c39a37e7430",layer_correctness_test.py,"@@ -123,6 +123,8 @@ class LayerCorrectnessTest(keras_parameterized.TestCase):
       ('BatchNormalization', normalization_v2.BatchNormalization, (2, 2),
        1e-2, 1e-2),
       ('LayerNormalization', normalization.LayerNormalization, (2, 2)),
+      ('LayerNormalizationUnfused',
+       lambda: normalization.LayerNormalization(axis=1), (2, 2, 2)),
       ('MaxPooling2D', pooling.MaxPooling2D, (2, 2, 2, 1)),
       ('AveragePooling2D', pooling.AveragePooling2D, (2, 2, 2, 1)),
       ('GlobalMaxPooling2D', pooling.GlobalMaxPooling2D, (2, 2, 2, 1)),
",0,train
2ccfe8e764632cd05422bda12abe0f7a24abf000,tensorflow/tensorflow,"Added a new method to extract the graph properties from a cost graph without
having to run the model. This will simplify the process of creating regression
tests

PiperOrigin-RevId: 158050327",graph_properties.cc,"@@ -218,9 +218,13 @@ Status GraphProperties::InferDynamically(Cluster* cluster) {
   TF_RETURN_IF_ERROR(
       cluster->Run(item_.graph, item_.feed, item_.fetch, &metadata));
 
+  return InferFromCostGraph(metadata.cost_graph());
+}
+
+Status GraphProperties::InferFromCostGraph(const CostGraphDef& cost_graph) {
   std::unordered_map<string, const CostGraphDef::Node*> name_to_cost;
   std::unordered_map<string, const NodeDef*> name_to_node;  // Empty
-  for (auto& node : metadata.cost_graph().node()) {
+  for (auto& node : cost_graph.node()) {
     name_to_cost[node.name()] = &node;
 
     std::vector<OpInfo::TensorProperties> output_properties;
",0,train
2ccfe8e764632cd05422bda12abe0f7a24abf000,tensorflow/tensorflow,"Added a new method to extract the graph properties from a cost graph without
having to run the model. This will simplify the process of creating regression
tests

PiperOrigin-RevId: 158050327",graph_properties.h,"@@ -36,6 +36,7 @@ class GraphProperties {
 
   Status InferStatically();
   Status InferDynamically(Cluster* cluster);
+  Status InferFromCostGraph(const CostGraphDef& cost_graph);
 
   bool HasOutputProperties(const string& name) const;
   std::vector<OpInfo::TensorProperties> GetInputProperties(
",0,train
d5c075b02191f74b0b4c37713648c59ff7b06962,tensorflow/tensorflow,"Add test for 64-bit clz and sign.

PiperOrigin-RevId: 196894702",hlo_evaluator_typed_visitor.h,"@@ -1738,14 +1738,16 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
-  // Enable CLZ only for int32 and uint32.
+  // Enable CLZ only for int32, uint32, int64 and uint64.
   template <
       typename NativeT,
       typename std::enable_if<
           (std::is_floating_point<NativeT>::value ||
            std::is_integral<NativeT>::value || is_complex_t<NativeT>::value) &&
           !(std::is_same<NativeT, uint32>::value ||
-            std::is_same<NativeT, int32>::value)>::type* = nullptr>
+            std::is_same<NativeT, int32>::value ||
+            std::is_same<NativeT, int64>::value ||
+            std::is_same<NativeT, uint64>::value)>::type* = nullptr>
   Status HandleClz(HloInstruction* clz) {
     return InvalidArgument(""Unsupported type for Clz"");
   }
@@ -1762,6 +1764,18 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
     return Status::OK();
   }
 
+  template <typename NativeT,
+            typename std::enable_if<
+                std::is_same<NativeT, uint64>::value ||
+                std::is_same<NativeT, int64>::value>::type* = nullptr>
+  Status HandleClz(HloInstruction* clz) {
+    TF_ASSIGN_OR_RETURN(parent_->evaluated_[clz],
+                        ElementWiseUnaryOp(clz, [](ElementwiseT elem_operand) {
+                          return 63 - tensorflow::Log2Floor64(elem_operand);
+                        }));
+    return Status::OK();
+  }
+
   Status HandleClz(HloInstruction* clz) override {
     return HandleClz<ElementwiseT>(clz);
   }
",0,test
d5c075b02191f74b0b4c37713648c59ff7b06962,tensorflow/tensorflow,"Add test for 64-bit clz and sign.

PiperOrigin-RevId: 196894702",array_elementwise_ops_test.cc,"@@ -2225,6 +2225,15 @@ XLA_TEST_F(ArrayElementwiseOpTest, ClzU32s) {
   ComputeAndCompareR1<uint32>(&builder, {32, 31, 27, 15, 9, 3, 0}, {});
 }
 
+XLA_TEST_F(ArrayElementwiseOpTest, ClzS64s) {
+  XlaBuilder builder(TestName());
+  auto a =
+      builder.ConstantR1<int64>({0, 1, 0x80000000, 0x7FFFFFFFF2345678ul, -1});
+  builder.Clz(a);
+
+  ComputeAndCompareR1<int64>(&builder, {64, 63, 32, 1, 0}, {});
+}
+
 XLA_TEST_F(ArrayElementwiseOpTest, AddChainFoldLeft) {
   // a ------ (add) --------- (add)
   //         /               /
",0,test
d5c075b02191f74b0b4c37713648c59ff7b06962,tensorflow/tensorflow,"Add test for 64-bit clz and sign.

PiperOrigin-RevId: 196894702",unary_op_test.cc,"@@ -84,6 +84,11 @@ int UnaryOpTest::inf<int>() {
   return 2147483647;
 }
 
+template <>
+int64 UnaryOpTest::inf<int64>() {
+  return 0x7FFFFFFFFFFFFFFFl;
+}
+
 template <>
 void UnaryOpTest::AbsTestHelper<complex64>() {
   XlaBuilder builder(TestName());
@@ -176,6 +181,7 @@ XLA_TEST_F(UnaryOpTest, SignTestR0) {
 
 XLA_TEST_F(UnaryOpTest, SignTestR1) {
   SignTestHelper<int>();
+  SignTestHelper<int64>();
   SignTestHelper<float>();
   SignTestHelper<complex64>();
 }
",0,test
8c44d4d7258b35da0275852a6a0c8afa28d16ea7,tensorflow/tensorflow,"Fixing bug in wait_for_session in which we were waiting for N ms instead of N secs.
Change: 119119175",session_manager.py,"@@ -258,7 +258,7 @@ class SessionManager(object):
 
     if max_wait_secs is None:
       max_wait_secs = float(""Inf"")
-    timer = _CountDownTimer(max_wait_secs)
+    timer = _CountDownTimer(1000 * max_wait_secs)
 
     while True:
       sess = session.Session(target, graph=self._graph, config=config)
",0,train
8c44d4d7258b35da0275852a6a0c8afa28d16ea7,tensorflow/tensorflow,"Fixing bug in wait_for_session in which we were waiting for N ms instead of N secs.
Change: 119119175",session_manager_test.py,"@@ -124,7 +124,7 @@ class SessionManagerTest(tf.test.TestCase):
 
       # Set max_wait_secs to allow us to try a few times.
       with self.assertRaises(errors.DeadlineExceededError):
-        sm.wait_for_session(master="""", max_wait_secs=3000)
+        sm.wait_for_session(master="""", max_wait_secs=3)
 
 if __name__ == ""__main__"":
   tf.test.main()
",0,train
8cb0d7f4b9fdb12b3d74bcc4cf50684a718ec46c,tensorflow/tensorflow,const removed,gradients.cc,"@@ -168,7 +168,7 @@ std::vector<bool> SymbolicGradientBuilder::GetReachableNodes() {
   std::vector<bool> reachable_nodes(scope_.graph()->num_node_ids(), false);
   std::deque<Node*> queue;
   for (const Output& out : outputs_) {
-    const Node* const out_node = out.node();
+    Node* const out_node = out.node();
     const int out_node_id = out_node->id();
     if (!reachable_nodes[out_node_id]) {
       queue.push_back(out_node);
@@ -181,7 +181,7 @@ std::vector<bool> SymbolicGradientBuilder::GetReachableNodes() {
     queue.pop_front();
     for (const Edge* e : n->in_edges()) {
       if (e->IsControlEdge()) continue;
-      const Node* const src_node = e->src();
+      Node* const src_node = e->src();
       const int src_node_id = src_node->id();
       if (!reachable_nodes[src_node_id]) {
         queue.push_back(src_node);
",0,train
4a6aab8549606f44bc1384cfa2bbdd68764a4ebb,tensorflow/tensorflow,"A fix to RequantizationPerChannel Op
  - Added registration for output type: quint8",mkl_requantize_per_channel_op.cc,"@@ -20,7 +20,6 @@ limitations under the License.
 #include <math.h>
 
 #include ""mkldnn.hpp""
-#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 #include ""tensorflow/core/framework/op.h""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/type_traits.h""
@@ -29,6 +28,7 @@ limitations under the License.
 #include ""tensorflow/core/kernels/no_op.h""
 #include ""tensorflow/core/lib/core/errors.h""
 #include ""tensorflow/core/util/mkl_util.h""
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 
 namespace tensorflow {
 
@@ -141,8 +141,8 @@ class MklRequantizePerChannelOp : public OpKernel {
       output_min->flat<float>()(0) = input_requested_min_float;
       output_max->flat<float>()(0) = input_requested_max_float;
     } catch (mkldnn::error& e) {
-      string error_msg = ""Status: "" + std::to_string(e.status) +
-                         "", message: "" + std::string(e.message) + "", in file "" +
+      string error_msg = ""Status: "" + std::to_string(e.status) + "", message: "" +
+                         std::string(e.message) + "", in file "" +
                          std::string(__FILE__) + "":"" + std::to_string(__LINE__);
       OP_REQUIRES_OK(
           ctx, errors::Aborted(""Operation received an exception:"", error_msg));
@@ -162,11 +162,18 @@ class MklRequantizePerChannelOp : public OpKernel {
   engine cpu_engine_ = engine(engine::cpu, 0);
 };
 
+// Registration for out_type: qint8
 REGISTER_KERNEL_BUILDER(Name(""RequantizePerChannel"")
                             .Device(DEVICE_CPU)
                             .TypeConstraint<qint32>(""T"")
                             .TypeConstraint<qint8>(""out_type""),
                         MklRequantizePerChannelOp<CPUDevice, qint8>);
+// Registration for out_type: quint8
+REGISTER_KERNEL_BUILDER(Name(""RequantizePerChannel"")
+                            .Device(DEVICE_CPU)
+                            .TypeConstraint<qint32>(""T"")
+                            .TypeConstraint<quint8>(""out_type""),
+                        MklRequantizePerChannelOp<CPUDevice, quint8>);
 
 }  // namespace tensorflow
 #endif  // INTEL_MKL
",0,train
73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles

PiperOrigin-RevId: 221151286",c_api.cc,"@@ -404,8 +404,7 @@ const char* TFE_TensorHandleDeviceName(TFE_TensorHandle* h, TF_Status* status) {
         ""The passed in handle is a nullptr"");
     return nullptr;
   }
-  tensorflow::Device* d = nullptr;
-  status->status = h->handle->OpDevice(&d);
+  tensorflow::Device* d = h->handle->op_device();
   return (d == nullptr) ? ""/job:localhost/replica:0/task:0/device:CPU:0""
                         : d->name().c_str();
 }
",0,train
73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles

PiperOrigin-RevId: 221151286",c_api_debug.cc,"@@ -57,13 +57,9 @@ TF_CAPI_EXPORT extern TFE_TensorDebugInfo* TFE_TensorHandleTensorDebugInfo(
     return nullptr;
   }
 
-  tensorflow::Device* device;
-  status->status = handle->handle->Device(&device);
-  if (!status->status.ok()) {
-    return nullptr;
-  }
-
 #ifdef TENSORFLOW_EAGER_USE_XLA
+  tensorflow::Device* device = handle->handle->device();
+
   // If tensor resides on an XLA device, use XLA device's PaddedShapeFn.
   tensorflow::XlaDevice* xla_device =
       dynamic_cast<tensorflow::XlaDevice*>(device);
",0,train
73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles

PiperOrigin-RevId: 221151286",execute.cc,"@@ -85,8 +85,7 @@ Status MaybeCopyInputToExpectedDevice(EagerOperation* op, int i,
                                       RunMetadata* run_metadata,
                                       TensorHandle** handle) {
   EagerContext* ctx = op->EagerContext();
-  Device* handle_device = nullptr;
-  TF_RETURN_IF_ERROR((*handle)->Device(&handle_device));
+  Device* handle_device = (*handle)->device();
   const Device* actual_device =
       handle_device == nullptr ? ctx->HostCPU() : handle_device;
   const Device* op_device =
@@ -419,8 +418,7 @@ Status EagerRemoteSendTensor(EagerContext* ctx, TensorHandle* h,
   request.set_op_id(ctx->NextId());
   request.set_device_name(recv_device->name());
 
-  Device* tensor_handle_device;
-  TF_RETURN_IF_ERROR(h->Device(&tensor_handle_device));
+  Device* tensor_handle_device = h->device();
 
   // AsProtoTensorContent doesn't work when the tensor is on the GPU, hence copy
   // it to the CPU before copying it out.
@@ -487,8 +485,7 @@ Status EagerRemoteExecute(EagerOperation* op, TensorHandle** retvals,
   auto* remote_op = request->add_queue()->mutable_operation();
 
   for (int i = 0; i < op->Inputs().size(); i++) {
-    tensorflow::Device* input_device;
-    TF_RETURN_IF_ERROR(op->Inputs()[i]->Device(&input_device));
+    tensorflow::Device* input_device = op->Inputs()[i]->device();
     if (op->Device() != input_device &&
         // If the expected and actual devices are on the same task, don't
         // explicitly copy, and instead depend on the copy to happen locally
@@ -624,8 +621,7 @@ Status MaybeUpdateOpDevice(EagerOperation* op) {
       ctx->PinSmallOpsToCPU() && IsPinnableOp(op->Name());
 
   for (int i = 0; i < op->Inputs().size(); ++i) {
-    Device* input_op_device = nullptr;
-    TF_RETURN_IF_ERROR(op->Inputs()[i]->OpDevice(&input_op_device));
+    Device* input_op_device = op->Inputs()[i]->op_device();
     VLOG(2) << ""for op "" << op->Name() << "" input "" << i << "" ""
             << DataTypeString(op->Inputs()[i]->dtype) << "" ""
             << (input_op_device == nullptr ? ""cpu"" : input_op_device->name())
@@ -778,6 +774,9 @@ Status EagerExecute(EagerContext* ctx, Device* device,
       // In the async case, the retval is not a nullptr, and its device is
       // already set since all TensorHandles always have their device set during
       // construction.
+      DCHECK_EQ(device, retvals[i]->op_device());
+      DCHECK_EQ(kernel->OutputDevice(i), retvals[i]->device());
+
       retvals[i]->SetTensor(outputs[i]);
     }
   }
@@ -893,8 +892,7 @@ string GetUniqueWireID() {
 
 Status EagerCopyToDevice(TensorHandle* h, EagerContext* ctx,
                          const char* device_name, TensorHandle** result) {
-  tensorflow::Device* send_device;
-  TF_RETURN_IF_ERROR(h->Device(&send_device));
+  tensorflow::Device* send_device = h->device();
 
   if (send_device == nullptr) {
     send_device = ctx->HostCPU();
",0,train
73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles

PiperOrigin-RevId: 221151286",tensor_handle.cc,"@@ -79,16 +79,6 @@ Status TensorHandle::Tensor(const tensorflow::Tensor** t) {
   return Status::OK();
 }
 
-Status TensorHandle::Device(tensorflow::Device** d) {
-  *d = device_;
-  return Status::OK();
-}
-
-Status TensorHandle::OpDevice(tensorflow::Device** d) {
-  *d = op_device_;
-  return Status::OK();
-}
-
 Status TensorHandle::TensorAndDevice(const tensorflow::Tensor** tensor,
                                      tensorflow::Device** device,
                                      tensorflow::Device** op_device) {
",0,train
73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles

PiperOrigin-RevId: 221151286",tensor_handle.h,"@@ -102,9 +102,9 @@ class TensorHandle : public core::RefCounted {
 
   Status Tensor(const tensorflow::Tensor** t);
 
-  Status Device(tensorflow::Device** d);
+  tensorflow::Device* device() const { return device_; }
 
-  Status OpDevice(tensorflow::Device** d);
+  tensorflow::Device* op_device() const { return op_device_; }
 
   Status TensorAndDevice(const tensorflow::Tensor** tensor,
                          tensorflow::Device** device,
@@ -171,11 +171,11 @@ class TensorHandle : public core::RefCounted {
   //
   // TODO(ashankar): Reference count TFE_Context to ensure that 'device_' of a
   // TFE_TensorHandle does not outlive the TFE_Context from which it came?
-  tensorflow::Device* device_;
+  tensorflow::Device* const device_;
 
   // Device in which the op producing this tensor was executed. Equals to
   // device_ for constant tensors.
-  tensorflow::Device* op_device_;
+  tensorflow::Device* const op_device_;
 
   // IDs required when this class is representing a remote tensor handle.
   const int64 remote_op_id_;
",0,train
73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles

PiperOrigin-RevId: 221151286",eager_service_impl_test.cc,"@@ -345,8 +345,7 @@ TEST_F(EagerServiceImplTest, SendTensorTest) {
       response.context_id(), RemoteTensorHandleInternal(2, 0), &tensor_handle));
   TF_ASSERT_OK(tensor_handle->Tensor(&t));
 
-  Device* device = nullptr;
-  TF_ASSERT_OK(tensor_handle->Device(&device));
+  Device* device = tensor_handle->device();
   EXPECT_NE(device, nullptr);
   EXPECT_EQ(device->name(), ""/job:localhost/replica:0/task:0/device:CPU:0"");
 
",0,train
73a45ea89bf9eb3045a07f7c1aeabd1b18113b22,tensorflow/tensorflow,"Minor cleanup - no longer returns Status on Device/OpDevice reads on tensorhandles

PiperOrigin-RevId: 221151286",py_func.cc,"@@ -177,8 +177,7 @@ tensorflow::Status ExtractTensorFromEagerTensor(const PyObject* eager_tensor,
                                                 const Device* expected_device,
                                                 const Tensor** output_tensor) {
   auto handle = EagerTensor_Handle(eager_tensor)->handle;
-  Device* actual_device = nullptr;
-  TF_RETURN_IF_ERROR(handle->Device(&actual_device));
+  Device* actual_device = handle->device();
   TF_RETURN_IF_ERROR(handle->Tensor(output_tensor));
   // actual_device may be nullptr, which implies local CPU.
   if (expected_device == actual_device) return Status::OK();
",0,train
3a179b7ee8d2b2010e019067f8514b66b899f01d,tensorflow/tensorflow,"Move TPU error counters to compilation which fills up the cache.

PiperOrigin-RevId: 424228383
Change-Id: I7cef3c66e60ca7b6d5c9d366387d149df0dbb856",tpu_compile_op_common.cc,"@@ -137,8 +137,6 @@ void TpuCompileOpKernelCommon::Compute(OpKernelContext* ctx) {
     proto.set_status_error_message(compile_status.error_message());
     status_payload = proto.SerializeAsString();
   }
-  metrics::UpdateTpuErrorCounter(""TpuCompileOp"",
-                                 error_name(compile_status.code()));
   OP_REQUIRES_OK_OR_SET_PAYLOAD(ctx,
                                 TpuCompileInterface::kTpuCompileErrorPayloadKey,
                                 status_payload, compile_status);
@@ -180,6 +178,8 @@ Status TpuCompileOpKernelCommon::CompileLocallyAndFillHostCache(
             << session_name << "" took "" << duration << "" and ""
             << (compile_status.ok() ? ""succeeded"" : ""failed"");
   tpu_program_group->LogProgramMemorySummary();
+  metrics::UpdateTpuErrorCounter(""TpuCompileOp"",
+                                 error_name(compile_status.code()));
   metrics::UpdateXlaCompilationTime(absl::ToInt64Microseconds(duration));
   TpuCompilationMetrics::IncrementCompilationCount(session_name);
 
",0,test
c381794b2fc3227bfee9cf085e26bafb33da8f4b,tensorflow/tensorflow,"Support different threading modes in GPU device.
All modes are experimental for now. The goal is to find the best setting, and
change the default to pick that.

PiperOrigin-RevId: 178662212",direct_session.cc,"@@ -521,9 +521,7 @@ Status DirectSession::Run(const RunOptions& run_options,
 
   args.rendezvous = run_state.rendez;
   args.cancellation_manager = &step_cancellation_manager;
-  args.runner = [this, pool](Executor::Args::Closure c) {
-    SchedClosure(pool, std::move(c));
-  };
+
   args.session_state = &session_state_;
   args.tensor_store = &run_state.tensor_store;
   args.step_container = &run_state.step_container;
@@ -584,7 +582,24 @@ Status DirectSession::Run(const RunOptions& run_options,
     return errors::Cancelled(""Run call was cancelled"");
   }
 
+  Executor::Args::Runner default_runner = [this,
+                                           pool](Executor::Args::Closure c) {
+    SchedClosure(pool, std::move(c));
+  };
   for (const auto& item : executors_and_keys->items) {
+    // TODO(zhengxq): support partial run.
+    // TODO(zhengxq): support other session types.
+    // TODO(zhengxq): if the device picks its own threadpool, we need to assign
+    //     less threads to the main compute pool by default.
+    thread::ThreadPool* device_thread_pool =
+        item.device->tensorflow_device_thread_pool();
+    if (!device_thread_pool) {
+      args.runner = default_runner;
+    } else {
+      args.runner = [this, device_thread_pool](Executor::Args::Closure c) {
+        SchedClosure(device_thread_pool, std::move(c));
+      };
+    }
     item.executor->RunAsync(args, barrier->Get());
   }
 
@@ -1222,6 +1237,7 @@ Status DirectSession::GetOrCreateExecutors(
     // NewLocalExecutor takes ownership of partition_graph.
     item->graph = partition_graph.get();
     item->executor = nullptr;
+    item->device = device;
     Executor* executor;
     TF_RETURN_IF_ERROR(
         NewLocalExecutor(params, partition_graph.release(), &executor));
",0,train
c381794b2fc3227bfee9cf085e26bafb33da8f4b,tensorflow/tensorflow,"Support different threading modes in GPU device.
All modes are experimental for now. The goal is to find the best setting, and
change the default to pick that.

PiperOrigin-RevId: 178662212",direct_session.h,"@@ -112,6 +112,7 @@ class DirectSession : public Session {
   // every partition.
   struct PerPartitionExecutorsAndLib {
     Graph* graph = nullptr;                  // not owned.
+    Device* device = nullptr;                // not owned.
     FunctionLibraryRuntime* flib = nullptr;  // not owned.
     std::unique_ptr<Executor> executor;
   };
",0,train
c381794b2fc3227bfee9cf085e26bafb33da8f4b,tensorflow/tensorflow,"Support different threading modes in GPU device.
All modes are experimental for now. The goal is to find the best setting, and
change the default to pick that.

PiperOrigin-RevId: 178662212",gpu_device.cc,"@@ -60,6 +60,7 @@ limitations under the License.
 #include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/core/public/session_options.h""
 #include ""tensorflow/core/util/device_name_utils.h""
+#include ""tensorflow/core/util/env_var.h""
 #include ""tensorflow/core/util/stream_executor_util.h""
 
 namespace tensorflow {
@@ -305,6 +306,46 @@ Status BaseGPUDevice::Init(const SessionOptions& options) {
   gpu_device_info_->gpu_id = gpu_id_;
   set_tensorflow_gpu_device_info(gpu_device_info_);
 
+  // Whether and how the GPU device uses its own threadpool.
+  // This option is experimental. Once we confirm the best setting, we
+  // may change the default behavior and completely remove this flag.
+  // Default values might change in future releases.
+  // Possible values:
+  //   * global: GPU uses threads shared with CPU in the main compute
+  //          thread-pool. This is currently the default.
+  //   * gpu_private: GPU uses threads dedicated to this device.
+  //   * gpu_shared: All GPUs share a dedicated thread pool.
+  string gpu_thread_mode;
+  TF_RETURN_IF_ERROR(
+      ReadStringFromEnvVar(""TF_GPU_THREAD_MODE"", ""global"", &gpu_thread_mode));
+  gpu_thread_mode = str_util::Lowercase(gpu_thread_mode);
+  if (gpu_thread_mode != ""global"") {
+    int64 gpu_thread_count = -1;
+    // Default to two threads. One for device compute and another for memory
+    // copies.
+    TF_RETURN_IF_ERROR(
+        ReadInt64FromEnvVar(""TF_GPU_THREAD_COUNT"", 2, &gpu_thread_count));
+    if (gpu_thread_mode == ""gpu_private"") {
+      // TODO(zhengxq): since these threads only serve a single GPU device,
+      //   we should set the device context once for each thread, and avoid
+      //   setting them for each kernel.
+      // TODO(zhengxq): pin the thread to the same socket of the target GPU.
+      thread_pool_.reset(new thread::ThreadPool(
+          options.env, strings::StrCat(""gpu_private_"", gpu_id_),
+          static_cast<int32>(gpu_thread_count)));
+      set_tensorflow_device_thread_pool(thread_pool_.get());
+    } else if (gpu_thread_mode == ""gpu_shared"") {
+      static thread::ThreadPool* thread_pool = new thread::ThreadPool(
+          options.env, ""gpu_shared"", static_cast<int32>(gpu_thread_count));
+      set_tensorflow_device_thread_pool(thread_pool);
+    } else {
+      string error_message =
+          strings::StrCat(""Invalid gpu_thread_mode: "", gpu_thread_mode);
+      LOG(WARNING) << error_message;
+      return errors::InvalidArgument(error_message);
+    }
+  }
+
   return Status::OK();
 }
 
",0,train
c381794b2fc3227bfee9cf085e26bafb33da8f4b,tensorflow/tensorflow,"Support different threading modes in GPU device.
All modes are experimental for now. The goal is to find the best setting, and
change the default to pick that.

PiperOrigin-RevId: 178662212",gpu_device.h,"@@ -116,6 +116,7 @@ class BaseGPUDevice : public LocalDevice {
   const bool sync_every_op_ = false;
   const int32 max_streams_;
   std::unique_ptr<EventMgr> em_;
+  std::unique_ptr<thread::ThreadPool> thread_pool_;
 
   void ReinitializeDevice(OpKernelContext* context, PerOpGpuDevice* device,
                           int stream_id, Allocator* allocator);
",0,train
c381794b2fc3227bfee9cf085e26bafb33da8f4b,tensorflow/tensorflow,"Support different threading modes in GPU device.
All modes are experimental for now. The goal is to find the best setting, and
change the default to pick that.

PiperOrigin-RevId: 178662212",device_base.h,"@@ -145,6 +145,12 @@ class DeviceBase {
     return gpu_device_info_;
   }
 
+  // The preferred thread pool for this device. If it is nullptr, the system
+  // automatically assigns a thread pool for execution.
+  virtual thread::ThreadPool* tensorflow_device_thread_pool() {
+    return device_thread_pool_;
+  }
+
   // Does not take ownership.
   void set_eigen_cpu_device(Eigen::ThreadPoolDevice* d) {
     eigen_cpu_device_ = d;
@@ -215,10 +221,17 @@ class DeviceBase {
     return errors::Internal(""Device does not implement MakeTensorFromProto()"");
   }
 
+ protected:
+  // Does not take ownership.
+  void set_tensorflow_device_thread_pool(thread::ThreadPool* thread_pool) {
+    device_thread_pool_ = thread_pool;
+  }
+
  private:
   Env* const env_;
   CpuWorkerThreads* cpu_worker_threads_ = nullptr;
   GpuDeviceInfo* gpu_device_info_ = nullptr;
+  thread::ThreadPool* device_thread_pool_ = nullptr;
   Eigen::ThreadPoolDevice* eigen_cpu_device_ = nullptr;
 #ifdef TENSORFLOW_USE_SYCL
   Eigen::SyclDevice* eigen_sycl_device_ = nullptr;
",0,train
964a956abafab5bb1bd07256e75c89c57693b997,tensorflow/tensorflow,Fix typo in cuda_fft.cc.,cuda_fft.cc,"@@ -57,7 +57,7 @@ namespace dynload {
       static void *f;                                                          \
       port::Status s =                                                         \
           port::Env::Default->GetSymbolFromLibrary(GetDsoHandle(), kName, &f); \
-      CHECK(f.ok()) << ""could not find "" << kName                              \
+      CHECK(s.ok()) << ""could not find "" << kName                              \
                     << "" in cuFFT DSO; dlerror: "" << s.error_message();        \
       return reinterpret_cast<FuncPointerT>(f);                                \
     }                                                                          \
",0,train
c061d6c6b7b2004f5c271f4000fe6e1f9129e0ed,tensorflow/tensorflow,"Re-enable kmeans_test
Change: 148175182",kmeans_test.py,"@@ -22,14 +22,15 @@ import math
 import sys
 import time
 
+import numpy as np
+from sklearn.cluster import KMeans as SklearnKMeans
+
+# pylint: disable=g-import-not-at-top
 # TODO: #6568 Remove this hack that makes dlopen() not crash.
 if hasattr(sys, 'getdlopenflags') and hasattr(sys, 'setdlopenflags'):
   import ctypes
   sys.setdlopenflags(sys.getdlopenflags() | ctypes.RTLD_GLOBAL)
 
-import numpy as np
-from sklearn.cluster import KMeans as SklearnKMeans
-
 from tensorflow.contrib import factorization
 from tensorflow.contrib.learn.python import learn
 from tensorflow.contrib.learn.python.learn.estimators import kmeans as kmeans_lib
@@ -492,10 +493,10 @@ class TensorflowKMeansBenchmark(KMeansBenchmark):
           initial_clusters=factorization.KMEANS_PLUS_PLUS_INIT,
           kmeans_plus_plus_num_retries=int(math.log(self.num_clusters) + 2),
           random_seed=i * 42,
+          relative_tolerance=1e-6,
           config=run_config.RunConfig(tf_random_seed=3))
       tf_kmeans.fit(input_fn=lambda: (constant_op.constant(self.points), None),
-                    steps=50,
-                    relative_tolerance=1e-6)
+                    steps=50)
       _ = tf_kmeans.clusters()
       scores.append(
           tf_kmeans.score(
",0,test
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",c_api.cc,"@@ -752,8 +752,7 @@ TFE_Context* TFE_NewContext(const TFE_ContextOptions* opts, TF_Status* status) {
       static_cast<tensorflow::ContextDevicePlacementPolicy>(
           opts->device_placement_policy),
       opts->async, opts->lazy_remote_inputs_copy, device_mgr.release(),
-      /*device_mgr_owned*/ true, r,
-      tensorflow::GetDefaultCustomKernelCreator()));
+      /*device_mgr_owned*/ true, r));
 }
 
 void TFE_DeleteContext(TFE_Context* ctx) {
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",test_utils.cc,"@@ -48,7 +48,6 @@ EagerContextPtr CreateTestingEagerContext(DeviceMgr* device_mgr) {
       /* async= */ false,
       /* lazy_copy_function_remote_inputs= */ false, device_mgr,
       /* device_mgr_owned= */ false, /* rendezvous= */ nullptr,
-      /* custom_kernel_creator= */ nullptr,
       /* cluster_flr= */ nullptr));
 }
 
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",xla_kernel_creator.cc,"@@ -72,7 +72,8 @@ static bool IsCompilable(FunctionLibraryRuntime* flr, const NodeDef& ndef,
 bool XlaKernelCreator::CanCreateKernel(
     const FunctionLibraryRuntime& flr,
     const std::shared_ptr<const NodeProperties>& props) const {
-  return CanCreateXlaKernel(props->node_def);
+  return CanCreateXlaKernel(props->node_def) &&
+         !XlaOpRegistry::IsCompilationDevice(flr.device()->device_type());
 }
 
 static Status CreateXlaKernel(FunctionLibraryRuntime* flr,
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",xla_op_registry.cc,"@@ -134,6 +134,13 @@ XlaOpRegistry::~XlaOpRegistry() = default;
   result.first->second.op_filter = op_filter;
 }
 
+/* static */ bool XlaOpRegistry::IsCompilationDevice(
+    const string& device_name) {
+  XlaOpRegistry& registry = Instance();
+  mutex_lock lock(registry.mutex_);
+  return registry.backends_.find(device_name) != registry.backends_.end();
+}
+
 /* static */ bool XlaOpRegistry::GetCompilationDevice(
     const string& device_name, const DeviceRegistration** registration) {
   XlaOpRegistry& registry = Instance();
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",xla_op_registry.h,"@@ -153,6 +153,10 @@ class XlaOpRegistry {
   static void RegisterCompilationDevice(const string& device_name,
                                         const DeviceRegistration& registration);
 
+  // Returns whether the device name is for the JIT device used exclusively for
+  // TF2XLA conversion.
+  static bool IsCompilationDevice(const string& device_name);
+
   // Returns the JIT device name associated with 'device_name', setting
   // 'jit_device_name', 'requires_jit', and 'enabled_jit_by_default', if they
   // are not null. Returns false and leaves the outputs unchanged if no matching
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",direct_session.cc,"@@ -1342,12 +1342,10 @@ Status DirectSession::CreateExecutors(
       options_.config.experimental().has_session_metadata()
           ? &options_.config.experimental().session_metadata()
           : nullptr;
-  const CustomKernelCreator* custom_kernel_creator =
-      GetDefaultCustomKernelCreator();
   func_info->proc_flr.reset(new ProcessFunctionLibraryRuntime(
       device_mgr_.get(), options_.env, &options_.config, graph_def_version,
       func_info->flib_def.get(), optimizer_opts, thread_pools_[0].first,
-      /*parent=*/nullptr, custom_kernel_creator, session_metadata,
+      /*parent=*/nullptr, session_metadata,
       Rendezvous::Factory{
           [](const int64, const DeviceMgr* device_mgr, Rendezvous** r) {
             *r = new IntraProcessRendezvous(device_mgr);
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",context.cc,"@@ -76,7 +76,6 @@ EagerContext::EagerContext(
     ContextDevicePlacementPolicy default_device_placement_policy, bool async,
     const bool lazy_copy_function_remote_inputs, const DeviceMgr* device_mgr,
     bool device_mgr_owned, Rendezvous* rendezvous,
-    const CustomKernelCreator* custom_kernel_creator,
     DistributedFunctionLibraryRuntime* cluster_flr)
     : ImmediateExecutionContext(kEager),
       opts_(opts),
@@ -85,7 +84,6 @@ EagerContext::EagerContext(
       host_cpu_device_(device_mgr->HostCPU()),
       rendezvous_(rendezvous),
       thread_pool_(NewThreadPoolFromSessionOptions(opts)),
-      custom_kernel_creator_(custom_kernel_creator),
       cluster_flr_(cluster_flr),
       log_device_placement_(opts.config.log_device_placement()),
       allow_soft_placement_(opts.config.allow_soft_placement()),
@@ -99,7 +97,7 @@ EagerContext::EagerContext(
           ""TF_EAGER_ENABLE_SMALL_TENSOR_CPU_PINNING"", false)) {
   ResetPFLR(device_mgr, opts.env, &opts.config, TF_GRAPH_DEF_VERSION,
             &func_lib_def_, opts.config.graph_options().optimizer_options(),
-            thread_pool_.get(), cluster_flr, custom_kernel_creator_);
+            thread_pool_.get(), cluster_flr);
   // Starts exporting metrics through a platform-specific monitoring API (if
   // provided). For builds using ""tensorflow/core/platform/default"", this is
   // currently a no-op.
@@ -185,8 +183,7 @@ void EagerContext::ResetPFLR(const DeviceMgr* device_mgr, Env* env,
                              const FunctionLibraryDefinition* lib_def,
                              const OptimizerOptions& optimizer_options,
                              thread::ThreadPool* thread_pool,
-                             DistributedFunctionLibraryRuntime* cluster_flr,
-                             const CustomKernelCreator* custom_kernel_creator) {
+                             DistributedFunctionLibraryRuntime* cluster_flr) {
   Rendezvous::Factory rendezvous_factory{
       [this](const int64 step_id, const DeviceMgr*, Rendezvous** r) {
         *r = CreateRendezvous(step_id);
@@ -194,7 +191,7 @@ void EagerContext::ResetPFLR(const DeviceMgr* device_mgr, Env* env,
       }};
   pflr_.reset(new ProcessFunctionLibraryRuntime(
       device_mgr, env, config, graph_def_version, lib_def, optimizer_options,
-      thread_pool, cluster_flr, custom_kernel_creator,
+      thread_pool, cluster_flr,
       /*session_metadata=*/nullptr, std::move(rendezvous_factory)));
 }
 
@@ -1328,7 +1325,7 @@ Status EagerContext::SetMasterContextState(
   const auto* config = pflr_->config();
   ResetPFLR(local_device_manager_.Get(), env_, config, TF_GRAPH_DEF_VERSION,
             &func_lib_def_, config->graph_options().optimizer_options(),
-            thread_pool_.get(), cluster_flr_.Get(), custom_kernel_creator_);
+            thread_pool_.get(), cluster_flr_.Get());
 
   keep_alive_secs_ = keep_alive_secs;
   sleep_for_secs_ = std::max(1, keep_alive_secs_ / 2);
@@ -1430,7 +1427,7 @@ Status EagerContext::InitializeRemoteWorker(
   const auto* config = pflr_->config();
   ResetPFLR(local_device_manager_.Get(), env_, config, TF_GRAPH_DEF_VERSION,
             &func_lib_def_, config->graph_options().optimizer_options(),
-            thread_pool_.get(), cluster_flr_.Get(), custom_kernel_creator_);
+            thread_pool_.get(), cluster_flr_.Get());
   InitPrioritizedDeviceTypeList();
 
   ClearCachesAndThreadExecutors();
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",context.h,"@@ -140,7 +140,6 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted {
                bool async, const bool lazy_copy_function_remote_inputs,
                const DeviceMgr* device_mgr, bool device_mgr_owned,
                Rendezvous* rendezvous,
-               const CustomKernelCreator* custom_kernel_creator,
                DistributedFunctionLibraryRuntime* cluster_flr = nullptr);
 
   void Release() override { Unref(); }
@@ -495,8 +494,7 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted {
                  const FunctionLibraryDefinition* lib_def,
                  const OptimizerOptions& optimizer_options,
                  thread::ThreadPool* thread_pool = nullptr,
-                 DistributedFunctionLibraryRuntime* cluster_flr = nullptr,
-                 const CustomKernelCreator* custom_kernel_creator = nullptr);
+                 DistributedFunctionLibraryRuntime* cluster_flr = nullptr);
 
   void ResetClusterFLR(DistributedFunctionLibraryRuntime* cluster_flr);
 
@@ -570,8 +568,6 @@ class EagerContext : public ImmediateExecutionContext, public core::RefCounted {
 
   std::unique_ptr<thread::ThreadPool> thread_pool_;
 
-  const CustomKernelCreator* const custom_kernel_creator_;
-
   // EagerContext owns the DistributedFunctionLibraryRuntime(
   // EagerClusterFunctionLibraryRuntime) if using EagerService for remote
   // function execution (lazy_copy_function_remote_inputs_=true).
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",context_test.cc,"@@ -59,7 +59,6 @@ class EagerContextTest : public ::testing::Test {
         /* async */ false,
         /* lazy_copy_function_remote_inputs */ false, device_manager_,
         /* device_mgr_owned */ false, /* rendezvous */ nullptr,
-        /* custom_kernel_creator */ nullptr,
         /* cluster_flr */ nullptr);
   }
 
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",eager_operation_test.cc,"@@ -28,7 +28,7 @@ TEST(EagerOperationTest, DeviceName) {
   auto ctx = new EagerContext(
       SessionOptions(),
       tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, false,
-      false, &device_mgr, false, nullptr, nullptr, nullptr);
+      false, &device_mgr, false, nullptr, nullptr);
 
   auto op = new EagerOperation(ctx);
 
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",execute_node_test.cc,"@@ -68,7 +68,7 @@ TEST(ExecuteNodeTest, ExecuteNodeArgs) {
   auto ctx = new EagerContext(
       SessionOptions(),
       tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, false,
-      false, &device_mgr, false, nullptr, nullptr, nullptr);
+      false, &device_mgr, false, nullptr, nullptr);
 
   // Set a RemoteMgr to the EagerContext.
   auto remote_mgr = absl::make_unique<eager::RemoteMgr>(
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",mkl_eager_op_rewrite_test.cc,"@@ -40,8 +40,7 @@ class EagerOpRewriteTest : public ::testing::Test {
     tensorflow::EagerContext* eager_ctx = new tensorflow::EagerContext(
         SessionOptions(),
         tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT,
-        async, lazy_remote_tensor_copy, device_mgr.get(), false, rendezvous,
-        GetDefaultCustomKernelCreator());
+        async, lazy_remote_tensor_copy, device_mgr.get(), false, rendezvous);
 
     EagerExecutor executor_(false);
     std::unique_ptr<tensorflow::EagerOperation> op(
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",placement_test.cc,"@@ -88,7 +88,6 @@ class PlacementTest : public ::testing::Test {
         /* async */ false,
         /* lazy_copy_function_remote_inputs */ false, device_manager_,
         /* device_mgr_owned */ false, /* rendezvous */ nullptr,
-        /* custom_kernel_creator */ nullptr,
         /* cluster_flr */ nullptr);
   }
 
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",tensor_handle_test.cc,"@@ -39,7 +39,7 @@ TEST(TensorHandle_ShapeTest, AsyncShape) {
   auto ctx = new EagerContext(
       SessionOptions(),
       tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, false,
-      false, &device_mgr, false, nullptr, nullptr, nullptr);
+      false, &device_mgr, false, nullptr, nullptr);
   TensorHandle* sync_th =
       TensorHandle::CreateLocalHandle(std::move(t), nullptr, nullptr, ctx);
   TensorHandle* async_th = TensorHandle::CreateEmptyLocalHandle(
@@ -108,7 +108,6 @@ class PackedTensorHandleTest : public ::testing::Test {
         /* async= */ false,
         /* lazy_copy_function_remote_inputs= */ false, device_mgr_,
         /* device_mgr_owned= */ false, /* rendezvous= */ nullptr,
-        /* custom_kernel_creator= */ nullptr,
         /* cluster_flr= */ nullptr);
   }
 
@@ -257,7 +256,7 @@ TEST(TensorHandle_ResourceDeviceTest, OnLocalDevice) {
   auto ctx = new EagerContext(
       SessionOptions(),
       tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, false,
-      false, &local_device_mgr, false, nullptr, nullptr, nullptr);
+      false, &local_device_mgr, false, nullptr, nullptr);
 
   tensorflow::DataType dtype = DT_RESOURCE;
   TensorShape shape = {2};
@@ -289,7 +288,7 @@ TEST(TensorHandle_ResourceDeviceTest, OnRemoteDevice) {
   auto ctx = new EagerContext(
       SessionOptions(),
       tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT, false,
-      false, &local_device_mgr, false, nullptr, nullptr, nullptr);
+      false, &local_device_mgr, false, nullptr, nullptr);
 
   std::unique_ptr<Device> d0(
       CreateDevice(""CPU"", ""/job:worker/task:0/device:CPU:0"", false));
@@ -346,7 +345,6 @@ class RemoteTensorHandleTest : public ::testing::Test {
         /* async= */ false,
         /* lazy_copy_function_remote_inputs= */ false, device_mgr_,
         /* device_mgr_owned= */ false, /* rendezvous= */ nullptr,
-        /* custom_kernel_creator= */ nullptr,
         /* cluster_flr= */ nullptr);
   }
 
@@ -387,7 +385,6 @@ TEST_F(RemoteTensorHandleTest, UnknownRemoteDevice) {
       /* async= */ false,
       /* lazy_copy_function_remote_inputs= */ false, &device_mgr,
       /* device_mgr_owned= */ false, /* rendezvous= */ nullptr,
-      /* custom_kernel_creator= */ nullptr,
       /* cluster_flr= */ nullptr);
 
   tensorflow::DataType dtype = DT_FLOAT;
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",function.cc,"@@ -326,7 +326,6 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
                              const FunctionLibraryDefinition* lib_def,
                              thread::ThreadPool* default_thread_pool,
                              const OptimizerOptions& optimizer_options,
-                             const CustomKernelCreator* custom_kernel_creator,
                              const SessionMetadata* session_metadata,
                              ProcessFunctionLibraryRuntime* parent);
 
@@ -390,7 +389,6 @@ class FunctionLibraryRuntimeImpl : public FunctionLibraryRuntime {
   const int graph_def_version_;
   const FunctionLibraryDefinition* const base_lib_def_;
   GraphOptimizer optimizer_;
-  const CustomKernelCreator* custom_kernel_creator_;
   const SessionMetadata* const session_metadata_;
   Executor::Args::Runner default_runner_;
   const string device_name_;
@@ -462,7 +460,6 @@ FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl(
     int graph_def_version, const FunctionLibraryDefinition* lib_def,
     thread::ThreadPool* default_thread_pool,
     const OptimizerOptions& optimizer_options,
-    const CustomKernelCreator* custom_kernel_creator,
     const SessionMetadata* session_metadata,
     ProcessFunctionLibraryRuntime* parent)
     : device_mgr_(dmgr),
@@ -472,7 +469,6 @@ FunctionLibraryRuntimeImpl::FunctionLibraryRuntimeImpl(
       graph_def_version_(graph_def_version),
       base_lib_def_(lib_def),
       optimizer_(optimizer_options),
-      custom_kernel_creator_(custom_kernel_creator),
       session_metadata_(session_metadata),
       default_runner_(nullptr),
       device_name_(device_ == nullptr
@@ -609,10 +605,12 @@ Status FunctionLibraryRuntimeImpl::CreateKernel(
     FunctionLibraryRuntime* flr, OpKernel** kernel) {
   // If a custom kernel creator is given, try that.
   Status s;
-  if (custom_kernel_creator_ != nullptr &&
-      custom_kernel_creator_->CanCreateKernel(*this, props)) {
+  const CustomKernelCreator* custom_kernel_creator =
+      GetDefaultCustomKernelCreator();
+  if (custom_kernel_creator &&
+      custom_kernel_creator->CanCreateKernel(*this, props)) {
     std::unique_ptr<OpKernel> ret;
-    s = custom_kernel_creator_->CreateKernel(this, props, &ret);
+    s = custom_kernel_creator->CreateKernel(this, props, &ret);
     if (s.ok()) {
       *kernel = ret.release();
     } else {
@@ -1328,9 +1326,9 @@ Status FunctionLibraryRuntimeImpl::Clone(
     std::unique_ptr<FunctionLibraryDefinition>* out_lib_def,
     std::unique_ptr<ProcessFunctionLibraryRuntime>* out_pflr,
     FunctionLibraryRuntime** out_flr, bool skip_flib_def) {
-  TF_RETURN_IF_ERROR(parent_->Clone(
-      env_, graph_def_version_, optimizer_.options(), custom_kernel_creator_,
-      out_lib_def, out_pflr, skip_flib_def));
+  TF_RETURN_IF_ERROR(parent_->Clone(env_, graph_def_version_,
+                                    optimizer_.options(), out_lib_def, out_pflr,
+                                    skip_flib_def));
   *out_flr = (*out_pflr)->GetFLR(device_->name());
   if (*out_flr != nullptr) {
     return Status::OK();
@@ -1376,12 +1374,11 @@ std::unique_ptr<FunctionLibraryRuntime> NewFunctionLibraryRuntime(
     Device* device, int graph_def_version,
     const FunctionLibraryDefinition* lib_def, thread::ThreadPool* thread_pool,
     const OptimizerOptions& optimizer_options,
-    const CustomKernelCreator* custom_kernel_creator,
     const SessionMetadata* session_metadata,
     ProcessFunctionLibraryRuntime* parent) {
   return std::unique_ptr<FunctionLibraryRuntime>(new FunctionLibraryRuntimeImpl(
       device_mgr, env, config, device, graph_def_version, lib_def, thread_pool,
-      optimizer_options, custom_kernel_creator, session_metadata, parent));
+      optimizer_options, session_metadata, parent));
 }
 
 class SymbolicGradientHelper {
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",function.h,"@@ -43,15 +43,11 @@ const CustomKernelCreator* GetDefaultCustomKernelCreator();
 // interpreter op kernel to execute a function. Else c->CreateKernel() can be
 // used to create a kernel that will compile the function with XLA and run the
 // resulting program.
-//
-// TODO(zhifengc/phawkins): b/32379046
 void RegisterDefaultCustomKernelCreator(CustomKernelCreator* c);
 
 // Creates a FunctionLibraryRuntime, which instantiates functions
 // defined in ""lib_def"" and executes functions on the ""device"".
-// ""device_mgr"" must contain the ""device"". If not nullptr,
-// ""custom_kernel_creator"" is consulted by the returned runtime to
-// create kernels.
+// ""device_mgr"" must contain the ""device"".
 //
 // The returned object does not take ownerships of ""device"" or
 // ""lib_def"".  The caller must ensure ""device"" and ""lib_def"" outlives
@@ -65,7 +61,6 @@ std::unique_ptr<FunctionLibraryRuntime> NewFunctionLibraryRuntime(
     Device* device, int graph_def_version,
     const FunctionLibraryDefinition* lib_def, thread::ThreadPool* thread_pool,
     const OptimizerOptions& optimizer_options,
-    const CustomKernelCreator* custom_kernel_creator,
     const SessionMetadata* session_metadata,
     ProcessFunctionLibraryRuntime* parent);
 
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",function_test.cc,"@@ -162,8 +162,7 @@ class FunctionLibraryRuntimeTest : public ::testing::Test {
     pflr_.reset(new ProcessFunctionLibraryRuntime(
         device_mgr_.get(), Env::Default(), &options.config,
         TF_GRAPH_DEF_VERSION, lib_def_.get(), opts, /*thread_pool=*/nullptr,
-        /*parent=*/nullptr, /*custom_kernel_creator=*/nullptr,
-        /*session_metadata=*/nullptr,
+        /*parent=*/nullptr, /*session_metadata=*/nullptr,
         Rendezvous::Factory{
             [](const int64, const DeviceMgr* device_mgr, Rendezvous** r) {
               *r = new IntraProcessRendezvous(device_mgr);
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",function_threadpool_test.cc,"@@ -65,8 +65,7 @@ class FunctionLibraryRuntimeTest : public ::testing::Test {
     pflr_.reset(new ProcessFunctionLibraryRuntime(
         device_mgr_.get(), Env::Default(), /*config=*/nullptr,
         TF_GRAPH_DEF_VERSION, lib_def_.get(), opts, default_thread_pool,
-        /*parent=*/nullptr, /*custom_kernel_creator=*/nullptr,
-        /*session_metadata=*/nullptr,
+        /*parent=*/nullptr, /*session_metadata=*/nullptr,
         Rendezvous::Factory{
             [](const int64, const DeviceMgr* device_mgr, Rendezvous** r) {
               *r = new IntraProcessRendezvous(device_mgr);
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",kernel_benchmark_testlib.cc,"@@ -90,7 +90,7 @@ Benchmark::Benchmark(const string& device, Graph* g,
   pflr_ = std::unique_ptr<ProcessFunctionLibraryRuntime>(
       new ProcessFunctionLibraryRuntime(
           device_mgr_.get(), Env::Default(), nullptr, graph_def_version,
-          flib_def_.get(), OptimizerOptions(), pool_, nullptr, nullptr, nullptr,
+          flib_def_.get(), OptimizerOptions(), pool_, nullptr, nullptr,
           Rendezvous::Factory()));
 
   flr_ = pflr_->GetFLR(device_->name());
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",process_function_library_runtime.cc,"@@ -88,7 +88,6 @@ ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime(
     const OptimizerOptions& optimizer_options,
     thread::ThreadPool* default_thread_pool,
     DistributedFunctionLibraryRuntime* parent,
-    const CustomKernelCreator* custom_kernel_creator,
     const SessionMetadata* session_metadata,
     Rendezvous::Factory rendezvous_factory)
     : parent_(parent),
@@ -106,14 +105,14 @@ ProcessFunctionLibraryRuntime::ProcessFunctionLibraryRuntime(
     (*flr_map_)[nullptr] = NewFunctionLibraryRuntime(
         nullptr, env, config_ ? &(*config_) : nullptr, nullptr,
         graph_def_version, lib_def_, default_thread_pool, optimizer_options,
-        custom_kernel_creator, session_metadata_, this);
+        session_metadata_, this);
     return;
   }
   for (Device* d : device_mgr->ListDevices()) {
     (*flr_map_)[d] = NewFunctionLibraryRuntime(
         device_mgr, env, config_ ? &(*config_) : nullptr, d, graph_def_version,
-        lib_def_, default_thread_pool, optimizer_options, custom_kernel_creator,
-        session_metadata_, this);
+        lib_def_, default_thread_pool, optimizer_options, session_metadata_,
+        this);
   }
 
   InitializeDeviceSet();
@@ -1715,7 +1714,6 @@ void ProcessFunctionLibraryRuntime::CleanUp(
 
 Status ProcessFunctionLibraryRuntime::Clone(
     Env* env, int graph_def_version, const OptimizerOptions& optimizer_options,
-    const CustomKernelCreator* custom_kernel_creator,
     std::unique_ptr<FunctionLibraryDefinition>* out_lib_def,
     std::unique_ptr<ProcessFunctionLibraryRuntime>* out_pflr,
     bool skip_flib_def) const {
@@ -1728,7 +1726,7 @@ Status ProcessFunctionLibraryRuntime::Clone(
   *out_pflr = absl::make_unique<ProcessFunctionLibraryRuntime>(
       device_mgr_, env, config_ ? &(*config_) : nullptr, graph_def_version,
       out_lib_def->get(), optimizer_options, default_thread_pool_, parent_,
-      custom_kernel_creator, session_metadata_, rendezvous_factory_);
+      session_metadata_, rendezvous_factory_);
   {
     tf_shared_lock l(mu_);
     for (auto* d : composite_devices_) (*out_pflr)->AddCompositeDevice(d);
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",process_function_library_runtime.h,"@@ -69,7 +69,6 @@ class ProcessFunctionLibraryRuntime {
       const OptimizerOptions& optimizer_options,
       thread::ThreadPool* thread_pool = nullptr,
       DistributedFunctionLibraryRuntime* parent = nullptr,
-      const CustomKernelCreator* custom_kernel_creator = nullptr,
       const SessionMetadata* session_metadata = nullptr,
       Rendezvous::Factory rendezvous_factory = Rendezvous::Factory());
 
@@ -357,7 +356,6 @@ class ProcessFunctionLibraryRuntime {
   // runtime w.r.t. to number of functions in the current function library.
   Status Clone(Env* env, int graph_def_version,
                const OptimizerOptions& optimizer_options,
-               const CustomKernelCreator* custom_kernel_creator,
                std::unique_ptr<FunctionLibraryDefinition>* out_lib_def,
                std::unique_ptr<ProcessFunctionLibraryRuntime>* out_pflr,
                bool skip_flib_def = false) const;
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",process_function_library_runtime_test.cc,"@@ -139,8 +139,7 @@ class ProcessFunctionLibraryRuntimeTest : public ::testing::Test {
     proc_flr_.reset(new ProcessFunctionLibraryRuntime(
         device_mgr_.get(), Env::Default(), /*config=*/nullptr,
         TF_GRAPH_DEF_VERSION, lib_def_.get(), opts,
-        /*thread_pool=*/nullptr, cluster_flr_.get(),
-        /*custom_kernel_creator=*/nullptr, session_metadata,
+        /*thread_pool=*/nullptr, cluster_flr_.get(), session_metadata,
         Rendezvous::Factory{
             [this](const int64 step_id, const DeviceMgr* device_mgr,
                    Rendezvous** r) {
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",standalone.cc,"@@ -59,7 +59,6 @@ Status Dataset::FromGraph(Params params, const GraphDef& graph_def,
       device_mgr.get(), Env::Default(), /*config=*/nullptr,
       TF_GRAPH_DEF_VERSION, flib_def.get(), OptimizerOptions{},
       /*thread_pool=*/nullptr, /*parent=*/nullptr,
-      /*custom_kernel_creator=*/nullptr,
       /*session_metadata=*/nullptr,
       Rendezvous::Factory{
           [](const int64, const DeviceMgr* device_mgr, Rendezvous** r) {
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",eager_service_impl.cc,"@@ -275,7 +275,7 @@ Status EagerServiceImpl::CreateContext(const CreateContextRequest* request,
   tensorflow::EagerContext* ctx = new tensorflow::EagerContext(
       opts, tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT,
       request->async(), request->lazy_copy_remote_function_inputs(), device_mgr,
-      false, r, GetDefaultCustomKernelCreator(), worker_session->cluster_flr());
+      false, r, worker_session->cluster_flr());
   // Ownership will be transferred to the ServerContext, or else in an error
   // case ctx will be deleted by this unref.
   core::ScopedUnref unref_ctx(ctx);
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",eager_service_impl_test.cc,"@@ -780,7 +780,7 @@ class FunctionWithRemoteInputsTest : public EagerServiceImplTest {
         remote_device_mgr_.get(), Env::Default(), /*config=*/
         nullptr, TF_GRAPH_DEF_VERSION, &func_lib_def_, OptimizerOptions(),
         /*thread_pool=*/nullptr, eager_cluster_flr_.get(),
-        /*custom_kernel_creator=*/nullptr, /*session_metadata=*/nullptr,
+        /*session_metadata=*/nullptr,
         Rendezvous::Factory{[this](const int64 step_id,
                                    const DeviceMgr* device_mgr,
                                    Rendezvous** r) {
@@ -1220,7 +1220,7 @@ TEST_F(EagerServiceImplTest, RequestsToMasterTest) {
       tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT,
       /*async=*/false,
       /*lazy_copy_function_remote_inputs=*/false, device_mgr_.get(), false,
-      rendezvous, GetDefaultCustomKernelCreator());
+      rendezvous);
   const uint64 context_id = random::New64();
 
   // Set RemoteMgr to ctx.
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",remote_mgr_test.cc,"@@ -56,7 +56,7 @@ class RemoteMgrTest : public ::testing::Test {
         tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT,
         /*async=*/false,
         /*lazy_copy_function_remote_inputs=*/false, device_mgr.release(), true,
-        rendezvous, GetDefaultCustomKernelCreator(), nullptr);
+        rendezvous, nullptr);
   }
 
   ~RemoteMgrTest() override { ctx_->Unref(); }
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",graph_mgr.cc,"@@ -136,12 +136,11 @@ Status GraphMgr::InitItem(
 
   // We don't explicitly Validate the graph def because ConvertGraphDefToGraph
   // does that below.
-
   item->proc_flr.reset(new ProcessFunctionLibraryRuntime(
       device_mgr_, worker_env_->env, /*config=*/&config_proto,
       gdef.versions().producer(), item->lib_def.get(),
       graph_options.optimizer_options(), worker_env_->compute_pool, cluster_flr,
-      /*custom_kernel_creator=*/nullptr, /*session_metadata=*/nullptr,
+      /*session_metadata=*/nullptr,
       Rendezvous::Factory{
           [this, session](const int64 step_id, const DeviceMgr*,
                           Rendezvous** r) -> Status {
",0,train
0e718f2b0a9de489f135b9228f2db11c00a857d5,tensorflow/tensorflow,"[TF2XLA] Remove the serialization of CustomKernelCreator, since there is only one, and we won't add new ones

Serialization adds a new surface area for bugs, as not all the callers
propagate the CustomKernelCreator correctly.  Moreover, the mechanism is quite
hacky and in the future we plan to potentially switch to a different one.

PiperOrigin-RevId: 333111910
Change-Id: I5a02200dfdffde657bd5d9e4547c470d8644d892",dataset_test_base.cc,"@@ -402,7 +402,7 @@ Status DatasetOpsTestBase::InitFunctionLibraryRuntime(
   pflr_ = absl::make_unique<ProcessFunctionLibraryRuntime>(
       device_mgr_.get(), Env::Default(), /*config=*/nullptr,
       TF_GRAPH_DEF_VERSION, lib_def_.get(), opts, thread_pool_.get(),
-      /*parent=*/nullptr, /*custom_kernel_creator=*/nullptr,
+      /*parent=*/nullptr,
       /*session_metadata=*/nullptr,
       Rendezvous::Factory{
           [](const int64, const DeviceMgr* device_mgr, Rendezvous** r) {
",0,train
8a05bdf333f34603b33c0f3a029e023deb27ae04,tensorflow/tensorflow,"Expose the RegAdagradOptimizer (which allows the user to specify whether a loss should update the accumulator) through tf.contrib.opt.

PiperOrigin-RevId: 210253451",__init__.py,"@@ -31,6 +31,7 @@ from tensorflow.contrib.opt.python.training.model_average_optimizer import *
 from tensorflow.contrib.opt.python.training.moving_average_optimizer import *
 from tensorflow.contrib.opt.python.training.multitask_optimizer_wrapper import *
 from tensorflow.contrib.opt.python.training.nadam_optimizer import *
+from tensorflow.contrib.opt.python.training.reg_adagrad_optimizer import *
 from tensorflow.contrib.opt.python.training.shampoo import *
 from tensorflow.contrib.opt.python.training.weight_decay_optimizers import *
 from tensorflow.contrib.opt.python.training.powersign import *
@@ -65,6 +66,7 @@ _allowed_symbols = [
     'ModelAverageCustomGetter',
     'GGTOptimizer',
     'ShampooOptimizer',
+    'RegAdagradOptimizer',
 ]
 
 remove_undocumented(__name__, _allowed_symbols)
",0,train
26664f30f4f3d98b500c8dfd4e7852661280cfab,tensorflow/tensorflow,Addressing PR feedback,device_tracer_test.cc,"@@ -243,6 +243,7 @@ TEST_F(DeviceTracerTest, RunWithTraceOption) {
   EXPECT_GE(run_metadata.step_stats().dev_stats_size(), 1);
 }
 
+#if TENSORFLOW_USE_ROCM
 TEST_F(DeviceTracerTest, TraceToXSpace) {
   auto tracer = CreateGpuTracer();
   if (!tracer) return;
@@ -266,13 +267,8 @@ TEST_F(DeviceTracerTest, TraceToXSpace) {
   XSpace space;
   TF_ASSERT_OK(tracer->CollectData(&space));
   // At least one gpu plane and one host plane for launching events.
-#if GOOGLE_CUDA
-  const XPlane* host_plane = FindPlaneWithName(space, kCuptiDriverApiPlaneName);
-  ASSERT_NE(host_plane, nullptr);
-#elif TENSORFLOW_USE_ROCM
   const XPlane* host_plane = FindPlaneWithName(space, kRoctracerApiPlaneName);
   ASSERT_NE(host_plane, nullptr);
-#endif
 
   const XPlane* device_plane =
       FindPlaneWithName(space, strings::StrCat(kGpuPlanePrefix, 0));
@@ -283,14 +279,59 @@ TEST_F(DeviceTracerTest, TraceToXSpace) {
   EXPECT_GE(device_plane->event_metadata_size(), 5);
   // Check if device capacity is serialized.
   XPlaneVisitor plane = CreateTfXPlaneVisitor(device_plane);
-#if GOOGLE_CUDA
+
+  // Check if the device events timestamps are set.
+  int total_events = 0;
+  plane.ForEachLine([&](const tensorflow::profiler::XLineVisitor& line) {
+    line.ForEachEvent([&](const tensorflow::profiler::XEventVisitor& event) {
+      EXPECT_GT(event.TimestampNs(), 0);
+      EXPECT_GT(event.DurationNs(), 0);
+      ++total_events;
+    });
+  });
+  EXPECT_GE(total_events, 5);
+}
+#else // TENSORFLOW_USE_ROCM
+TEST_F(DeviceTracerTest, TraceToXSpace) {
+  auto tracer = CreateGpuTracer();
+  if (!tracer) return;
+
+  Initialize({3, 2, -1, 0});
+  auto session = CreateSession();
+  ASSERT_TRUE(session != nullptr);
+  TF_ASSERT_OK(session->Create(def_));
+  std::vector<std::pair<string, Tensor>> inputs;
+
+  // Request two targets: one fetch output and one non-fetched output.
+  std::vector<string> output_names = {y_ + "":0""};
+  std::vector<string> target_nodes = {y_neg_};
+  std::vector<Tensor> outputs;
+
+  TF_ASSERT_OK(tracer->Start());
+  Status s = session->Run(inputs, output_names, target_nodes, &outputs);
+  TF_ASSERT_OK(s);
+
+  TF_ASSERT_OK(tracer->Stop());
+  XSpace space;
+  TF_ASSERT_OK(tracer->CollectData(&space));
+  // At least one gpu plane and one host plane for launching events.
+  const XPlane* host_plane = FindPlaneWithName(space, kCuptiDriverApiPlaneName);
+  ASSERT_NE(host_plane, nullptr);
+
+  const XPlane* device_plane =
+      FindPlaneWithName(space, strings::StrCat(kGpuPlanePrefix, 0));
+  ASSERT_NE(device_plane, nullptr);  // Check if device plane is serialized.
+  // one for MemcpyH2D, one for MemcpyD2H, two for Matmul (one from Eigen, one
+  // from cudnn), one for memset.
+  EXPECT_EQ(device_plane->event_metadata_size(), 5);
+  // Check if device capacity is serialized.
+  XPlaneVisitor plane = CreateTfXPlaneVisitor(device_plane);
   EXPECT_TRUE(plane.GetStat(kDevCapClockRateKHz).has_value());
   EXPECT_TRUE(plane.GetStat(kDevCapCoreCount).has_value());
   EXPECT_TRUE(plane.GetStat(kDevCapMemoryBandwidth).has_value());
   EXPECT_TRUE(plane.GetStat(kDevCapMemorySize).has_value());
   EXPECT_TRUE(plane.GetStat(kDevCapComputeCapMajor).has_value());
   EXPECT_TRUE(plane.GetStat(kDevCapComputeCapMinor).has_value());
-#endif
 
   // Check if the device events timestamps are set.
   int total_events = 0;
@@ -303,6 +344,7 @@ TEST_F(DeviceTracerTest, TraceToXSpace) {
   });
   EXPECT_GE(total_events, 5);
 }
+#endif // TENSORFLOW_USE_ROCM
 
 #if GOOGLE_CUDA
 TEST_F(DeviceTracerTest, CudaRuntimeResource) {
",0,test
4ca1258da5f15f686e7caae3850f4aff30e9a9c0,tensorflow/tensorflow,[tensorflow/compiler/tf2tensorrt/convert/trt_optimization_pass.cc] Add calls to `reserve()` before populating vectors,trt_optimization_pass.cc,"@@ -374,7 +374,9 @@ Status TRTOptimizationPass::Optimize(grappler::Cluster* cluster,
   }
 
   std::vector<string> nodes_to_preserve;
-  for (const auto& n : item.NodesToPreserve()) {
+  auto _nodes_to_preserve = item.NodesToPreserve();
+  nodes_to_preserve.reserve(_nodes_to_preserve.size());
+  for (const auto& n : _nodes_to_preserve) {
     auto tokens = str_util::Split(n, "":"");
     string s = tokens.at(0);
     for (int i = 1; i < tokens.size() - 1; ++i) {
",0,train
1e2c750e62e80a4c26385536791e53806d4bffe4,tensorflow/tensorflow,"Remove trailing whitespaces in TraceMe names.

PiperOrigin-RevId: 267487085",direct_session.cc,"@@ -500,11 +500,11 @@ Status DirectSession::RunInternal(
         if (options_.config.experimental().has_session_metadata()) {
           const auto& model_metadata =
               options_.config.experimental().session_metadata();
-          return strings::StrCat(""SessionRun #id="", step_id,
+          return strings::StrCat(""SessionRun#id="", step_id,
                                  "",model_id="", model_metadata.name(), "":"",
                                  model_metadata.version(), ""#"");
         } else {
-          return strings::StrCat(""SessionRun #id="", step_id, ""#"");
+          return strings::StrCat(""SessionRun#id="", step_id, ""#"");
         }
       },
       profiler::TraceMeLevel::kInfo);
",0,train
1e2c750e62e80a4c26385536791e53806d4bffe4,tensorflow/tensorflow,"Remove trailing whitespaces in TraceMe names.

PiperOrigin-RevId: 267487085",graph_mgr.cc,"@@ -418,7 +418,7 @@ void GraphMgr::ExecuteAsync(const string& handle, const int64 step_id,
                             CancellationManager* cancellation_manager,
                             const NamedTensors& in, StatusCallback done) {
   const uint64 start_time_usecs = Env::Default()->NowMicros();
-  string session_id_meta = strings::StrCat(""RunGraph #id="", step_id, ""#"");
+  string session_id_meta = strings::StrCat(""RunGraph#id="", step_id, ""#"");
   auto* activity = new profiler::TraceMe(absl::string_view(session_id_meta),
                                          profiler::TraceMeLevel::kInfo);
   // Lookup an item. Holds one ref while executing.
",0,train
c486a9177192f652320d37a5fdf33ab9a3a789f2,tensorflow/tensorflow,"Fix bugs in neutral element code and add more unit tests to cover matmul with input shape != output shape.

PiperOrigin-RevId: 177920882",constant_folding.cc,"@@ -1254,8 +1254,11 @@ void ConstantFolding::ReplaceAddOrMulWithIdentity(int input_to_forward,
 Status ConstantFolding::ReplaceAddOrMulWithConstant(
     double value, const TensorShapeProto& shape, NodeDef* node) {
   AttrValue tensor_attr;
-  TF_RETURN_IF_ERROR(CreateConstantTensorAttrValue(node->attr().at(""T"").type(),
-                                                   value, shape, &tensor_attr));
+  AttrValue dtype_attr = node->attr().at(""T"");
+  TF_RETURN_IF_ERROR(CreateConstantTensorAttrValue(dtype_attr.type(), value,
+                                                   shape, &tensor_attr));
+  node->clear_attr();
+  node->mutable_attr()->insert({""dtype"", dtype_attr});
   node->mutable_attr()->insert({""value"", tensor_attr});
   node->set_op(""Const"");
   // Convert all inputs to control dependencies.
@@ -1333,55 +1336,44 @@ Status ConstantFolding::SimplifyGraph(GraphDef* output,
           properties.GetOutputProperties(node.name())[0].shape();
       const TensorShapeProto& x_shape =
           properties.GetInputProperties(node.name())[0].shape();
-
-      // Simplify multiplication by or addition of zeros.
-      const bool x_is_zero = IsZeros(*x);
-      const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
-      if (x_is_zero) {
-        if ((is_mul && x_matches_output_shape) || is_matmul) {
-          // 0 * y = 0
-          ReplaceAddOrMulWithIdentity(0, &node);
-        } else {
-          // 0 + y = y.
-          ReplaceAddOrMulWithIdentity(1, &node);
-        }
-        continue;
-      }
       const TensorShapeProto& y_shape =
           properties.GetInputProperties(node.name())[1].shape();
+      const bool x_is_zero = IsZeros(*x);
+      const bool x_matches_output_shape = ShapesEqual(output_shape, x_shape);
       const bool y_is_zero = IsZeros(*y);
       const bool y_matches_output_shape = ShapesEqual(output_shape, y_shape);
-      if (y_is_zero) {
-        if ((is_mul && y_matches_output_shape) || is_matmul) {
-          // x * 0 = 0
+
+      // Simplify addition of zeros.
+      if (is_add) {
+        if (x_is_zero && y_matches_output_shape) {
+          // 0 + y = y.
           ReplaceAddOrMulWithIdentity(1, &node);
-        } else {
+          continue;
+        } else if (y_is_zero && x_matches_output_shape) {
           // x + 0 = y.
           ReplaceAddOrMulWithIdentity(0, &node);
+          continue;
         }
-        continue;
       }
 
+      // Simplify element-wise multiplication by ones.
       if (is_mul) {
-        // Simplify scalar multiplication by zeros where, due to broadcasting,
-        // the output shape does not match the shape of the zero input.
-        if (x_is_zero || y_is_zero) {
-          TF_RETURN_IF_ERROR(
-              ReplaceAddOrMulWithConstant(0, output_shape, &node));
-          continue;
-        }
-
-        // Simplify multiplication by ones.
         if (IsOnes(*x) && y_matches_output_shape) {
           // 1 * y = y.
           ReplaceAddOrMulWithIdentity(1, &node);
           continue;
-        } else if (IsOnes(*y) && x_matches_output_shape) {
+        }
+        if (IsOnes(*y) && x_matches_output_shape) {
           // x * 1 = x.
           ReplaceAddOrMulWithIdentity(0, &node);
           continue;
         }
       }
+
+      // Simplify multiplication and matmul by zeros.
+      if (x_is_zero || y_is_zero) {
+        TF_RETURN_IF_ERROR(ReplaceAddOrMulWithConstant(0, output_shape, &node));
+      }
     }
   }
   return Status::OK();
",0,train
c486a9177192f652320d37a5fdf33ab9a3a789f2,tensorflow/tensorflow,"Fix bugs in neutral element code and add more unit tests to cover matmul with input shape != output shape.

PiperOrigin-RevId: 177920882",constant_folding_test.cc,"@@ -84,6 +84,10 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
                                 ops::Placeholder::Shape(TensorShape({2, 2})));
     Output y = ops::Placeholder(s.WithOpName(""y""), DT_FLOAT,
                                 ops::Placeholder::Shape(TensorShape({2, 2})));
+    Output a = ops::Placeholder(s.WithOpName(""a""), DT_FLOAT,
+                                ops::Placeholder::Shape(TensorShape({3, 2})));
+    Output b = ops::Placeholder(s.WithOpName(""b""), DT_FLOAT,
+                                ops::Placeholder::Shape(TensorShape({2, 3})));
     Output zeros = !use_const ? ops::ZerosLike(s.WithOpName(""zeros""), x)
                               : ops::Const(s.WithOpName(""zeros""), 0.0f, {2, 2});
     Output zeros_broadcast =
@@ -94,16 +98,20 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
     Output mul2 = ops::Mul(s.WithOpName(""mul2""), zeros, y);
     Output mul3 = ops::Mul(s.WithOpName(""mul3""), x, ones);
     Output mul4 = ops::Mul(s.WithOpName(""mul4""), ones, y);
-    Output mul5 = ops::Mul(s.WithOpName(""mul1""), x, zeros_broadcast);
-    Output mul6 = ops::Mul(s.WithOpName(""mul2""), zeros_broadcast, y);
+    Output mul5 = ops::Mul(s.WithOpName(""mul5""), x, zeros_broadcast);
+    Output mul6 = ops::Mul(s.WithOpName(""mul6""), zeros_broadcast, y);
     Output matmul1 = ops::MatMul(s.WithOpName(""matmul1""), x, zeros);
     Output matmul2 = ops::MatMul(s.WithOpName(""matmul2""), zeros, y);
+    Output matmul3 = ops::MatMul(s.WithOpName(""matmul3""), a, zeros);
+    Output matmul4 = ops::MatMul(s.WithOpName(""matmul4""), zeros, b);
     Output add1 = ops::Add(s.WithOpName(""add1""), x, zeros);
     Output add2 = ops::Add(s.WithOpName(""add2""), zeros, y);
-    Output addn =
-        ops::AddN(s, {mul1, mul2, mul3, mul4, matmul1, matmul2, add1, add2});
+    Output addn = ops::AddN(
+        s.WithOpName(""addn""),
+        {mul1, mul2, mul3, mul4, mul5, mul6, matmul1, matmul2, add1, add2});
     GrapplerItem item;
     TF_CHECK_OK(s.ToGraphDef(&item.graph));
+    item.fetch = {""addn"", ""matmul3"", ""matmul4""};
 
     ConstantFolding optimizer(RewriterConfig::AGGRESSIVE,
                               nullptr /* cpu_device */);
@@ -111,35 +119,17 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
     Status status = optimizer.Optimize(nullptr, item, &output);
     TF_EXPECT_OK(status);
 
-    EXPECT_EQ(16, output.node_size());
+    EXPECT_EQ(20, output.node_size());
     for (int i = 0; i < output.node_size(); ++i) {
       const NodeDef& node = output.node(i);
       const string& name = node.name();
       if (name == ""mul1"") {
-        if (use_const) {
-          EXPECT_EQ(""Const"", node.op());
-          EXPECT_EQ(""^x"", node.input(0));
-        } else {
-          EXPECT_EQ(""Identity"", node.op());
-          EXPECT_EQ(""zeros"", node.input(0));
-          EXPECT_EQ(""^x"", node.input(1));
-        }
+        EXPECT_EQ(""Const"", node.op());
+        EXPECT_EQ(""^x"", node.input(0));
+        EXPECT_EQ(""^zeros"", node.input(1));
       } else if (name == ""mul2"") {
-        if (use_const) {
-          EXPECT_EQ(""Const"", node.op());
-          EXPECT_EQ(""^y"", node.input(0));
-        } else {
-          EXPECT_EQ(""Identity"", node.op());
-          EXPECT_EQ(""zeros"", node.input(0));
-          EXPECT_EQ(""^y"", node.input(1));
-        }
-      } else if (name == ""matmul1"") {
-        EXPECT_EQ(""Identity"", node.op());
-        EXPECT_EQ(""zeros"", node.input(0));
-        EXPECT_EQ(""^x"", node.input(1));
-      } else if (name == ""matmul2"") {
-        EXPECT_EQ(""Identity"", node.op());
-        EXPECT_EQ(""zeros"", node.input(0));
+        EXPECT_EQ(""Const"", node.op());
+        EXPECT_EQ(""^zeros"", node.input(0));
         EXPECT_EQ(""^y"", node.input(1));
       } else if (name == ""mul3"") {
         EXPECT_EQ(""Identity"", node.op());
@@ -152,23 +142,39 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
       } else if (name == ""mul5"") {
         EXPECT_EQ(""Const"", node.op());
         EXPECT_EQ(""^x"", node.input(0));
-        EXPECT_EQ(""^ones"", node.input(1));
+        EXPECT_EQ(""^zeros_broadcast"", node.input(1));
+      } else if (name == ""mul6"") {
+        EXPECT_EQ(""Const"", node.op());
+        EXPECT_EQ(""^zeros_broadcast"", node.input(0));
+        EXPECT_EQ(""^y"", node.input(1));
+      } else if (name == ""matmul1"") {
+        EXPECT_EQ(""Const"", node.op());
+        EXPECT_EQ(""^x"", node.input(0));
+        EXPECT_EQ(""^zeros"", node.input(1));
+      } else if (name == ""matmul2"") {
+        EXPECT_EQ(""Const"", node.op());
+        EXPECT_EQ(""^zeros"", node.input(0));
+        EXPECT_EQ(""^y"", node.input(1));
+      } else if (name == ""matmul3"") {
+        EXPECT_EQ(""Const"", node.op());
+        EXPECT_EQ(""^a"", node.input(0));
+        EXPECT_EQ(""^zeros"", node.input(1));
         TensorProto t = node.attr().at(""value"").tensor();
         EXPECT_EQ(1, t.float_val_size());
         EXPECT_EQ(0, t.float_val(0));
         EXPECT_EQ(2, t.tensor_shape().dim_size());
-        EXPECT_EQ(1, t.tensor_shape().dim(0).size());
+        EXPECT_EQ(3, t.tensor_shape().dim(0).size());
         EXPECT_EQ(2, t.tensor_shape().dim(1).size());
-      } else if (name == ""mul6"") {
+      } else if (name == ""matmul4"") {
         EXPECT_EQ(""Const"", node.op());
-        EXPECT_EQ(""^y"", node.input(0));
-        EXPECT_EQ(""^ones"", node.input(1));
+        EXPECT_EQ(""^zeros"", node.input(0));
+        EXPECT_EQ(""^b"", node.input(1));
         TensorProto t = node.attr().at(""value"").tensor();
         EXPECT_EQ(1, t.float_val_size());
         EXPECT_EQ(0, t.float_val(0));
         EXPECT_EQ(2, t.tensor_shape().dim_size());
-        EXPECT_EQ(1, t.tensor_shape().dim(0).size());
-        EXPECT_EQ(2, t.tensor_shape().dim(1).size());
+        EXPECT_EQ(2, t.tensor_shape().dim(0).size());
+        EXPECT_EQ(3, t.tensor_shape().dim(1).size());
       } else if (name == ""add1"") {
         EXPECT_EQ(""Identity"", node.op());
         EXPECT_EQ(""x"", node.input(0));
@@ -178,6 +184,16 @@ TEST_F(ConstantFoldingTest, NeutralElement) {
         EXPECT_EQ(""y"", node.input(0));
         EXPECT_EQ(""^zeros"", node.input(1));
       }
+      const std::set<string> square_zero_const{""mul1"", ""mul2"",    ""mul5"",
+                                               ""mul6"", ""matmul1"", ""matmul2""};
+      if (square_zero_const.count(name) > 0) {
+        TensorProto t = node.attr().at(""value"").tensor();
+        EXPECT_EQ(1, t.float_val_size());
+        EXPECT_EQ(0, t.float_val(0));
+        EXPECT_EQ(2, t.tensor_shape().dim_size());
+        EXPECT_EQ(2, t.tensor_shape().dim(0).size());
+        EXPECT_EQ(2, t.tensor_shape().dim(1).size());
+      }
     }
   }
 }
",0,train
265e1be02583e2d62fbb797237f701a9c9bc2668,tensorflow/tensorflow,"Minimize calls to tesor_util.constant_value in array_grad._StridedSliceGrad.

PiperOrigin-RevId: 293716338
Change-Id: Id05c9afa21f80543ef783d0cfbc33027caecdf05",array_grad.py,"@@ -273,14 +273,14 @@ def _StridedSliceGrad(op, grad):
   # be the same.
   x = array_ops.shape(op.inputs[0], out_type=begin.dtype)
 
-  if tensor_util.constant_value(x) is not None:
-    x = tensor_util.constant_value(x)
-  if tensor_util.constant_value(begin) is not None:
-    begin = tensor_util.constant_value(begin)
-  if tensor_util.constant_value(end) is not None:
-    end = tensor_util.constant_value(end)
-  if tensor_util.constant_value(strides) is not None:
-    strides = tensor_util.constant_value(strides)
+  x_static = tensor_util.constant_value(x)
+  x = x_static if x_static is not None else x
+  begin_static = tensor_util.constant_value(begin)
+  begin = begin_static if begin_static is not None else begin
+  end_static = tensor_util.constant_value(end)
+  end = end_static if end_static is not None else end
+  strides_static = tensor_util.constant_value(strides)
+  strides = strides_static if strides_static is not None else strides
 
   return array_ops.strided_slice_grad(
       x,
",0,test
185c0233e5533788fd5c4679acd0a4b64484dc03,tensorflow/tensorflow,"Fix an error in neon tensor util. vget_high_s8 gets the high bits.

PiperOrigin-RevId: 227574080",neon_tensor_utils.cc,"@@ -144,7 +144,7 @@ void NeonMatrixBatchVectorMultiplyAccumulate(
         // registers).
         int16x8_t prod_16x8 =
             vmull_s8(vget_low_s8(s1_8x16), vget_low_s8(s2_8x16));
-        // Multiply the high bits (i.e. the lower 8 8bit numbers in the
+        // Multiply the high bits (i.e. the higher 8 8bit numbers in the
         // registers), and accumulate with the result of the low bits product.
         // The assumption here is that overflow will not happen as we quantize
         // our values to be in the range [-127, 127]. As such the sum of the 2
",0,train
289be76f8ed6d40752f6ee5c64632f4624fa7cc2,tensorflow/tensorflow,"Simplify GPU copy insertion.

Previously, there was almost identical code for inserting copies.
This CL combines the two code paths.

PiperOrigin-RevId: 201655259",gpu_copy_insertion.cc,"@@ -52,60 +52,20 @@ StatusOr<bool> GpuCopyInsertion::Run(HloModule* module) {
                       HloDataflowAnalysis::Run(*module));
 
   // Make sure all operands of a library call are in memory instead of constants
-  // in IR.
-  for (HloInstruction* hlo :
-       module->entry_computation()->MakeInstructionPostOrder()) {
-    // Inserts a copy of hlo->operand(n) if it's a constant.
-    auto copy_operand_if_constant = [&](int64 n) -> Status {
-      HloInstruction* operand = hlo->mutable_operand(n);
-      TF_RET_CHECK(ShapeUtil::IsArray(operand->shape()));
-      const auto& values = dataflow->GetValueSet(operand).values();
-      if (std::any_of(values.begin(), values.end(), [](const HloValue* value) {
-            return value->defining_instruction()->opcode() ==
-                   HloOpcode::kConstant;
-          })) {
-        TF_ASSIGN_OR_RETURN(HloInstruction * copy, FindOrInsertCopy(operand));
-        TF_RETURN_IF_ERROR(hlo->ReplaceOperandWith(n, copy));
-        changed = true;
-      }
-      return Status::OK();
-    };
-
-    if (IsCustomCallToDnnBatchNorm(*hlo)) {
-      // The epsilon and feature_index operands to a CUDNN batchnorm op don't
-      // need to be materialized in memory -- in fact, they must be constants.
-      // These are the last two operands of all three batchnorm ops.
-      for (int64 i = 0; i < hlo->operand_count() - 2; ++i) {
-        TF_RETURN_IF_ERROR(copy_operand_if_constant(i));
-      }
-    } else if (ImplementedAsLibraryCall(*hlo) ||
-               hlo->opcode() == HloOpcode::kCrossReplicaSum) {
-      // For all other library calls and cross-replica-sum, materialize all the
-      // operands into memory.  (Cross-replica-sum gets its constant args
-      // materialized even if it's not implemented as a libcall to simplify the
-      // implementation.  It's slower, but we can constant fold away constant
-      // args *anyway*, so we just need to make it work.)
-      for (int64 i = 0; i < hlo->operand_count(); ++i) {
-        TF_RETURN_IF_ERROR(copy_operand_if_constant(i));
-      }
-    }
-  }
-
-  // Init values of while and conditional nodes cannot be constants. Insert
-  // copies for any constants found at the operands of these nodes.
+  // in IR. Also, init values of while and conditional nodes cannot be
+  // constants. Insert copies for any constants found at the operands of these
+  // nodes.
   tensorflow::gtl::FlatSet<HloInstruction*> inserted_copies;
   for (HloComputation* computation : module->computations()) {
-    for (HloInstruction* instruction : computation->instructions()) {
-      if (instruction->opcode() != HloOpcode::kWhile &&
-          instruction->opcode() != HloOpcode::kConditional) {
-        continue;
-      }
-      for (auto operand : instruction->operands()) {
+    for (HloInstruction* hlo : computation->instructions()) {
+      // Inserts a copy of hlo->operand(n) if it's a constant.
+      auto copy_operand_if_constant = [&](int64 n) -> Status {
+        HloInstruction* operand = hlo->mutable_operand(n);
         // Skip the operands that have already been replaced with a copy in a
         // previous iteration (which is possible when a constant is used as an
         // operand in multiple places).
         if (ContainsKey(inserted_copies, operand)) {
-          continue;
+          return Status::OK();
         }
         for (auto& pair : dataflow->GetInstructionValueSet(operand)) {
           const HloValueSet& value_set = pair.second;
@@ -121,6 +81,47 @@ StatusOr<bool> GpuCopyInsertion::Run(HloModule* module) {
             }
           }
         }
+        return Status::OK();
+      };
+
+      if (IsCustomCallToDnnBatchNorm(*hlo)) {
+        // The epsilon and feature_index operands to a CUDNN batchnorm op don't
+        // need to be materialized in memory -- in fact, they must be constants.
+        // These are the last two operands of all three batchnorm ops.
+        for (int64 i = 0; i < hlo->operand_count() - 2; ++i) {
+          TF_RETURN_IF_ERROR(copy_operand_if_constant(i));
+        }
+      } else if (ImplementedAsLibraryCall(*hlo) ||
+                 hlo->opcode() == HloOpcode::kCrossReplicaSum ||
+                 hlo->opcode() == HloOpcode::kWhile ||
+                 hlo->opcode() == HloOpcode::kConditional) {
+        // For all other library calls, cross-replica-sum, while and conditional
+        // ops materialize all the operands into memory.  (Cross-replica-sum
+        // gets its constant args materialized even if it's not implemented as a
+        // libcall to simplify the implementation.  It's slower, but we can
+        // constant fold away constant args *anyway*, so we just need to make it
+        // work.)
+        for (int64 i = 0; i < hlo->operand_count(); ++i) {
+          TF_RETURN_IF_ERROR(copy_operand_if_constant(i));
+        }
+      }
+    }
+  }
+
+  if (changed) {
+    // Check the assumption that the epsilon and feature_index constants of the
+    // CUDNN batchnorm op are not shared with other ops where we would replace
+    // them with a copy. These custom op calls are generated with the
+    // CudnnBatchNormRewriter, so this would only happen if HloCSE merges them.
+    for (HloComputation* computation : module->computations()) {
+      for (HloInstruction* hlo : computation->instructions()) {
+        if (!IsCustomCallToDnnBatchNorm(*hlo)) {
+          continue;
+        }
+        for (int64 i = hlo->operand_count() - 2; i < hlo->operand_count();
+             ++i) {
+          CHECK_EQ(hlo->operand(i)->opcode(), HloOpcode::kConstant);
+        }
       }
     }
   }
",0,train
694cc06ac9290168e5a700ebb5bc2f117b04af10,tensorflow/tensorflow,"Remove an obsolete TODO for _ReductionDims(sparse_tensor, ..).

tf.rank() is recently patched to work on SparseTensor.
Change: 125127586",math_ops.py,"@@ -936,9 +936,6 @@ def _ReductionDims(x, reduction_indices):
       return constant_op.constant(np.arange(rank), dtype=dtypes.int32)
 
     # Otherwise, we rely on Range and Rank to do the right thing at run-time.
-    # TODO(zongheng): remove this once rank() supports SparseTensor.
-    if isinstance(x, ops.SparseTensor):
-      return range(0, array_ops.size(x.shape))
     return range(0, array_ops.rank(x))
 
 
",0,train
6c85a66a16f07bab9b5dc3df33bc6b8111b76615,tensorflow/tensorflow,"Add export calls for protos.

PiperOrigin-RevId: 185166764",__init__.py,"@@ -116,6 +116,7 @@ from tensorflow.python.platform import test
 
 from tensorflow.python.util.all_util import remove_undocumented
 from tensorflow.python.util.all_util import make_all
+from tensorflow.python.util.tf_export import tf_export
 
 # Import modules whose docstrings contribute, for use by remove_undocumented
 # below.
@@ -167,6 +168,31 @@ _allowed_symbols = [
     'TensorInfo',  # Used for tf.saved_model functionality.
 ]
 
+# Export protos
+# pylint: disable=undefined-variable
+tf_export('AttrValue')(AttrValue)
+tf_export('ConfigProto')(ConfigProto)
+tf_export('Event', 'summary.Event')(Event)
+tf_export('GPUOptions')(GPUOptions)
+tf_export('GraphDef')(GraphDef)
+tf_export('GraphOptions')(GraphOptions)
+tf_export('HistogramProto')(HistogramProto)
+tf_export('LogMessage')(LogMessage)
+tf_export('MetaGraphDef')(MetaGraphDef)
+tf_export('NameAttrList')(NameAttrList)
+tf_export('NodeDef')(NodeDef)
+tf_export('OptimizerOptions')(OptimizerOptions)
+tf_export('RunMetadata')(RunMetadata)
+tf_export('RunOptions')(RunOptions)
+tf_export('SessionLog', 'summary.SessionLog')(SessionLog)
+tf_export('Summary', 'summary.Summary')(Summary)
+tf_export('summary.SummaryDescription')(SummaryDescription)
+tf_export('SummaryMetadata')(SummaryMetadata)
+tf_export('summary.TaggedRunMetadata')(TaggedRunMetadata)
+tf_export('TensorInfo')(TensorInfo)
+# pylint: enable=undefined-variable
+
+
 # The following symbols are kept for compatibility. It is our plan
 # to remove them in the future.
 _allowed_symbols.extend([
",0,train
6c85a66a16f07bab9b5dc3df33bc6b8111b76615,tensorflow/tensorflow,"Add export calls for protos.

PiperOrigin-RevId: 185166764",profiler.py,"@@ -31,6 +31,7 @@ from tensorflow.python.profiler.option_builder import ProfileOptionBuilder
 from tensorflow.python.profiler.tfprof_logger import write_op_log
 
 from tensorflow.python.util.all_util import remove_undocumented
+from tensorflow.python.util.tf_export import tf_export
 
 
 _allowed_symbols = [
@@ -48,6 +49,12 @@ _allowed_symbols.extend([
     'OpLogProto',
 ])
 
+# Export protos
+tf_export('profiler.GraphNodeProto')(GraphNodeProto)
+tf_export('profiler.MultiGraphNodeProto')(MultiGraphNodeProto)
+tf_export('profiler.AdviceProto')(AdviceProto)
+tf_export('profiler.OpLogProto')(OpLogProto)
+
 remove_undocumented(__name__, _allowed_symbols, [
     Profiler,
     profile,
",0,train
6c85a66a16f07bab9b5dc3df33bc6b8111b76615,tensorflow/tensorflow,"Add export calls for protos.

PiperOrigin-RevId: 185166764",training.py,"@@ -189,6 +189,7 @@ from tensorflow.python.training.training_util import create_global_step
 from tensorflow.python.training.training_util import get_or_create_global_step
 from tensorflow.python.pywrap_tensorflow import do_quantize_training_on_graphdef
 from tensorflow.python.pywrap_tensorflow import NewCheckpointReader
+from tensorflow.python.util.tf_export import tf_export
 
 # pylint: disable=wildcard-import
 # Training data protos.
@@ -239,6 +240,23 @@ _allowed_symbols = [
     ""SequenceExample"",  # from example_pb2.
     ""ServerDef"",
 ]
+
+# pylint: disable=undefined-variable
+tf_export(""train.BytesList"")(BytesList)
+tf_export(""train.ClusterDef"")(ClusterDef)
+tf_export(""train.Example"")(Example)
+tf_export(""train.Feature"")(Feature)
+tf_export(""train.Features"")(Features)
+tf_export(""train.FeatureList"")(FeatureList)
+tf_export(""train.FeatureLists"")(FeatureLists)
+tf_export(""train.FloatList"")(FloatList)
+tf_export(""train.Int64List"")(Int64List)
+tf_export(""train.JobDef"")(JobDef)
+tf_export(""train.SaverDef"")(SaverDef)
+tf_export(""train.SequenceExample"")(SequenceExample)
+tf_export(""train.ServerDef"")(ServerDef)
+# pylint: enable=undefined-variable
+
 # Include extra modules for docstrings because:
 # * Input methods in tf.train are documented in io_ops.
 # * Saver methods in tf.train are documented in state_ops.
",0,train
c88380396e1d1003fa770794217e4cb919c511a0,tensorflow/tensorflow,"Make the ops_compatibility framework usable outside tensorflow/core.
Change: 120008243",backwards_compatibility_test.cc,"@@ -19,12 +19,15 @@ limitations under the License.
 #include ""tensorflow/core/platform/env.h""
 #include ""tensorflow/core/platform/protobuf.h""
 #include ""tensorflow/core/platform/test.h""
+#include ""tensorflow/core/public/version.h""
 
 namespace tensorflow {
 namespace {
 
 TEST(BackwardsCompatibilityTest, IsCompatible) {
-  OpCompatibilityLib compatibility(""tensorflow/core/ops"");
+  OpCompatibilityLib compatibility(""tensorflow/core/ops"",
+                                   strings::StrCat(""v"", TF_MAJOR_VERSION),
+                                   nullptr);
 
   Env* env = Env::Default();
   int changed_ops = 0;
",0,train
c88380396e1d1003fa770794217e4cb919c511a0,tensorflow/tensorflow,"Make the ops_compatibility framework usable outside tensorflow/core.
Change: 120008243",op_compatibility_lib.cc,"@@ -23,17 +23,21 @@ limitations under the License.
 #include ""tensorflow/core/lib/io/path.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/protobuf.h""
-#include ""tensorflow/core/public/version.h""
 
 namespace tensorflow {
 
-static string OpsHistoryFile() {
-  return strings::StrCat(""compat/ops_history.v"", TF_MAJOR_VERSION, "".pbtxt"");
+static string OpsHistoryFile(const string& ops_prefix,
+                             const string& history_version) {
+  return io::JoinPath(ops_prefix, strings::StrCat(""compat/ops_history."",
+                                                  history_version, "".pbtxt""));
 }
 
-OpCompatibilityLib::OpCompatibilityLib(const string& ops_prefix)
+OpCompatibilityLib::OpCompatibilityLib(const string& ops_prefix,
+                                       const string& history_version,
+                                       const std::set<string>* stable_ops)
     : ops_file_(io::JoinPath(ops_prefix, ""ops.pbtxt"")),
-      op_history_file_(io::JoinPath(ops_prefix, OpsHistoryFile())) {
+      op_history_file_(OpsHistoryFile(ops_prefix, history_version)),
+      stable_ops_(stable_ops) {
   // Get the sorted list of all registered OpDefs.
   printf(""Getting all registered ops...\n"");
   OpRegistry::Global()->Export(false, &op_list_);
@@ -48,6 +52,24 @@ Status OpCompatibilityLib::ValidateCompatible(Env* env, int* changed_ops,
   // Strip docs out of op_list_.
   RemoveDescriptionsFromOpList(&op_list_);
 
+  if (stable_ops_ != nullptr) {
+    printf(""Verifying no stable ops have been removed...\n"");
+    // We rely on stable_ops_ and op_list_ being in sorted order.
+    auto iter = stable_ops_->begin();
+    for (int cur = 0; iter != stable_ops_->end() && cur < op_list_.op_size();
+         ++cur) {
+      const string& op_name = op_list_.op(cur).name();
+      if (op_name > *iter) {
+        return errors::InvalidArgument(""Error, stable op removed: "", *iter);
+      } else if (op_name == *iter) {
+        ++iter;
+      }
+    }
+    if (iter != stable_ops_->end()) {
+      return errors::InvalidArgument(""Error, stable op removed: "", *iter);
+    }
+  }
+
   OpList in_op_history;
   {  // Read op history.
     printf(""Reading op history from %s...\n"", op_history_file_.c_str());
@@ -61,17 +83,22 @@ Status OpCompatibilityLib::ValidateCompatible(Env* env, int* changed_ops,
 
   int cur = 0;
   int start = 0;
+
   printf(""Verifying updates are compatible...\n"");
   // Note: Op history is in (alphabetical, oldest-first) order.
   while (cur < op_list_.op_size() && start < in_op_history.op_size()) {
-    if (op_list_.op(cur).name() < in_op_history.op(start).name()) {
+    const string& op_name = op_list_.op(cur).name();
+    if (stable_ops_ != nullptr && stable_ops_->count(op_name) == 0) {
+      // Ignore unstable op.
+    }
+    if (op_name < in_op_history.op(start).name()) {
       // New op: add it.
       if (out_op_history != nullptr) {
         *out_op_history->add_op() = op_list_.op(cur);
       }
       ++*added_ops;
       ++cur;
-    } else if (op_list_.op(cur).name() > in_op_history.op(start).name()) {
+    } else if (op_name > in_op_history.op(start).name()) {
       // Op removed: error.
       return errors::InvalidArgument(""Error, removed op: "",
                                      SummarizeOpDef(in_op_history.op(start)));
@@ -79,7 +106,6 @@ Status OpCompatibilityLib::ValidateCompatible(Env* env, int* changed_ops,
       // Op match.
 
       // Find all historical version of this op.
-      const string& op_name = op_list_.op(cur).name();
       int end = start + 1;
       for (; end < in_op_history.op_size(); ++end) {
         if (in_op_history.op(end).name() != op_name) break;
@@ -127,17 +153,22 @@ Status OpCompatibilityLib::ValidateCompatible(Env* env, int* changed_ops,
   }
 
   // Error if missing ops.
-  if (start < in_op_history.op_size()) {
+  if (stable_ops_ == nullptr && start < in_op_history.op_size()) {
     return errors::InvalidArgument(""Error, removed op: "",
                                    SummarizeOpDef(in_op_history.op(start)));
   }
 
   // Add remaining new ops.
   for (; cur < op_list_.op_size(); ++cur) {
-    if (out_op_history) {
-      *out_op_history->add_op() = op_list_.op(cur);
+    const string& op_name = op_list_.op(cur).name();
+    if (stable_ops_ != nullptr && stable_ops_->count(op_name) == 0) {
+      // Ignore unstable op.
+    } else {
+      if (out_op_history) {
+        *out_op_history->add_op() = op_list_.op(cur);
+      }
+      ++*added_ops;
     }
-    ++*added_ops;
   }
 
   return Status::OK();
",0,train
c88380396e1d1003fa770794217e4cb919c511a0,tensorflow/tensorflow,"Make the ops_compatibility framework usable outside tensorflow/core.
Change: 120008243",op_compatibility_lib.h,"@@ -16,15 +16,25 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_OPS_COMPAT_OP_COMPATIBILITY_LIB_H_
 #define TENSORFLOW_CORE_OPS_COMPAT_OP_COMPATIBILITY_LIB_H_
 
-#include <string>
+#include <set>
+
 #include ""tensorflow/core/framework/op_def.pb.h""
 #include ""tensorflow/core/platform/env.h""
+#include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
 
 class OpCompatibilityLib {
  public:
-  explicit OpCompatibilityLib(const string& ops_prefix);
+  // `ops_prefix` is a filename prefix indicating where to find the
+  //   ops files.
+  // `history_version` is used to construct the ops history file name.
+  // `*stable_ops` has an optional list of ops that we care about.
+  //   If stable_ops == nullptr, we use all registered ops.
+  //   Otherwise we ignore ops not in *stable_ops and require all ops
+  //   in *stable_ops to exist.
+  OpCompatibilityLib(const string& ops_prefix, const string& history_version,
+                     const std::set<string>* stable_ops);
 
   // Name of the file that contains the checked-in versions of ops, with docs.
   const string& ops_file() const { return ops_file_; }
@@ -45,8 +55,9 @@ class OpCompatibilityLib {
                             OpList* out_op_history);
 
  private:
-  string ops_file_;
-  string op_history_file_;
+  const string ops_file_;
+  const string op_history_file_;
+  const std::set<string>* stable_ops_;
   OpList op_list_;
 };
 
",0,train
c88380396e1d1003fa770794217e4cb919c511a0,tensorflow/tensorflow,"Make the ops_compatibility framework usable outside tensorflow/core.
Change: 120008243",update_ops.cc,"@@ -27,7 +27,8 @@ namespace tensorflow {
 namespace {
 
 void WriteUpdateTo(const string& directory) {
-  OpCompatibilityLib compatibility(directory);
+  OpCompatibilityLib compatibility(
+      directory, strings::StrCat(""v"", TF_MAJOR_VERSION), nullptr);
 
   // Write full copy of all ops to ops.pbtxt.
   Env* env = Env::Default();
",0,train
79d7ea98b59e7e0841186a6f31b85e8c4bbe5d62,tensorflow/tensorflow,"Fix pydoc for _safe_scalar_div.
Change: 134115246",metric_ops.py,"@@ -93,7 +93,7 @@ def _safe_div(numerator, denominator, name):
 
 
 def _safe_scalar_div(numerator, denominator, name):
-  """"""Divides two values, returning 0 if the denominator is != 0.
+  """"""Divides two values, returning 0 if the denominator is 0.
 
   Args:
     numerator: A scalar `float64` `Tensor`.
",0,train
06c2ab5c681db8b81024dee83b620ecc49e62ae8,tensorflow/tensorflow,"test: remove defaults, use self.asserts",script_ops_test.py,"@@ -59,10 +59,10 @@ class NumpyFunctionTest(test.TestCase):
 
     # different argument
     tensor_double_plus_stateless(
-      constant_op.constant(1, dtype=dtypes.int32),
-      constant_op.constant(2, dtype=dtypes.int32),
+      constant_op.constant(1),
+      constant_op.constant(2),
     )
-    assert call_count == 1  # +1 as only the first one was executed
+    self.assertEqual(call_count, 1)  # +1 as only the first one was executed
 
     @def_function.function(autograph=False)
     def tensor_double_plus_stateful(a, b):
@@ -71,11 +71,10 @@ class NumpyFunctionTest(test.TestCase):
       return sum1 + sum2
 
     tensor_double_plus_stateful(
-      constant_op.constant(3, dtype=dtypes.int32),
-      constant_op.constant(4, dtype=dtypes.int32),
+      constant_op.constant(3),
+      constant_op.constant(4),
                           )
-    assert call_count == 3  # +2 as it is stateful, both were executed
-
+    self.assertEqual(call_count, 3)  # +2 as it is stateful, both were executed
 
 
 if __name__ == ""__main__"":
",0,train
a968485f8adb4ee4c943dac8b3a2d480e9422284,tensorflow/tensorflow,"[MLIR][KernelGen] Simplify baseline implementations for floor_div

PiperOrigin-RevId: 407310823
Change-Id: I7cd06a130ef8272576341f09dd1c68d31a52c40e",gpu_binary_ops_test.cc,"@@ -489,53 +489,36 @@ TEST_F(BinaryOpsTest, EqualUint8_tSpecialCases) {
 
 /// Test `tf.FloorDiv`.
 
-template <typename T>
+template <typename T, std::enable_if_t<llvm::is_one_of<T, float, double>::value,
+                                       bool> = true>
 T baseline_floor_div(T lhs, T rhs) {
   return std::floor(lhs / rhs);
 }
 
-template <>
-Eigen::half baseline_floor_div(Eigen::half lhs, Eigen::half rhs) {
-  return static_cast<Eigen::half>(std::floor(static_cast<float>(lhs / rhs)));
-}
-
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
-    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
-template <>
-int8_t baseline_floor_div(int8_t lhs, int8_t rhs) {
-  int8_t res = lhs / rhs;
-  if (((lhs < 0 && rhs > 0) || (lhs > 0 && rhs < 0)) && lhs % rhs) {
-    --res;
-  }
-  return res;
-}
-#endif
-
-template <>
-int16_t baseline_floor_div(int16_t lhs, int16_t rhs) {
-  int16_t res = lhs / rhs;
-  if (((lhs < 0 && rhs > 0) || (lhs > 0 && rhs < 0)) && lhs % rhs) {
-    --res;
-  }
-  return res;
+template <typename T,
+          std::enable_if_t<llvm::is_one_of<T, Eigen::half>::value, bool> = true>
+T baseline_floor_div(T lhs, T rhs) {
+  return static_cast<T>(std::floor(static_cast<float>(lhs / rhs)));
 }
 
-template <>
-int64_t baseline_floor_div(int64_t lhs, int64_t rhs) {
-  int64_t res = lhs / rhs;
+template <typename T, std::enable_if_t<llvm::is_one_of<T, int8_t, int16_t,
+                                                       int32_t, int64_t>::value,
+                                       bool> = true>
+T baseline_floor_div(T lhs, T rhs) {
+  T res = lhs / rhs;
   if (((lhs < 0 && rhs > 0) || (lhs > 0 && rhs < 0)) && lhs % rhs) {
     --res;
   }
   return res;
 }
 
-#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
-    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
-template <>
-uint64_t baseline_floor_div(uint64_t lhs, uint64_t rhs) {
+template <typename T,
+          std::enable_if_t<
+              llvm::is_one_of<T, uint8_t, uint16_t, uint32_t, uint64_t>::value,
+              bool> = true>
+T baseline_floor_div(T lhs, T rhs) {
   return lhs / rhs;
 }
-#endif
 
 GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     FloorDiv,
",0,train
bdc5846c58ca88bf591f0ca0a3971a2daf03a180,tensorflow/tensorflow,support max_profiling_buffer_entries,label_image.cc,"@@ -184,7 +184,8 @@ void RunInference(Settings* s) {
       exit(-1);
   }
 
-  profiling::Profiler* profiler = new profiling::Profiler();
+  profiling::Profiler* profiler =
+      new profiling::Profiler(s->max_profiling_buffer_entries);
   interpreter->SetProfiler(profiler);
 
   if (s->profiling) profiler->StartProfiling();
@@ -287,12 +288,13 @@ int Main(int argc, char** argv) {
         {""input_mean"", required_argument, nullptr, 'b'},
         {""input_std"", required_argument, nullptr, 's'},
         {""num_results"", required_argument, nullptr, 'r'},
+        {""max_profiling_buffer_entries"", required_argument, nullptr, 'e'},
         {nullptr, 0, nullptr, 0}};
 
     /* getopt_long stores the option index here. */
     int option_index = 0;
 
-    c = getopt_long(argc, argv, ""a:b:c:f:i:l:m:p:r:s:t:v:"", long_options,
+    c = getopt_long(argc, argv, ""a:b:c:e:f:i:l:m:p:r:s:t:v:"", long_options,
                     &option_index);
 
     /* Detect the end of the options. */
@@ -309,6 +311,10 @@ int Main(int argc, char** argv) {
         s.loop_count =
             strtol(optarg, nullptr, 10);  // NOLINT(runtime/deprecated_fn)
         break;
+      case 'e':
+        s.max_profiling_buffer_entries =
+            strtol(optarg, nullptr, 10);  // NOLINT(runtime/deprecated_fn)
+        break;
       case 'f':
         s.allow_fp16 =
             strtol(optarg, nullptr, 10);  // NOLINT(runtime/deprecated_fn)
",0,train
bdc5846c58ca88bf591f0ca0a3971a2daf03a180,tensorflow/tensorflow,support max_profiling_buffer_entries,label_image.h,"@@ -36,6 +36,7 @@ struct Settings {
   string input_layer_type = ""uint8_t"";
   int number_of_threads = 4;
   int number_of_results = 5;
+  int max_profiling_buffer_entries = 1024;
 };
 
 }  // namespace label_image
",0,train
983d8931b4ea1a5ee81a63b5a2d393bb82f2fa0b,tensorflow/tensorflow,"Fix typo.
Change: 139974629",checkpoint_reader.h,"@@ -43,7 +43,7 @@ class CheckpointReader {
   bool HasTensor(const string& name) const;
   const string DebugString() const;
 
-  // Returns a map from variable namaes to its shape.  Slices of a partitioned
+  // Returns a map from variable names to its shape.  Slices of a partitioned
   // tensor are combined into a single entry.
   const TensorSliceReader::VarToShapeMap& GetVariableToShapeMap() const;
 
",0,train
cb401f09be5b816e704a70babc0facad63e84636,tensorflow/tensorflow,"tf.tile gradient supports IndexedSlice (#17083)

* TST: add test case

* ENH: tf.tile gradient supports IndexedSlices

* Revert ""TST: add test case""

This reverts commit b4958112a5b110dc015e48ec547eb98996a84038.

* TST: move test case

* CLN: fix lint error

* TST: add test case, input with rank 1",shape_ops_test.py,"@@ -642,6 +642,29 @@ class TileTest(test.TestCase):
       err = gradient_checker.compute_gradient_error(a, [4, 2], tiled, [4, 4])
     self.assertLess(err, 1e-3)
 
+  def testGradientWithSparseGradWithRank1(self):
+    inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0],
+                                  dtype=dtypes.float32)
+    outputs = array_ops.gather(array_ops.tile(inputs, [3]),
+                               [1, 5, 9, 3, 7, 2, 2, 2])
+    with self.test_session():
+      error = gradient_checker.compute_gradient_error(
+          inputs, inputs.get_shape().as_list(),
+          outputs, outputs.get_shape().as_list())
+      self.assertLess(error, 1e-4)
+
+  def testGradientWithSparseGradWithRank3(self):
+    inputs = constant_op.constant([1.0, 2.0, 3.0, 4.0],
+                                  dtype=dtypes.float32)
+    inputs = array_ops.reshape(inputs, [-1, 1, 1])
+    outputs = array_ops.gather(array_ops.tile(inputs, [3, 4, 2]),
+                               [1, 5, 9, 3, 7, 2, 2, 2])
+    with self.test_session():
+      error = gradient_checker.compute_gradient_error(
+          inputs, inputs.get_shape().as_list(),
+          outputs, outputs.get_shape().as_list())
+      self.assertLess(error, 1e-4)
+
   def testShapeFunctionEdgeCases(self):
     # Unknown multiples shape.
     inp = constant_op.constant(0.0, shape=[4, 4, 4, 4])
",0,train
cb401f09be5b816e704a70babc0facad63e84636,tensorflow/tensorflow,"tf.tile gradient supports IndexedSlice (#17083)

* TST: add test case

* ENH: tf.tile gradient supports IndexedSlices

* Revert ""TST: add test case""

This reverts commit b4958112a5b110dc015e48ec547eb98996a84038.

* TST: move test case

* CLN: fix lint error

* TST: add test case, input with rank 1",array_grad.py,"@@ -568,7 +568,6 @@ ops.NotDifferentiable(""Size"")
 @ops.RegisterGradient(""Tile"")
 def _TileGrad(op, grad):
   """"""Sum reduces grad along the tiled dimensions.""""""
-  assert isinstance(grad, ops.Tensor)
   input_shape = array_ops.shape(op.inputs[0])
   # We interleave multiples and input_shape to get split_shape,
   # reshape grad to split_shape, and reduce along all even
@@ -581,6 +580,13 @@ def _TileGrad(op, grad):
   split_shape = array_ops.reshape(
       array_ops.transpose(array_ops.stack([op.inputs[1], input_shape])), [-1])
   axes = math_ops.range(0, array_ops.size(split_shape), 2)
+  # Sum reduces grad along the first dimension for IndexedSlices
+  if isinstance(grad, ops.IndexedSlices):
+    grad = math_ops.unsorted_segment_sum(
+        grad.values,
+        math_ops.mod(grad.indices, input_shape[0]),
+        input_shape[0])
+    split_shape = array_ops.concat([[1], split_shape[1:]], axis=0)
   input_grad = math_ops.reduce_sum(array_ops.reshape(grad, split_shape), axes)
   # Fix shape inference
   if not context.executing_eagerly():
",0,train
38491a84a9e38357e400457dbbe408b66e786672,tensorflow/tensorflow,TFTRT: Expand lambda and inline ifs to functions with if and else,trt_convert.py,"@@ -45,15 +45,33 @@ from tensorflow.python.saved_model import loader_impl
 from tensorflow.python.saved_model import tag_constants
 from tensorflow.python.training import saver
 
-if _six.PY2:
-  _to_bytes = lambda s: s.encode(""utf-8"", errors=""surrogateescape"") \
-    if isinstance(s, unicode) else s
-  _to_string = lambda s: s.decode(""utf-8"") if isinstance(s, str) else s
-else:
-  _to_bytes = lambda s: s.encode(""utf-8"", errors=""surrogateescape"") \
-    if isinstance(s, str) else s
-  _to_string = lambda s: s.decode(""utf-8"") if isinstance(s, bytes) else s
-
+def _to_bytes(s):
+    """"""Returns encoded of s if s is a sequence of chars otherwise returns s.
+    """"""
+    if _six.PY2:
+        if isinstance(s, unicode):
+            return s.encode(""utf-8"", errors=""surrogateescape"")
+        else:
+            return s
+    else:
+        if isinstance(s, str):
+            return s.encode(""utf-8"", errors=""surrogateescape"")
+        else:
+            return s
+
+def _to_string(s):
+    """"""Returns decoded of s if s is a sequence of bytes otherwise returns s.
+    """"""
+    if _six.PY2:
+        if isinstance(s, str):
+            return s.decode(""utf-8"")
+        else:
+            return s
+    else:
+        if isinstance(s, bytes):
+            return s.decode(""utf-8"")
+        else:
+            return s
 
 class TrtPrecisionMode(object):
   FP32 = ""FP32""
",0,train
a346aa260d32eb83621bb7ed501a2b07ba186480,tensorflow/tensorflow,"Automated rollback of commit 624ff13fdf4e54e255d23971ef2beec3c48c3bb2. Revert #21826.

PiperOrigin-RevId: 212487142",ctc_ops.py,"@@ -242,11 +242,11 @@ def ctc_beam_search_decoder(inputs, sequence_length, beam_width=100,
 
   If `merge_repeated` is `True`, merge repeated classes in the output beams.
   This means that if consecutive entries in a beam are the same,
-  only the first of these is emitted.  That is, when the sequence is
-  `A B B * B * B` (where '*' is the blank label), the return value is:
+  only the first of these is emitted.  That is, when the top path
+  is `A B B B B`, the return value is:
 
     * `A B` if `merge_repeated = True`.
-    * `A B B B` if `merge_repeated = False`.
+    * `A B B B B` if `merge_repeated = False`.
 
   Args:
     inputs: 3-D `float` `Tensor`, size
",0,train
81fefe40e1c3ad9a14d9d7d665b25d7e93fb2dfc,tensorflow/tensorflow,"Add test case for int16 support of tf.stack/Pack on gpu

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",stack_op_test.py,"@@ -76,7 +76,7 @@ class StackOpTest(test.TestCase):
     np.random.seed(7)
     with self.test_session(use_gpu=True):
       for shape in (2,), (3,), (2, 3), (3, 2), (4, 3, 2):
-        for dtype in [np.bool, np.float32, np.int32, np.int64]:
+        for dtype in [np.bool, np.float32, np.int16, np.int32, np.int64]:
           data = np.random.randn(*shape).astype(dtype)
           # Stack back into a single tensorflow tensor directly using np array
           c = array_ops.stack(data)
",0,train
6294c8cefa60a5a240b56e4cea5aa487a43cf245,tensorflow/tensorflow,"Remove explicit assert

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",string_ops.py,"@@ -125,8 +125,8 @@ def string_format(template, inputs, placeholder=""{}"", summarize=3, name=None):
     ```python
     >>> tensor = tf.range(10)
     >>> formatted = tf.strings.format(""tensor: {}, suffix"", tensor)
-    >>> expected = ""tensor: [0 1 2 ... 7 8 9], suffix""
-    >>> assert(formatted == expected)
+    >>> print(formatted)
+    tf.Tensor(b'tensor: [0 1 2 ... 7 8 9], suffix', shape=(), dtype=string)
     ```
 
     Formatting a multi-tensor template:
@@ -135,14 +135,8 @@ def string_format(template, inputs, placeholder=""{}"", summarize=3, name=None):
     >>> tensor_two = tf.range(10)
     >>> formatted = tf.strings.format(""first: {}, second: {}, suffix"",
     ...                               (tensor_one, tensor_two))
-    >>> expected = (""first: [[0 1 2 ... 7 8 9]\n""
-    ...             "" [10 11 12 ... 17 18 19]\n""
-    ...             "" [20 21 22 ... 27 28 29]\n""
-    ...             "" ...\n""
-    ...             "" [70 71 72 ... 77 78 79]\n""
-    ...             "" [80 81 82 ... 87 88 89]\n""
-    ...             "" [90 91 92 ... 97 98 99]], second: [0 1 2 ... 7 8 9], suffix"")
-    >>> assert(formatted == expected)
+    >>> print(formatted)
+    tf.Tensor(b'first: [[0 1 2 ... 7 8 9]\n [10 11 12 ... 17 18 19]\n [20 21 22 ... 27 28 29]\n ...\n [70 71 72 ... 77 78 79]\n [80 81 82 ... 87 88 89]\n [90 91 92 ... 97 98 99]], second: [0 1 2 ... 7 8 9], suffix', shape=(), dtype=string)
     ```
 
   Args:
",0,train
3a0643dcb563f9bb34879e9da98b65d359f24ed2,tensorflow/tensorflow,Change error msg,sparse_to_dense_op.cc,"@@ -241,8 +241,8 @@ class SparseToDenseGPU : public AsyncOpKernel {
                                             output_shape_vec.data(),
                                             num_dims * sizeof(Index)).ok(),
                       errors::InvalidArgument(
-                          ""failed to copy memory from host to device in ""
-                          ""SparseToDense""), done);
+                          ""failed to copy output_shape vector from host to ""
+                          ""device in SparseToDenseOp""), done);
 
     functor::LaunchSparseToDense<T, Index>()(
         c, done, this, validate_indices_, indices.flat<Index>().data(),
",0,train
4675bebffe1eb1b94e26159a42bd7a2031837985,tensorflow/tensorflow,"Add a convenient `clone()` method on the `Op` class that forward to the underlying `Operation` (NFC)

PiperOrigin-RevId: 266685852",OpDefinition.h,"@@ -906,6 +906,16 @@ public:
   /// Return the operation that this refers to.
   Operation *getOperation() { return OpState::getOperation(); }
 
+  /// Create a deep copy of this operation.
+  ConcreteType clone() { return cast<ConcreteType>(getOperation()->clone()); }
+
+  /// Create a partial copy of this operation without traversing into attached
+  /// regions. The new operation will have the same number of regions as the
+  /// original one, but they will be left empty.
+  ConcreteType cloneWithoutRegions() {
+    return cast<ConcreteType>(getOperation()->cloneWithoutRegions());
+  }
+
   /// Return the dialect that this refers to.
   Dialect *getDialect() { return getOperation()->getDialect(); }
 
",0,train
4675bebffe1eb1b94e26159a42bd7a2031837985,tensorflow/tensorflow,"Add a convenient `clone()` method on the `Op` class that forward to the underlying `Operation` (NFC)

PiperOrigin-RevId: 266685852",Operation.h,"@@ -94,10 +94,16 @@ public:
   Operation *clone(BlockAndValueMapping &mapper);
   Operation *clone();
 
-  /// Create a deep copy of this operation but keep the operation regions empty.
+  /// Create a partial copy of this operation without traversing into attached
+  /// regions. The new operation will have the same number of regions as the
+  /// original one, but they will be left empty.
   /// Operands are remapped using `mapper` (if present), and `mapper` is updated
   /// to contain the results.
   Operation *cloneWithoutRegions(BlockAndValueMapping &mapper);
+
+  /// Create a partial copy of this operation without traversing into attached
+  /// regions. The new operation will have the same number of regions as the
+  /// original one, but they will be left empty.
   Operation *cloneWithoutRegions();
 
   /// Returns the operation block that contains this operation.
",0,train
37ba47810867abe769199cc46d5b8e3b6fe11069,tensorflow/tensorflow,Fixing example strided_slice (#7347),array_ops.py,"@@ -634,8 +634,8 @@ def strided_slice(input_,
   tf.strided_slice(input, [1, 0, 0], [2, 1, 3], [1, 1, 1]) ==> [[[3, 3, 3]]]
   tf.strided_slice(input, [1, 0, 0], [2, 2, 3], [1, 1, 1]) ==> [[[3, 3, 3],
                                                                  [4, 4, 4]]]
-  tf.strided_slice(input, [1, 1, 0], [2, -1, 3], [1, -1, 1]) ==>[[[4, 4, 4],
-                                                                  [3, 3, 3]]]
+  tf.strided_slice(input, [1, -1, 0], [2, -3, 3], [1, -1, 1]) ==>[[[4, 4, 4],
+                                                                   [3, 3, 3]]]
   ```
 
   Args:
",0,train
71f7b620fc3a6bdf30facd7b6e63c789e90567e0,tensorflow/tensorflow,Fixed off-by-one error in L115-116. (#7437),word2vec_basic.py,"@@ -112,6 +112,8 @@ def generate_batch(batch_size, num_skips, skip_window):
       labels[i * num_skips + j, 0] = buffer[target]
     buffer.append(data[data_index])
     data_index = (data_index + 1) % len(data)
+  # Backtrack a little bit to avoid skipping words in the end of a batch
+  data_index = (data_index + len(data) - span) % len(data)
   return batch, labels
 
 batch, labels = generate_batch(batch_size=8, num_skips=2, skip_window=1)
",0,train
092485d20cd7dca8a9aa0268a34081cad7918549,tensorflow/tensorflow,"Raise an unimplemented error when receiving aliased reference args.

The XLA runtime does not support updating reference args that alias.
Currently, it fails with in internal error. However, it should really be
an unimplemented error, as it is a condition that is not unexpected and
might be recovered from by, e.g., falling back to TF classic.

PiperOrigin-RevId: 241491318",xla_launch_util.cc,"@@ -34,6 +34,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/types.h""
+#include ""tensorflow/core/lib/core/errors.h""
 #include ""tensorflow/core/util/stream_executor_util.h""
 
 namespace tensorflow {
@@ -132,7 +133,7 @@ Status LockVariables(absl::Span<VariableInfo> variables) {
       // cluster because we would not handle variable updates correctly.  Any
       // locks we have already acquired will be released when the VariableInfo
       // objects are destroyed.
-      return errors::Internal(""Duplicate variable passed to XLA cluster"");
+      return errors::Unimplemented(""Duplicate variable passed to XLA cluster"");
     }
     VLOG(4) << ""Acquiring lock for variable ""
             << reinterpret_cast<void*>(variable);
",0,train
092485d20cd7dca8a9aa0268a34081cad7918549,tensorflow/tensorflow,"Raise an unimplemented error when receiving aliased reference args.

The XLA runtime does not support updating reference args that alias.
Currently, it fails with in internal error. However, it should really be
an unimplemented error, as it is a condition that is not unexpected and
might be recovered from by, e.g., falling back to TF classic.

PiperOrigin-RevId: 241491318",critical_section_test.py,"@@ -56,6 +56,7 @@ class CriticalSectionTest(test.TestCase):
                         sorted(r_value))
 
   @test_util.run_in_graph_and_eager_modes
+  @test_util.xla_allow_fallback(""b/128495870"")
   def testCriticalSectionWithControlFlow(self):
     for outer_cond in [False, True]:
       for inner_cond in [False, True]:
",0,train
f0a7939bf73c7b08daee6a46292159fd7651a785,tensorflow/tensorflow,"Remove internal caching fields from the tracking list for tf.Moduel.

It was breaking tf probability when tf.Module traverse all the fields.

PiperOrigin-RevId: 296482313
Change-Id: I1afa34681df1e03e2f126b0070a597a03e4b1862",network.py,"@@ -158,7 +158,8 @@ class Network(base_layer.Layer):
   # The key of _layer_call_argspecs is a layer. tf.Module._flatten will fail to
   # flatten the key since it is trying to convert Trackable/Layer to a string.
   _TF_MODULE_IGNORED_PROPERTIES = frozenset(itertools.chain(
-      ('_layer_call_argspecs', '_compiled_trainable_state'),
+      ('_layer_call_argspecs', '_compiled_trainable_state',
+       '_output_mask_cache', '_output_tensor_cache', '_output_shape_cache'),
       base_layer.Layer._TF_MODULE_IGNORED_PROPERTIES
   ))
 
",0,train
baea13831c2d1ffa08c4fcc8944a3870d19826cb,tensorflow/tensorflow,"Introduce a new C API entrypoint to set a 'func' attribute on an op
description.

PiperOrigin-RevId: 182146876",c_api.cc,"@@ -1201,6 +1201,13 @@ void TF_SetAttrTypeList(TF_OperationDescription* desc, const char* attr_name,
                      reinterpret_cast<const DataType*>(values), num_values));
 }
 
+void TF_SetAttrFunc(TF_OperationDescription* desc, const char* attr_name,
+                    const char* value, size_t length) {
+  tensorflow::NameAttrList func_name;
+  func_name.set_name(std::string(value, value + length));
+  desc->node_builder.Attr(attr_name, func_name);
+}
+
 void TF_SetAttrShape(TF_OperationDescription* desc, const char* attr_name,
                      const int64_t* dims, int num_dims) {
   PartialTensorShape shape;
",0,train
baea13831c2d1ffa08c4fcc8944a3870d19826cb,tensorflow/tensorflow,"Introduce a new C API entrypoint to set a 'func' attribute on an op
description.

PiperOrigin-RevId: 182146876",c_api.h,"@@ -511,6 +511,11 @@ TF_CAPI_EXPORT extern void TF_SetAttrTypeList(TF_OperationDescription* desc,
                                               const char* attr_name,
                                               const TF_DataType* values,
                                               int num_values);
+// Set a 'func' attribute to the specified name.
+// `value` must point to a string of length `length` bytes.
+TF_CAPI_EXPORT extern void TF_SetAttrFunc(TF_OperationDescription* desc,
+                                          const char* attr_name,
+                                          const char* value, size_t length);
 
 // Set `num_dims` to -1 to represent ""unknown rank"".  Otherwise,
 // `dims` points to an array of length `num_dims`.  `dims[i]` must be
",0,train
9d17a0b425db338ae86465f5f3204335986fbae6,tensorflow/tensorflow,"Set namespace to TFDevice for MarkOpsForOutsideCompilation pass.

This pass is a generic TF device pass so this is a better namespace.

PiperOrigin-RevId: 322895228
Change-Id: Id848bd88af6a7d60f428a0b6531e3fb4a507976d",mark_ops_for_outside_compilation.cc,"@@ -23,7 +23,7 @@ limitations under the License.
 #include ""tensorflow/compiler/mlir/tensorflow/transforms/passes.h""
 
 namespace mlir {
-namespace TF {
+namespace TFDevice {
 
 namespace {
 
@@ -54,5 +54,5 @@ static PassRegistration<MarkOpsForOutsideCompilation> pass(
     ""tf-mark-ops-for-outside-compilation"",
     ""Marks unsupported ops a device cluster for outside compilation."");
 
-}  // namespace TF
+}  // namespace TFDevice
 }  // namespace mlir
",0,train
9d17a0b425db338ae86465f5f3204335986fbae6,tensorflow/tensorflow,"Set namespace to TFDevice for MarkOpsForOutsideCompilation pass.

This pass is a generic TF device pass so this is a better namespace.

PiperOrigin-RevId: 322895228
Change-Id: Id848bd88af6a7d60f428a0b6531e3fb4a507976d",passes.h,"@@ -247,6 +247,11 @@ std::unique_ptr<OperationPass<FuncOp>> CreateParallelExecuteToIslandsPass();
 std::unique_ptr<OperationPass<ModuleOp>>
 CreateAnnotateParameterReplicationPass();
 
+// Creates a pass that marks unsupported ops in device cluster for outside
+// compilation.
+std::unique_ptr<OperationPass<ModuleOp>>
+CreateMarkOpsForOutsideCompilationPass();
+
 // Creates a pass that hoists a `tf_device.launch` body and assigns a `device`
 // attribute to each TensorFlow dialect op in the body based on the `device`
 // attribute on the `tf_device.launch`.
@@ -302,11 +307,6 @@ std::unique_ptr<OperationPass<FuncOp>> CreateTPUHostComputationExpansionPass();
 std::unique_ptr<OperationPass<FuncOp>>
 CreateTPUUpdateEmbeddingEnqueueOpInputsPass();
 
-// Creates a pass that marks unsupported ops in device cluster for outside
-// compilation.
-std::unique_ptr<OperationPass<ModuleOp>>
-CreateMarkOpsForOutsideCompilationPass();
-
 // Creates a pass that extract outside compilation (CPU ops inside TPU cluster)
 // ops to a separate parallel_execute region to run on CPU.
 std::unique_ptr<OperationPass<ModuleOp>>
",0,train
dc18758c270de25d5b37a55d4b41af1157dbe625,tensorflow/tensorflow,"Roll forward ""Add a show_fusion_subcomputations command to interactive_graphviz"" with fix

PiperOrigin-RevId: 313426932
Change-Id: Ia2366ee899d7bd0d69448144d1c18164d5801753",hlo_graph_dumper.cc,"@@ -312,12 +312,13 @@ optional<string> MatchTrivialComputation(const HloComputation* computation) {
 class HloDotDumper {
  public:
   HloDotDumper(const HloComputation* computation, absl::string_view label,
-               const DebugOptions& debug_options, bool show_backend_config,
+               const DebugOptions& debug_options,
+               HloRenderOptions hlo_render_options,
                const HloExecutionProfile* profile, NodeFilter filter)
       : computation_(computation),
         label_(label),
         debug_options_(debug_options),
-        show_backend_config_(show_backend_config),
+        hlo_render_options_(hlo_render_options),
         profile_(profile),
         filter_(std::move(filter)) {}
 
@@ -384,7 +385,7 @@ class HloDotDumper {
   const HloComputation* computation_;  // never null
   const string label_;                 // overall name for the graph
   const DebugOptions& debug_options_;
-  const bool show_backend_config_;
+  const HloRenderOptions hlo_render_options_;
   const HloExecutionProfile* profile_;  // may be null
   const NodeFilter filter_;
 
@@ -565,7 +566,8 @@ bool HloDotDumper::ShouldShowFusionSubcomputation(const HloInstruction* instr) {
 bool HloDotDumper::ShouldShowSubcomputation(const HloComputation* subcomp) {
   if (subcomp->IsFusionComputation()) {
     const HloInstruction* fusion = subcomp->FusionInstruction();
-    if (!filter_.Show(fusion) || filter_.SomeOrAllOperandsOmitted(fusion)) {
+    if (!filter_.Show(fusion) || filter_.SomeOrAllOperandsOmitted(fusion) ||
+        !hlo_render_options_.show_fusion_subcomputations) {
       return false;
     }
   }
@@ -1133,7 +1135,8 @@ string HloDotDumper::GetInstructionNodeMetadata(const HloInstruction* instr) {
 
 string HloDotDumper::GetInstructionNodeBackendConfig(
     const HloInstruction* instr) {
-  if (!show_backend_config_ || instr->raw_backend_config_string().empty()) {
+  if (!hlo_render_options_.show_backend_config ||
+      instr->raw_backend_config_string().empty()) {
     return """";
   }
 
@@ -1604,14 +1607,14 @@ StatusOr<string> RenderGraph(const HloComputation& computation,
                              const DebugOptions& debug_options,
                              RenderedGraphFormat format,
                              const HloExecutionProfile* hlo_execution_profile,
-                             bool show_backend_config) {
+                             HloRenderOptions hlo_render_options) {
   tensorflow::mutex_lock lock(url_renderer_mu);
   if (format == RenderedGraphFormat::kUrl && url_renderer == nullptr) {
     return Unavailable(""Can't render as URL; no URL renderer was registered."");
   }
 
   string rendered_dot =
-      HloDotDumper(&computation, label, debug_options, show_backend_config,
+      HloDotDumper(&computation, label, debug_options, hlo_render_options,
                    hlo_execution_profile, NodeFilter())
           .Dump();
   return WrapDotInFormat(rendered_dot, format);
@@ -1619,7 +1622,7 @@ StatusOr<string> RenderGraph(const HloComputation& computation,
 
 StatusOr<string> RenderNeighborhoodAround(
     const HloInstruction& node, int radius, RenderedGraphFormat format,
-    bool show_backend_config,
+    HloRenderOptions hlo_render_options,
     const absl::flat_hash_set<const HloInstruction*>& boundary) {
   tensorflow::mutex_lock lock(url_renderer_mu);
   if (format == RenderedGraphFormat::kUrl && url_renderer == nullptr) {
@@ -1632,7 +1635,7 @@ StatusOr<string> RenderNeighborhoodAround(
   string rendered_dot =
       HloDotDumper(node.parent(), label,
                    node.GetModule()->config().debug_options(),
-                   show_backend_config, /*profile=*/nullptr,
+                   hlo_render_options, /*profile=*/nullptr,
                    MakeNodeRadiusAroundFilter(&node, radius, boundary))
           .Dump();
   return WrapDotInFormat(rendered_dot, format);
@@ -1641,7 +1644,7 @@ StatusOr<string> RenderNeighborhoodAround(
 StatusOr<string> RenderAllPathsFromTo(const HloInstruction& from,
                                       const HloInstruction& to, int64 max_nodes,
                                       RenderedGraphFormat format,
-                                      bool show_backend_config) {
+                                      HloRenderOptions hlo_render_options) {
   tensorflow::mutex_lock lock(url_renderer_mu);
   if (format == RenderedGraphFormat::kUrl && url_renderer == nullptr) {
     return FailedPrecondition(
@@ -1663,7 +1666,7 @@ StatusOr<string> RenderAllPathsFromTo(const HloInstruction& from,
                    ""NODES***<br/><br/>"");
   }
   string rendered_dot =
-      HloDotDumper(from.parent(), label, debug_options, show_backend_config,
+      HloDotDumper(from.parent(), label, debug_options, hlo_render_options,
                    /*profile=*/nullptr, filter)
           .Dump();
   return WrapDotInFormat(rendered_dot, format);
",0,train
dc18758c270de25d5b37a55d4b41af1157dbe625,tensorflow/tensorflow,"Roll forward ""Add a show_fusion_subcomputations command to interactive_graphviz"" with fix

PiperOrigin-RevId: 313426932
Change-Id: Ia2366ee899d7bd0d69448144d1c18164d5801753",hlo_graph_dumper.h,"@@ -50,6 +50,14 @@ enum class RenderedGraphFormat {
   kUrl,
 };
 
+struct HloRenderOptions {
+  // Include the backend config string in the rendered graph.
+  bool show_backend_config = false;
+
+  // Include the fusion subcomputations in the rendered graph.
+  bool show_fusion_subcomputations = true;
+};
+
 // Renders an HLO module as a human-readable visual graph.
 //
 // Note that this only works well for relatively small graphs (no more than a
@@ -61,7 +69,7 @@ StatusOr<string> RenderGraph(
     const HloComputation& computation, absl::string_view label,
     const DebugOptions& debug_options, RenderedGraphFormat format,
     const HloExecutionProfile* hlo_execution_profile = nullptr,
-    bool show_backend_config = false);
+    HloRenderOptions hlo_render_options = {});
 
 // Like RenderGraph, but renders only nodes ""near"" the given node in the graph.
 //
@@ -73,7 +81,7 @@ StatusOr<string> RenderGraph(
 // will be omitted even if they are within the radius.
 StatusOr<string> RenderNeighborhoodAround(
     const HloInstruction& node, int radius, RenderedGraphFormat format,
-    bool show_backend_config = false,
+    HloRenderOptions hlo_render_options = {},
     const absl::flat_hash_set<const HloInstruction*>& boundary = {});
 
 // Renders nodes on any of the paths from `from` to `to`.  If there are more
@@ -82,7 +90,7 @@ StatusOr<string> RenderNeighborhoodAround(
 StatusOr<string> RenderAllPathsFromTo(const HloInstruction& from,
                                       const HloInstruction& to, int64 max_nodes,
                                       RenderedGraphFormat format,
-                                      bool show_backend_config = false);
+                                      HloRenderOptions hlo_render_options = {});
 
 // Registers a function which implements RenderedGraphFormat::kUrl.
 //
",0,train
dc18758c270de25d5b37a55d4b41af1157dbe625,tensorflow/tensorflow,"Roll forward ""Add a show_fusion_subcomputations command to interactive_graphviz"" with fix

PiperOrigin-RevId: 313426932
Change-Id: Ia2366ee899d7bd0d69448144d1c18164d5801753",interactive_graphviz.cc,"@@ -112,8 +112,7 @@ constexpr int64 kDefaultMaxNumNodesInAllPaths = 100;
 
 using absl::EqualsIgnoreCase;
 
-// A global control for whether backend configuration display is enabled.
-bool show_backend_config = true;
+HloRenderOptions hlo_render_options;
 
 HloInstruction* FindInstruction(const HloModule& module, string node_name) {
   if (absl::StartsWith(node_name, ""%"")) {
@@ -160,6 +159,8 @@ void DoHelpCommand() {
     Renders all nodes in <computation>.
   backend_config [on|off]
     Controls whether backend operation configuration information is printed.
+  show_fusion_subcomputations [on|off]
+    Controls whether fusion subcomputations are shown.
   list [name|op_name|op_type] <pattern>
     Lists all instructions whose name, metadata op_name, or metadata op_type
     contains <pattern> as a substring.
@@ -182,15 +183,32 @@ void DoHelpCommand() {
 // Turn metadata-printing on or off.
 void DoBackendConfigCommand(const std::vector<string>& tokens) {
   if (tokens.size() == 2 && tokens[1] == ""on"") {
-    show_backend_config = true;
+    hlo_render_options.show_backend_config = true;
   } else if (tokens.size() == 2 && tokens[1] == ""off"") {
-    show_backend_config = false;
+    hlo_render_options.show_backend_config = false;
   } else if (tokens.size() != 1) {
     std::cerr << ""(Illegal backend_config value.  Use either 'on' or 'off'.)""
               << std::endl;
   }
   std::cout << ""Backend configuration display ""
-            << (show_backend_config ? ""ON"" : ""OFF"") << std::endl;
+            << (hlo_render_options.show_backend_config ? ""ON"" : ""OFF"")
+            << std::endl;
+}
+
+// Turn fusion computation display on or off.
+void DoShowFusionSubcomputationsCommand(const std::vector<string>& tokens) {
+  if (tokens.size() == 2 && tokens[1] == ""on"") {
+    hlo_render_options.show_fusion_subcomputations = true;
+  } else if (tokens.size() == 2 && tokens[1] == ""off"") {
+    hlo_render_options.show_fusion_subcomputations = false;
+  } else if (tokens.size() != 1) {
+    std::cerr << ""(Illegal show_fusion_subcomputations value.  Use either ""
+                 ""'on' or 'off'.)""
+              << std::endl;
+  }
+  std::cout << ""Fusion subcomputations display ""
+            << (hlo_render_options.show_fusion_subcomputations ? ""ON"" : ""OFF"")
+            << std::endl;
 }
 
 // List all computations in the module.
@@ -373,7 +391,7 @@ void DoExtractCommand(const HloModule& module,
   auto extracted_module = ExtractModule(instr, height);
   std::cout << extracted_module->ToString(
                    HloPrintOptions::ShortParsable().set_print_backend_config(
-                       show_backend_config))
+                       hlo_render_options.show_backend_config))
             << std::endl;
 }
 
@@ -517,7 +535,7 @@ void DoAllPathsCommand(const Options& opts, const HloModule& module,
   }
   RenderAndDisplayGraph(opts, [&](RenderedGraphFormat format) {
     return RenderAllPathsFromTo(*from, *to, max_nodes, format,
-                                /*show_backend_config=*/show_backend_config);
+                                hlo_render_options);
   });
 }
 
@@ -582,15 +600,13 @@ void DoPlotCommand(const Options& opts, const HloModule& module,
     RenderAndDisplayGraph(opts, [&](RenderedGraphFormat format) {
       return RenderGraph(*comp, /*label=*/"""",
                          comp->parent()->config().debug_options(), format,
-                         /*hlo_execution_profile=*/nullptr,
-                         /*show_backend_config=*/show_backend_config);
+                         /*hlo_execution_profile=*/nullptr, hlo_render_options);
     });
   } else {
     RenderAndDisplayGraph(opts, [&](RenderedGraphFormat format) {
-      return RenderNeighborhoodAround(
-          *instr, graph_width, format,
-          /*show_backend_config=*/show_backend_config,
-          /*boundary=*/boundary);
+      return RenderNeighborhoodAround(*instr, graph_width, format,
+                                      hlo_render_options,
+                                      /*boundary=*/boundary);
     });
   }
 }
@@ -617,6 +633,8 @@ void InteractiveDumpGraphs(const Options& opts, const HloModule& module) {
       DoHelpCommand();
     } else if (tokens[0] == ""backend_config"") {
       DoBackendConfigCommand(tokens);
+    } else if (tokens[0] == ""show_fusion_subcomputations"") {
+      DoShowFusionSubcomputationsCommand(tokens);
     } else if (tokens[0] == ""list"") {
       if (tokens.size() > 1 && tokens[1] == ""computations"") {
         DoListComputationsCommand(module, tokens);
",0,train
fce49887e827abc2627fd2a7bc135800baaafc4f,tensorflow/tensorflow,"Performing the finalization of the LayerCollection outside of FisherEstimator's constructor.  This allows layers and losses to be registered after the FisherEstimator (or KFACOptimizer) has been constructed.

PiperOrigin-RevId: 188889252",estimator_test.py,"@@ -96,49 +96,57 @@ class EstimatorTest(test.TestCase):
       # Check that we throw an error if we try to build an estimator for vars
       # that were not manually registered.
       with self.assertRaises(ValueError):
-        estimator.FisherEstimator([self.weights, self.bias], 0.1, 0.2,
-                                  self.layer_collection)
+        est = estimator.FisherEstimator([self.weights, self.bias], 0.1, 0.2,
+                                        self.layer_collection)
+        est.make_ops_and_vars()
 
       # Check that we throw an error if we don't include registered variables,
       # i.e. self.weights
       with self.assertRaises(ValueError):
-        estimator.FisherEstimator([], 0.1, 0.2, self.layer_collection)
+        est = estimator.FisherEstimator([], 0.1, 0.2, self.layer_collection)
+        est.make_ops_and_vars()
 
   @test.mock.patch.object(utils.SubGraph, ""variable_uses"", return_value=42)
   def testVariableWrongNumberOfUses(self, mock_uses):
     with self.assertRaises(ValueError):
-      estimator.FisherEstimator([self.weights], 0.1, 0.2,
-                                self.layer_collection)
+      est = estimator.FisherEstimator([self.weights], 0.1, 0.2,
+                                      self.layer_collection)
+      est.make_ops_and_vars()
 
   def testInvalidEstimationMode(self):
     with self.assertRaises(ValueError):
-      estimator.FisherEstimator([self.weights], 0.1, 0.2,
-                                self.layer_collection,
-                                estimation_mode=""not_a_real_mode"")
+      est = estimator.FisherEstimator([self.weights], 0.1, 0.2,
+                                      self.layer_collection,
+                                      estimation_mode=""not_a_real_mode"")
+      est.make_ops_and_vars()
 
   def testGradientsModeBuild(self):
     with self._graph.as_default():
-      estimator.FisherEstimator([self.weights], 0.1, 0.2,
-                                self.layer_collection,
-                                estimation_mode=""gradients"")
+      est = estimator.FisherEstimator([self.weights], 0.1, 0.2,
+                                      self.layer_collection,
+                                      estimation_mode=""gradients"")
+      est.make_ops_and_vars()
 
   def testEmpiricalModeBuild(self):
     with self._graph.as_default():
-      estimator.FisherEstimator([self.weights], 0.1, 0.2,
-                                self.layer_collection,
-                                estimation_mode=""empirical"")
+      est = estimator.FisherEstimator([self.weights], 0.1, 0.2,
+                                      self.layer_collection,
+                                      estimation_mode=""empirical"")
+      est.make_ops_and_vars()
 
   def testCurvaturePropModeBuild(self):
     with self._graph.as_default():
-      estimator.FisherEstimator([self.weights], 0.1, 0.2,
-                                self.layer_collection,
-                                estimation_mode=""curvature_prop"")
+      est = estimator.FisherEstimator([self.weights], 0.1, 0.2,
+                                      self.layer_collection,
+                                      estimation_mode=""curvature_prop"")
+      est.make_ops_and_vars()
 
   def testExactModeBuild(self):
     with self._graph.as_default():
-      estimator.FisherEstimator([self.weights], 0.1, 0.2,
-                                self.layer_collection,
-                                estimation_mode=""exact"")
+      est = estimator.FisherEstimator([self.weights], 0.1, 0.2,
+                                      self.layer_collection,
+                                      estimation_mode=""exact"")
+      est.make_ops_and_vars()
 
   def test_cov_update_thunks(self):
     """"""Ensures covariance update ops run once per global_step.""""""
",0,train
fce49887e827abc2627fd2a7bc135800baaafc4f,tensorflow/tensorflow,"Performing the finalization of the LayerCollection outside of FisherEstimator's constructor.  This allows layers and losses to be registered after the FisherEstimator (or KFACOptimizer) has been constructed.

PiperOrigin-RevId: 188889252",estimator.py,"@@ -149,8 +149,6 @@ class FisherEstimator(object):
     self._damping = damping
     self._estimation_mode = estimation_mode
     self._layers = layer_collection
-    self._layers.create_subgraph()
-    self._layers.check_registration(variables)
     self._gradient_fns = {
         ""gradients"": self._get_grads_lists_gradients,
         ""empirical"": self._get_grads_lists_empirical,
@@ -164,9 +162,6 @@ class FisherEstimator(object):
 
     self._name = name
 
-    self._instantiate_factors()
-    self._register_matrix_functions()
-
   @property
   def variables(self):
     return self._variables
@@ -285,6 +280,12 @@ class FisherEstimator(object):
       for block in self.blocks:
         block.register_matpower(exp)
 
+  def _finalize_layer_collection(self):
+    self._layers.create_subgraph()
+    self._layers.check_registration(self.variables)
+    self._instantiate_factors()
+    self._register_matrix_functions()
+
   def make_ops_and_vars(self, scope=None):
     """"""Make ops and vars with no specific device placement.
 
@@ -467,6 +468,8 @@ class FisherEstimator(object):
     """"""
     self._check_vars_unmade_and_set_made_flag()
 
+    self._finalize_layer_collection()
+
     scope = self.name if scope is None else scope
 
     cov_variable_thunks = [
",0,train
5fb8e65180a86fac58709d248201c600f4817f5f,tensorflow/tensorflow,Fix clang formatting errors for micro op EXPAND_DIMS,expand_dims.cc,"@@ -29,7 +29,7 @@ constexpr int kOutputTensor = 0;
 TfLiteStatus ExpandTensorDim(TfLiteContext* context,
                              const TfLiteEvalTensor* input, int axis,
                              TfLiteEvalTensor* output) {
-  const TfLiteIntArray *input_dims = input->dims;
+  const TfLiteIntArray* input_dims = input->dims;
   TfLiteIntArray* output_dims = output->dims;
   if (axis < 0) {
     axis = output_dims->size + axis;
@@ -59,11 +59,12 @@ TfLiteStatus GetAxisValueFromTensor(TfLiteContext* context,
   }
 
   if (kTfLiteInt32 == (axis->type)) {
-    const int32_t *axis_ptr = tflite::micro::GetTensorData<int32_t>(axis);
+    const int32_t* axis_ptr = tflite::micro::GetTensorData<int32_t>(axis);
     *axis_value = axis_ptr[0];
     return kTfLiteOk;
   } else {
-    TF_LITE_KERNEL_LOG(context, ""Axis type %s (%d) not supported by Expand_Dims."",
+    TF_LITE_KERNEL_LOG(context,
+                       ""Axis type %s (%d) not supported by Expand_Dims."",
                        TfLiteTypeGetName(axis->type), axis->type);
     return kTfLiteError;
   }
@@ -77,7 +78,8 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* axis;
   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, kAxisTensor, &axis));
   TfLiteTensor* output;
-  TF_LITE_ENSURE_OK(context, GetOutputSafe(context, node, kOutputTensor, &output));
+  TF_LITE_ENSURE_OK(context,
+                    GetOutputSafe(context, node, kOutputTensor, &output));
   output->type = input->type;
   if (IsDynamicTensor(axis)) {
     TF_LITE_KERNEL_LOG(context,
@@ -107,24 +109,28 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   int axis_value;
   TF_LITE_ENSURE_OK(context,
                     GetAxisValueFromTensor(context, axis, &axis_value));
-  if ((axis_value > static_cast<int32_t>(input_dims)) || (axis_value < static_cast<int32_t>(-(input_dims + 1)))) {
-    TF_LITE_KERNEL_LOG(context,
-                       ""Invalid Expand_Dims axis value (%d)."", axis_value);
+  if ((axis_value > static_cast<int32_t>(input_dims)) ||
+      (axis_value < static_cast<int32_t>(-(input_dims + 1)))) {
+    TF_LITE_KERNEL_LOG(context, ""Invalid Expand_Dims axis value (%d)."",
+                       axis_value);
     return kTfLiteError;
   }
   ExpandTensorDim(context, input, axis_value, output);
 
   switch (input->type) {
     case kTfLiteFloat32: {
-      memCopyN(tflite::micro::GetTensorData<float>(output), tflite::micro::GetTensorData<float>(input), flat_size);
+      memCopyN(tflite::micro::GetTensorData<float>(output),
+               tflite::micro::GetTensorData<float>(input), flat_size);
     } break;
     case kTfLiteInt8: {
-      memCopyN(tflite::micro::GetTensorData<int8_t>(output), tflite::micro::GetTensorData<int8_t>(input), flat_size);
+      memCopyN(tflite::micro::GetTensorData<int8_t>(output),
+               tflite::micro::GetTensorData<int8_t>(input), flat_size);
     } break;
     default:
-      TF_LITE_KERNEL_LOG(context,
-                         ""Expand_Dims only currently supports int8 and float32, got %d."",
-                         input->type);
+      TF_LITE_KERNEL_LOG(
+          context,
+          ""Expand_Dims only currently supports int8 and float32, got %d."",
+          input->type);
       return kTfLiteError;
   }
   return kTfLiteOk;
",0,train
5fb8e65180a86fac58709d248201c600f4817f5f,tensorflow/tensorflow,Fix clang formatting errors for micro op EXPAND_DIMS,expand_dims_test.cc,"@@ -24,7 +24,7 @@ namespace tflite {
 namespace testing {
 namespace {
 
-//Hard coded dimension limit. Is there a predefined constant?
+// Hard coded dimension limit. Is there a predefined constant?
 constexpr int MaxDims = 254;
 
 template <typename T>
@@ -93,9 +93,9 @@ TF_LITE_MICRO_TEST(ExpandDimsPositiveAxisTest0) {
   const int axis_dims[] = {1, 1};
   const int axis_data[] = {0};
   const int golden_dims[] = {3, 1, 2, 2};
-  tflite::testing::TestExpandDims<int8_t>(
-             input_dims, input_data, axis_dims, axis_data,
-             golden_dims, golden_data, output_data);
+  tflite::testing::TestExpandDims<int8_t>(input_dims, input_data, axis_dims,
+                                          axis_data, golden_dims, golden_data,
+                                          output_data);
 }
 
 TF_LITE_MICRO_TEST(ExpandDimsPositiveAxisTest1) {
@@ -106,9 +106,9 @@ TF_LITE_MICRO_TEST(ExpandDimsPositiveAxisTest1) {
   const int axis_dims[] = {1, 1};
   const int axis_data[] = {1};
   const int golden_dims[] = {3, 2, 1, 2};
-  tflite::testing::TestExpandDims<float>(
-             input_dims, input_data, axis_dims, axis_data,
-             golden_dims, golden_data, output_data);
+  tflite::testing::TestExpandDims<float>(input_dims, input_data, axis_dims,
+                                         axis_data, golden_dims, golden_data,
+                                         output_data);
 }
 
 TF_LITE_MICRO_TEST(ExpandDimsPositiveAxisTest2) {
@@ -119,9 +119,9 @@ TF_LITE_MICRO_TEST(ExpandDimsPositiveAxisTest2) {
   const int axis_dims[] = {1, 1};
   const int axis_data[] = {2};
   const int golden_dims[] = {3, 2, 2, 1};
-  tflite::testing::TestExpandDims<int8_t>(
-             input_dims, input_data, axis_dims, axis_data,
-             golden_dims, golden_data, output_data);
+  tflite::testing::TestExpandDims<int8_t>(input_dims, input_data, axis_dims,
+                                          axis_data, golden_dims, golden_data,
+                                          output_data);
 }
 
 TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest4) {
@@ -132,9 +132,9 @@ TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest4) {
   const int axis_dims[] = {1, 1};
   const int axis_data[] = {-4};
   const int golden_dims[] = {4, 1, 3, 1, 2};
-  tflite::testing::TestExpandDims<int8_t>(
-             input_dims, input_data, axis_dims, axis_data,
-             golden_dims, golden_data, output_data);
+  tflite::testing::TestExpandDims<int8_t>(input_dims, input_data, axis_dims,
+                                          axis_data, golden_dims, golden_data,
+                                          output_data);
 }
 
 TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest3) {
@@ -145,9 +145,9 @@ TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest3) {
   const int axis_dims[] = {1, 1};
   const int axis_data[] = {-3};
   const int golden_dims[] = {4, 3, 1, 1, 2};
-  tflite::testing::TestExpandDims<float>(
-             input_dims, input_data, axis_dims, axis_data,
-             golden_dims, golden_data, output_data);
+  tflite::testing::TestExpandDims<float>(input_dims, input_data, axis_dims,
+                                         axis_data, golden_dims, golden_data,
+                                         output_data);
 }
 
 TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest2) {
@@ -158,9 +158,9 @@ TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest2) {
   const int axis_dims[] = {1, 1};
   const int axis_data[] = {-2};
   const int golden_dims[] = {4, 1, 2, 1, 3};
-  tflite::testing::TestExpandDims<int8_t>(
-             input_dims, input_data, axis_dims, axis_data,
-             golden_dims, golden_data, output_data);
+  tflite::testing::TestExpandDims<int8_t>(input_dims, input_data, axis_dims,
+                                          axis_data, golden_dims, golden_data,
+                                          output_data);
 }
 
 TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest1) {
@@ -171,9 +171,9 @@ TF_LITE_MICRO_TEST(ExpandDimsNegativeAxisTest1) {
   const int axis_dims[] = {1, 1};
   const int axis_data[] = {-1};
   const int golden_dims[] = {4, 1, 3, 2, 1};
-  tflite::testing::TestExpandDims<float>(
-             input_dims, input_data, axis_dims, axis_data,
-             golden_dims, golden_data, output_data);
+  tflite::testing::TestExpandDims<float>(input_dims, input_data, axis_dims,
+                                         axis_data, golden_dims, golden_data,
+                                         output_data);
 }
 
 TF_LITE_MICRO_TESTS_END
",0,train
5fb8e65180a86fac58709d248201c600f4817f5f,tensorflow/tensorflow,Fix clang formatting errors for micro op EXPAND_DIMS,micro_mutable_op_resolver.h,"@@ -206,7 +206,8 @@ class MicroMutableOpResolver : public MicroOpResolver {
   }
 
   TfLiteStatus AddExpandDims() {
-    return AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXP(), ParseExpandDims);
+    return AddBuiltin(BuiltinOperator_EXPAND_DIMS, Register_EXP(),
+                      ParseExpandDims);
   }
 
   TfLiteStatus AddFloor() {
",0,train
890126848c2218c08abef80b44a6f2cb958d642b,tensorflow/tensorflow,"Add instruction count method to HloModule.

PiperOrigin-RevId: 182227249",hlo_module.cc,"@@ -457,6 +457,14 @@ HloInstruction* HloModule::OutlineExpressionFromComputation(
   return call;
 }
 
+int64 HloModule::instruction_count() const {
+  int64 n = 0;
+  for (const auto& computation : computations_) {
+    n += computation->instruction_count();
+  }
+  return n;
+}
+
 std::list<HloComputation*> HloModule::MakeComputationPostOrder() const {
   // First determine all root computations by building a set of nonroot
   // computations (computations which are called by an instruction in the
",0,train
890126848c2218c08abef80b44a6f2cb958d642b,tensorflow/tensorflow,"Add instruction count method to HloModule.

PiperOrigin-RevId: 182227249",hlo_module.h,"@@ -129,6 +129,9 @@ class HloModule {
   // Gets the number of computations in this module.
   int64 computation_count() const { return computations_.size(); }
 
+  // Gets the number of instructions in this module.
+  int64 instruction_count() const;
+
   // Compute and return a post order of all computations in the module. The sort
   // is defined like so: if computation A has an instruction which calls
   // computation B, then A will appear after B in the sort.
",0,train
aa59c42debb5146da4f9192321c92fe06eaec35d,tensorflow/tensorflow,"Solve IndexError: list index out of range #43561

Signed-off-by: Hollow Man <hollowman@hollowman.ml>",functional.py,"@@ -1089,10 +1089,13 @@ def _should_skip_first_node(layer):
   # Networks that are constructed with an Input layer/shape start with a
   # pre-existing node linking their input to output. This node is excluded from
   # the network config.
-  return (isinstance(layer, Functional) and
-          # Filter out Sequential models without an input shape.
-          isinstance(layer._self_tracked_trackables[0],
-                     input_layer_module.InputLayer))
+  if layer._self_tracked_trackables:
+    return (isinstance(layer, Functional) and
+            # Filter out Sequential models without an input shape.
+            isinstance(layer._self_tracked_trackables[0],
+                       input_layer_module.InputLayer))
+  else:
+    return isinstance(layer, Functional)
 
 
 def connect_ancillary_layers(model, created_layers):
",0,train
d6e2513d60999bf0cf315c42a14c0e45eb49cda2,tensorflow/tensorflow,"support profiling multiple tpu through one grpc and one session.
data are saved with host prefix.

PiperOrigin-RevId: 192523668",capture_tpu_profile.cc,"@@ -26,6 +26,7 @@ limitations under the License.
 
 #include ""tensorflow/contrib/tpu/profiler/dump_tpu_profile.h""
 #include ""tensorflow/contrib/tpu/profiler/tpu_profiler.grpc.pb.h""
+#include ""tensorflow/contrib/tpu/profiler/tpu_profiler_analysis.grpc.pb.h""
 #include ""tensorflow/contrib/tpu/profiler/version.h""
 #include ""tensorflow/core/distributed_runtime/rpc/grpc_util.h""
 #include ""tensorflow/core/lib/core/errors.h""
@@ -40,6 +41,7 @@ namespace tensorflow {
 namespace tpu {
 namespace {
 
+using ::tensorflow::grpc::TPUProfileAnalysis;
 using ::tensorflow::TPUProfiler;
 
 constexpr uint64 kMaxEvents = 1000000;
@@ -64,11 +66,10 @@ Status ValidateHostPortPair(const string& host_port) {
   return Status::OK();
 }
 
-// Returns whether the returned trace is empty.
-// Failure are handled by CHECK, i.e. abort()
-bool Profile(const string& service_addr, const string& logdir, int duration_ms,
-             const string& repository_root, const string& session_id,
-             const ProfileOptions& opts) {
+ProfileRequest PopulateProfileRequest(int duration_ms,
+                                      const string& repository_root,
+                                      const string& session_id,
+                                      const ProfileOptions& opts) {
   ProfileRequest request;
   request.set_duration_ms(duration_ms);
   request.set_max_events(kMaxEvents);
@@ -83,6 +84,17 @@ bool Profile(const string& service_addr, const string& logdir, int duration_ms,
   *request.mutable_opts() = opts;
   std::cout << ""Limiting the number of trace events to "" << kMaxEvents
             << std::endl;
+  return request;
+}
+
+// Returns whether the returned trace is empty.
+// Failure are handled by CHECK, i.e. abort()
+bool Profile(const string& service_addr, const string& logdir, int duration_ms,
+             const string& repository_root, const string& session_id,
+             const ProfileOptions& opts) {
+  ProfileRequest request =
+      PopulateProfileRequest(duration_ms, repository_root, session_id, opts);
+
   ::grpc::ClientContext context;
   ::grpc::ChannelArguments channel_args;
   // TODO(ioeric): use `SetMaxReceiveMessageSize` instead once it's available.
@@ -120,7 +132,36 @@ bool NewSession(const string& service_addr,
                 const std::vector<tensorflow::string>& hostnames,
                 int duration_ms, const string& repository_root,
                 const string& session_id, const ProfileOptions& opts) {
-  return true;
+  NewProfileSessionRequest new_session_request;
+  *new_session_request.mutable_request() =
+      PopulateProfileRequest(duration_ms, repository_root, session_id, opts);
+  new_session_request.set_repository_root(repository_root);
+  new_session_request.set_session_id(session_id);
+  std::copy(
+      hostnames.begin(), hostnames.end(),
+      proto2::RepeatedFieldBackInserter(new_session_request.mutable_hosts()));
+
+  ::grpc::ClientContext context;
+  ::grpc::ChannelArguments channel_args;
+  // TODO(qiuminxu): use `NewHostPortGrpcChannel` instead once their
+  // `ValidateHostPortPair` checks for empty host string case.
+  channel_args.SetMaxReceiveMessageSize(std::numeric_limits<int32>::max());
+  // TODO(jiesun): GRPC support following relevant naming scheme:
+  // 1. dns:///host:port
+  // 2. ipv4:host:port or ipv6:[host]:port
+  // We might need to change the prefix which depends on what TPU name resolver
+  // will give us.
+  std::unique_ptr<TPUProfileAnalysis::Stub> stub =
+      TPUProfileAnalysis::NewStub(::grpc::CreateCustomChannel(
+          ""dns:///"" + service_addr, ::grpc::InsecureChannelCredentials(),
+          channel_args));
+  NewProfileSessionResponse new_session_response;
+  TF_QCHECK_OK(FromGrpcStatus(
+      stub->NewSession(&context, new_session_request, &new_session_response)));
+
+  std::cout << ""Profile session succeed for hosts:""
+            << str_util::Join(hostnames, "","");
+  return new_session_response.empty_trace();
 }
 
 }  // namespace
",0,test
d6e2513d60999bf0cf315c42a14c0e45eb49cda2,tensorflow/tensorflow,"support profiling multiple tpu through one grpc and one session.
data are saved with host prefix.

PiperOrigin-RevId: 192523668",dump_tpu_profile.cc,"@@ -64,7 +64,8 @@ Status WriteGzippedDataToFile(const string& filename, const string& data) {
 
 Status DumpTraceToLogDirectory(StringPiece run_dir, const string& host_prefix,
                                const string& encoded_trace, std::ostream* os) {
-  string proto_path = JoinPath(run_dir, kProtoTraceFileName);
+  string proto_path =
+      JoinPath(run_dir, StrCat(host_prefix, kProtoTraceFileName));
   TF_RETURN_IF_ERROR(
       WriteStringToFile(Env::Default(), proto_path, encoded_trace));
   LOG(INFO) << ""Dumped raw-proto trace data to "" << proto_path;
",0,test
13dd442b4c248a5d0cbc5ed7b407e2fd98712cc0,tensorflow/tensorflow,"Remove unused transcription array.
Change: 151959018",ctc_decoder.h,"@@ -89,7 +89,6 @@ class CTCGreedyDecoder : public CTCDecoder {
       std::vector<int>& output_b = (*output)[0][b];
 
       int prev_class_ix = -1;
-      std::vector<int> transcription;
       (*scores)(b, 0) = 0;
       for (int t = 0; t < seq_len_b; ++t) {
         auto row = input[t].row(b);
@@ -98,7 +97,6 @@ class CTCGreedyDecoder : public CTCDecoder {
         if (max_class_ix != blank_index_ &&
             !(merge_repeated_ && max_class_ix == prev_class_ix)) {
           output_b.push_back(max_class_ix);
-          transcription.push_back(max_class_ix);
         }
         prev_class_ix = max_class_ix;
       }
",0,test
5a8679283766231c98e7b3074bad646111f96f2f,tensorflow/tensorflow,Try to pacify pylint.,def_function_test.py,"@@ -729,8 +729,8 @@ class DefFunctionTest(test.TestCase, parameterized.TestCase):
       (None, 'foo.bar'),                      # implements
       (None, True, False),                    # relax_shapes
   ))
-  def test_pickle(self, input_signature, autograph, autograph_options, implements,
-                  relax_shapes):
+  def test_pickle(self, input_signature, autograph, autograph_options,
+                  implements, relax_shapes):
     """"""@function objects can be pickled and unpickled.""""""
     # Can't pickle functions in __main__:
     from tensorflow.python.eager.def_function_test import undecorated_function
",0,train
9fd71390a9839c7912d83fffd4f762ea4970e3f1,tensorflow/tensorflow,"Checkpoints default values for RunConfig (#8488)

* Default value of save_checkpoints_secs

To not use the default value of save_checkpoints_secs if save_checkpoints_steps is specified in the RunConfig call

* Update run_config.py

* Addressing None, None caller input 

As mentioned in the review comments, the caller many not want any checkpoints and can pass None, None for these parameters. This is to address that case.

* Fix indentation

* Update run_config.py",run_config.py,"@@ -198,6 +198,7 @@ class RunConfig(ClusterConfig):
   parameter servers), you probably want to use `learn_runner.EstimatorConfig`
   instead.
   """"""
+  _USE_DEFAULT = 0
 
   def __init__(self,
                master=None,
@@ -206,7 +207,7 @@ class RunConfig(ClusterConfig):
                gpu_memory_fraction=1,
                tf_random_seed=None,
                save_summary_steps=100,
-               save_checkpoints_secs=600,
+               save_checkpoints_secs=_USE_DEFAULT,
                save_checkpoints_steps=None,
                keep_checkpoint_max=5,
                keep_checkpoint_every_n_hours=10000,
@@ -258,6 +259,11 @@ class RunConfig(ClusterConfig):
     self._tf_random_seed = tf_random_seed
     self._save_summary_steps = save_summary_steps
     self._save_checkpoints_secs = save_checkpoints_secs
+    if save_checkpoints_secs == RunConfig._USE_DEFAULT:
+      if save_checkpoints_steps is None:
+        self._save_checkpoints_secs = 600
+      else:
+        self._save_checkpoints_secs = None
     self._save_checkpoints_steps = save_checkpoints_steps
 
     # TODO(weiho): Remove these after ModelFn refactoring, when users can
",0,train
8b7aea89ae82ccc5da20e5ab029d069ddeff3f19,tensorflow/tensorflow,"Check for correct linkable output tensor descriptor.

PiperOrigin-RevId: 278413334
Change-Id: Ie6ec0d82b7972f80c8760663a96c79df9b68840d",inference_context.cc,"@@ -390,6 +390,13 @@ void InferenceContext::Merge() {
         !IsReady(ready_tensors, linkable_node)) {
       continue;
     }
+    const auto& original_dst_def =
+        node.operations[0]->GetDefinition().dst_tensors[0];
+    const auto& link_dst_def =
+        linkable_node.operations[0]->GetDefinition().dst_tensors[0];
+    if (original_dst_def != link_dst_def) {
+      continue;
+    }
     MergeCLNodes(&linkable_node, &node);
     nodes_.erase(nodes_.begin() + next_nodes[0]);
     i -= 1;
",0,train
8b7aea89ae82ccc5da20e5ab029d069ddeff3f19,tensorflow/tensorflow,"Check for correct linkable output tensor descriptor.

PiperOrigin-RevId: 278413334
Change-Id: Ie6ec0d82b7972f80c8760663a96c79df9b68840d",tensor_type.h,"@@ -41,6 +41,8 @@ struct TensorDescriptor {
   bool operator==(const TensorDescriptor& d) const {
     return data_type == d.data_type && storage_type == d.storage_type;
   }
+
+  bool operator!=(const TensorDescriptor& d) const { return !(*this == d); }
 };
 
 std::string ToString(TensorStorageType type);
",0,train
47b674c938a38c6d88f27244a12ce3944c2f0464,tensorflow/tensorflow,"[XLA] Remove a source of nondeterminism in HLO clustering.

Record the HLO clusters with std::set instead of std::unordered_set to ensure
that the algorithm to assign each cluster a sequence number during a set
traversal is deterministic.

PiperOrigin-RevId: 178830794",mark_for_compilation_pass.cc,"@@ -172,10 +172,15 @@ bool HasResourceInputOrOutput(const Node& node) {
                    DT_RESOURCE) != node.output_types().end();
 }
 
+struct NodeCompare {
+  bool operator()(const Node* a, const Node* b) { return a->id() < b->id(); }
+};
+using OrderedNodeSet = std::set<Node*, NodeCompare>;
+
 Status FindCompilationCandidates(
     const Graph& graph, FunctionLibraryDefinition* flib_def, Env* env,
     const std::function<bool(const Node*, const DeviceType&)>& is_compilable_fn,
-    std::unordered_set<Node*>* candidates) {
+    OrderedNodeSet* candidates) {
   OptimizerOptions opts;
   std::unique_ptr<ProcessFunctionLibraryRuntime> pflr(
       new ProcessFunctionLibraryRuntime(nullptr, env, TF_GRAPH_DEF_VERSION,
@@ -354,7 +359,7 @@ Status MarkForCompilationPass::RunImpl(
 
   Graph* graph = options.graph->get();
 
-  std::unordered_set<Node*> compilation_candidates;
+  OrderedNodeSet compilation_candidates;
   TF_RETURN_IF_ERROR(FindCompilationCandidates(
       *graph, options.flib_def,
       (options.session_options != nullptr) ? options.session_options->env
",0,train
dd1cfe2f2092517d8a57bad04b2cb269a19b37ee,tensorflow/tensorflow,"Convert InputBuffer to BuffereedInputStream for FixedLengthRecordDatasetOp

This fix converts InputBuffer to BuffereedInputStream for
FixedLengthRecordDatasetOp, so that it is possible to add
compression layer on top for FixedLengthRecordDatasetOp.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",reader_dataset_ops.cc,"@@ -383,13 +383,13 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel {
         mutex_lock l(mu_);
         do {
           // We are currently processing a file, so try to read the next record.
-          if (input_buffer_) {
-            const int64 current_pos = input_buffer_->Tell();
+          if (buffered_input_stream_) {
+            const int64 current_pos = buffered_input_stream_->Tell();
             DCHECK_GE(file_pos_limit_, 0);
             if (current_pos < file_pos_limit_) {
               string record;
-              TF_RETURN_IF_ERROR(
-                  input_buffer_->ReadNBytes(dataset()->record_bytes_, &record));
+              TF_RETURN_IF_ERROR(buffered_input_stream_->ReadNBytes(
+                  dataset()->record_bytes_, &record));
               // Produce the record as output.
               out_tensors->emplace_back(ctx->allocator({}), DT_STRING,
                                         TensorShape({}));
@@ -400,7 +400,7 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel {
 
             // We have reached the end of the current file, so maybe
             // move on to next file.
-            input_buffer_.reset();
+            buffered_input_stream_.reset();
             file_.reset();
             ++current_file_index_;
           }
@@ -432,10 +432,10 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel {
           }
           TF_RETURN_IF_ERROR(ctx->env()->NewRandomAccessFile(
               dataset()->filenames_[current_file_index_], &file_));
-          input_buffer_.reset(
-              new io::InputBuffer(file_.get(), dataset()->buffer_size_));
+          buffered_input_stream_.reset(new io::BufferedInputStream(
+              file_.get(), dataset()->buffer_size_));
           TF_RETURN_IF_ERROR(
-              input_buffer_->SkipNBytes(dataset()->header_bytes_));
+              buffered_input_stream_->SkipNBytes(dataset()->header_bytes_));
         } while (true);
       }
 
@@ -450,10 +450,11 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel {
         TF_RETURN_IF_ERROR(writer->WriteScalar(full_name(""current_file_index""),
                                                current_file_index_));
 
-        // `input_buffer_` is empty if
+        // `buffered_input_stream_` is empty if
         // 1. GetNext has not been called even once.
         // 2. All files have been read and iterator has been exhausted.
-        int64 current_pos = input_buffer_ ? input_buffer_->Tell() : -1;
+        int64 current_pos =
+            buffered_input_stream_ ? buffered_input_stream_->Tell() : -1;
         TF_RETURN_IF_ERROR(
             writer->WriteScalar(full_name(""current_pos""), current_pos));
         return Status::OK();
@@ -471,18 +472,18 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel {
             reader->ReadScalar(full_name(""current_pos""), &current_pos));
 
         // Seek to current_pos.
-        input_buffer_.reset();
+        buffered_input_stream_.reset();
         file_.reset();
-        if (current_pos >= 0) {  // There was an active input_buffer_.
+        if (current_pos >= 0) {  // There was an active buffered_input_stream_.
           uint64 file_size;
           TF_RETURN_IF_ERROR(ctx->env()->GetFileSize(
               dataset()->filenames_[current_file_index_], &file_size));
           file_pos_limit_ = file_size - dataset()->footer_bytes_;
           TF_RETURN_IF_ERROR(ctx->env()->NewRandomAccessFile(
               dataset()->filenames_[current_file_index_], &file_));
-          input_buffer_.reset(
-              new io::InputBuffer(file_.get(), dataset()->buffer_size_));
-          TF_RETURN_IF_ERROR(input_buffer_->Seek(current_pos));
+          buffered_input_stream_.reset(new io::BufferedInputStream(
+              file_.get(), dataset()->buffer_size_));
+          TF_RETURN_IF_ERROR(buffered_input_stream_->SkipNBytes(current_pos));
         }
 
         return Status::OK();
@@ -492,8 +493,9 @@ class FixedLengthRecordDatasetOp : public DatasetOpKernel {
       mutex mu_;
       size_t current_file_index_ GUARDED_BY(mu_) = 0;
       std::unique_ptr<RandomAccessFile> file_
-          GUARDED_BY(mu_);  // must outlive input_buffer_
-      std::unique_ptr<io::InputBuffer> input_buffer_ GUARDED_BY(mu_);
+          GUARDED_BY(mu_);  // must outlive buffered_input_stream_
+      std::unique_ptr<io::RandomAccessInputStream> buffered_input_stream_
+          GUARDED_BY(mu_);
       int64 file_pos_limit_ GUARDED_BY(mu_) = -1;
     };
 
",0,train
78c34b2950846690673ccdd43ff14ba109fbddd6,tensorflow/tensorflow,"Improves error behavior in rewrite pass.

PiperOrigin-RevId: 386375090
Change-Id: Ie4e44c744a5a25ee1dcd3502a5cb96c11e977e5a",distributed_tpu_rewrite_pass.cc,"@@ -458,6 +458,10 @@ class TensorDevicePlacer {
   // Reports that the argument/return-value at index has been assigned
   // by the user to a given device.
   void ReportDeviceAssigned(int64_t device, int64_t index) {
+    if (device >= index_nodes_.size()) {
+      LOG(DFATAL) << ""Sharding assignment is out of bounds. Check that the ""
+                     ""number of nodes is properly set."";
+    }
     DeviceNode* node = &index_nodes_.at(device);
     node->size += sizes_.at(index);
     heap_.Adjust(node);
",0,train
99ef7181786b4bc471b10582fdab21993bda152f,tensorflow/tensorflow,"Adjust TPUEstimator timeout for worker shutdown to 60 seconds.

PiperOrigin-RevId: 198477309",tpu_estimator.py,"@@ -2228,11 +2228,11 @@ class TPUEstimator(estimator_lib.Estimator):
           if shutdown_mode:
             if shutdown_mode == 'shutdown_worker':
               finalizer_hooks = [
-                  session_support.ShutdownLameWorkers(timeout_ms=1000),
+                  session_support.ShutdownLameWorkers(timeout_ms=60*1000),
               ]
             elif shutdown_mode == 'shutdown_computation':
               finalizer_hooks = [
-                  session_support.RestartComputation(timeout_ms=1000),
+                  session_support.RestartComputation(timeout_ms=60*1000),
               ]
             else:
               raise ValueError('Unknown TF_TPU_GRACEFUL_SHUTDOWN_MODE ""%s""' %
",0,train
54b5a2163bc2c5a13db8de39fc99ae558fc854a4,tensorflow/tensorflow,"batch_matmul_op_test.py: Updated to pass in TF2, by using gradient_checker_v2 and removing placeholder nodes.

PiperOrigin-RevId: 223863787",batch_matmul_op_test.py,"@@ -20,9 +20,8 @@ from __future__ import print_function
 
 import numpy as np
 
-from tensorflow.python.framework import constant_op
-from tensorflow.python.ops import array_ops
-from tensorflow.python.ops import gradient_checker
+from tensorflow.python.framework import test_util
+from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
@@ -76,25 +75,18 @@ class BatchMatmulOpTest(test.TestCase):
 
   # Compares _tfpBatchMatmul(x, y, alpha, adj) and _npBatchMatMul(x, y, alpha,
   # adj)
-  def _compare(self, x_in, y_in, adjoint_a, adjoint_b, static_shape=True):
+  def _compare(self, x_in, y_in, adjoint_a, adjoint_b):
     x_t_shape = x_in.shape[:-2] + (x_in.shape[-1], x_in.shape[-2])
     y_t_shape = y_in.shape[:-2] + (y_in.shape[-1], y_in.shape[-2])
     x = x_in if not adjoint_a else x_in.reshape(x_t_shape)
     y = y_in if not adjoint_b else y_in.reshape(y_t_shape)
     is_floating = x.dtype != np.int32
     tol = 100 * np.finfo(x.dtype).eps if is_floating else 0
-    with self.cached_session(use_gpu=is_floating) as sess:
-      if static_shape:
-        z0 = math_ops.matmul(x, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b)
-        z0_val = self.evaluate(z0)
-      else:
-        x_ph = array_ops.placeholder(x.dtype)
-        y_ph = array_ops.placeholder(y.dtype)
-        z0 = math_ops.matmul(
-            x_ph, y_ph, adjoint_a=adjoint_a, adjoint_b=adjoint_b)
-        z0_val = sess.run(z0, feed_dict={x_ph: x, y_ph: y})
+    with test_util.device(use_gpu=is_floating):
+      z0 = math_ops.matmul(
+          x, y, adjoint_a=adjoint_a, adjoint_b=adjoint_b)
       z1 = self._npBatchMatmul(x, y, adjoint_a, adjoint_b)
-      self.assertAllClose(z0_val, z1, rtol=tol, atol=tol)
+      self.assertAllClose(z0, z1, rtol=tol, atol=tol)
 
   def _rand(self, shape, dtype):
     vals = np.array(np.random.normal(-10, 10, np.prod(shape)), dtype=dtype)
@@ -103,42 +95,41 @@ class BatchMatmulOpTest(test.TestCase):
       vals += 1j * imag
     return vals.reshape(shape)
 
-  def _testNonEmpty(self, dtype, adjoint_a, adjoint_b, use_static_shape):
+  def _testNonEmpty(self, dtype, adjoint_a, adjoint_b):
 
-    def compareNonEmpty(self, a_shape, b_shape):
+    def CompareNonEmpty(self, a_shape, b_shape):
       self._compare(
           self._rand(a_shape, dtype),
-          self._rand(b_shape, dtype), adjoint_a, adjoint_b, use_static_shape)
+          self._rand(b_shape, dtype), adjoint_a, adjoint_b)
 
-    compareNonEmpty(self, [1, 2, 3], [1, 3, 5])
-    compareNonEmpty(self, [1, 2, 3], [1, 3, 1])
-    compareNonEmpty(self, [1, 1, 3], [1, 3, 5])
-    compareNonEmpty(self, [1, 2, 3], [1, 3, 5])
-    compareNonEmpty(self, [7, 1, 3], [7, 3, 5])
-    compareNonEmpty(self, [7, 2, 3], [7, 3, 1])
-    compareNonEmpty(self, [7, 2, 3], [7, 3, 5])
-    compareNonEmpty(self, [10, 64, 75], [10, 75, 30])
-    compareNonEmpty(self, [5, 7, 2, 3], [5, 7, 3, 5])
+    CompareNonEmpty(self, [1, 2, 3], [1, 3, 5])
+    CompareNonEmpty(self, [1, 2, 3], [1, 3, 1])
+    CompareNonEmpty(self, [1, 1, 3], [1, 3, 5])
+    CompareNonEmpty(self, [1, 2, 3], [1, 3, 5])
+    CompareNonEmpty(self, [7, 1, 3], [7, 3, 5])
+    CompareNonEmpty(self, [7, 2, 3], [7, 3, 1])
+    CompareNonEmpty(self, [7, 2, 3], [7, 3, 5])
+    CompareNonEmpty(self, [10, 64, 75], [10, 75, 30])
+    CompareNonEmpty(self, [5, 7, 2, 3], [5, 7, 3, 5])
 
-  def _testEmpty(self, dtype, adjoint_a, adjoint_b, use_static_shape):
+  def _testEmpty(self, dtype, adjoint_a, adjoint_b):
 
-    def compareEmpty(self, a_shape, b_shape):
+    def CompareEmpty(self, a_shape, b_shape):
       self._compare(
           np.zeros(a_shape).astype(dtype),
-          np.zeros(b_shape).astype(dtype), adjoint_a, adjoint_b,
-          use_static_shape)
+          np.zeros(b_shape).astype(dtype), adjoint_a, adjoint_b)
 
-    compareEmpty(self, [0, 3, 2], [0, 2, 4])
-    compareEmpty(self, [3, 0, 2], [3, 2, 5])
-    compareEmpty(self, [3, 3, 2], [3, 2, 0])
+    CompareEmpty(self, [0, 3, 2], [0, 2, 4])
+    CompareEmpty(self, [3, 0, 2], [3, 2, 5])
+    CompareEmpty(self, [3, 3, 2], [3, 2, 0])
 
 
-def _GetBatchMatmulOpTest(dtype, adjoint_a, adjoint_b, use_static_shape):
+def _GetBatchMatmulOpTest(dtype, adjoint_a, adjoint_b):
 
   def Test(self):
     np.random.seed(42)
-    self._testNonEmpty(dtype, adjoint_a, adjoint_b, use_static_shape)
-    self._testEmpty(dtype, adjoint_a, adjoint_b, use_static_shape)
+    self._testNonEmpty(dtype, adjoint_a, adjoint_b)
+    self._testEmpty(dtype, adjoint_a, adjoint_b)
 
   return Test
 
@@ -154,17 +145,13 @@ class BatchMatmulGradientTest(test.TestCase):
     y = y_in if not adjoint_b else y_in.reshape(y_t_shape)
     epsilon = np.finfo(x.dtype).eps
     delta = epsilon**(1.0 / 3.0)
-    with self.cached_session(use_gpu=True):
-      inx = constant_op.constant(x)
-      iny = constant_op.constant(y)
-      z = math_ops.matmul(inx, iny, adjoint_a, adjoint_b)
-      loss = math_ops.reduce_sum(z)
-      ((x_jacob_t, x_jacob_n),
-       (y_jacob_t, y_jacob_n)) = gradient_checker.compute_gradient(
-           [inx, iny], [x.shape, y.shape],
-           loss, [1],
-           x_init_value=[x, y],
-           delta=delta)
+    def Loss(x, y):
+      z = math_ops.matmul(x, y, adjoint_a, adjoint_b)
+      return math_ops.reduce_sum(z)
+    with self.session(use_gpu=True):
+      ((x_jacob_t, y_jacob_t),
+       (x_jacob_n, y_jacob_n)) = gradient_checker_v2.compute_gradient(
+           Loss, [x, y], delta=delta)
       tol = 20 * delta
       self.assertAllClose(x_jacob_t, x_jacob_n, rtol=tol, atol=tol)
       self.assertAllClose(y_jacob_t, y_jacob_n, rtol=tol, atol=tol)
@@ -202,11 +189,9 @@ if __name__ == ""__main__"":
     for adjoint_a_ in False, True:
       for adjoint_b_ in False, True:
         name = ""%s_%s_%s"" % (dtype_.__name__, adjoint_a_, adjoint_b_)
-        for use_static_shape in True, False:
-          setattr(BatchMatmulOpTest,
-                  ""testBatchMatmulOp_"" + name + (""_%s"" % use_static_shape),
-                  _GetBatchMatmulOpTest(dtype_, adjoint_a_, adjoint_b_,
-                                        use_static_shape))
+        setattr(BatchMatmulOpTest,
+                ""testBatchMatmulOp_"" + name,
+                _GetBatchMatmulOpTest(dtype_, adjoint_a_, adjoint_b_))
         if dtype_ is not np.int32:
           setattr(BatchMatmulGradientTest, ""testBatchMatmulGradient_"" + name,
                   _GetBatchMatmulGradientTest(dtype_, adjoint_a_, adjoint_b_))
",0,test
4d6c4c72b4ffd2c558d7908a1f3ec32f2f92379e,tensorflow/tensorflow,"Reduce precision in one conv op test.

This test was flakily failing, presumably due to nondeterministic cudnn
convolution algorithm choices.

PiperOrigin-RevId: 248122343",conv_ops_test.py,"@@ -2192,7 +2192,8 @@ class Conv2DTest(test.TestCase):
           padding=[[0, 0], [2, 2], [2, 2], [0, 0]],
           test_input=True,
           data_format=data_format,
-          use_gpu=use_gpu)
+          use_gpu=use_gpu,
+          max_err=0.003)
 
   @test_util.deprecated_graph_mode_only
   def testFilterGradient2x2PaddingStrideOne(self):
",0,train
4588361b6a5b48aad1ead88755d2afef38605af5,tensorflow/tensorflow,"tfdbg: adjust the scope of mutex for keeping track of disk usage
PiperOrigin-RevId: 211966207",debug_io_utils.cc,"@@ -693,6 +693,7 @@ uint64 DebugFileIO::diskBytesUsed = 0;
 mutex DebugFileIO::bytes_mu(LINKER_INITIALIZED);
 
 bool DebugFileIO::requestDiskByteUsage(uint64 bytes) {
+  mutex_lock l(bytes_mu);
   if (globalDiskBytesLimit == 0) {
     const char* env_tfdbg_disk_bytes_limit = getenv(""TFDBG_DISK_BYTES_LIMIT"");
     if (env_tfdbg_disk_bytes_limit == nullptr ||
@@ -707,7 +708,6 @@ bool DebugFileIO::requestDiskByteUsage(uint64 bytes) {
   if (bytes == 0) {
     return true;
   }
-  mutex_lock l(bytes_mu);
   if (diskBytesUsed + bytes < globalDiskBytesLimit) {
     diskBytesUsed += bytes;
     return true;
",0,train
b1f5d9e26125b4ad62c4566e4c2ddd784ce625bc,tensorflow/tensorflow,Add tests to validate only parameters typed with ops.Tensor are converted to Tensors,function_test.py,"@@ -3932,7 +3932,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
     gradients(constant_op.constant([[[1.0], [2.0]]]))  # No error is raised
 
-  def testTraceWithAnnotationsBasic(self):
+  def testFollowTypeHintsTraceBasic(self):
     trace_count = [0]
     def func(x: ops.Tensor):
       trace_count[0] += 1
@@ -3952,7 +3952,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     disabled(3) # Retrace
     self.assertEqual(trace_count[0], 3)
 
-  def testTraceWithAnnotationsWithArgs(self):
+  def testFollowTypeHintsTraceWithArgs(self):
     trace_count = [0]
     def func(*args: ops.Tensor):
       trace_count[0] += 1
@@ -3973,7 +3973,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     disabled(args2) # Retrace
     self.assertEqual(trace_count[0], 2)
 
-  def testTraceWithAnnotationsWithKwargs(self):
+  def testFollowTypeHintsTraceWithKwargs(self):
     trace_count = [0]
     def func(t: ops.Tensor, **kwargs: ops.Tensor):
       trace_count[0] += 1
@@ -3991,7 +3991,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     disabled(2, x=2, y=2.0, z=""two"") # Retrace
     self.assertEqual(trace_count[0], 2)
 
-  def testTraceWithAnnotationsWithMultipleInputTypes(self):
+  def testFollowTypeHintsTraceWithMultipleInputTypes(self):
     trace_count = [0]
     def func(t: ops.Tensor, *args: ops.Tensor,
              **kwargs: ops.Tensor):
@@ -4010,6 +4010,62 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     disabled(2, constant_op.constant(2), ""str2"", x=5.0) # Retrace
     self.assertEqual(trace_count[0], 2)
 
+  def testFollowTypeHintsTraceWithOnlyArgNamed(self):
+    trace_count = [0]
+    def func(t: ops.Tensor, i: int = 1, **kwargs):
+      trace_count[0] += 1
+      return t
+
+    enabled = def_function.function(func, experimental_follow_type_hints=True)
+
+    trace_count = [0]
+    enabled(1, 3, x=4.0, y=""str"")
+    enabled(2, 4, x=4.0, y=""str"") # Retrace
+    self.assertEqual(trace_count[0], 2)
+
+  def testFollowTypeHintsTraceWithNotAllNamed(self):
+    trace_count = [0]
+    def func(x, y: ops.Tensor, z: int):
+      trace_count[0] += 1
+      return x
+
+    enabled = def_function.function(func, experimental_follow_type_hints=True)
+
+    enabled(1, 2, 3)
+    enabled(1, 20, 3) # No retrace - change in ops.Tensor typed arg
+    enabled(2, 2, 3) # Retrace - change in untyped arg
+    enabled(2, 2, 4) # Retrace - change in typed arg
+    self.assertEqual(trace_count[0], 3)
+
+  def testFollowTypeHintsTraceWithOnlyArgsNamed(self):
+    trace_count = [0]
+    def func(x, y, *args: ops.Tensor):
+      trace_count[0] += 1
+      return x
+
+    enabled = def_function.function(func, experimental_follow_type_hints=True)
+
+    trace_count = [0]
+    enabled(1, 20, 3, 4, 5, 6)
+    enabled(1, 20, 3, 4, 5, 60) # No retrace - change in *args
+    enabled(1, 30, 7, 8, 9, 10) # Retrace - change in args
+    self.assertEqual(trace_count[0], 2)
+
+  def testFollowTypeHintsTraceWithOnlyKwargsNamed(self):
+    trace_count = [0]
+    def func(x, y, *args, **kwargs: ops.Tensor):
+      trace_count[0] += 1
+      return x
+
+    enabled = def_function.function(func, experimental_follow_type_hints=True)
+
+    trace_count = [0]
+    enabled(1, 2, 3, 4, 5, 6, a=1.0, b=2.0, c=3.0)
+    enabled(1, 2, 3, 4, 5, 6, a=1.5, b=2.5, c=3.5) # No retrace - change in **kwargs
+    enabled(100, 2, 3, 4, 5, 6, a=1.0, b=2.0, c=3.0) # Retrace - change in args
+    enabled(1, 2, 3, 4, 5, 100, a=1.0, b=2.0, c=3.0) # Retrace - change in *args
+    self.assertEqual(trace_count[0], 3)
+
 class MultiDeviceTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_gpu_only
",0,train
36b42528ec7eb02f76cb5b802c43306b871b6229,tensorflow/tensorflow,"Removed Warning from the file.

Fixed the warning in the file.",quantization_utils.cc,"@@ -117,7 +117,7 @@ void SymmetricPerChannelQuantization(const float* const input,
   // Calculate scales per channel
   std::vector<float> scale_invs(channel_dim_size);
   const float half_scale = kMaxQuantizedValue;
-  for (size_t channel_idx = 0; channel_idx < channel_dim_size; channel_idx++) {
+  for (int channel_idx = 0; channel_idx < channel_dim_size; channel_idx++) {
     const float half_range = std::max(std::abs(min_vals[channel_idx]),
                                       std::abs(max_vals[channel_idx]));
     output_scales->at(channel_idx) = half_range / half_scale;
",0,train
6adf6a06e1975adadec5cb0a7b9778363e51f61c,tensorflow/tensorflow,"Update GraphDef version to 748.

PiperOrigin-RevId: 370412154
Change-Id: Idc87751b9c95a5fa4268d92ad934fbe0b63cddc9",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 747  // Updated: 2021/4/25
+#define TF_GRAPH_DEF_VERSION 748  // Updated: 2021/4/26
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
86a83cf73c93909a0e2f54d4bb4d0879a011b899,tensorflow/tensorflow,"Make more functions work as metric functions for MetricSpec.
Change: 147382300",metric_spec.py,"@@ -53,7 +53,8 @@ def _args(fn):
 _CANONICAL_LABELS_ARG = 'labels'
 _LABELS_ARGS = set((_CANONICAL_LABELS_ARG, 'label', 'targets', 'target'))
 _CANONICAL_PREDICTIONS_ARG = 'predictions'
-_PREDICTIONS_ARGS = set((_CANONICAL_PREDICTIONS_ARG, 'prediction'))
+_PREDICTIONS_ARGS = set((_CANONICAL_PREDICTIONS_ARG, 'prediction',
+                         'logits', 'logit'))
 _CANONICAL_WEIGHTS_ARG = 'weights'
 _WEIGHTS_ARGS = set((_CANONICAL_WEIGHTS_ARG, 'weight'))
 
",0,train
f9c5e921dd7058ea517a3d984b2e161d8dd19cee,tensorflow/tensorflow,"[TF:XLA] Implement SqrtGrad.

PiperOrigin-RevId: 167000454",binary_ops_test.py,"@@ -94,6 +94,12 @@ class BinaryOpsTest(XLATestCase):
           np.array([5, 6, 7, 8], dtype=dtype),
           expected=np.array([-160, -81, -28, -4], dtype=dtype))
 
+      self._testBinary(
+          gen_math_ops._sqrt_grad,
+          np.array([4, 3, 2, 1], dtype=dtype),
+          np.array([5, 6, 7, 8], dtype=dtype),
+          expected=np.array([0.625, 1, 1.75, 4], dtype=dtype))
+
       self._testBinary(
           gen_nn_ops._softplus_grad,
           np.array([4, 3, 2, 1], dtype=dtype),
",0,test
f9c5e921dd7058ea517a3d984b2e161d8dd19cee,tensorflow/tensorflow,"[TF:XLA] Implement SqrtGrad.

PiperOrigin-RevId: 167000454",randomized_tests.cc,"@@ -2496,6 +2496,16 @@ TEST_F(OpTest, Sqrt) {
   });
 }
 
+TEST_F(OpTest, SqrtGrad) {
+  Repeatedly([this]() {
+    auto dims = RandomDims();
+    return ExpectTfAndXlaOutputsAreClose(OpTestBuilder(""SqrtGrad"")
+                                             .RandomInput(DT_FLOAT, dims)
+                                             .RandomInput(DT_FLOAT, dims)
+                                             .Attr(""T"", DT_FLOAT));
+  });
+}
+
 TEST_F(OpTest, SquaredDifference) {
   Repeatedly([this]() {
     auto dims = BroadcastableDims();
",0,test
f9c5e921dd7058ea517a3d984b2e161d8dd19cee,tensorflow/tensorflow,"[TF:XLA] Implement SqrtGrad.

PiperOrigin-RevId: 167000454",binary_ops.cc,"@@ -107,6 +107,10 @@ XLA_MAKE_BINARY(
     b->Mul(b->Pow(lhs, XlaHelpers::IntegerLiteral(b, input_type(0), 3)),
            b->Div(rhs, XlaHelpers::IntegerLiteral(b, input_type(0), -2)),
            extend_dimensions));
+XLA_MAKE_BINARY(SqrtGrad,
+                b->Div(b->Mul(rhs,
+                              XlaHelpers::FloatLiteral(b, input_type(0), 0.5)),
+                       lhs, extend_dimensions));
 
 static xla::ComputationDataHandle Square(xla::ComputationBuilder* builder,
                                          const xla::ComputationDataHandle& x) {
",0,test
4c0a09fc302e193df54f127ca59f465e4966b8db,tensorflow/tensorflow,"fixit for resource_scatter_update.

PiperOrigin-RevId: 322286887
Change-Id: I9c2293d00c371b9cab279366bc893e509e1ded3b",optimizer_v2.py,"@@ -46,7 +46,6 @@ from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import gen_resource_variable_ops
 from tensorflow.python.ops import gradients
 from tensorflow.python.ops import math_ops
-from tensorflow.python.ops import resource_variable_ops
 from tensorflow.python.ops import variables as tf_variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.saved_model import revived_types
@@ -1159,7 +1158,8 @@ class OptimizerV2(trackable.Trackable):
 
   def _resource_scatter_update(self, x, i, v):
     with ops.control_dependencies(
-        [resource_variable_ops.resource_scatter_update(x.handle, i, v)]):
+        [gen_resource_variable_ops.ResourceScatterUpdate(
+            resource=x.handle, indices=i, updates=v)]):
       return x.value()
 
   @property
",0,train
3494c78bc9eef521af3986eddfe4bf00cf9f0fe4,tensorflow/tensorflow,"[xla::gpu] skip autotuning on Ampere and above.

PiperOrigin-RevId: 441499202",nvptx_compiler.cc,"@@ -147,8 +147,12 @@ Status NVPTXCompiler::OptimizeHloPostLayoutAssignment(
 
   HloPassPipeline post_pipeline(""nvptx post-layout_assignment part 2"");
 
-  // Find the fastest algorithm for GEMMs.
-  post_pipeline.AddPass<GemmAlgorithmPicker>(stream_exec, device_allocator);
+  // Find the fastest algorithm for GEMMs. Skip on Ampere and later as the
+  // algorithm goes unused.
+  if (!stream_exec->GetDeviceDescription().cuda_compute_capability().IsAtLeast(
+          se::CudaComputeCapability::AMPERE)) {
+    post_pipeline.AddPass<GemmAlgorithmPicker>(stream_exec, device_allocator);
+  }
 
   if (!IsBefEnabled(hlo_module->config())) {
     // Transform TriangularSolve ops into custom-calls, so we can add temp
",0,train
00e2cbf2e84524dd9e8320b58cdccf2c8b3f33b3,tensorflow/tensorflow,Changes based on review,mkl_fused_batch_norm_op.cc,"@@ -34,12 +34,6 @@ using BatchNormBwdPd = mkldnn::batch_normalization_backward::primitive_desc;
 namespace tensorflow {
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-#ifdef ENABLE_MKLDNN_V1
-#define BN_FLAGS mkldnn::batch_normalization_flags
-#else
-#define BN_FLAGS mkldnn
-#endif
-
 struct MklBatchNormFwdParams {
   memory::dims src_dims;
   int depth;
@@ -61,7 +55,7 @@ struct MklBatchNormFwdParams {
                         bool training)
       : src_dims(src_dims), depth(depth), eps(eps), training(training) {}
 
-#endif
+#endif // !ENABLE_MKLDNN_V1
 };
 
 template <typename T, typename U>
@@ -87,22 +81,18 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive {
         static_cast<void*>(const_cast<T*>(src_data)));
     context_.dst_mem->set_data_handle(static_cast<void*>(dst_data));
 
-    if (context_.flags & (int)BN_FLAGS::use_scale_shift)
+    if (context_.flags & static_cast<int>(BN_FLAGS::use_scale_shift))
       context_.weights_mem->set_data_handle(
           static_cast<void*>(const_cast<U*>(weights_data)));
 
     if ((context_.pkind == prop_kind::forward_training) ||
-        (context_.flags & (int)BN_FLAGS::use_global_stats)) {
+        (context_.flags & static_cast<int>(BN_FLAGS::use_global_stats))) {
       context_.mean_mem->set_data_handle(static_cast<void*>(mean_data));
       context_.variance_mem->set_data_handle(static_cast<void*>(variance_data));
     }
 #ifdef ENABLE_MKLDNN_V1
     // Execute batch-normalization forward primitives.
-    DCHECK_EQ(context_.fwd_primitives.size(), context_.net_args.size());
-    for (size_t i = 0; i < context_.fwd_primitives.size(); ++i) {
-      context_.fwd_primitives.at(i).execute(*context_.fwd_stream,
-                                            context_.net_args.at(i));
-    }
+    execute_primitives(context_.fwd_primitives, context_.fwd_stream, context_.net_args);
 #else
     context_.fwd_stream->submit(context_.fwd_primitives);
 #endif  // ENABLE_MKLDNN_V1
@@ -141,7 +131,7 @@ class MklFusedBatchNormFwdPrimitive : public MklPrimitive {
  private:
   // Primitive reuse context for BatchNorm forward op.
   struct BatchNormFwdContext {
-    // Flags indicts if it is training or inference mode.
+    // Flags indicating if it is training or inference mode.
     int64 flags;
 
     // Algorithm kind.
@@ -556,12 +546,12 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive {
     auto diff_weights_desc = weights_desc;
 
     // Forward batch-normalization descriptor and primitive descriptor.
-    auto bn_flags =
-        bwdParams.training
-            ? BN_FLAGS::use_scale_shift
-            : (BN_FLAGS::use_scale_shift | BN_FLAGS::use_global_stats);
+    //auto bn_flags =
+    //    bwdParams.training
+    //        ? BN_FLAGS::use_scale_shift
+    //        : (BN_FLAGS::use_scale_shift | BN_FLAGS::use_global_stats);
     auto fwd_desc = batch_normalization_forward::desc(
-        prop_kind::forward_training, src_md, bwdParams.eps, bn_flags);
+        prop_kind::forward_training, src_md, bwdParams.eps, context_.flags);
     auto fwd_pd = BatchNormFwdPd(fwd_desc, cpu_engine_);
 
     // Backward batch-normalization primitive.
@@ -570,7 +560,7 @@ class MklFusedBatchNormBwdPrimitive : public MklPrimitive {
     //   2. on bwd propagation, mean and variance are considered as constants.
     //      Thus, reduce the amount of MKL computation.
     auto bwd_desc = batch_normalization_backward::desc(
-        prop_kind::backward, diff_dst_md, src_md, bwdParams.eps, bn_flags);
+        prop_kind::backward, diff_dst_md, src_md, bwdParams.eps, context_.flags);
     context_.bwd_pd.reset(new BatchNormBwdPd(bwd_desc, cpu_engine_, fwd_pd));
 
     // Create memory primitives.
@@ -979,7 +969,7 @@ class MklFusedBatchNormOp : public OpKernel {
 
     // Set NAN mean value in case of empty input tensor
     auto saved_mean_data = (*saved_mean_tensor)->flat<U>().data();
-    std::fill_n(saved_mean_data, num_elements, static_cast<U>(NAN));
+    std::fill_n(saved_mean_data, num_elements, static_cast<U>(0));
 
     MklDnnShape mkl_shape_saved_variance;
     mkl_shape_saved_variance.SetMklTensor(false);
@@ -990,12 +980,12 @@ class MklFusedBatchNormOp : public OpKernel {
 
     // Set NAN variance value in case of empty input tensor
     auto saved_variance_data = (*saved_variance_tensor)->flat<U>().data();
-    std::fill_n(saved_variance_data, num_elements, static_cast<U>(NAN));
+    std::fill_n(saved_variance_data, num_elements, static_cast<U>(0));
 
     // Changes to support reserved_space_3 parameter in FusedBatchNormV3.
     // TODO: This parameter functionality is not implemented on CPU.
     //       It is used to hold intermediate results. So the allocated
-    //       memory is filled with NANs.
+    //       memory is filled with 0s.
     if (reserved_space) {
       DCHECK(reserved_space_tensor != nullptr);
 
@@ -1171,7 +1161,7 @@ class MklFusedBatchNormGradOp : public OpKernel {
       src_data = static_cast<T*>(const_cast<T*>(src_tensor.flat<T>().data()));
 
       const T* diff_dst_data = nullptr;
-#ifdef ENABLE_MKL_DNN_V1
+#ifdef ENABLE_MKLDNN_V1
       if (IS_DIFF_DST_REORDER_NEEDED(diff_dst_md, bn_bwd_pd, bn_bwd)) {
         diff_dst.SetUsrMem(diff_dst_md, &diff_dst_tensor);
         diff_dst.CheckReorderToOpMem(MEMORY_PD_WITHOUT_DATA(
@@ -1184,7 +1174,7 @@ class MklFusedBatchNormGradOp : public OpKernel {
 #else
       diff_dst_data =
           static_cast<T*>(const_cast<T*>(diff_dst_tensor.flat<T>().data()));
-#endif
+#endif // ENABLE_MKLDNN_V1
 
       // Indices of output tensors
       const size_t kDiffSrcIndex = 0;
",0,test
00e2cbf2e84524dd9e8320b58cdccf2c8b3f33b3,tensorflow/tensorflow,Changes based on review,mkl_types.h,"@@ -110,6 +110,7 @@ namespace tensorflow {
 #define TENSOR_FORMAT MKL_TENSOR_FORMAT
 #define TENSOR_FORMAT_NHWC MKL_TENSOR_FORMAT_NHWC
 #define TENSOR_MAX_DIMS MKLDNN_MAX_NDIMS
+#define BN_FLAGS mkldnn::batch_normalization_flags
 
 #else
 
@@ -205,6 +206,7 @@ namespace tensorflow {
 #define SUMMAND_MD summand_pd
 #define TENSOR_FORMAT TensorFormat
 #define TENSOR_FORMAT_NHWC FORMAT_NHWC
+#define BN_FLAGS mkldnn
 #endif  // ENABLE_MKLDNN_V1
 
 }  // namespace tensorflow
",0,test
7cb0b5767c549df17a52173ef33ec7d2487d25e2,tensorflow/tensorflow,"Tolerate differences equal to `tolerated` threshold in MinMaxApproximatelyEqual.

PiperOrigin-RevId: 310872715
Change-Id: I5b56efad6c31efa144a72f3a30843a98fec0a6f1",hardcode_min_max.cc,"@@ -271,8 +271,8 @@ bool MinMaxApproximatelyEqual(const MinMax& minmax1, const MinMax& minmax2) {
   const double magnitude =
       std::min(minmax1.max - minmax1.min, minmax2.max - minmax2.min);
   const double tolerated = 1e-6 * magnitude;
-  return std::abs(minmax1.min - minmax2.min) < tolerated &&
-         std::abs(minmax1.max - minmax2.max) < tolerated;
+  return std::abs(minmax1.min - minmax2.min) <= tolerated &&
+         std::abs(minmax1.max - minmax2.max) <= tolerated;
 }
 
 // Propagates MinMax from any of the listed arrays, to all others.
",0,train
9d86b3cbb39009b6484b3ba3b8ebc3d82949fae0,tensorflow/tensorflow,"Adds summary for loss so it's easier to follow training progress
Change: 137064814",kmeans.py,"@@ -243,6 +243,7 @@ class KMeansClustering(estimator.Estimator,
      ).training_graph()
     incr_step = tf.assign_add(tf.contrib.framework.get_global_step(), 1)
     self._loss = tf.reduce_sum(losses)
+    tf.scalar_summary('loss/raw', self._loss)
     training_op = with_dependencies([training_op, incr_step], self._loss)
     return training_op, self._loss
 
",0,train
5cb5f52aa16f85f83d818e6f219e7f483b6ead71,tensorflow/tensorflow,"Uppercase platform name when looking up custom call target.

TFRT uses 'ROCm', but TF expects 'ROCM'.

PiperOrigin-RevId: 428809793
Change-Id: I842887ee056cb8272cbabf439e4a537971d09e24",xlir_kernels.cc,"@@ -18,6 +18,7 @@
 #include <utility>
 #include <vector>
 
+#include ""absl/strings/ascii.h""
 #include ""llvm/Support/Error.h""
 #include ""tensorflow/compiler/xla/service/custom_call_status_internal.h""
 #include ""tensorflow/compiler/xla/service/custom_call_target_registry.h""
@@ -321,11 +322,11 @@ static llvm::Error CustomCall(
     tfrt::StringAttribute symbol) {
   // Lookup custom call target from registry.
   auto platform = stream->platform();
-  auto* target = CustomCallTargetRegistry::Global()->Lookup(
-      symbol.str(), tfrt::StrCat(platform));
+  auto key = absl::AsciiStrToUpper(tfrt::StrCat(platform));  // 'ROCm' -> 'ROCM'
+  auto* target = CustomCallTargetRegistry::Global()->Lookup(symbol.str(), key);
   if (!target) {
     return tfrt::MakeStringError(""Custom call target '"", symbol.str(),
-                                 ""' not registered for platform "", platform);
+                                 ""' not registered for platform "", key);
   }
 
   auto current = tfrt::gpu::wrapper::CtxSetCurrent(stream.context()->get());
",0,train
a901eb7c6e30d7ece53adbe50549774e0a8e0715,tensorflow/tensorflow,"\nInternal refactor\n

PiperOrigin-RevId: 274224520",grpc_tensor_coding.cc,"@@ -14,8 +14,10 @@ limitations under the License.
 ==============================================================================*/
 
 #include ""tensorflow/core/distributed_runtime/rpc/grpc_tensor_coding.h""
+
 #include ""grpcpp/support/byte_buffer.h""
 #include ""grpcpp/support/slice.h""
+#include ""absl/flags/flag.h""
 #include ""tensorflow/core/common_runtime/dma_helper.h""
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor.pb.h""
@@ -26,7 +28,7 @@ limitations under the License.
 #include ""tensorflow/core/platform/env.h""
 #include ""tensorflow/core/protobuf/worker.pb.h""
 
-// (Omitted internal-only flag)
+ABSL_FLAG(bool, grpc_deepcopy_tensor_response, false, ""Disables mem sharing"");
 
 namespace tensorflow {
 namespace grpc {
@@ -183,7 +185,9 @@ void EncodeTensorToByteBuffer(bool is_dead, const Tensor& val, bool require_ack,
     // We enable this behavior if the tensor is large.
     bool share_tensor_slice_memory = (tdata.size() > kLargeTensorBytes);
 
-    // (Omitted internal-only conditional)
+    if (absl::GetFlag(FLAGS_grpc_deepcopy_tensor_response)) {
+      share_tensor_slice_memory = false;
+    }
 
     size_t encoder_size = expected_size - tdata.size();
 
",0,train
c8f1aec8046df28b4ed5b5181f3ef5509f98f97e,tensorflow/tensorflow,"Use i8 as type for the ConstantDataArray.

It turns out this is faster to compile, because LLVM handles it specially.

PiperOrigin-RevId: 201911349",cpu_external_constants_test.cc,"@@ -65,7 +65,7 @@ TEST_F(CpuExternalConstantsTest, BasicNegative) {
   // to externalize it.
   TestWithArray(/*rows=*/4, /*cols=*/4, R""(
 CHECK-NOT: @constant_global_0 = external constant [16 x float], align 8
-CHECK: @0 = private constant [16 x float] {{.*}}, align 8
+CHECK: @0 = private constant [64 x i8] {{.*}}, align 8
 )"");
 }
 }  // namespace
",0,test
c8f1aec8046df28b4ed5b5181f3ef5509f98f97e,tensorflow/tensorflow,"Use i8 as type for the ConstantDataArray.

It turns out this is faster to compile, because LLVM handles it specially.

PiperOrigin-RevId: 201911349",cpu_literal_caching_test.cc,"@@ -55,8 +55,8 @@ ENTRY main {
 )"";
 
   string filecheck_pattern = R""(
-CHECK: private constant [12 x float]
-CHECK-NOT: private constant [12 x float]
+CHECK: private constant [48 x i8]
+CHECK-NOT: private constant [48 x i8]
 )"";
 
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
@@ -98,10 +98,10 @@ ENTRY main {
 )"";
 
   string filecheck_pattern = R""(
-CHECK: private constant [1 x float]
-CHECK: private constant [2 x float]
-CHECK-NOT: private constant [1 x float]
-CHECK-NOT: private constant [2 x float]
+CHECK: private constant [4 x i8]
+CHECK: private constant [8 x i8]
+CHECK-NOT: private constant [4 x i8]
+CHECK-NOT: private constant [8 x i8]
 )"";
 
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
",0,test
c8f1aec8046df28b4ed5b5181f3ef5509f98f97e,tensorflow/tensorflow,"Use i8 as type for the ConstantDataArray.

It turns out this is faster to compile, because LLVM handles it specially.

PiperOrigin-RevId: 201911349",cpu_outfeed_test.cc,"@@ -37,7 +37,7 @@ ENTRY main {
 )"";
 
   string filecheck_pattern = R""(
-CHECK: private constant [12 x float]
+CHECK: private constant [48 x i8]
 )"";
 
   TF_ASSERT_OK_AND_ASSIGN(std::unique_ptr<HloModule> module,
",0,test
c8f1aec8046df28b4ed5b5181f3ef5509f98f97e,tensorflow/tensorflow,"Use i8 as type for the ConstantDataArray.

It turns out this is faster to compile, because LLVM handles it specially.

PiperOrigin-RevId: 201911349",llvm_util.cc,"@@ -36,6 +36,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/io/path.h""
 #include ""tensorflow/core/lib/strings/str_util.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
+#include ""tensorflow/core/platform/byte_order.h""
 #include ""tensorflow/core/platform/env.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/types.h""
@@ -251,14 +252,12 @@ StatusOr<Shape> DecodeSelfDescribingShapeConstant(const void* shape_ptr,
 
 llvm::Constant* ConvertLiteralToIrConstant(const Literal& literal,
                                            llvm::Module* module) {
-  const Shape& shape = literal.shape();
-  llvm::Type* type = shape.element_type() == C64
-                         ? llvm::Type::getFloatTy(module->getContext())
-                         : PrimitiveTypeToIrType(shape.element_type(), module);
   const char* data = static_cast<const char*>(literal.untyped_data());
-  uint64 num_elements = literal.size_bytes() * 8 / GetSizeInBits(type);
-  return llvm::ConstantDataArray::getRaw(
-      llvm::StringRef(data, literal.size_bytes()), num_elements, type);
+  CHECK_EQ(module->getDataLayout().isLittleEndian(),
+           tensorflow::port::kLittleEndian);
+  return llvm::ConstantDataArray::getString(
+      module->getContext(), llvm::StringRef(data, literal.size_bytes()),
+      /*AddNull=*/false);
 }
 
 llvm::AllocaInst* EmitAllocaAtFunctionEntry(llvm::Type* type,
",0,test
f1d8f8d1501af92b0fe1f1d37398b4dee6a4e7e2,tensorflow/tensorflow,"Collect slot variable restorations in a queue and restore them as a batch.

PiperOrigin-RevId: 399236012
Change-Id: Ia89a1037fdaae45fc48cb17a56e99cd4125e9054",base.py,"@@ -275,12 +275,10 @@ class CheckpointPosition(object):
       checkpoint.object_by_proto_id[self._proto_id] = trackable
       for deferred_slot_restoration in (
           checkpoint.deferred_slot_restorations.pop(self._proto_id, ())):
-        trackable._create_or_restore_slot_variable(  # pylint: disable=protected-access
-            slot_variable_position=CheckpointPosition(
-                checkpoint=checkpoint,
-                proto_id=deferred_slot_restoration.slot_variable_id),
-            variable=deferred_slot_restoration.original_variable,
-            slot_name=deferred_slot_restoration.slot_name)
+        self._queue_slot_variable_for_restoration(
+            trackable, deferred_slot_restoration.original_variable,
+            deferred_slot_restoration.slot_variable_id,
+            deferred_slot_restoration.slot_name)
       for slot_restoration in checkpoint.slot_restorations.pop(
           self._proto_id, ()):
         optimizer_object = checkpoint.object_by_proto_id.get(
@@ -300,12 +298,9 @@ class CheckpointPosition(object):
         # it would not have the optimizer's `_create_or_restore_slot_variable`
         # method.
         elif hasattr(optimizer_object, ""_create_or_restore_slot_variable""):
-          optimizer_object._create_or_restore_slot_variable(  # pylint: disable=protected-access
-              slot_variable_position=CheckpointPosition(
-                  checkpoint=checkpoint,
-                  proto_id=slot_restoration.slot_variable_id),
-              variable=trackable,
-              slot_name=slot_restoration.slot_name)
+          self._queue_slot_variable_for_restoration(
+              optimizer_object, trackable, slot_restoration.slot_variable_id,
+              slot_restoration.slot_name)
       return True  # New assignment
     else:
       # The object was already mapped for this checkpoint load, which means
@@ -486,6 +481,44 @@ class CheckpointPosition(object):
         return self._checkpoint.shape_map[serialized_tensor.checkpoint_key]
     return None
 
+  def _queue_slot_variable_for_restoration(self, optimizer_object, variable,
+                                           slot_variable_id, slot_name):
+    """"""Adds a slot variable onto the restoration queue.
+
+    See comment on slot_restoration_tensor_saveables in
+    _CheckpointRestoreCoordinator.__init__ for more information.
+
+    Args:
+      optimizer_object: Optimizer that owns the slot variable.
+      variable: Variable associated with the slot variable.
+      slot_variable_id: ID of the slot variable.
+      slot_name: Name of the slot variable.
+    """"""
+    slot_variable_position = CheckpointPosition(
+        checkpoint=self.checkpoint, proto_id=slot_variable_id)
+    # pylint: disable=protected-access
+    slot_variable = optimizer_object._create_or_restore_slot_variable(
+        slot_variable_position=slot_variable_position,
+        variable=variable,
+        slot_name=slot_name)
+    # pylint: enable=protected-access
+    if slot_variable is None:
+      # The optimizer returns None if the restore should not be done (yet).
+      return
+    slot_variable_position.checkpoint.object_by_proto_id[
+        slot_variable_id] = slot_variable
+    # pylint: disable=protected-access
+    slot_variable._maybe_initialize_trackable()
+    slot_variable._self_update_uid = self.checkpoint.restore_uid
+    # pylint: enable=protected-access
+    # Since this is a slot variable, there will be no new python_saveables, so
+    # ignore that return value.
+    new_restore_ops, new_tensor_saveables, _ = (
+        slot_variable_position.gather_ops_or_named_saveables())
+    self.checkpoint.new_restore_ops(new_restore_ops)
+    self.checkpoint.slot_restoration_tensor_saveables.update(
+        new_tensor_saveables)
+
 
 _DeferredSlotVariableRestoration = collections.namedtuple(
     ""_DeferredSlotVariableRestoration"", [
@@ -983,6 +1016,20 @@ class Trackable(object):
     restore_ops.extend(
         current_position.checkpoint.restore_saveables(
             tensor_saveables, python_saveables))
+    # It is faster to restore slot variables separately because the file reader
+    # (BundleReader) assumes that variables are stored on disk in alphabetical
+    # order. However, slot variables are stored in their own groups after other
+    # variables, and while each group is alphabetically sorted, merging them
+    # into 1 read would cause lots of back and forth seeking, e.g.
+    #   variable/1 @ offset 0,
+    #   variable/1/slot/1 @ offset 100,
+    #   variable/1/slot/2 @ offset 200,
+    #   variable/2 @ offset 1,
+    #   variable/2/slot/1 @ offset 101, ...
+    restore_ops.extend(
+        current_position.checkpoint.restore_saveables(
+            current_position.checkpoint.slot_restoration_tensor_saveables, []))
+    current_position.checkpoint.slot_restoration_tensor_saveables.clear()
     return restore_ops
 
   def _single_restoration_from_checkpoint_position(self, checkpoint_position,
",0,train
f1d8f8d1501af92b0fe1f1d37398b4dee6a4e7e2,tensorflow/tensorflow,"Collect slot variable restorations in a queue and restore them as a batch.

PiperOrigin-RevId: 399236012
Change-Id: Ia89a1037fdaae45fc48cb17a56e99cd4125e9054",util.py,"@@ -272,6 +272,15 @@ class _CheckpointRestoreCoordinator(object):
                     optimizer_id=node_index,
                     slot_variable_id=slot_reference.slot_variable_node_id,
                     slot_name=slot_reference.slot_name))
+    # Dictionary of tensor_saveables for slot_restorations that were not shifted
+    # over to deferred_slot_restorations when the variable is created/tracked.
+    #
+    # These saveables are restored, along with other (non-slot) variables, in a
+    # batch after collecting all child CheckpointPositions. Doing slot variable
+    # restorations in a batch results in more efficient (fewer) file operations.
+    # This efficiency is particularly significant when restoring from
+    # network-based file systems.
+    self.slot_restoration_tensor_saveables = {}
 
     self._deleter = _CheckpointRestoreCoordinatorDeleter(
         self.expect_partial_attr,
",0,train
7aedf28c704d3fdfe22b8563ede09677f8c92585,tensorflow/tensorflow,"Prevent overwriting from SavedModel builder, if the export-directory already exists.
Change: 137312424",builder.py,"@@ -86,8 +86,12 @@ class SavedModelBuilder(object):
         constants.SAVED_MODEL_SCHEMA_VERSION)
 
     self._export_dir = export_dir
-    if not file_io.file_exists(export_dir):
-      file_io.recursive_create_dir(self._export_dir)
+    if file_io.file_exists(export_dir):
+      raise AssertionError(
+          ""Export directory already exists. Please specify a different export ""
+          ""directory."")
+
+    file_io.recursive_create_dir(self._export_dir)
 
     # Boolean to track whether variables and assets corresponding to the
     # SavedModel have been saved. Specifically, the first meta graph to be added
",0,test
7aedf28c704d3fdfe22b8563ede09677f8c92585,tensorflow/tensorflow,"Prevent overwriting from SavedModel builder, if the export-directory already exists.
Change: 137312424",saved_model_test.py,"@@ -198,6 +198,29 @@ class SavedModelTest(tf.test.TestCase):
       self.assertRaises(errors.NotFoundError, loader.load, sess, [""baz""],
                         export_dir)
 
+  def testNoOverwrite(self):
+    export_dir = os.path.join(tf.test.get_temp_dir(), ""test_no_overwrite"")
+    builder = saved_model_builder.SavedModelBuilder(export_dir)
+
+    # Graph with a single variable. SavedModel invoked to:
+    # - add with weights.
+    with self.test_session(graph=tf.Graph()) as sess:
+      self._init_and_validate_variable(sess, ""v"", 42)
+      builder.add_meta_graph_and_variables(sess, [""foo""])
+
+    # Save the SavedModel to disk in text format.
+    builder.save(as_text=True)
+
+    # Restore the graph with tag ""foo"", whose variables were saved.
+    with self.test_session(graph=tf.Graph()) as sess:
+      loader.load(sess, [""foo""], export_dir)
+      self.assertEqual(42, tf.get_collection(tf.GraphKeys.VARIABLES)[0].eval())
+
+    # An attempt to create another builder with the same export directory should
+    # result in an assertion error.
+    self.assertRaises(AssertionError, saved_model_builder.SavedModelBuilder,
+                      export_dir)
+
   def testSaveAsText(self):
     export_dir = os.path.join(tf.test.get_temp_dir(), ""test_astext"")
     builder = saved_model_builder.SavedModelBuilder(export_dir)
",0,test
1fe68ce0f4f7ef020cc52d1cc9963dd344fccba0,tensorflow/tensorflow,"internal change

PiperOrigin-RevId: 190789794",resource_mgr.h,"@@ -319,14 +319,13 @@ class IsResourceInitialized : public OpKernel {
 // specified type. The type will be a part of the generated op name.
 // TODO(apassos): figure out how to get non-cpu-allocated tensors to work
 // through constant folding so this doesn't have to be marked as stateful.
-#define REGISTER_RESOURCE_HANDLE_OP(Type)                   \
-  REGISTER_OP(#Type ""HandleOp"")                             \
-      .Attr(""container: string = ''"")                       \
-      .Attr(""shared_name: string = ''"")                     \
-      .Output(""resource: resource"")                         \
-      .SetIsStateful()                                      \
-      .SetShapeFn(tensorflow::shape_inference::ScalarShape) \
-      .Doc(""Creates a handle to a "" #Type)
+#define REGISTER_RESOURCE_HANDLE_OP(Type) \
+  REGISTER_OP(#Type ""HandleOp"")           \
+      .Attr(""container: string = ''"")     \
+      .Attr(""shared_name: string = ''"")   \
+      .Output(""resource: resource"")       \
+      .SetIsStateful()                    \
+      .SetShapeFn(tensorflow::shape_inference::ScalarShape)
 
 // Utility op kernel to produce a handle to a resource of type T.
 template <typename T>
",0,train
a6f9fd60cc1a16c56b78a8497530ba1351143c79,tensorflow/tensorflow,"Introduce TraceType for Iterator

PiperOrigin-RevId: 404289211
Change-Id: I2996145d1af984ab6d28079e757a25654f5d06b5",iterator_ops.py,"@@ -34,7 +34,6 @@ from tensorflow.python.framework import type_spec
 from tensorflow.python.ops import gen_dataset_ops
 from tensorflow.python.training.saver import BaseSaverBuilder
 from tensorflow.python.training.tracking import base as trackable
-from tensorflow.python.types import trace
 from tensorflow.python.util import _pywrap_utils
 from tensorflow.python.util import deprecation
 from tensorflow.python.util import lazy_loader
@@ -673,30 +672,7 @@ class IteratorBase(collections_abc.Iterator, trackable.Trackable,
     raise NotImplementedError(""Iterator.get_next_as_optional()"")
 
 
-# TODO(b/202447704): Merge into IteratorSpec.
-class IteratorType(trace.TraceType):
-  """"""Represents Iterators (and specs) for function tracing purposes.""""""
-
-  def __init__(self, spec, local_id):
-    self._components = (spec, local_id)
-
-  def is_subtype_of(self, other):
-    # TODO(b/202429845): Implement for subtyping.
-    return self == other
-
-  def most_specific_common_supertype(self, others):
-    # TODO(b/202430155) Implement for shape relaxation.
-    return None
-
-  def __hash__(self) -> int:
-    return hash(self._components)
-
-  def __eq__(self, other) -> bool:
-    return isinstance(
-        other, IteratorType) and self._components == other._components
-
-
-class OwnedIterator(IteratorBase, trace.SupportsTracingType):
+class OwnedIterator(IteratorBase):
   """"""An iterator producing tf.Tensor objects from a tf.data.Dataset.
 
   The iterator resource  created through `OwnedIterator` is owned by the Python
@@ -900,14 +876,9 @@ class OwnedIterator(IteratorBase, trace.SupportsTracingType):
 
     return {""ITERATOR"": _saveable_factory}
 
-  def __tf_tracing_type__(self, tracing_context):
-    return IteratorType(
-        self._type_spec,
-        tracing_context.get_local_id(self._iterator_resource._id))  # pylint:disable=protected-access
-
 
 @tf_export(""data.IteratorSpec"", v1=[])
-class IteratorSpec(type_spec.TypeSpec, trace.SupportsTracingType):
+class IteratorSpec(type_spec.TypeSpec):
   """"""Type specification for `tf.data.Iterator`.
 
   For instance, `tf.data.IteratorSpec` can be used to define a tf.function that
@@ -960,11 +931,6 @@ class IteratorSpec(type_spec.TypeSpec, trace.SupportsTracingType):
   def from_value(value):
     return IteratorSpec(value.element_spec)  # pylint: disable=protected-access
 
-  def __tf_tracing_type__(self, tracing_context):
-    # TODO(b/202772221): Validate and enforce this assumption of uniqueness per
-    # spec instance.
-    return IteratorType(self, tracing_context.get_local_id(id(self)))
-
 
 # TODO(b/71645805): Expose trackable stateful objects from dataset.
 class _IteratorSaveable(BaseSaverBuilder.SaveableObject):
",0,train
a6f9fd60cc1a16c56b78a8497530ba1351143c79,tensorflow/tensorflow,"Introduce TraceType for Iterator

PiperOrigin-RevId: 404289211
Change-Id: I2996145d1af984ab6d28079e757a25654f5d06b5",function_trace_type_test.py,"@@ -15,14 +15,10 @@
 """"""Tests for function_trace_type.""""""
 
 import timeit
-from absl.testing import parameterized
-
 
 from tensorflow.python import keras
-from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import function
 from tensorflow.python.eager import function_trace_type
-from tensorflow.python.framework import combinations
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import tensor_spec
 from tensorflow.python.ops import array_ops
@@ -33,24 +29,8 @@ from tensorflow.python.ops.ragged import ragged_tensor
 from tensorflow.python.platform import test
 
 
-class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase):
-
-  @combinations.generate(combinations.combine(mode=['eager']))
-  def testIteratorAliasing(self):
-    it1 = iter(dataset_ops.DatasetV2.from_tensor_slices([1, 2, 3]))
-    it2 = iter(dataset_ops.DatasetV2.from_tensor_slices([1, 2, 3]))
-
-    self.assertEqual(
-        function_trace_type.get_arg_spec((it1, it1), False, False, True),
-        function_trace_type.get_arg_spec((it2, it2), False, False, True))
-    self.assertEqual(
-        function_trace_type.get_arg_spec((it1, it2), False, False, True),
-        function_trace_type.get_arg_spec((it2, it1), False, False, True))
-    self.assertNotEqual(
-        function_trace_type.get_arg_spec((it1, it1), False, False, True),
-        function_trace_type.get_arg_spec((it1, it2), False, False, True))
+class CacheKeyGenerationTest(test.TestCase):
 
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testCompositeAndSpec(self):
     composite_tensor = ragged_tensor.RaggedTensor.from_row_splits(
         values=[1, 2, 3], row_splits=[0, 2, 3])
@@ -60,7 +40,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase):
         function_trace_type.get_arg_spec(composite_tensor, False, False, True),
         function_trace_type.get_arg_spec(spec, False, False, True))
 
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testVariableAliasing(self):
     v1 = resource_variable_ops.ResourceVariable([1])
     v2 = resource_variable_ops.ResourceVariable([1])
@@ -80,7 +59,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase):
     self.assertEqual(all_unique, all_unique_again)
     self.assertEqual(all_same, all_same_again)
 
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testTensorEquality(self):
     context = function_trace_type.SignatureContext()
     tensor_a = array_ops.zeros([11, 3, 5],
@@ -97,7 +75,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase):
     self.assertNotEqual(tensor_b, tensor_c)
     self.assertEqual(tensor_a, tensor_d)
 
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testTensorAndSpecEquality(self):
     context = function_trace_type.SignatureContext()
     tensor = array_ops.zeros([11, 3, 5],
@@ -110,7 +87,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase):
     self.assertEqual(tensor, spec)
     self.assertNotEqual(tensor, spec_with_name)
 
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testTupleEquality(self):
     trace_a = function_trace_type.get_arg_spec((1, 2, 3, 4), False, False, True)
     trace_b = function_trace_type.get_arg_spec((1, 2, 2, 4), False, False, True)
@@ -122,7 +98,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase):
     self.assertNotEqual(trace_b, trace_c)
     self.assertEqual(trace_a, trace_d)
 
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testListEquality(self):
     trace_a = function_trace_type.get_arg_spec([1, 2, 3, 4], False, False, True)
     trace_b = function_trace_type.get_arg_spec([1, 2, 2, 4], False, False, True)
@@ -134,7 +109,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase):
     self.assertNotEqual(trace_b, trace_c)
     self.assertEqual(trace_a, trace_d)
 
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testDictEquality(self):
     trace_a = function_trace_type.get_arg_spec({1: 2, 3: 4}, False, False, True)
     trace_b = function_trace_type.get_arg_spec({1: 2, 3: 2}, False, False, True)
@@ -146,7 +120,6 @@ class CacheKeyGenerationTest(test.TestCase, parameterized.TestCase):
     self.assertNotEqual(trace_b, trace_c)
     self.assertEqual(trace_a, trace_d)
 
-  @combinations.generate(combinations.combine(mode=['graph', 'eager']))
   def testComplexStruct(self):
     struct = {(1, 2, 3): {(1, 2): {12: 2}}, (3, 2, 3): (2, {2: 3})}
     trace_a = function_trace_type.get_arg_spec(struct, False, False, True)
",0,train
a6f9fd60cc1a16c56b78a8497530ba1351143c79,tensorflow/tensorflow,"Introduce TraceType for Iterator

PiperOrigin-RevId: 404289211
Change-Id: I2996145d1af984ab6d28079e757a25654f5d06b5",trace.py,"@@ -77,7 +77,6 @@ class SupportsTracingType(Protocol):
   classes according to the behaviour specified by their TraceType.
   """"""
 
+  @abc.abstractmethod
   def __tf_tracing_type__(self, context: TracingContext) -> TraceType:
-    raise NotImplementedError(
-        ""Class inheriting SupportsTracingType must implement __tf_tracing_type__""
-    )
+    pass
",0,train
14f5e78832d78b5bab6803b01156a17c1e9482b9,tensorflow/tensorflow,"Make the no-cloning distribution codepath a default except for graph mode or TPU.

At this point we have verified performance and accuracy on a couple of models in addition to all unit-tests passing.

PiperOrigin-RevId: 247611564",distribute_strategy_test.py,"@@ -1630,12 +1630,12 @@ class TestDistributionStrategyWithKerasModels(test.TestCase,
     x = np.ones((64, 10)).astype('float32')
 
     model = _make_model_with_add_loss()
-    model.compile('sgd', cloning=cloning)
+    model.compile('sgd')
     history = model.fit(x, steps_per_epoch=2, epochs=1)
 
     with distribution.scope():
       ds_model = _make_model_with_add_loss()
-      ds_model.compile('sgd')
+      ds_model.compile('sgd', cloning=cloning)
       ds_history = ds_model.fit(x, steps_per_epoch=2, epochs=1)
 
     self.assertAllClose(history.history, ds_history.history)
",0,test
14f5e78832d78b5bab6803b01156a17c1e9482b9,tensorflow/tensorflow,"Make the no-cloning distribution codepath a default except for graph mode or TPU.

At this point we have verified performance and accuracy on a couple of models in addition to all unit-tests passing.

PiperOrigin-RevId: 247611564",distributed_training_utils.py,"@@ -618,7 +618,8 @@ def is_distributing_by_cloning(model):
     True if the `model` is going to be distributed using cloning and False
     otherwise.
   """"""
-  return (model._cloning or not context.executing_eagerly() or
+  return (model._cloning or model._compile_distribution or
+          not context.executing_eagerly() or
           K.is_tpu_strategy(model._distribution_strategy))
 
 
",0,test
14f5e78832d78b5bab6803b01156a17c1e9482b9,tensorflow/tensorflow,"Make the no-cloning distribution codepath a default except for graph mode or TPU.

At this point we have verified performance and accuracy on a couple of models in addition to all unit-tests passing.

PiperOrigin-RevId: 247611564",training.py,"@@ -249,7 +249,7 @@ class Model(network.Network):
     # cloning is requested.
     # TODO(b/124517980, b/124377929): Remove this temporary undocumented way
     # of enabling the feature and graduate it to the main distributed code path.
-    self._cloning = kwargs.pop('cloning', True)
+    self._cloning = kwargs.pop('cloning', False)
 
     self._validate_compile_param_for_distribution_strategy(self.run_eagerly,
                                                            sample_weight_mode,
",0,test
8802516b56e190cba5846f7b7dfca7a0902bcf03,tensorflow/tensorflow,"Integrate LLVM at https://github.com/llvm/llvm-project/commit/2501e86acda2

PiperOrigin-RevId: 319196952
Change-Id: I078a64a0b84eb9cd8f3c5d277ad30c943b58fd1c",lhlo_legalize_to_llvm.cc,"@@ -128,8 +128,10 @@ struct DynamicMemRefCastOpConverter
 
 void PopulateLhloToLLVMConversionPatterns(LLVMTypeConverter *converter,
                                           OwningRewritePatternList *patterns) {
-  patterns->insert<DynamicMemRefCastOpConverter, StaticMemRefCastOpConverter>(
-      *converter);
+  // TODO(b/160227541): Re-enable LHLO->LLVM lowering.
+  //  patterns->insert<DynamicMemRefCastOpConverter,
+  //  StaticMemRefCastOpConverter>(
+  //     *converter);
 }
 
 }  // namespace xla_lhlo
",0,train
71061b5dccc00b13f2d67144117fdd254797af38,tensorflow/tensorflow,Fixed typo,crop_and_resize_op_gpu.cu.cc,"@@ -411,7 +411,7 @@ struct CropAndResizeBackpropImage<GPUDevice, T> {
           d.stream(), config.virtual_thread_count, grads_image.data()));
     }
 
-    // Configurate interpolation method.
+    // Configure interpolation method.
     InterpolationMethod method = BILINEAR;
     if (method_name == ""nearest"") {
       method = NEAREST;
",0,test
3265790fde0b78769b5906909db235464377f6f8,tensorflow/tensorflow,"Support CompositeTensor in functional If/While

We need to repack the return of the if/else branches to ensure any
CompositeTensors are returned with the correct type. For functional
while we need to be sure to unpack and repack when handling the body.

PiperOrigin-RevId: 333190256
Change-Id: If9a9449a10616afa3dd79e39b1e66ee0ee571a9e",functional_ops.py,"@@ -838,28 +838,14 @@ def If(cond, inputs, then_branch, else_branch, name=None):
     or else_branch(inputs).
   """"""
   # pylint: disable=protected-access
-  # Handle the Defun case until users have transitioned to tf.function. Note
-  # that composites may need to be re-packed by the caller.
   if isinstance(then_branch, function._DefinedFunction):
     tlist = [_.type for _ in then_branch.definition.signature.output_arg]
-    return gen_functional_ops._if(
-        cond, inputs, tlist, then_branch, else_branch, name=name)
-
-  # We assume that `then_branch` is a ConcreteFunction here.
-  then_out = then_branch.structured_outputs
-  else_out = else_branch.structured_outputs
-
-  # Ensure then/else are the same type of composites to avoid an invalid call
-  # to pack_sequence_as later on.
-  nest.assert_same_structure(then_out, else_out, expand_composites=True)
-
-  tlist = nest.flatten(then_branch.output_dtypes)
-  ret = gen_functional_ops._if(
+  else:
+    # We assume that `then_branch` is a ConcreteFunction here.
+    tlist = nest.flatten(then_branch.output_dtypes)
+  return gen_functional_ops._if(
       cond, inputs, tlist, then_branch, else_branch, name=name)
 
-  # Re-pack the outputs to restore any CompositeTensors
-  return nest.pack_sequence_as(then_out, ret, expand_composites=True)
-
 
 def Gradient(inputs, f, name=None):
   r""""""Computes the gradient function for function f via backpropagation.
@@ -978,8 +964,7 @@ def While(input_, cond, body, name=None, hostmem=None):
     # Slice off the loop-carried captured inputs.
     ret = ret[:-len(body.captured_inputs)]
   else:
-    ret = gen_functional_ops._while(
-        nest.flatten(input_, expand_composites=True), cond, body, name=name)
+    ret = gen_functional_ops._while(input_, cond, body, name=name)
   if hostmem:
     input_attr = attr_value_pb2.AttrValue()
     input_attr.list.i.extend(hostmem)
@@ -988,14 +973,7 @@ def While(input_, cond, body, name=None, hostmem=None):
     output_attr = attr_value_pb2.AttrValue()
     output_attr.list.i.extend(hostmem)
     ret[0].op._set_attr(""_output_hostmem"", output_attr)  # pylint: disable=protected-access
-
-  # Handle the Defun case until users have transitioned to tf.function. Note
-  # that composites may need to be re-packed by the caller.
-  if isinstance(body, function._DefinedFunction):
-    return ret
-
-  return nest.pack_sequence_as(
-      body.structured_outputs, ret, expand_composites=True)
+  return ret
 
 
 # b/36459430
",0,train
3265790fde0b78769b5906909db235464377f6f8,tensorflow/tensorflow,"Support CompositeTensor in functional If/While

We need to repack the return of the if/else branches to ensure any
CompositeTensors are returned with the correct type. For functional
while we need to be sure to unpack and repack when handling the body.

PiperOrigin-RevId: 333190256
Change-Id: If9a9449a10616afa3dd79e39b1e66ee0ee571a9e",functional_ops_test.py,"@@ -19,30 +19,28 @@ from __future__ import division
 from __future__ import print_function
 
 from tensorflow.python.eager import def_function
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function
-from tensorflow.python.framework import ops
-from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import tensor_spec
+from tensorflow.python.framework import test_util
 from tensorflow.python.ops import functional_ops
-from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
 
 
 class FunctionalOpsTest(test.TestCase):
 
+  @test_util.deprecated_graph_mode_only
   def testIfWithDefun(self):
-    # Defun should only be used in graph mode
-    with ops.Graph().as_default():
-      @function.Defun(dtypes.float32)
-      def Then(x):
-        return x + 1
 
-      @function.Defun(dtypes.float32)
-      def Else(x):
-        return x - 1
+    @function.Defun(dtypes.float32)
+    def Then(x):
+      return x + 1
+
+    @function.Defun(dtypes.float32)
+    def Else(x):
+      return x - 1
 
+    with self.cached_session():
       inputs = [10.]
       result = self.evaluate(functional_ops.If(False, inputs, Then, Else))
       self.assertEqual([9.0], result)
@@ -59,78 +57,12 @@ class FunctionalOpsTest(test.TestCase):
     def Else(x):
       return x - 1
 
-    inputs = [10.]
-    then_cf = Then.get_concrete_function()
-    else_cf = Else.get_concrete_function()
-    result = self.evaluate(functional_ops.If(False, inputs, then_cf, else_cf))
-    self.assertEqual([9.0], result)
-
-  def testIfWithFunctionComposite(self):
-
-    signature = [tensor_spec.TensorSpec([], dtypes.float32)]
-    @def_function.function(input_signature=signature)
-    def Then(x):
-      return sparse_tensor.SparseTensor([[0]], [x + 1], [1])
-
-    @def_function.function(input_signature=signature)
-    def Else(x):
-      return sparse_tensor.SparseTensor([[0]], [x - 1], [1])
-
-    inputs = [10.]
-    then_cf = Then.get_concrete_function()
-    else_cf = Else.get_concrete_function()
-    result = functional_ops.If(False, inputs, then_cf, else_cf)
-    self.assertIsInstance(result, sparse_tensor.SparseTensor)
-    self.assertAllEqual([9.0], result.values)
-
-  def testWhileWithDefun(self):
-    # Defun should only be used in graph mode
-    with ops.Graph().as_default():
-      @function.Defun(dtypes.int32)
-      def Body(n):
-        return n - 1
-
-      @function.Defun(dtypes.int32)
-      def Cond(n):
-        return math_ops.reduce_min(n) > 0
-
-      n = constant_op.constant([2])
-      result = self.evaluate(functional_ops.While([n], Cond, Body))
-      self.assertAllEqual([[0]], result)
-
-  def testWhileWithFunction(self):
-
-    @def_function.function
-    def Body(n):
-      return n - 1
-
-    @def_function.function
-    def Cond(n):
-      return math_ops.reduce_min(n) > 0
-
-    n = constant_op.constant([2])
-    cond_cf = Cond.get_concrete_function(n)
-    body_cf = Body.get_concrete_function(n)
-    result = functional_ops.While([n], cond_cf, body_cf)
-    self.assertAllEqual([0], result)
-
-  def testWhileWithFunctionComposite(self):
-
-    @def_function.function
-    def Body(n):
-      return sparse_tensor.SparseTensor([[0]], n.values - 1, [1])
-
-    @def_function.function
-    def Cond(n):
-      return math_ops.reduce_min(n.values) > 0
-
-    n = constant_op.constant([2])
-    n = sparse_tensor.SparseTensor([[0]], [2], [1])
-    cond_cf = Cond.get_concrete_function(n)
-    body_cf = Body.get_concrete_function(n)
-    result = functional_ops.While([n], cond_cf, body_cf)
-    self.assertIsInstance(result, sparse_tensor.SparseTensor)
-    self.assertAllEqual([0], result.values)
+    with self.cached_session():
+      inputs = [10.]
+      result = self.evaluate(
+          functional_ops.If(False, inputs, Then.get_concrete_function(),
+                            Else.get_concrete_function()))
+      self.assertEqual([9.0], result)
 
 
 if __name__ == '__main__':
",0,train
3e53570d3bf518ec2b6cfeed4b5fd57d11370289,tensorflow/tensorflow,fix #14542 a bug of model_to_dot() (#14553),vis_utils.py,"@@ -120,7 +120,7 @@ def model_to_dot(model, show_shapes=False, show_layer_names=True, rankdir='TB'):
     layer_id = str(id(layer))
     for i, node in enumerate(layer._inbound_nodes):  # pylint: disable=protected-access
       node_key = layer.name + '_ib-' + str(i)
-      if node_key in model.container_nodes:
+      if node_key in model._network_nodes:  # pylint: disable=protected-access
         for inbound_layer in node.inbound_layers:
           inbound_layer_id = str(id(inbound_layer))
           layer_id = str(id(layer))
",0,train
dac3cf87e6fd2fd80ebc05c1d21bec9ca992041d,tensorflow/tensorflow,"Remove unnecessary control dependencies.
Change: 144392019",metrics_impl.py,"@@ -296,12 +296,11 @@ def mean(values, weights=None, metrics_collections=None,
       values = math_ops.multiply(values, weights)
       num_values = math_ops.reduce_sum(weights)
 
-    total_compute_op = state_ops.assign_add(total, math_ops.reduce_sum(values))
-    count_compute_op = state_ops.assign_add(count, num_values)
+    update_total_op = state_ops.assign_add(total, math_ops.reduce_sum(values))
+    update_count_op = state_ops.assign_add(count, num_values)
 
     mean_t = _safe_div(total, count, 'value')
-    with ops.control_dependencies([total_compute_op, count_compute_op]):
-      update_op = _safe_div(total, count, 'update_op')
+    update_op = _safe_div(update_total_op, update_count_op, 'update_op')
 
     if metrics_collections:
       ops.add_to_collections(metrics_collections, mean_t)
@@ -1007,8 +1006,8 @@ def mean_tensor(values, weights=None, metrics_collections=None,
       values = math_ops.multiply(values, weights)
       num_values = math_ops.multiply(num_values, weights)
 
-    total_compute_op = state_ops.assign_add(total, values)
-    count_compute_op = state_ops.assign_add(count, num_values)
+    update_total_op = state_ops.assign_add(total, values)
+    update_count_op = state_ops.assign_add(count, num_values)
 
     def compute_mean(total, count, name):
       non_zero_count = math_ops.maximum(count,
@@ -1017,8 +1016,7 @@ def mean_tensor(values, weights=None, metrics_collections=None,
       return math_ops.truediv(total, non_zero_count, name=name)
 
     mean_t = compute_mean(total, count, 'value')
-    with ops.control_dependencies([total_compute_op, count_compute_op]):
-      update_op = compute_mean(total, count, 'update_op')
+    update_op = compute_mean(update_total_op, update_count_op, 'update_op')
 
     if metrics_collections:
       ops.add_to_collections(metrics_collections, mean_t)
@@ -1271,17 +1269,16 @@ def precision(labels, predictions, weights=None,
         labels, predictions, weights, metrics_collections=None,
         updates_collections=None, name=None)
 
-    def compute_precision(name):
+    def compute_precision(tp, fp, name):
       return array_ops.where(
-          math_ops.greater(true_p + false_p, 0),
-          math_ops.div(true_p, true_p + false_p),
+          math_ops.greater(tp + fp, 0),
+          math_ops.div(tp, tp + fp),
           0,
           name)
 
-    p = compute_precision('value')
-    with ops.control_dependencies([true_positives_update_op,
-                                   false_positives_update_op]):
-      update_op = compute_precision('update_op')
+    p = compute_precision(true_p, false_p, 'value')
+    update_op = compute_precision(
+        true_positives_update_op, false_positives_update_op, 'update_op')
 
     if metrics_collections:
       ops.add_to_collections(metrics_collections, p)
@@ -1342,17 +1339,15 @@ def precision_at_thresholds(labels, predictions, thresholds,
                                      (predictions, labels, weights)):
     values, update_ops = _confusion_matrix_at_thresholds(
         labels, predictions, thresholds, weights, includes=('tp', 'fp'))
-    tp = values['tp']
-    fp = values['fp']
 
     # Avoid division by zero.
     epsilon = 1e-7
-    def compute_precision(name):
+    def compute_precision(tp, fp, name):
       return math_ops.div(tp, epsilon + tp + fp, name='precision_' + name)
 
-    prec = compute_precision('value')
-    with ops.control_dependencies(update_ops.values()):
-      update_op = compute_precision('update_op')
+    prec = compute_precision(values['tp'], values['fp'], 'value')
+    update_op = compute_precision(
+        update_ops['tp'], update_ops['fp'], 'update_op')
 
     if metrics_collections:
       ops.add_to_collections(metrics_collections, prec)
@@ -1469,9 +1464,8 @@ def recall(labels, predictions, weights=None,
           name)
 
     rec = compute_recall(true_p, false_n, 'value')
-    with ops.control_dependencies([true_positives_update_op,
-                                   false_negatives_update_op]):
-      update_op = compute_recall(true_p, false_n, 'update_op')
+    update_op = compute_recall(
+        true_positives_update_op, false_negatives_update_op, 'update_op')
 
     if metrics_collections:
       ops.add_to_collections(metrics_collections, rec)
@@ -1881,17 +1875,14 @@ def recall_at_thresholds(labels, predictions, thresholds,
                                      (predictions, labels, weights)):
     values, update_ops = _confusion_matrix_at_thresholds(
         labels, predictions, thresholds, weights, includes=('tp', 'fn'))
-    tp = values['tp']
-    fn = values['fn']
 
     # Avoid division by zero.
     epsilon = 1e-7
-    def compute_recall(name):
+    def compute_recall(tp, fn, name):
       return math_ops.div(tp, epsilon + tp + fn, name='recall_' + name)
 
-    rec = compute_recall('value')
-    with ops.control_dependencies(update_ops.values()):
-      update_op = compute_recall('update_op')
+    rec = compute_recall(values['tp'], values['fn'], 'value')
+    update_op = compute_recall(update_ops['tp'], update_ops['fn'], 'update_op')
 
     if metrics_collections:
       ops.add_to_collections(metrics_collections, rec)
@@ -1951,21 +1942,20 @@ def root_mean_squared_error(labels, predictions, weights=None,
   labels, predictions, weights = _remove_squeezable_dimensions(
       labels, predictions, weights)
   predictions.get_shape().assert_is_compatible_with(labels.get_shape())
-  value_tensor, update_op = mean_squared_error(
+  mse, update_mse_op = mean_squared_error(
       labels, predictions, weights, None, None,
       name or 'root_mean_squared_error')
 
-  rmse = math_ops.sqrt(value_tensor)
-  with ops.control_dependencies([update_op]):
-    update_op = math_ops.sqrt(update_op)
+  rmse = math_ops.sqrt(mse)
+  update_rmse_op = math_ops.sqrt(update_mse_op)
 
   if metrics_collections:
     ops.add_to_collections(metrics_collections, rmse)
 
   if updates_collections:
-    ops.add_to_collections(updates_collections, update_op)
+    ops.add_to_collections(updates_collections, update_rmse_op)
 
-  return rmse, update_op
+  return rmse, update_rmse_op
 
 
 def sensitivity_at_specificity(
@@ -2031,12 +2021,8 @@ def sensitivity_at_specificity(
 
     values, update_ops = _confusion_matrix_at_thresholds(
         labels, predictions, thresholds, weights)
-    tp = values['tp']
-    fn = values['fn']
-    tn = values['tn']
-    fp = values['fp']
 
-    def compute_sensitivity_at_specificity(name):
+    def compute_sensitivity_at_specificity(tp, tn, fp, fn, name):
       specificities = math_ops.div(tn, tn + fp + kepsilon)
       tf_index = math_ops.argmin(math_ops.abs(specificities - specificity), 0)
       tf_index = math_ops.cast(tf_index, dtypes.int32)
@@ -2046,9 +2032,11 @@ def sensitivity_at_specificity(
                           tp[tf_index] + fn[tf_index] + kepsilon,
                           name)
 
-    sensitivity = compute_sensitivity_at_specificity('value')
-    with ops.control_dependencies(update_ops.values()):
-      update_op = compute_sensitivity_at_specificity('update_op')
+    sensitivity = compute_sensitivity_at_specificity(
+        values['tp'], values['tn'], values['fp'], values['fn'], 'value')
+    update_op = compute_sensitivity_at_specificity(
+        update_ops['tp'], update_ops['tn'], update_ops['fp'], update_ops['fn'],
+        'update_op')
 
     if metrics_collections:
       ops.add_to_collections(metrics_collections, sensitivity)
@@ -2595,15 +2583,15 @@ def specificity_at_sensitivity(
 
     values, update_ops = _confusion_matrix_at_thresholds(
         labels, predictions, thresholds, weights)
-    tp = values['tp']
-    fn = values['fn']
-    tn = values['tn']
-    fp = values['fp']
 
-    def compute_specificity_at_sensitivity(name):
+    def compute_specificity_at_sensitivity(tp, tn, fp, fn, name):
       """"""Computes the specificity at the given sensitivity.
 
       Args:
+        tp: True positives.
+        tn: True negatives.
+        fp: False positives.
+        fn: False negatives.
         name: The name of the operation.
 
       Returns:
@@ -2626,9 +2614,11 @@ def specificity_at_sensitivity(
                           tn[tf_index] + fp[tf_index] + kepsilon,
                           name)
 
-    specificity = compute_specificity_at_sensitivity('value')
-    with ops.control_dependencies(update_ops.values()):
-      update_op = compute_specificity_at_sensitivity('update_op')
+    specificity = compute_specificity_at_sensitivity(
+        values['tp'], values['tn'], values['fp'], values['fn'], 'value')
+    update_op = compute_specificity_at_sensitivity(
+        update_ops['tp'], update_ops['tn'], update_ops['fp'], update_ops['fn'],
+        'update_op')
 
     if metrics_collections:
       ops.add_to_collections(metrics_collections, specificity)
",0,train
0a935589b9395b8275fa2ecd9fec2408a57d1b24,tensorflow/tensorflow,"Use optimized ArgMax to replace partial ArgSort when the number of category is one.

PiperOrigin-RevId: 381393443
Change-Id: I2f0a47184d374543124b137b81741857e783bdeb",detection_postprocess.cc,"@@ -364,10 +364,14 @@ TfLiteStatus DecodeCenterSizeBoxes(TfLiteContext* context, TfLiteNode* node,
 
 void DecreasingPartialArgSort(const float* values, int num_values,
                               int num_to_sort, int* indices) {
-  std::iota(indices, indices + num_values, 0);
-  std::partial_sort(
-      indices, indices + num_to_sort, indices + num_values,
-      [&values](const int i, const int j) { return values[i] > values[j]; });
+  if (num_to_sort == 1) {
+    indices[0] = optimized_ops::ArgMaxVector(values, num_values);
+  } else {
+    std::iota(indices, indices + num_values, 0);
+    std::partial_sort(
+        indices, indices + num_to_sort, indices + num_values,
+        [&values](const int i, const int j) { return values[i] > values[j]; });
+  }
 }
 
 void DecreasingArgSort(const float* values, int num_values, int* indices) {
",0,train
379d9a71d36be8728bf906c0af8d5519eeaa23cb,tensorflow/tensorflow,updates test function for new shuffle error type and message,numpy_io_test.py,"@@ -286,8 +286,9 @@ class NumpyIoTest(test.TestCase):
     x = np.arange(32, 36)
     y = np.arange(4)
     with self.test_session():
-      with self.assertRaisesRegexp(TypeError,
-                                   'shuffle must be explicitly set as boolean'):
+      with self.assertRaisesRegexp(ValueError,
+                                   'shuffle must be provided and explicitly '
+                                   'set as boolean'):
         # Default shuffle is None.
         numpy_io.numpy_input_fn(x, y)
 
",0,test
379d9a71d36be8728bf906c0af8d5519eeaa23cb,tensorflow/tensorflow,updates test function for new shuffle error type and message,pandas_io_test.py,"@@ -70,8 +70,9 @@ class PandasIoTest(test.TestCase):
       return
     x, _ = self.makeTestDataFrame()
     y_noindex = pd.Series(np.arange(-32, -28))
-    with self.assertRaisesRegexp(TypeError,
-                                 'shuffle must be explicitly set as boolean'):
+    with self.assertRaisesRegexp(ValueError,
+                                 'shuffle must be provided and explicitly '
+                                 'set as boolean'):
       # Default shuffle is None
       pandas_io.pandas_input_fn(x, y_noindex)
 
",0,test
14993b909aaa53f2713e234b3ad3a35aff4739e8,tensorflow/tensorflow,"rm duplicated implements for GrpcRemoteMaster (#12313)

* rm duplicated implements for GrpcRemoteMaster

* remove all duplicated implements

* format for code style

* typedef for MasterServiceStub",grpc_remote_master.cc,"@@ -32,6 +32,8 @@ namespace tensorflow {
 // GrpcRemoteMaster is an implementation of the MasterInterface
 // that uses gRPC to talk to the Master service.
 class GrpcRemoteMaster : public MasterInterface {
+  using MasterServiceStub = grpc::MasterService::Stub;
+
  public:
   explicit GrpcRemoteMaster(const SharedGrpcChannelPtr& client_channel)
       : stub_(grpc::MasterService::NewStub(client_channel)) {}
@@ -42,63 +44,56 @@ class GrpcRemoteMaster : public MasterInterface {
                        const CreateSessionRequest* request,
                        CreateSessionResponse* response) override {
     ::grpc::ClientContext ctx;
-    ctx.set_fail_fast(false);
-    SetDeadline(&ctx, call_options->GetTimeout());
-    return FromGrpcStatus(stub_->CreateSession(&ctx, *request, response));
+    return Call(&ctx, call_options, request, response,
+                &MasterServiceStub::CreateSession);
   }
 
   Status ExtendSession(CallOptions* call_options,
                        const ExtendSessionRequest* request,
                        ExtendSessionResponse* response) override {
     ::grpc::ClientContext ctx;
-    ctx.set_fail_fast(false);
-    SetDeadline(&ctx, call_options->GetTimeout());
-    return FromGrpcStatus(stub_->ExtendSession(&ctx, *request, response));
+    return Call(&ctx, call_options, request, response,
+                &MasterServiceStub::ExtendSession);
   }
 
   Status PartialRunSetup(CallOptions* call_options,
                          const PartialRunSetupRequest* request,
                          PartialRunSetupResponse* response) override {
     ::grpc::ClientContext ctx;
-    ctx.set_fail_fast(false);
-    SetDeadline(&ctx, call_options->GetTimeout());
-    return FromGrpcStatus(stub_->PartialRunSetup(&ctx, *request, response));
+    return Call(&ctx, call_options, request, response,
+                &MasterServiceStub::PartialRunSetup);
   }
 
   Status RunStep(CallOptions* call_options, RunStepRequestWrapper* request,
                  MutableRunStepResponseWrapper* response) override {
     ::grpc::ClientContext ctx;
     auto trace = TraceRpc(""RunStep/Client"", &ctx);
-    ctx.set_fail_fast(false);
-    SetDeadline(&ctx, call_options->GetTimeout());
-    return FromGrpcStatus(stub_->RunStep(&ctx, request->ToProto(),
-                                         get_proto_from_wrapper(response)));
+    return Call(&ctx, call_options, &request->ToProto(),
+                get_proto_from_wrapper(response),
+                &MasterServiceStub::RunStep);
   }
 
   Status CloseSession(CallOptions* call_options,
                       const CloseSessionRequest* request,
                       CloseSessionResponse* response) override {
     ::grpc::ClientContext ctx;
-    ctx.set_fail_fast(false);
-    SetDeadline(&ctx, call_options->GetTimeout());
-    return FromGrpcStatus(stub_->CloseSession(&ctx, *request, response));
+    return Call(&ctx, call_options, request, response,
+                &MasterServiceStub::CloseSession);
   }
 
   Status ListDevices(CallOptions* call_options,
                      const ListDevicesRequest* request,
                      ListDevicesResponse* response) override {
     ::grpc::ClientContext ctx;
-    ctx.set_fail_fast(false);
-    SetDeadline(&ctx, call_options->GetTimeout());
-    return FromGrpcStatus(stub_->ListDevices(&ctx, *request, response));
+    return Call(&ctx, call_options, request, response,
+                &MasterServiceStub::ListDevices);
   }
 
   Status Reset(CallOptions* call_options, const ResetRequest* request,
                ResetResponse* response) override {
     ::grpc::ClientContext ctx;
-    ctx.set_fail_fast(false);
-    SetDeadline(&ctx, call_options->GetTimeout());
-    return FromGrpcStatus(stub_->Reset(&ctx, *request, response));
+    return Call(&ctx, call_options, request, response,
+                &MasterServiceStub::Reset);
   }
 
  private:
@@ -110,13 +105,23 @@ class GrpcRemoteMaster : public MasterInterface {
     return port::Tracing::TraceMe(name, trace_id);
   }
 
-  std::unique_ptr<grpc::MasterService::Stub> stub_;
-
   void SetDeadline(::grpc::ClientContext* ctx, int64 time_in_ms) {
     if (time_in_ms > 0) {
       ctx->set_deadline(gpr_time_from_millis(time_in_ms, GPR_TIMESPAN));
     }
   }
+
+  template <typename Request, typename Response>
+  Status Call(::grpc::ClientContext* ctx, CallOptions* call_options,
+              const Request* request, Response* response,
+              ::grpc::Status (MasterServiceStub::*pfunc)(
+                  ::grpc::ClientContext*, const Request&, Response*)) {
+    ctx->set_fail_fast(false);
+    SetDeadline(ctx, call_options->GetTimeout());
+    return FromGrpcStatus((stub_.get()->*pfunc)(ctx, *request, response));
+  }
+
+  std::unique_ptr<MasterServiceStub> stub_;
 };
 
 MasterInterface* NewGrpcMaster(const SharedGrpcChannelPtr& channel) {
",0,test
4f933f5b9ba6469fce4d5142246ce607edbbcb00,tensorflow/tensorflow,pooling ops build fix.,mkl_pooling_ops_common.cc,"@@ -180,7 +180,7 @@ void MklPoolingBwdPrimitive<T>::Setup(const MklPoolingParams& bwdParams) {
   context_.alg_kind = bwdParams.alg_kind;
 
   // Create memory descriptor.
-  context_.diff_src_md.reset(new memory::desc(
+  context_.src_md.reset(new memory::desc(
       {bwdParams.src_dims}, MklDnnType<T>(), MEMORY_FORMAT::any));
 #ifndef ENABLE_MKLDNN_V1
   context_.diff_dst_md.reset(new memory::desc(
",0,train
e91d2e843f8ef2b35179706f8b1fe964a663a988,tensorflow/tensorflow,"Update GraphDef version to 823.

PiperOrigin-RevId: 383993661
Change-Id: I5acb57ddde52ce83a167f09ce46cf153dca6565a",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 822  // Updated: 2021/7/9
+#define TF_GRAPH_DEF_VERSION 823  // Updated: 2021/7/10
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
ce9b1295b5689129fe1a35ae75faf069b0fe24ca,tensorflow/tensorflow,fix,dlpack.cc,"@@ -323,7 +323,7 @@ TFE_TensorHandle* TFE_HandleFromDLPack(void* dlm, TF_Status* status,
 
   TFE_TensorHandle* handle = TFE_NewTensorHandleFromDeviceMemory(
       ctx, device_name.value().c_str(), dtype, dims, num_dims, data,
-      total_bytes, &DeallocatorWrapperFunc, &dlmt, status);
+      total_bytes, &DeallocatorWrapperFunc, dlmt, status);
 
   return handle;
 }
",0,train
ce9b1295b5689129fe1a35ae75faf069b0fe24ca,tensorflow/tensorflow,fix,tfe_wrapper.cc,"@@ -1169,7 +1169,8 @@ PYBIND11_MODULE(_pywrap_tfe, m) {
 
     PyCapsule_SetName(pycapsule.ptr(), ""used_dltensor"");
     PyCapsule_SetDestructor(pycapsule.ptr(), nullptr);
-    PyObject* pyhandle = EagerTensorFromHandle(thandle, true);
+    
+    PyObject* pyhandle = EagerTensorFromHandle(thandle);
     return tensorflow::PyoOrThrow(pyhandle);
   });
 
",0,train
21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method
Change: 133406514",android_armv7a_cpu_utils_helper.cc,"@@ -31,26 +31,6 @@ namespace profile_utils {
 
 /* static */ constexpr int AndroidArmV7ACpuUtilsHelper::INVALID_FD;
 
-void AndroidArmV7ACpuUtilsHelper::Initialize() {
-  struct perf_event_attr pe;
-
-  memset(&pe, 0, sizeof(struct perf_event_attr));
-  pe.type = PERF_TYPE_HARDWARE;
-  pe.size = sizeof(struct perf_event_attr);
-  pe.config = PERF_COUNT_HW_CPU_CYCLES;
-  pe.disabled = 1;
-  pe.exclude_kernel = 1;
-  pe.exclude_hv = 1;
-
-  fd_ = OpenPerfEvent(&pe, 0, -1, -1, 0);
-  if (fd_ == INVALID_FD) {
-    LOG(WARNING) << ""Error opening perf event"";
-    is_initialized_ = false;
-  } else {
-    is_initialized_ = true;
-  }
-}
-
 void AndroidArmV7ACpuUtilsHelper::ResetClockCycle() {
   if (!is_initialized_) {
     return;
@@ -98,7 +78,6 @@ int AndroidArmV7ACpuUtilsHelper::OpenPerfEvent(
 namespace tensorflow {
 namespace profile_utils {
 
-void AndroidArmV7ACpuUtilsHelper::Initialize() {}
 void AndroidArmV7ACpuUtilsHelper::ResetClockCycle() {}
 uint64 AndroidArmV7ACpuUtilsHelper::GetCurrentClockCycle() { return 1; }
 void AndroidArmV7ACpuUtilsHelper::EnableClockCycleProfiling(bool) {}
",0,train
21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method
Change: 133406514",android_armv7a_cpu_utils_helper.h,"@@ -27,7 +27,6 @@ namespace profile_utils {
 class AndroidArmV7ACpuUtilsHelper : public ICpuUtilsHelper {
  public:
   AndroidArmV7ACpuUtilsHelper() = default;
-  void Initialize() final;
   void ResetClockCycle() final;
   uint64 GetCurrentClockCycle() final;
   void EnableClockCycleProfiling(bool enable) final;
",0,train
21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method
Change: 133406514",cpu_utils.cc,"@@ -14,64 +14,53 @@ limitations under the License.
 ==============================================================================*/
 
 #include ""tensorflow/core/platform/profile_utils/cpu_utils.h""
+
+#include <limits>
+#include <mutex>
+
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/profile_utils/android_armv7a_cpu_utils_helper.h""
 
 namespace tensorflow {
 namespace profile_utils {
 
-namespace {
-
-const class StaticVariableInitializer {
- public:
-  StaticVariableInitializer() { CpuUtils::Initialize(); }
-} STATIC_VARIABLE_INITIALIZER;
-
-}  // anonymous namespace for initializer
-
 /* static */ constexpr int64 CpuUtils::INVALID_FREQUENCY;
 
-/* static */ int64 CpuUtils::GetCpuFrequency() {
-  static const int64 cpu_frequency = GetCpuFrequencyImpl();
-  return cpu_frequency;
-}
+static ICpuUtilsHelper* cpu_utils_helper_instance_ = nullptr;
 
-/* static */ int CpuUtils::GetClockPerMicroSec() {
-  static const int clock_per_micro_sec =
-      static_cast<int>(GetCpuFrequency() / (1000LL * 1000LL));
-  return clock_per_micro_sec;
+/* static */ int64 CpuUtils::GetCycleCounterFrequency() {
+  static const int64 cpu_frequency = GetCycleCounterFrequencyImpl();
+  return cpu_frequency;
 }
 
 /* static */ double CpuUtils::GetMicroSecPerClock() {
   static const double micro_sec_per_clock =
-      (1000.0 * 1000.0) / static_cast<double>(GetCpuFrequency());
+      (1000.0 * 1000.0) / static_cast<double>(GetCycleCounterFrequency());
   return micro_sec_per_clock;
 }
 
-/* static */ void CpuUtils::Initialize() {
-  CpuUtils::GetCpuFrequency();
-  CpuUtils::GetClockPerMicroSec();
-  CpuUtils::GetMicroSecPerClock();
-  GetCpuUtilsHelper().Initialize();
-}
-
 /* static */ void CpuUtils::ResetClockCycle() {
-  GetCpuUtilsHelper().ResetClockCycle();
+  GetCpuUtilsHelperSingletonInstance().ResetClockCycle();
 }
 
 /* static */ void CpuUtils::EnableClockCycleProfiling(const bool enable) {
-  GetCpuUtilsHelper().EnableClockCycleProfiling(enable);
+  GetCpuUtilsHelperSingletonInstance().EnableClockCycleProfiling(enable);
 }
 
-/* static */ int64 CpuUtils::GetCpuFrequencyImpl() {
-// TODO(satok): do not switch by macro here
+/* static */ int64 CpuUtils::GetCycleCounterFrequencyImpl() {
 #if defined(__ANDROID__)
-  // TODO:(satok): Support Android
+  // TODO(satok): Support android
   return INVALID_FREQUENCY;
 #elif defined(__linux__)
   double bogomips;
   FILE* fp = popen(""grep '^bogomips' /proc/cpuinfo | head -1"", ""r"");
+  if (fp == nullptr) {
+    return INVALID_FREQUENCY;
+  }
   const int retval_of_bogomips = fscanf(fp, ""bogomips : %lf"", &bogomips);
+  if (retval_of_bogomips <= 0) {
+    return INVALID_FREQUENCY;
+  }
   pclose(fp);
   const double freq_ghz = bogomips / 1000.0 / 2.0;
   if (retval_of_bogomips != 1 || freq_ghz < 0.01) {
@@ -83,7 +72,12 @@ const class StaticVariableInitializer {
   int64 freq_hz;
   FILE* fp =
       popen(""sysctl hw | grep hw.cpufrequency_max: | cut -d' ' -f 2"", ""r"");
-  fscanf(fp, ""%lld"", &freq_hz);
+  if (fp == nullptr) {
+    return INVALID_FREQUENCY;
+  }
+  if (fscanf(fp, ""%lld"", &freq_hz) != 1) {
+    return INVALID_FREQUENCY;
+  }
   pclose(fp);
   if (freq_hz < 1e6) {
     LOG(WARNING) << ""Failed to get CPU frequency: "" << freq_hz << "" Hz"";
@@ -97,14 +91,19 @@ const class StaticVariableInitializer {
 #endif
 }
 
-/* static */ ICpuUtilsHelper& CpuUtils::GetCpuUtilsHelper() {
+/* static */ ICpuUtilsHelper& CpuUtils::GetCpuUtilsHelperSingletonInstance() {
+  static std::once_flag flag;
+  std::call_once(flag, []() {
+    if (cpu_utils_helper_instance_ != nullptr) {
+      LOG(FATAL) << ""cpu_utils_helper_instance_ is already instantiated."";
+    }
 #if defined(__ANDROID__) && defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
-  static AndroidArmV7ACpuUtilsHelper cpu_utils_helper;
+    cpu_utils_helper_instance_ = new AndroidArmV7ACpuUtilsHelper();
 #else
-  // TODO(satok): Change CpuUtilsHelper by cpu architecture
-  static DefaultCpuUtilsHelper cpu_utils_helper;
+      cpu_utils_helper_instance_ = new DefaultCpuUtilsHelper();
 #endif
-  return cpu_utils_helper;
+  });
+  return *cpu_utils_helper_instance_;
 }
 
 }  // namespace profile_utils
",0,train
21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method
Change: 133406514",cpu_utils.h,"@@ -17,6 +17,8 @@ limitations under the License.
 #ifndef TENSORFLOW_PLATFORM_PROFILEUTILS_CPU_UTILS_H__
 #define TENSORFLOW_PLATFORM_PROFILEUTILS_CPU_UTILS_H__
 
+#include <memory>
+
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/profile_utils/i_cpu_utils_helper.h""
 #include ""tensorflow/core/platform/types.h""
@@ -29,6 +31,13 @@ namespace tensorflow {
 
 namespace profile_utils {
 
+// CpuUtils is a profiling tool with static functions
+// designed to be called from multiple classes.
+// A dedicated class which inherits ICpuUtilsHelper is
+// stored as a function-local static variable which inherits
+// GetCpuUtilsHelperSingletonInstance that caches CPU information,
+// because loading CPU information may take a long time.
+// Users must call EnableClockCycleProfiling before using CpuUtils.
 class CpuUtils {
  public:
   // Constant for invalid frequency.
@@ -44,7 +53,7 @@ class CpuUtils {
   static inline uint64 GetCurrentClockCycle() {
 #if defined(__ANDROID__)
 #if defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
-    return GetCpuUtilsHelper().GetCurrentClockCycle();
+    return GetCpuUtilsHelperSingletonInstance().GetCurrentClockCycle();
 #else   // defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
     return DUMMY_CYCLE_CLOCK;
 #endif  // defined(__ARM_ARCH_7A__) && (__ANDROID_API__ >= 21)
@@ -88,25 +97,16 @@ class CpuUtils {
 #endif
   }
 
-  // Return cpu frequency. As this method caches the cpu frequency internally,
-  // there is no overhead except function call to call this method.
-  static int64 GetCpuFrequency();
-
-  // Return cached cpu count per each micro second.
+  // Return cycle counter frequency.
   // As this method caches the cpu frequency internally,
-  // there is no overhead except function call to call this method.
-  static int GetClockPerMicroSec();
+  // the first call will incur overhead, but not subsequent calls.
+  static int64 GetCycleCounterFrequency();
 
   // Return micro secound per each clock
   // As this method caches the cpu frequency internally,
-  // there is no overhead except function call to call this method.
+  // the first call will incur overhead, but not subsequent calls.
   static double GetMicroSecPerClock();
 
-  // Initialize CpuUtils
-  // This method is called from the static initializer declared in cpu_utils.cc
-  // This initializes state and cached static variables declared in functions.
-  static void Initialize();
-
   // Reset clock cycle
   // Resetting clock cycle is recommended to prevent
   // clock cycle counters from overflowing on some platforms.
@@ -120,7 +120,6 @@ class CpuUtils {
   class DefaultCpuUtilsHelper : public ICpuUtilsHelper {
    public:
     DefaultCpuUtilsHelper() = default;
-    void Initialize() final {}
     void ResetClockCycle() final {}
     uint64 GetCurrentClockCycle() final { return DUMMY_CYCLE_CLOCK; }
     void EnableClockCycleProfiling(bool /* enable */) final {}
@@ -133,9 +132,15 @@ class CpuUtils {
   // CAVEAT: as this method calls system call and parse the mssage,
   // this call may be slow. This is why this class caches the value by
   // StaticVariableInitializer.
-  static int64 GetCpuFrequencyImpl();
-
-  static ICpuUtilsHelper& GetCpuUtilsHelper();
+  static int64 GetCycleCounterFrequencyImpl();
+
+  // Return a singleton of ICpuUtilsHelper
+  // ICpuUtilsHelper is declared as a function-local static variable
+  // for the following two reasons:
+  // 1. Avoid passing instances to all classes which want
+  // to use profiling tools in CpuUtils
+  // 2. Minimize the overhead of acquiring ICpuUtilsHelper
+  static ICpuUtilsHelper& GetCpuUtilsHelperSingletonInstance();
 
   TF_DISALLOW_COPY_AND_ASSIGN(CpuUtils);
 };
",0,train
21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method
Change: 133406514",cpu_utils_test.cc,"@@ -23,7 +23,16 @@ namespace profile_utils {
 
 static constexpr bool DBG = false;
 
-TEST(CpuUtils, CheckGetCurrentClockCycle) {
+class CpuUtilsTest : public ::testing::Test {
+ protected:
+  void SetUp() { CpuUtils::EnableClockCycleProfiling(true); }
+};
+
+TEST_F(CpuUtilsTest, SetUpTestCase) {}
+
+TEST_F(CpuUtilsTest, TearDownTestCase) {}
+
+TEST_F(CpuUtilsTest, CheckGetCurrentClockCycle) {
   static constexpr int LOOP_COUNT = 10;
   const uint64 start_clock_count = CpuUtils::GetCurrentClockCycle();
   CHECK_GT(start_clock_count, 0);
@@ -42,8 +51,8 @@ TEST(CpuUtils, CheckGetCurrentClockCycle) {
   }
 }
 
-TEST(CpuUtils, CheckCpuFrequency) {
-  const int64 cpu_frequency = CpuUtils::GetCpuFrequency();
+TEST_F(CpuUtilsTest, CheckCycleCounterFrequency) {
+  const int64 cpu_frequency = CpuUtils::GetCycleCounterFrequency();
   CHECK_GT(cpu_frequency, 0);
   CHECK_NE(cpu_frequency, CpuUtils::INVALID_FREQUENCY);
   if (DBG) {
@@ -51,15 +60,7 @@ TEST(CpuUtils, CheckCpuFrequency) {
   }
 }
 
-TEST(CpuUtils, CheckClockPerMicroSec) {
-  const int clock_per_micro_sec = CpuUtils::GetClockPerMicroSec();
-  CHECK_GT(clock_per_micro_sec, 0);
-  if (DBG) {
-    LOG(INFO) << ""Clock per micro sec = "" << clock_per_micro_sec;
-  }
-}
-
-TEST(CpuUtils, CheckMicroSecPerClock) {
+TEST_F(CpuUtilsTest, CheckMicroSecPerClock) {
   const double micro_sec_per_clock = CpuUtils::GetMicroSecPerClock();
   CHECK_GT(micro_sec_per_clock, 0.0);
   if (DBG) {
",0,train
21d699a745bd8225f38508658415bd2739bf500f,tensorflow/tensorflow,"Stop using static initializer in cpu utils, just removed initializing method
Change: 133406514",i_cpu_utils_helper.h,"@@ -24,13 +24,11 @@ namespace profile_utils {
 
 // ICpuUtilsHelper is an interface class for cpu_utils which proxies
 // the difference of profiling functions of different platforms.
+// Overridden functions must be thread safe.
 class ICpuUtilsHelper {
  public:
   ICpuUtilsHelper() = default;
   virtual ~ICpuUtilsHelper() = default;
-  // Initialize CpuUtilsHelper.
-  // This method is called only once when CpuUtils is loaded.
-  virtual void Initialize() = 0;
   // Reset clock cycle.
   // Resetting clock cycle is recommended to prevent
   // clock cycle counters from overflowing on some platforms.
",0,train
d80fa4ebdccffde26334d04ecfc7935887c603e2,tensorflow/tensorflow,"Bidirectional rnn now returns forward and backward output states, updated tests, tests pass

Updated docstring for bidirectional_rnn

Updated bidirectional test

Using self.assertAllClose instead of explicit iterators.

Fixed some alignments in rnn_test",rnn_test.py,"@@ -771,28 +771,30 @@ class BidirectionalRNNTest(tf.test.TestCase):
         tf.placeholder(tf.float32,
                        shape=(batch_size, input_size) if use_shape else None)
     ]
-    outputs = tf.nn.bidirectional_rnn(cell_fw,
-                                      cell_bw,
-                                      inputs,
-                                      dtype=tf.float32,
-                                      sequence_length=sequence_length)
+    outputs, state_fw, state_bw = tf.nn.bidirectional_rnn(cell_fw,
+                                                          cell_bw,
+                                                          inputs,
+                                                          dtype=tf.float32,
+                                                          sequence_length=sequence_length)
     self.assertEqual(len(outputs), len(inputs))
     for out in outputs:
       self.assertEqual(out.get_shape().as_list(), [batch_size if use_shape
                                                    else None, 2 * num_units])
 
     input_value = np.random.randn(batch_size, input_size)
+    outputs = tf.pack(outputs)
 
-    return input_value, inputs, outputs, sequence_length
+    return input_value, inputs, outputs, state_fw, state_bw, sequence_length
 
   def _testBidirectionalRNN(self, use_gpu, use_shape):
     with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
-      input_value, inputs, outputs, sequence_length = (
+      input_value, inputs, outputs, state_fw, state_bw, sequence_length = (
           self._createBidirectionalRNN(use_gpu, use_shape, True))
       tf.initialize_all_variables().run()
       # Run with pre-specified sequence length of 2, 3
-      out = sess.run(outputs, feed_dict={inputs[0]: input_value,
-                                         sequence_length: [2, 3]})
+      out, s_fw, s_bw = sess.run([outputs, state_fw, state_bw], 
+                                 feed_dict={inputs[0]: input_value,
+                                 sequence_length: [2, 3]})
 
       # Since the forward and backward LSTM cells were initialized with the
       # same parameters, the forward and backward output has to be the same,
@@ -824,13 +826,17 @@ class BidirectionalRNNTest(tf.test.TestCase):
       self.assertEqual(out[2][1][0], out[0][1][3])
       self.assertEqual(out[2][1][1], out[0][1][4])
       self.assertEqual(out[2][1][2], out[0][1][5])
+      # Via the reasoning above, the forward and backward final state should be
+      # exactly the same
+      self.assertAllClose(s_fw, s_bw)
 
   def _testBidirectionalRNNWithoutSequenceLength(self, use_gpu, use_shape):
     with self.test_session(use_gpu=use_gpu, graph=tf.Graph()) as sess:
-      input_value, inputs, outputs, _ = self._createBidirectionalRNN(
-          use_gpu, use_shape, False)
+      input_value, inputs, outputs, state_fw, state_bw, _ = self._createBidirectionalRNN(
+                                                                use_gpu, use_shape, False)
       tf.initialize_all_variables().run()
-      out = sess.run(outputs, feed_dict={inputs[0]: input_value})
+      out, s_fw, s_bw = sess.run([outputs, state_fw, state_bw], 
+                                 feed_dict={inputs[0]: input_value})
 
       # Since the forward and backward LSTM cells were initialized with the
       # same parameters, the forward and backward output has to be the same,
@@ -849,6 +855,9 @@ class BidirectionalRNNTest(tf.test.TestCase):
         self.assertEqual(out[i][1][0], out[8 - 1 - i][1][3])
         self.assertEqual(out[i][1][1], out[8 - 1 - i][1][4])
         self.assertEqual(out[i][1][2], out[8 - 1 - i][1][5])
+      # Via the reasoning above, the forward and backward final state should be
+      # exactly the same
+      self.assertAllClose(s_fw, s_bw)
 
   def testBidirectionalRNN(self):
     self._testBidirectionalRNN(use_gpu=False, use_shape=False)
",0,train
d80fa4ebdccffde26334d04ecfc7935887c603e2,tensorflow/tensorflow,"Bidirectional rnn now returns forward and backward output states, updated tests, tests pass

Updated docstring for bidirectional_rnn

Updated bidirectional test

Using self.assertAllClose instead of explicit iterators.

Fixed some alignments in rnn_test",rnn.py,"@@ -293,9 +293,11 @@ def bidirectional_rnn(cell_fw, cell_bw, inputs,
     scope: VariableScope for the created subgraph; defaults to ""BiRNN""
 
   Returns:
-    A set of output `Tensors` where:
+    A tuple (outputs, output_state_fw, output_state_bw) where:
       outputs is a length T list of outputs (one for each input), which
       are depth-concatenated forward and backward outputs
+      output_state_fw is the final state of the forward rnn
+      output_state_bw is the final state of the backward rnn
 
   Raises:
     TypeError: If ""cell_fw"" or ""cell_bw"" is not an instance of RNNCell.
@@ -314,19 +316,19 @@ def bidirectional_rnn(cell_fw, cell_bw, inputs,
   name = scope or ""BiRNN""
   # Forward direction
   with vs.variable_scope(name + ""_FW"") as fw_scope:
-    output_fw, _ = rnn(cell_fw, inputs, initial_state_fw, dtype,
+    output_fw, output_state_fw = rnn(cell_fw, inputs, initial_state_fw, dtype,
                        sequence_length, scope=fw_scope)
 
   # Backward direction
   with vs.variable_scope(name + ""_BW"") as bw_scope:
-    tmp, _ = rnn(cell_bw, _reverse_seq(inputs, sequence_length),
+    tmp, output_state_bw = rnn(cell_bw, _reverse_seq(inputs, sequence_length),
                  initial_state_bw, dtype, sequence_length, scope=bw_scope)
   output_bw = _reverse_seq(tmp, sequence_length)
   # Concat each of the forward/backward outputs
   outputs = [array_ops.concat(1, [fw, bw])
              for fw, bw in zip(output_fw, output_bw)]
 
-  return outputs
+  return (outputs, output_state_fw, output_state_bw)
 
 
 def dynamic_rnn(cell, inputs, sequence_length, initial_state=None, dtype=None,
",0,train
4d120b703ee1b28bc5dcc719d04150688ce32361,tensorflow/tensorflow,"Use std::move for functions in gpu EventMgr.
Change: 143193447",gpu_event_mgr.h,"@@ -83,7 +83,7 @@ class EventMgr {
     ToFreeVector to_free;
     {
       mutex_lock l(mu_);
-      QueueFunc(stream, func);
+      QueueFunc(stream, std::move(func));
       PollEvents(false, &to_free);
     }
     FreeMemory(to_free);
@@ -147,7 +147,7 @@ class EventMgr {
 
   void QueueFunc(perftools::gputools::Stream* stream,
                  std::function<void()> func) EXCLUSIVE_LOCKS_REQUIRED(mu_) {
-    QueueInUse(stream, {nullptr, nullptr, BufRec(), func});
+    QueueInUse(stream, {nullptr, nullptr, BufRec(), std::move(func)});
   }
 
   // This function should be called at roughly the same tempo as
",0,train
8e9f3196fd8841de83bd6a622df696ea191d1d78,tensorflow/tensorflow,"Added a bunch of unary ops to the estimator.

PiperOrigin-RevId: 324607213
Change-Id: I24369f36cc29f68caac412a5d3076f5ef43859fe",op_level_cost_estimator.cc,"@@ -522,6 +522,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
 
   // Unary ops alphabetically sorted
   elementwise_ops_.emplace(""Acos"", EIGEN_COST(scalar_acos_op<float>));
+  elementwise_ops_.emplace(""All"", EIGEN_COST(scalar_boolean_and_op));
+  elementwise_ops_.emplace(""ArgMax"", EIGEN_COST(scalar_max_op<float>));
   elementwise_ops_.emplace(""Asin"", EIGEN_COST(scalar_asin_op<float>));
   elementwise_ops_.emplace(""Atan"", EIGEN_COST(scalar_atan_op<float>));
   elementwise_ops_.emplace(""Atan2"", EIGEN_COST(scalar_quotient_op<float>) +
@@ -546,7 +548,10 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
   elementwise_ops_.emplace(""Lgamma"", 1);
   elementwise_ops_.emplace(""Log"", EIGEN_COST(scalar_log_op<float>));
   elementwise_ops_.emplace(""Log1p"", EIGEN_COST(scalar_log1p_op<float>));
+  elementwise_ops_.emplace(""Max"", EIGEN_COST(scalar_max_op<float>));
+  elementwise_ops_.emplace(""Min"", EIGEN_COST(scalar_min_op<float>));
   elementwise_ops_.emplace(""Neg"", EIGEN_COST(scalar_opposite_op<float>));
+  elementwise_ops_.emplace(""Prod"", EIGEN_COST(scalar_product_op<float>));
   elementwise_ops_.emplace(""QuantizeAndDequantizeV2"",
                            quantize_and_dequantize_v2_cost);
   elementwise_ops_.emplace(""QuantizedSigmoid"",
@@ -554,6 +559,7 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
   elementwise_ops_.emplace(""QuantizeV2"", quantize_v2_cost);
   elementwise_ops_.emplace(""Reciprocal"", EIGEN_COST(scalar_inverse_op<float>));
   elementwise_ops_.emplace(""Relu"", EIGEN_COST(scalar_max_op<float>));
+  elementwise_ops_.emplace(""Relu6"", EIGEN_COST(scalar_max_op<float>));
   elementwise_ops_.emplace(""Rint"", 1);
   elementwise_ops_.emplace(""Round"", EIGEN_COST(scalar_round_op<float>));
   elementwise_ops_.emplace(""Rsqrt"", EIGEN_COST(scalar_rsqrt_op<float>));
@@ -562,8 +568,10 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
   elementwise_ops_.emplace(""Sin"", EIGEN_COST(scalar_sin_op<float>));
   elementwise_ops_.emplace(""Sqrt"", EIGEN_COST(scalar_sqrt_op<float>));
   elementwise_ops_.emplace(""Square"", EIGEN_COST(scalar_square_op<float>));
+  elementwise_ops_.emplace(""Sum"", EIGEN_COST(scalar_sum_op<float>));
   elementwise_ops_.emplace(""Tan"", EIGEN_COST(scalar_tan_op<float>));
   elementwise_ops_.emplace(""Tanh"", EIGEN_COST(scalar_tanh_op<float>));
+  elementwise_ops_.emplace(""TopKV2"", EIGEN_COST(scalar_max_op<float>));
   // Binary ops alphabetically sorted
   elementwise_ops_.emplace(""Add"", EIGEN_COST(scalar_sum_op<float>));
   elementwise_ops_.emplace(""AddV2"", EIGEN_COST(scalar_sum_op<float>));
",0,train
8e9f3196fd8841de83bd6a622df696ea191d1d78,tensorflow/tensorflow,"Added a bunch of unary ops to the estimator.

PiperOrigin-RevId: 324607213
Change-Id: I24369f36cc29f68caac412a5d3076f5ef43859fe",op_level_cost_estimator_test.cc,"@@ -939,24 +939,29 @@ TEST_F(OpLevelCostEstimatorTest, SquaredDifferenceExecutionTime) {
   EXPECT_EQ(cost.num_ops_with_unknown_shapes, 0);
 }
 
-TEST_F(OpLevelCostEstimatorTest, ReluExecutionTime) {
-  auto cost = PredictCosts(DescribeUnaryOp(""Relu"", 1000));
-  EXPECT_EQ(Costs::Duration(800), cost.memory_time);
-  EXPECT_EQ(Costs::Duration(100), cost.compute_time);
-  EXPECT_EQ(Costs::Duration(900), cost.execution_time);
-  EXPECT_EQ(1, cost.num_ops_total);
-  EXPECT_FALSE(cost.inaccurate);
-  EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
-}
+TEST_F(OpLevelCostEstimatorTest, UnaryOpExecutionTime) {
+  std::vector<std::pair<std::string, int>> unary_ops = {
+      {""All"", 1},  {""ArgMax"", 1}, {""Cast"", 1},  {""Max"", 1}, {""Min"", 1},
+      {""Prod"", 1}, {""Relu"", 1},   {""Relu6"", 1}, {""Sum"", 1}, {""TopKV2"", 1}};
 
-TEST_F(OpLevelCostEstimatorTest, CastExecutionTime) {
-  auto cost = PredictCosts(DescribeUnaryOp(""Cast"", 1000));
-  EXPECT_EQ(Costs::Duration(800), cost.memory_time);
-  EXPECT_EQ(Costs::Duration(100), cost.compute_time);
-  EXPECT_EQ(Costs::Duration(900), cost.execution_time);
-  EXPECT_EQ(1, cost.num_ops_total);
-  EXPECT_FALSE(cost.inaccurate);
-  EXPECT_EQ(0, cost.num_ops_with_unknown_shapes);
+  const int kTensorSize = 1000;
+  for (auto unary_op : unary_ops) {
+    OpContext op_context = DescribeUnaryOp(unary_op.first, kTensorSize);
+
+    const int kExpectedMemoryTime = 800;
+    int expected_compute_time = std::ceil(
+        unary_op.second * kTensorSize /
+        estimator_.GetDeviceInfo(op_context.op_info.device()).gigaops);
+
+    auto cost = PredictCosts(op_context);
+    EXPECT_EQ(cost.memory_time, Costs::Duration(kExpectedMemoryTime));
+    EXPECT_EQ(cost.compute_time, Costs::Duration(expected_compute_time));
+    EXPECT_EQ(cost.execution_time,
+              Costs::Duration(expected_compute_time + kExpectedMemoryTime));
+    EXPECT_EQ(cost.num_ops_total, 1);
+    EXPECT_EQ(cost.num_ops_with_unknown_shapes, 0);
+    EXPECT_FALSE(cost.inaccurate);
+  }
 }
 
 TEST_F(OpLevelCostEstimatorTest, BroadcastAddExecutionTime) {
",0,train
69613d25c3f82652c636c5a1c1b42029dc427979,tensorflow/tensorflow,"More handle_data fixing.

I'm not sure why our existing tests didn't catch this...

PiperOrigin-RevId: 199206183",function.py,"@@ -720,6 +720,8 @@ class _FuncGraph(ops.Graph):
     if ops._USE_C_SHAPES:
       if isinstance(tensor, ops.EagerTensor):
         handle_data = tensor._handle_data
+        if handle_data:
+          handle_data = handle_data.SerializeToString()
       else:
         handle_data = c_api.GetResourceHandleShapeAndType(
             tensor.graph._c_graph, tensor._as_tf_output())
",0,train
14101a8ba173b44179b2a9317781f140eb61b0a1,tensorflow/tensorflow,"Fix TensorForest for 32-bit platforms.

PiperOrigin-RevId: 161077247",stats_ops.cc,"@@ -159,20 +159,17 @@ void TraverseTree(const DecisionTreeResource* tree_resource,
 // until they're gone.
 void UpdateStats(FertileStatsResource* fertile_stats_resource,
                  const std::unique_ptr<TensorDataSet>& data,
-                 const Tensor& input_labels, const Tensor& input_weights,
-                 int num_targets, const std::vector<int32>& leaf_ids,
+                 const TensorInputTarget& target, int num_targets,
+                 const std::vector<int32>& leaf_ids,
                  const std::vector<int32>& leaf_depths,
                  std::unordered_map<int32, std::unique_ptr<mutex>>* locks,
                  mutex* set_lock, int32 start, int32 end,
                  std::unordered_set<int32>* ready_to_split) {
-  const auto labels = input_labels.unaligned_flat<float>();
-  const auto weights = input_weights.unaligned_flat<float>();
   // Stores leaf_id, leaf_depth, example_id for examples that are waiting
   // on another to finish.
   std::queue<std::tuple<int32, int32, int32>> waiting;
 
   int32 i = start;
-  TensorInputTarget target(&labels, &weights, input_labels, num_targets);
   while (i < end || !waiting.empty()) {
     int32 leaf_id;
     int32 leaf_depth;
@@ -214,15 +211,11 @@ void UpdateStats(FertileStatsResource* fertile_stats_resource,
 void UpdateStatsCollated(
     FertileStatsResource* fertile_stats_resource,
     DecisionTreeResource* tree_resource,
-    const std::unique_ptr<TensorDataSet>& data, const Tensor& input_labels,
-    const Tensor& input_weights, int num_targets,
+    const std::unique_ptr<TensorDataSet>& data, const TensorInputTarget& target,
+    int num_targets,
     const std::unordered_map<int32, std::vector<int>>& leaf_examples,
     const std::vector<int32>& leaf_depths, mutex* set_lock, int32 start,
     int32 end, std::unordered_set<int32>* ready_to_split) {
-  const auto labels = input_labels.unaligned_flat<float>();
-  const auto weights = input_weights.unaligned_flat<float>();
-
-  TensorInputTarget target(&labels, &weights, input_labels, num_targets);
   auto it = leaf_examples.begin();
   std::advance(it, start);
   auto end_it = leaf_examples.begin();
@@ -335,32 +328,33 @@ class ProcessInputOp : public OpKernel {
     std::unordered_set<int32> ready_to_split;
     mutex set_lock;
 
+    TensorInputTarget target(input_labels, input_weights, num_targets);
+
     // TODO(gilberth): This is a rough approximation based on measurements
     // from a digits run on local desktop.  Heuristics might be necessary
     // if it really matters that much.
     const int64 costPerUpdate = 1000;
-    auto update = [this, &input_labels, &input_weights, &leaf_ids, &leaf_depths,
-                   &num_targets, fertile_stats_resource, &locks, &set_lock,
-                   &ready_to_split, num_data](int64 start, int64 end) {
+    auto update = [this, &target, &leaf_ids, &leaf_depths, &num_targets,
+                   fertile_stats_resource, &locks, &set_lock, &ready_to_split,
+                   num_data](int64 start, int64 end) {
       CHECK(start <= end);
       CHECK(end <= num_data);
-      UpdateStats(fertile_stats_resource, data_set_, input_labels,
-                  input_weights, num_targets, leaf_ids, leaf_depths, &locks,
-                  &set_lock, static_cast<int32>(start), static_cast<int32>(end),
+      UpdateStats(fertile_stats_resource, data_set_, target, num_targets,
+                  leaf_ids, leaf_depths, &locks, &set_lock,
+                  static_cast<int32>(start), static_cast<int32>(end),
                   &ready_to_split);
     };
 
-    auto update_collated = [this, &input_labels, &input_weights, &leaf_ids,
-                            &num_targets, &leaf_depths, fertile_stats_resource,
-                            tree_resource, &leaf_examples, &set_lock,
-                            &ready_to_split,
+    auto update_collated = [this, &target, &leaf_ids, &num_targets,
+                            &leaf_depths, fertile_stats_resource, tree_resource,
+                            &leaf_examples, &set_lock, &ready_to_split,
                             num_leaves](int64 start, int64 end) {
       CHECK(start <= end);
       CHECK(end <= num_leaves);
-      UpdateStatsCollated(
-          fertile_stats_resource, tree_resource, data_set_, input_labels,
-          input_weights, num_targets, leaf_examples, leaf_depths, &set_lock,
-          static_cast<int32>(start), static_cast<int32>(end), &ready_to_split);
+      UpdateStatsCollated(fertile_stats_resource, tree_resource, data_set_,
+                          target, num_targets, leaf_examples, leaf_depths,
+                          &set_lock, static_cast<int32>(start),
+                          static_cast<int32>(end), &ready_to_split);
     };
 
     if (param_proto_.collate_examples()) {
",0,train
14101a8ba173b44179b2a9317781f140eb61b0a1,tensorflow/tensorflow,"Fix TensorForest for 32-bit platforms.

PiperOrigin-RevId: 161077247",grow_stats_test.cc,"@@ -76,7 +76,7 @@ TEST(GrowStatsDenseClassificationTest, Basic) {
   std::vector<float> labels = {1, 0, 1};
   std::vector<float> weights = {2.3, 20.3, 1.1};
   std::unique_ptr<TestableInputTarget> target(
-      new TestableInputTarget(&labels, &weights, 1));
+      new TestableInputTarget(labels, weights, 1));
 
   RunBatch(stat.get(), target.get());
   CHECK(stat->IsFinished());
@@ -127,7 +127,7 @@ TEST(GrowStatsDenseClassificationTest, BasicRunningStats) {
   std::vector<float> labels = {1, 0, 1};
   std::vector<float> weights = {2.3, 20.3, 1.1};
   std::unique_ptr<TestableInputTarget> target(
-      new TestableInputTarget(&labels, &weights, 1));
+      new TestableInputTarget(labels, weights, 1));
 
   RunBatch(stat.get(), target.get());
   CHECK(stat->IsFinished());
@@ -185,7 +185,7 @@ TEST(GrowStatsDenseClassificationTest, TestFinishEarly) {
   std::vector<float> labels = {1, 0, 1};
   std::vector<float> weights = {1, 1, 1};
   std::unique_ptr<TestableInputTarget> target(
-      new TestableInputTarget(&labels, &weights, 1));
+      new TestableInputTarget(labels, weights, 1));
   std::unique_ptr<tensorflow::tensorforest::TensorDataSet> dataset(
       new tensorflow::tensorforest::TestableDataSet(
           {1.0, 2.0, 3.0, 4.0, 5.0, 6.0}, 2));
@@ -235,7 +235,7 @@ TEST(GrowStatsDenseClassificationTest, TestCheckPruneHoeffding) {
   // sends them both to the left.
   std::vector<float> labels = {0, 1};
   std::vector<float> weights = {1, 1};
-  TestableInputTarget target(&labels, &weights, 1);
+  TestableInputTarget target(labels, weights, 1);
   std::unique_ptr<tensorflow::tensorforest::TensorDataSet> dataset(
       new tensorflow::tensorforest::TestableDataSet(
           {-1.0, -1.0, 1.0, -1.0}, 2));
@@ -306,7 +306,7 @@ TEST(GrowStatsLeastSquaresRegressionTest, Basic) {
 
   std::vector<float> labels = {2.3, 5.6, 1.1};
   std::unique_ptr<TestableInputTarget> target(
-      new TestableInputTarget(&labels, {}, 1));
+      new TestableInputTarget(labels, {}, 1));
   std::vector<int> branches = {1, 0, 1, 1, 0, 0};
 
   RunBatch(stat.get(), target.get());
@@ -340,7 +340,7 @@ TEST(GrowStatsSparseClassificationTest, Basic) {
   std::vector<float> labels = {100, 1000, 1};
   std::vector<float> weights = {2.3, 20.3, 1.1};
   std::unique_ptr<TestableInputTarget> target(
-      new TestableInputTarget(&labels, &weights, 1));
+      new TestableInputTarget(labels, weights, 1));
   std::vector<int> branches = {1, 0, 1, 1, 0, 0};
 
   RunBatch(stat.get(), target.get());
",0,train
14101a8ba173b44179b2a9317781f140eb61b0a1,tensorflow/tensorflow,"Fix TensorForest for 32-bit platforms.

PiperOrigin-RevId: 161077247",input_target.h,"@@ -20,9 +20,7 @@
 namespace tensorflow {
 namespace tensorforest {
 
-typedef Eigen::TensorMap<
-    Eigen::Tensor<const float, 1, 1, long>, 0>  // NOLINT(runtime/int)
-    SingleDimStorageType;
+typedef TTypes<float, 1>::ConstTensor SingleDimStorageType;
 
 // Base class for classes that hold labels and weights. Mostly for testing
 // purposes, because it's inconvenient to construct nasty Eigen::things.
@@ -41,11 +39,12 @@ class InputTarget {
 template <typename T>
 class StoredInputTarget : public InputTarget {
  protected:
+  // Takes ownership of t and w with a std::unique_ptr.
   StoredInputTarget(const T* t, const T* w, int num_targets)
       : target_(t), weight_(w), num_targets_(num_targets) {}
 
-  const T* target_;
-  const T* weight_;
+  const std::unique_ptr<const T> target_;
+  const std::unique_ptr<const T> weight_;
   int num_targets_;
 };
 
@@ -54,10 +53,11 @@ class StoredInputTarget : public InputTarget {
 // outputs will correctly index the flattened data.
 class TensorInputTarget : public StoredInputTarget<SingleDimStorageType> {
  public:
-  TensorInputTarget(const SingleDimStorageType* t,
-                    const SingleDimStorageType* w, const Tensor& tensor,
-                    int num_targets)
-      : StoredInputTarget(t, w, num_targets), original_tensor_(tensor) {}
+  TensorInputTarget(const Tensor& target, const Tensor& weight, int num_targets)
+      : StoredInputTarget(new SingleDimStorageType(target.tensor<float, 1>()),
+                          new SingleDimStorageType(weight.tensor<float, 1>()),
+                          num_targets),
+        original_tensor_(target) {}
 
   int32 GetTargetAsClassIndex(int example_index,
                               int target_index) const override {
",0,train
14101a8ba173b44179b2a9317781f140eb61b0a1,tensorflow/tensorflow,"Fix TensorForest for 32-bit platforms.

PiperOrigin-RevId: 161077247",leaf_model_operators_test.cc,"@@ -77,7 +77,7 @@ void TestClassificationNormalUse(const std::unique_ptr<LeafModelOperator>& op) {
   std::vector<float> labels = {1, 0, 1};
   std::vector<float> weights = {2.3, 20.3, 1.1};
   std::unique_ptr<TestableInputTarget> target(
-      new TestableInputTarget(&labels, &weights, 1));
+      new TestableInputTarget(labels, weights, 1));
 
   // Update and check value.
   op->UpdateModel(leaf.get(), target.get(), 0);
",0,train
14101a8ba173b44179b2a9317781f140eb61b0a1,tensorflow/tensorflow,"Fix TensorForest for 32-bit platforms.

PiperOrigin-RevId: 161077247",test_utils.h,"@@ -22,9 +22,10 @@ namespace tensorforest {
 
 class TestableInputTarget : public StoredInputTarget<std::vector<float>> {
  public:
-  TestableInputTarget(const std::vector<float>* t, const std::vector<float>* w,
+  TestableInputTarget(const std::vector<float>& t, const std::vector<float>& w,
                       int num_t)
-      : StoredInputTarget(t, w, num_t) {}
+      : StoredInputTarget(new std::vector<float>(t), new std::vector<float>(w),
+                          num_t) {}
 
   int NumItems() const {
     return target_->size();
",0,train
7731e8dfbe4a56773be5dc94d631611211156659,tensorflow/tensorflow,"Don't constant-fold DT_RESOURCE constants.

PiperOrigin-RevId: 391803952
Change-Id: I0ea3ec31d3e7dfda0f03b4027a237f08d00a3091",constant_folding.cc,"@@ -30,6 +30,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/log_memory.h""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/types.h""
+#include ""tensorflow/core/framework/types.pb.h""
 #include ""tensorflow/core/graph/algorithm.h""
 #include ""tensorflow/core/graph/node_builder.h""
 #include ""tensorflow/core/graph/subgraph.h""
@@ -223,7 +224,8 @@ bool IsConstantFoldable(
     std::unordered_map<const Node*, std::vector<Tensor>>*
         shape_replacement_map) {
   if (n->IsConstant()) {
-    return true;
+    // Skip constant folding resources as they cannot be deep copied.
+    return n->output_type(0) != DT_RESOURCE;
   }
   if (MaybeReplaceShapeOp(n, shape_map, shape_replacement_map)) {
     return true;
",0,train
8f936eb15cc3b798dbe535ca1f4f0eff2b6b79bd,tensorflow/tensorflow,"Support dynamic value inference on iota instructions.

We consider all iota output values are static.

PiperOrigin-RevId: 341944607
Change-Id: Ie4c3b6dea7d168c41a10a0046eb280a5293adc60",xla_builder.cc,"@@ -3401,6 +3401,7 @@ StatusOr<XlaComputation> XlaBuilder::BuildDynamicInferenceGraph(XlaOp root_op) {
         break;
       }
       case HloOpcode::kConstant:
+      case HloOpcode::kIota:
         SetInstructionAsConstant(new_instr, id, new_shape, false);
         break;
       case HloOpcode::kCustomCall:
",0,train
8f936eb15cc3b798dbe535ca1f4f0eff2b6b79bd,tensorflow/tensorflow,"Support dynamic value inference on iota instructions.

We consider all iota output values are static.

PiperOrigin-RevId: 341944607
Change-Id: Ie4c3b6dea7d168c41a10a0046eb280a5293adc60",dynamism_inference_test.cc,"@@ -104,6 +104,19 @@ TEST_F(DynamismInferenceTest, ScalarInt32Literal) {
   }
 }
 
+TEST_F(DynamismInferenceTest, Iota) {
+  // The output of iota are consistened static.
+  for (ClientType client_type : client_types) {
+    Client* client = ClientOrDie(platform_, client_type);
+    XlaBuilder b(TestName());
+    auto computation = Iota(&b, S32, 2);
+    // Iota is not dynamic.
+    EXPECT_FALSE(ComputeDynamismLiteral(client, computation, &b)
+                     .ValueOrDie()
+                     .Get<bool>({0}));
+  }
+}
+
 TEST_F(DynamismInferenceTest, TupleSimple) {
   for (ClientType client_type : client_types) {
     Client* client = ClientOrDie(platform_, client_type);
",0,train
305712c02e70bc860812e7c151a3842f028cacb1,tensorflow/tensorflow,"More WhereOp/TopK GPU bugfixes: use the direct cuda stream for CUB GPU kernel.

Turns out using the StreamInterface objects leads to ""invalid resource handle""
errors, so we have to use the cudaStream_t directly.  This change is based on
similar code in cuda_solvers.cc.

PiperOrigin-RevId: 161261085",concat_lib_gpu_impl.cu.cc,"@@ -88,7 +88,8 @@ __global__ void concat_variable_kernel(
   // do an initial binary search and then scan linearly from there
   // works well when there are many small segments and when the
   // segments are much longer
-  IntType segment = gpu::upper_bound<IntType>(col_scan, num_inputs, gidx) - 1;
+  IntType segment =
+      cuda_helper::upper_bound<IntType>(col_scan, num_inputs, gidx) - 1;
 
   IntType curr_offset = col_scan[segment];
   IntType curr_segment = segment;
@@ -142,10 +143,10 @@ void ConcatGPUImpl(const Eigen::GpuDevice& gpu_device,
                                       output->dimension(0), gpu_device);
 
   if (fixed_size) {
-    concat_fixed_kernel<T, IntType><<<
-        config.block_count, config.thread_per_block, 0, gpu_device.stream()>>>(
-        input_ptrs, split_size, output->dimension(0), output->dimension(1),
-        output->data());
+    concat_fixed_kernel<T, IntType>
+        <<<config.block_count, config.thread_per_block, 0,
+           gpu_device.stream()>>>(input_ptrs, split_size, output->dimension(0),
+                                  output->dimension(1), output->data());
   } else {
     IntType smem_max = gpu_device.sharedMemPerBlock();
     IntType smem_usage = output_scan.size * sizeof(IntType);
@@ -155,17 +156,17 @@ void ConcatGPUImpl(const Eigen::GpuDevice& gpu_device,
     // 4096 inputs is a lot, most code will take the smem path
     const int32 kMaxSmemBytesPerformance = 16384;
     if (smem_usage < smem_max && smem_usage < kMaxSmemBytesPerformance)
-      concat_variable_kernel<
-          T, IntType, true><<<config.block_count, config.thread_per_block,
-                              smem_usage, gpu_device.stream()>>>(
-          input_ptrs, output_scan, output->dimension(0), output->dimension(1),
-          output->data());
+      concat_variable_kernel<T, IntType, true>
+          <<<config.block_count, config.thread_per_block, smem_usage,
+             gpu_device.stream()>>>(input_ptrs, output_scan,
+                                    output->dimension(0), output->dimension(1),
+                                    output->data());
     else
-      concat_variable_kernel<
-          T, IntType, false><<<config.block_count, config.thread_per_block, 0,
-                               gpu_device.stream()>>>(
-          input_ptrs, output_scan, output->dimension(0), output->dimension(1),
-          output->data());
+      concat_variable_kernel<T, IntType, false>
+          <<<config.block_count, config.thread_per_block, 0,
+             gpu_device.stream()>>>(input_ptrs, output_scan,
+                                    output->dimension(0), output->dimension(1),
+                                    output->data());
   }
 }
 
",0,train
305712c02e70bc860812e7c151a3842f028cacb1,tensorflow/tensorflow,"More WhereOp/TopK GPU bugfixes: use the direct cuda stream for CUB GPU kernel.

Turns out using the StreamInterface objects leads to ""invalid resource handle""
errors, so we have to use the cudaStream_t directly.  This change is based on
similar code in cuda_solvers.cc.

PiperOrigin-RevId: 161261085",split_lib_gpu.cu.cc,"@@ -138,7 +138,8 @@ __global__ void split_v_kernel(const T* input_ptr,
   // do an initial binary search and then scan linearly from there
   // works well when there are many small segments and when the
   // segments are much longer
-  IntType segment = gpu::upper_bound<IntType>(col_scan, num_outputs, gidx) - 1;
+  IntType segment =
+      cuda_helper::upper_bound<IntType>(col_scan, num_outputs, gidx) - 1;
 
   IntType curr_offset = col_scan[segment];
   IntType curr_segment = segment;
@@ -195,10 +196,10 @@ struct SplitOpGPULaunch {
     CudaLaunchConfig config = GetCudaLaunchConfig(
         prefix_dim_size * split_dim_size * suffix_dim_size, d);
 
-    SplitOpKernel<
-        T><<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
-        input, prefix_dim_size, split_dim_size, suffix_dim_size,
-        output_ptr_data);
+    SplitOpKernel<T>
+        <<<config.block_count, config.thread_per_block, 0, d.stream()>>>(
+            input, prefix_dim_size, split_dim_size, suffix_dim_size,
+            output_ptr_data);
   }
 };
 
@@ -224,15 +225,15 @@ struct SplitVOpGPULaunch {
       // 4096 inputs is a lot, most code will take the smem path
       const int32 kMaxSmemBytesPerformance = 16384;
       if (smem_usage < smem_max && smem_usage < kMaxSmemBytesPerformance)
-        split_v_kernel<T, IntType,
-                       true><<<config.block_count, config.thread_per_block,
-                               smem_usage, gpu_device.stream()>>>(
-            input_ptr, output_scan, total_rows, total_cols, output_ptr_data);
+        split_v_kernel<T, IntType, true>
+            <<<config.block_count, config.thread_per_block, smem_usage,
+               gpu_device.stream()>>>(input_ptr, output_scan, total_rows,
+                                      total_cols, output_ptr_data);
       else
-        split_v_kernel<T, IntType,
-                       false><<<config.block_count, config.thread_per_block, 0,
-                                gpu_device.stream()>>>(
-            input_ptr, output_scan, total_rows, total_cols, output_ptr_data);
+        split_v_kernel<T, IntType, false>
+            <<<config.block_count, config.thread_per_block, 0,
+               gpu_device.stream()>>>(input_ptr, output_scan, total_rows,
+                                      total_cols, output_ptr_data);
     }
   }
 };
",0,train
305712c02e70bc860812e7c151a3842f028cacb1,tensorflow/tensorflow,"More WhereOp/TopK GPU bugfixes: use the direct cuda stream for CUB GPU kernel.

Turns out using the StreamInterface objects leads to ""invalid resource handle""
errors, so we have to use the cudaStream_t directly.  This change is based on
similar code in cuda_solvers.cc.

PiperOrigin-RevId: 161261085",topk_op_gpu.cu.cc,"@@ -30,6 +30,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/gtl/top_n.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/types.h""
+#include ""tensorflow/core/util/cuda_kernel_helper.h""
 
 // Required for sorting Eigen::half
 namespace cub {
@@ -365,9 +366,9 @@ __global__ void TopKKernel(const T* input, int length, int k, bool sorted,
 }
 
 template <typename T>
-cudaError LaunchTopKKernel(cudaStream_t stream, int num_shards, const T* input,
-                           int batch_size, int length, int k, bool sorted,
-                           T* output, int* indices) {
+cudaError LaunchTopKKernel(const cudaStream_t& stream, int num_shards,
+                           const T* input, int batch_size, int length, int k,
+                           bool sorted, T* output, int* indices) {
   // This code assumes that k is small enough that the computation
   // fits inside shared memory (hard coded to 48KB).  In practice this
   // means k <= 3072 for T=float/int32 and k <= 2048 for T=double/int64.
@@ -428,7 +429,7 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows,
                         typename TTypes<T, 2>::Tensor values,
                         TTypes<int, 2>::Tensor indices) {
   const GPUDevice& d = ctx->eigen_device<GPUDevice>();
-  auto stream = ctx->eigen_gpu_device().stream();
+  const cudaStream_t& cu_stream = GetCudaStream(ctx);
   size_t temp_storage_bytes = -1;
 
   // TODO(ebrevdo): Once cub supports iterators for the ValueT and
@@ -480,7 +481,7 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows,
       /* d_end_offsets */ segment_offsets_t.data() + 1,
       /* begin_bit */ 0,
       /* end_bit */ sizeof(T) * 8,
-      /* stream */ stream);
+      /* stream */ cu_stream);
   if (err != cudaSuccess) {
     return errors::Internal(
         ""TopKOp: Could not launch ""
@@ -505,7 +506,7 @@ Status LaunchSortKernel(OpKernelContext* ctx, const T* input, int num_rows,
       /* d_end_offsets */ segment_offsets_t.data() + 1,
       /* begin_bit */ 0,
       /* end_bit */ sizeof(T) * 8,
-      /* stream */ stream);
+      /* stream */ cu_stream);
   if (err != cudaSuccess) {
     return errors::Internal(
         ""TopKOp: Could not launch ""
@@ -545,8 +546,8 @@ struct TopKFunctor<GPUDevice, T> {
       return impl::LaunchSortKernel(context, input.data(), num_rows, num_cols,
                                     k, values, indices);
     } else {
-      auto stream = context->eigen_gpu_device().stream();
-      auto err = impl::LaunchTopKKernel(stream, /* num_shards */ 0,
+      const cudaStream_t& cu_stream = GetCudaStream(context);
+      auto err = impl::LaunchTopKKernel(cu_stream, /* num_shards */ 0,
                                         input.data(), num_rows, num_cols, k,
                                         sorted, values.data(), indices.data());
       if (err != cudaSuccess) {
",0,train
305712c02e70bc860812e7c151a3842f028cacb1,tensorflow/tensorflow,"More WhereOp/TopK GPU bugfixes: use the direct cuda stream for CUB GPU kernel.

Turns out using the StreamInterface objects leads to ""invalid resource handle""
errors, so we have to use the cudaStream_t directly.  This change is based on
similar code in cuda_solvers.cc.

PiperOrigin-RevId: 161261085",where_op_gpu.cu.cc,"@@ -56,8 +56,9 @@ struct NumTrue<GPUDevice, TIndex> {
   EIGEN_ALWAYS_INLINE static Status Compute(
       OpKernelContext* ctx, const GPUDevice& d, TTypes<bool>::ConstFlat input,
       typename TTypes<TIndex>::Scalar num_true) {
-    std::size_t temp_storage_bytes = 0;
+    const cudaStream_t& cu_stream = GetCudaStream(ctx);
 
+    std::size_t temp_storage_bytes = 0;
     const bool* input_data = input.data();
     TIndex* num_true_data = num_true.data();
 
@@ -66,7 +67,7 @@ struct NumTrue<GPUDevice, TIndex> {
                                /*d_in*/ input_data,
                                /*d_out*/ num_true_data,
                                /*num_items*/ input.size(),
-                               /*stream*/ d.stream());
+                               /*stream*/ cu_stream);
 
     if (first_success != cudaSuccess) {
       return errors::Internal(
@@ -85,7 +86,7 @@ struct NumTrue<GPUDevice, TIndex> {
         /*d_in*/ input_data,
         /*d_out*/ num_true_data,
         /*num_items*/ input.size(),
-        /*stream*/ d.stream());
+        /*stream*/ cu_stream);
 
     if (second_success != cudaSuccess) {
       return errors::Internal(
@@ -168,6 +169,8 @@ struct Where<GPUDevice, NDIM, Tindex> {
       return Status::OK();
     }
 
+    const cudaStream_t& cu_stream = GetCudaStream(ctx);
+
     std::size_t temp_storage_bytes = 0;
 
     cub::CountingInputIterator<Tindex> select_counter(0);
@@ -188,7 +191,7 @@ struct Where<GPUDevice, NDIM, Tindex> {
                                    /*d_out*/ output_iterator,
                                    /*d_num_selected_out*/ found_true_device,
                                    /*num_items*/ input.size(),
-                                   /*stream*/ d.stream());
+                                   /*stream*/ cu_stream);
     if (first_success != cudaSuccess) {
       return errors::Internal(
           ""WhereOp: Could not launch cub::DeviceSelect::Flagged to calculate ""
@@ -208,7 +211,7 @@ struct Where<GPUDevice, NDIM, Tindex> {
         /*d_out*/ output_iterator,
         /*d_num_selected_out*/ found_true_device,
         /*num_items*/ input.size(),
-        /*stream*/ d.stream());
+        /*stream*/ cu_stream);
 
     if (second_success != cudaSuccess) {
       return errors::Internal(
",0,train
305712c02e70bc860812e7c151a3842f028cacb1,tensorflow/tensorflow,"More WhereOp/TopK GPU bugfixes: use the direct cuda stream for CUB GPU kernel.

Turns out using the StreamInterface objects leads to ""invalid resource handle""
errors, so we have to use the cudaStream_t directly.  This change is based on
similar code in cuda_solvers.cc.

PiperOrigin-RevId: 161261085",cuda_kernel_helper.h,"@@ -20,9 +20,11 @@ limitations under the License.
 
 #include <algorithm>
 
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
+#include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/platform/logging.h""
+#include ""tensorflow/core/platform/stream_executor.h""
 #include ""tensorflow/core/platform/types.h""
-#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 
 // Usage of GetCudaLaunchConfig, GetCuda2DLaunchConfig, and
 // GetCuda3DLaunchConfig:
@@ -95,7 +97,8 @@ void MyDriverFunc(const GPUDevice &d) {
 }
 
 // See the test for this for more example:
-// https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
+//
+https://github.com/tensorflow/tensorflow/blob/master/tensorflow/core/util/cuda_kernel_helper_test.cu.cc
 
 */
 
@@ -107,7 +110,7 @@ void MyDriverFunc(const GPUDevice &d) {
   for (int i = blockIdx.axis * blockDim.axis + threadIdx.axis; i < n.axis; \
        i += blockDim.axis * gridDim.axis)
 
-#define DIV_UP(a, b) (((a) + (b) - 1) / (b))
+#define DIV_UP(a, b) (((a) + (b)-1) / (b))
 
 namespace tensorflow {
 
@@ -277,7 +280,19 @@ inline Cuda2DLaunchConfig GetCuda2DLaunchConfig(
                                dynamic_shared_memory_size, block_size_limit);
 }
 
-namespace gpu {
+// Returns a raw reference to the current cuda stream.  Required by a
+// number of kernel calls (for which StreamInterface* does not work), i.e.
+// CUB and certain cublas primitives.
+inline const cudaStream_t& GetCudaStream(OpKernelContext* context) {
+  const cudaStream_t* ptr = CHECK_NOTNULL(
+      reinterpret_cast<const cudaStream_t*>(context->op_device_context()
+                                                ->stream()
+                                                ->implementation()
+                                                ->CudaStreamMemberHack()));
+  return *ptr;
+}
+
+namespace cuda_helper {
 
 template <typename IntType>
 __device__ IntType upper_bound(IntType* first, IntType count, IntType val) {
@@ -299,7 +314,7 @@ __device__ IntType upper_bound(IntType* first, IntType count, IntType val) {
   return first - orig;
 }
 
-}  // namespace gpu
+}  // namespace cuda_helper
 
 template <typename T>
 __device__ __host__ inline T ldg(const T* address) {
",0,train
8476ba0486bf03a4a622410fdefa62c159fd6235,tensorflow/tensorflow,"[tf.data] Make sure rendezvous is created when running multi-device function.

PiperOrigin-RevId: 254898730",captured_function.cc,"@@ -552,10 +552,7 @@ Status InstantiatedCapturedFunction::Run(IteratorContext* ctx,
       });
   f_opts.step_container = &step_container;
   f_opts.runner = ctx->runner();
-  if (lib_->device()->device_type() != DEVICE_CPU ||
-      captured_func_->is_multi_device_function()) {
-    f_opts.create_rendezvous = true;
-  }
+  f_opts.create_rendezvous = ShouldCreateRendezvous();
   // TODO(mrry): Add cancellation manager support to IteratorContext
   // so that we can cancel running map functions. The local
   // cancellation manager here is created so that we can run kernels
@@ -593,9 +590,7 @@ Status InstantiatedCapturedFunction::RunWithBorrowedArgs(
       });
   f_opts.step_container = &step_container;
   f_opts.runner = ctx->runner();
-  if (lib_->device()->device_type() != DEVICE_CPU) {
-    f_opts.create_rendezvous = true;
-  }
+  f_opts.create_rendezvous = ShouldCreateRendezvous();
   // TODO(mrry): Add cancellation manager support to IteratorContext
   // so that we can cancel running map functions. The local
   // cancellation manager here is created so that we can run kernels
@@ -633,9 +628,7 @@ Status InstantiatedCapturedFunction::RunInstantiated(
       });
   f_opts.step_container = &step_container;
   f_opts.runner = &captured_runner_;
-  if (lib_->device()->device_type() != DEVICE_CPU) {
-    f_opts.create_rendezvous = true;
-  }
+  f_opts.create_rendezvous = ShouldCreateRendezvous();
   // TODO(mrry): Add cancellation manager support to IteratorContext
   // so that we can cancel running map functions. The local
   // cancellation manager here is created so that we can run kernels
@@ -688,9 +681,7 @@ void InstantiatedCapturedFunction::RunAsync(
       });
   f_opts.step_container = step_container;
   f_opts.runner = ctx->runner();
-  if (lib_->device()->device_type() != DEVICE_CPU) {
-    f_opts.create_rendezvous = true;
-  }
+  f_opts.create_rendezvous = ShouldCreateRendezvous();
   // TODO(mrry): Add cancellation manager support to IteratorContext
   // so that we can cancel running map functions. The local
   // cancellation manager here is created so that we can run kernels
@@ -749,6 +740,11 @@ void InstantiatedCapturedFunction::RunAsync(
   lib_->Run(f_opts, f_handle_, frame, std::move(callback));
 }
 
+bool InstantiatedCapturedFunction::ShouldCreateRendezvous() const {
+  return lib_->device()->device_type() != DEVICE_CPU ||
+         captured_func_->is_multi_device_function();
+}
+
 CapturedFunction::CapturedFunction(
     const std::shared_ptr<const FunctionMetadata> metadata,
     std::vector<Tensor> captured_inputs)
",0,train
8476ba0486bf03a4a622410fdefa62c159fd6235,tensorflow/tensorflow,"[tf.data] Make sure rendezvous is created when running multi-device function.

PiperOrigin-RevId: 254898730",captured_function.h,"@@ -95,6 +95,10 @@ class InstantiatedCapturedFunction {
       std::function<void(std::function<void()>)> runner,
       CapturedFunction* captured_func);
 
+  // Determines whether a rendezvous object should be created when running the
+  // instantiated function.
+  bool ShouldCreateRendezvous() const;
+
   friend class CapturedFunction;
 
   FunctionLibraryRuntime* const lib_;
",0,train
97d7281354af43ed5fd53ebf729cea76de84acdb,tensorflow/tensorflow,"eager: Graceful failure on invalid inputs.

Tests added to pywrap_tfe_test.py would fail
(segmentation fault / infinite loop)
without corresponding fixes to pywrap_tfe.i and pywrap_tfe_src.cc

Other statements that would fail ungracefully without this fix
(and with eager execution enabled) include:
tf.split(value=0, num_or_size_splits=-1)
tf.dynamic_partition(data=0, partitions=0, num_partitions=-1)
tf.split(value=0, num_or_size_splits=1.23, num=-1)
tf.unstack(value=0, num=-1)

PiperOrigin-RevId: 212731927",pywrap_tfe_src.cc,"@@ -2563,13 +2563,18 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) {
   int num_retvals = 0;
   for (int i = 0; i < op_def->output_arg_size(); i++) {
     const auto& output_arg = op_def->output_arg(i);
+    int delta = 1;
     if (!output_arg.number_attr().empty()) {
-      num_retvals += attr_list_sizes[output_arg.number_attr()];
+      delta = attr_list_sizes[output_arg.number_attr()];
     } else if (!output_arg.type_list_attr().empty()) {
-      num_retvals += attr_list_sizes[output_arg.type_list_attr()];
-    } else {
-      num_retvals++;
+      delta = attr_list_sizes[output_arg.type_list_attr()];
+    }
+    if (delta < 0) {
+      RaiseFallbackException(
+          ""Attributes suggest that the size of an output list is less than 0"");
+      return nullptr;
     }
+    num_retvals += delta;
   }
 
   tensorflow::gtl::InlinedVector<TFE_TensorHandle*, 2> retvals(num_retvals);
",0,train
97d7281354af43ed5fd53ebf729cea76de84acdb,tensorflow/tensorflow,"eager: Graceful failure on invalid inputs.

Tests added to pywrap_tfe_test.py would fail
(segmentation fault / infinite loop)
without corresponding fixes to pywrap_tfe.i and pywrap_tfe_src.cc

Other statements that would fail ungracefully without this fix
(and with eager execution enabled) include:
tf.split(value=0, num_or_size_splits=-1)
tf.dynamic_partition(data=0, partitions=0, num_partitions=-1)
tf.split(value=0, num_or_size_splits=1.23, num=-1)
tf.unstack(value=0, num=-1)

PiperOrigin-RevId: 212731927",pywrap_tfe_test.py,"@@ -21,6 +21,7 @@ from __future__ import print_function
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import context
+from tensorflow.python.eager import core
 from tensorflow.python.eager import test
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
@@ -123,8 +124,8 @@ class Tests(test.TestCase):
   def testFastpathExecute_MixedPrecisionVariableTapeWrite(self):
     ctx = context.context()
     with backprop.GradientTape(persistent=True) as tape:
-      a_2_by_2 = constant_op.constant(
-          [[1.0, 2.0], [3.0, 4.0]], dtype=dtypes.float32)
+      a_2_by_2 = constant_op.constant([[1.0, 2.0], [3.0, 4.0]],
+                                      dtype=dtypes.float32)
       a_2_by_2_fp16 = math_ops.cast(a_2_by_2, dtype=dtypes.float16)
       m1 = resource_variable_ops.ResourceVariable(a_2_by_2)
       m2 = resource_variable_ops._MixedPrecisionVariable(
@@ -233,6 +234,26 @@ class Tests(test.TestCase):
       pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name,
                                                ctx_handle, None, [], a_2_by_2)
 
+  @test_util.assert_no_new_tensors
+  @test_util.assert_no_garbage_created
+  def testFastPathExecute_InvalidAttributes(self):
+    split_dim = constant_op.constant(0, dtype=dtypes.int32)
+    value = constant_op.constant([0, 1, 2, 3], dtype=dtypes.float32)
+    ctx = context.context()
+    ctx_handle = ctx._handle
+    with self.assertRaises(core._FallbackException):
+      pywrap_tensorflow.TFE_Py_FastPathExecute(ctx_handle, ctx.device_name,
+                                               ""Split"", None, None, split_dim,
+                                               value, ""num_split"", -1)
+
+  @test_util.assert_no_new_tensors
+  @test_util.assert_no_garbage_created
+  def testInvalidNumOutputs(self):
+    with self.assertRaisesRegexp(
+        Exception,
+        ""Value for attr 'num_split' of -1 must be at least minimum 1""):
+      array_ops.split(value=[1, 2, 3], num_or_size_splits=-1)
+
 
 if __name__ == ""__main__"":
   test.main()
",0,train
6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables

Previously, expensive copies were required for pass-through parameters.
Removing those copies is not safe in the presence of TF reference variables in the graph,
so we only remove them for cases when the graph does not contain TF reference variables.

PiperOrigin-RevId: 271241769",build_xla_ops_pass.cc,"@@ -472,6 +472,11 @@ Status ReplaceNodeWithXlaCompileAndXlaRun(
                                /*resources=*/cluster_info.resource_inputs,
                                /*must_compile=*/requires_compilation,
                                cluster_info.function);
+
+  bool has_ref_attr;
+  TF_RETURN_IF_ERROR(
+      GetNodeAttr(n->attrs(), kXlaHasReferenceVarsAttr, &has_ref_attr));
+  xla_compile.operation.node()->AddAttr(kXlaHasReferenceVarsAttr, has_ref_attr);
   TF_RETURN_IF_ERROR(
       CopyIncomingControlEdges(g, /*from=*/n, /*to=*/xla_compile.key.node()));
 
",0,test
6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables

Previously, expensive copies were required for pass-through parameters.
Removing those copies is not safe in the presence of TF reference variables in the graph,
so we only remove them for cases when the graph does not contain TF reference variables.

PiperOrigin-RevId: 271241769",build_xla_ops_pass_test.cc,"@@ -149,8 +149,10 @@ TEST_F(BuildXlaOpsTest, ControlDepsPreserved) {
   TF_ASSERT_OK(root.graph()->AddFunctionLibrary(fdef_lib));
   Node* call;
   TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), ""cluster_0"", ""C"", &call));
+  call->AddAttr(kXlaHasReferenceVarsAttr, false);
   call->set_requested_device(kXlaDeviceName);
   Node* write_op = MakeWrite(root, ""write"");
+  write_op->AddAttr(kXlaHasReferenceVarsAttr, false);
   root.graph()->AddControlEdge(call, write_op);
 
   std::unique_ptr<Graph> graph;
@@ -191,8 +193,10 @@ TEST_F(BuildXlaOpsTest, OnNonXlaDevice) {
   Node* call;
   TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), ""cluster_0"", ""C"", &call));
   TF_ASSERT_OK(root.DoShapeInference(call));
+  call->AddAttr(kXlaHasReferenceVarsAttr, false);
 
   Node* write_op = MakeWrite(root, Output(call), ""write_result"");
+  write_op->AddAttr(kXlaHasReferenceVarsAttr, false);
 
   auto xla_compile = NodeWith(Op(""_XlaCompile""), Attr(""must_compile"", false));
   auto predicated_compilation_key =
@@ -226,8 +230,10 @@ TEST_F(BuildXlaOpsTest, OnXlaDevice) {
   TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), ""cluster_0"", ""C"", &call));
   call->set_requested_device(kXlaDeviceName);
   TF_ASSERT_OK(root.DoShapeInference(call));
+  call->AddAttr(kXlaHasReferenceVarsAttr, false);
 
   Node* write_op = MakeWrite(root, Output(call), ""write_result"");
+  write_op->AddAttr(kXlaHasReferenceVarsAttr, false);
 
   std::unique_ptr<Graph> graph;
   TF_ASSERT_OK(BuildXlaOps(root, fdef_lib, &graph));
@@ -250,6 +256,7 @@ TEST_F(BuildXlaOpsTest, NoExtraMergeForEdgeToSink) {
   TF_ASSERT_OK(root.graph()->AddFunctionLibrary(fdef_lib));
   Node* call;
   TF_ASSERT_OK(MakeXlaCompiledKernel(root.graph(), ""cluster_0"", ""C"", &call));
+  call->AddAttr(kXlaHasReferenceVarsAttr, false);
 
   std::unique_ptr<Graph> graph;
   TF_ASSERT_OK(BuildXlaOps(root, fdef_lib, &graph));
@@ -278,6 +285,7 @@ TEST_F(BuildXlaOpsTest, NoDeviceToHostCopiesForClustersWithInt32Inputs) {
   TF_ASSERT_OK(
       MakeXlaCompiledKernel(root.graph(), ""cluster_int32"", ""C"", &call));
   call->set_requested_device(kXlaDeviceName);
+  call->AddAttr(kXlaHasReferenceVarsAttr, false);
 
   auto var =
       ops::VarHandleOp(root.WithOpName(""var""), DT_INT32, TensorShape({}));
",0,test
6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables

Previously, expensive copies were required for pass-through parameters.
Removing those copies is not safe in the presence of TF reference variables in the graph,
so we only remove them for cases when the graph does not contain TF reference variables.

PiperOrigin-RevId: 271241769",encapsulate_subgraphs_pass.cc,"@@ -30,6 +30,7 @@ limitations under the License.
 #include ""tensorflow/compiler/jit/graphcycles/graphcycles.h""
 #include ""tensorflow/compiler/jit/mark_for_compilation_pass.h""
 #include ""tensorflow/compiler/jit/shape_inference_helpers.h""
+#include ""tensorflow/compiler/jit/xla_cluster_util.h""
 #include ""tensorflow/compiler/tf2xla/const_analysis.h""
 #include ""tensorflow/compiler/xla/status_macros.h""
 #include ""tensorflow/core/common_runtime/device_factory.h""
@@ -61,6 +62,7 @@ const char* const kXlaNumConstantArgsAttr = ""_XlaNumConstantArgs"";
 const char* const kXlaNumResourceArgsAttr = ""_XlaNumResourceArgs"";
 const char* const kXlaHostTransferSequencerAttr =
     ""_xla_host_transfer_sequencer"";
+const char* const kXlaHasReferenceVarsAttr = ""_XlaHasReferenceVars"";
 
 void SortControlInputs(GraphDef* gdef) {
   int64 num_nodes = gdef->node_size();
@@ -1311,6 +1313,14 @@ Status EncapsulateSubgraphsPass::Run(
   }
 
   *options.graph = std::move(graph_out);
+  TF_ASSIGN_OR_RETURN(absl::flat_hash_set<Node*> ref_related_nodes,
+                      GetNodesRelatedToRefVariables(**options.graph, flr));
+  for (Node* node : (*options.graph)->nodes()) {
+    bool has_ref_vars = ref_related_nodes.contains(node);
+    node->AddAttr(kXlaHasReferenceVarsAttr, has_ref_vars);
+    VLOG(3) << ""Has ref vars = "" << has_ref_vars
+            << "", node: "" << node->def().SerializeAsString();
+  }
   return Status::OK();
 }
 
",0,test
6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables

Previously, expensive copies were required for pass-through parameters.
Removing those copies is not safe in the presence of TF reference variables in the graph,
so we only remove them for cases when the graph does not contain TF reference variables.

PiperOrigin-RevId: 271241769",encapsulate_subgraphs_pass.h,"@@ -91,6 +91,9 @@ extern const char* const kXlaNumConstantArgsAttr;
 // Name of the attribute containing the number of resource variable arguments.
 extern const char* const kXlaNumResourceArgsAttr;
 
+// Name of the attribute defining whether the cluster has reference variables.
+extern const char* const kXlaHasReferenceVarsAttr;
+
 // Sorts each node's control inputs by their names. This guarantees that for two
 // structually equivalent GraphDefs, we get the same traversal ordering on
 // node's control input fields.
",0,test
6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables

Previously, expensive copies were required for pass-through parameters.
Removing those copies is not safe in the presence of TF reference variables in the graph,
so we only remove them for cases when the graph does not contain TF reference variables.

PiperOrigin-RevId: 271241769",encapsulate_subgraphs_pass_test.cc,"@@ -2581,5 +2581,79 @@ TEST(EncapsulateSubgraphsTest, OutsideCompilationShapeInference) {
   TF_EXPECT_FUNCTIONDEFLIBRARY_EQ(library_expected, library);
 }
 
+void CreateSubgraphTouchingRefVar(const Scope& s) {
+  Output variable =
+      ops::Variable(s.WithOpName(""variable""), PartialTensorShape{}, DT_FLOAT);
+  Output read = ops::Identity(s.WithOpName(""read_ref_var""), variable);
+  Output neg = ops::Negate(s.WithOpName(""negate_ref""), read);
+  Output add = ops::Add(s.WithOpName(""add_ref""), neg, neg);
+
+  Output constant =
+      ops::Const(s.WithOpName(""constant_ref""), Input::Initializer(0.0));
+  s.graph()->AddControlEdge(constant.node(), variable.node());
+}
+
+TEST(EncapsulateSubgraphsTest, RefVariablesMarked) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+  CreateSubgraphTouchingRefVar(root);
+
+  auto graph = absl::make_unique<Graph>(OpRegistry::Global());
+  TF_ASSERT_OK(root.ToGraph(graph.get()));
+
+  SessionOptions session_options;
+  session_options.env = Env::Default();
+  GraphOptimizationPassOptions options;
+  options.session_options = &session_options;
+  FunctionLibraryDefinition library(OpRegistry::Global(), {});
+  options.flib_def = &library;
+  options.graph = &graph;
+
+  EncapsulateSubgraphsPass pass;
+  TF_ASSERT_OK(pass.Run(options));
+
+  for (const Node* node : graph->nodes()) {
+    bool has_ref_var;
+    TF_ASSERT_OK(
+        GetNodeAttr(node->attrs(), kXlaHasReferenceVarsAttr, &has_ref_var));
+    EXPECT_TRUE(node->IsSink() || node->IsSource() || has_ref_var)
+        << ""All nodes apart from source and sink can access reference variable"";
+  }
+}
+
+void CreateSubgraphNotTouchingRefVar(const Scope& s) {
+  Output constant =
+      ops::Const(s.WithOpName(""constant_normal""), Input::Initializer(0.0));
+  Output neg = ops::Negate(s.WithOpName(""negate_normal""), constant);
+  Output add = ops::Add(s.WithOpName(""add_normal""), neg, neg);
+}
+
+TEST(EncapsulateSubgraphsTest, NoRefVarsNoAttr) {
+  Scope root = Scope::NewRootScope().ExitOnError();
+  CreateSubgraphNotTouchingRefVar(root);
+
+  auto graph = absl::make_unique<Graph>(OpRegistry::Global());
+  TF_ASSERT_OK(root.ToGraph(graph.get()));
+
+  // TODO(cheshire): reduce boilerplate for creating
+  // GraphOptimizationPassOptions here and elsewhere, probably using a macro.
+  SessionOptions session_options;
+  session_options.env = Env::Default();
+  GraphOptimizationPassOptions options;
+  options.session_options = &session_options;
+  FunctionLibraryDefinition library(OpRegistry::Global(), {});
+  options.flib_def = &library;
+  options.graph = &graph;
+
+  EncapsulateSubgraphsPass pass;
+  TF_ASSERT_OK(pass.Run(options));
+
+  for (const Node* node : graph->nodes()) {
+    bool has_ref_var;
+    TF_ASSERT_OK(
+        GetNodeAttr(node->attrs(), kXlaHasReferenceVarsAttr, &has_ref_var));
+    EXPECT_FALSE(has_ref_var) << ""The graph does not have reference variables"";
+  }
+}
+
 }  // namespace
 }  // namespace tensorflow
",0,test
6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables

Previously, expensive copies were required for pass-through parameters.
Removing those copies is not safe in the presence of TF reference variables in the graph,
so we only remove them for cases when the graph does not contain TF reference variables.

PiperOrigin-RevId: 271241769",xla_ops.cc,"@@ -18,8 +18,10 @@ limitations under the License.
 #include ""absl/container/flat_hash_map.h""
 #include ""absl/memory/memory.h""
 #include ""tensorflow/compiler/jit/defs.h""
+#include ""tensorflow/compiler/jit/encapsulate_subgraphs_pass.h""
 #include ""tensorflow/compiler/jit/flags.h""
 #include ""tensorflow/compiler/jit/xla_activity_listener.h""
+#include ""tensorflow/compiler/jit/xla_cluster_util.h""
 #include ""tensorflow/compiler/tf2xla/shape_util.h""
 #include ""tensorflow/compiler/tf2xla/tf2xla_util.h""
 #include ""tensorflow/compiler/tf2xla/xla_compiler.h""
@@ -268,7 +270,7 @@ static Status BuildCompilationCache(OpKernelContext* ctx,
 }
 
 static Status CompileToLocalExecutable(
-    OpKernelContext* ctx, const NameAttrList& function,
+    OpKernelContext* ctx, const NameAttrList& function, bool has_ref_vars,
     const XlaPlatformInfo& platform_info, absl::Span<const int> resources,
     absl::Span<const int> constants, bool lazy, xla::LocalClient** client,
     std::map<int, OptionalTensor>* variables,
@@ -313,8 +315,9 @@ static Status CompileToLocalExecutable(
     options.shape_representation_fn =
         platform_info.xla_device_metadata()->shape_representation_fn();
   }
-  // TODO(b/138728225): Set options.alias_passthrough_params for clusters
-  // without ref variables.
+  // If reference variables are not present in the graph, we can safely alias
+  // passthrough parameters without performing a copy.
+  options.alias_passthrough_params = !has_ref_vars;
 
   std::map<int, Tensor> constant_args;
   for (int i : constants) {
@@ -351,8 +354,8 @@ void XlaLocalLaunchBase::Compute(OpKernelContext* ctx) {
 
   {
     Status s = CompileToLocalExecutable(
-        ctx, function_, platform_info_, resources_, constants_, /*lazy=*/false,
-        &client, &variables, &kernel, &executable);
+        ctx, function_, /*has_ref_vars=*/true, platform_info_, resources_,
+        constants_, /*lazy=*/false, &client, &variables, &kernel, &executable);
     if (!s.ok() && (platform_info_.device_type().type_string() == DEVICE_CPU ||
                     platform_info_.device_type().type_string() == DEVICE_GPU)) {
       // Suggest auto jit if the failure was with GPU or CPU.
@@ -451,6 +454,14 @@ bool MustCompileAttr(OpKernelConstruction* ctx) {
                         ctx->GetAttr(""must_compile"", &must_compile));
   return must_compile;
 }
+
+bool HasRefVars(OpKernelConstruction* ctx) {
+  bool has_ref_vars;
+  OP_REQUIRES_OK_RETURN(ctx, false,
+                        ctx->GetAttr(kXlaHasReferenceVarsAttr, &has_ref_vars));
+  return has_ref_vars;
+}
+
 }  // namespace
 
 XlaLocalLaunchOp::XlaLocalLaunchOp(OpKernelConstruction* ctx)
@@ -467,7 +478,8 @@ XlaCompileOp::XlaCompileOp(OpKernelConstruction* ctx)
       resources_(ResourcesVector(ctx)),
       function_(FunctionAttr(ctx)),
       platform_info_(PlatformInfoFromContext(ctx)),
-      must_compile_(MustCompileAttr(ctx)) {}
+      must_compile_(MustCompileAttr(ctx)),
+      has_ref_vars_(HasRefVars(ctx)) {}
 
 void XlaCompileOp::Compute(OpKernelContext* ctx) {
   VLOG(3) << ""XlaCompileOp "" << def().name()
@@ -488,7 +500,7 @@ void XlaCompileOp::Compute(OpKernelContext* ctx) {
     executable = nullptr;
   } else {
     Status status = CompileToLocalExecutable(
-        ctx, function_, platform_info_, resources_, constants_,
+        ctx, function_, has_ref_vars_, platform_info_, resources_, constants_,
         /*lazy=*/!must_compile_, &client, &variables, &kernel, &executable);
     if (must_compile_ || status.code() != error::UNIMPLEMENTED) {
       OP_REQUIRES_OK(ctx, status);
",0,test
6a20128c5b9819cc09b3dc948e240d37e06aba4b,tensorflow/tensorflow,"[TF/XLA Bridge] Alias pass-through parameters when the input graph contains no TF reference variables

Previously, expensive copies were required for pass-through parameters.
Removing those copies is not safe in the presence of TF reference variables in the graph,
so we only remove them for cases when the graph does not contain TF reference variables.

PiperOrigin-RevId: 271241769",xla_ops.h,"@@ -153,6 +153,9 @@ class XlaCompileOp : public OpKernel {
 
   const bool must_compile_;
 
+  // Whether the graph has TF reference variables.
+  const bool has_ref_vars_;
+
   // cannot_compile_cluster_ is set to true if XLA returns an Unimplemented
   // error when compiling the cluster this _XlaCompile is supposed to compile.
   // If `cannot_compile_cluster_` is true then we avoid compiling this cluster
",0,test
eb178237c69f8ce0cea75a42ba181dd0fbbc56a2,tensorflow/tensorflow,"Update GraphDef version to 734.

PiperOrigin-RevId: 367955072
Change-Id: Iff095da3b4fd4e73eee8987938a97118cb9cce45",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 733  // Updated: 2021/4/11
+#define TF_GRAPH_DEF_VERSION 734  // Updated: 2021/4/12
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
c2f231507c0b8b7abb8097323545f5810a208bda,tensorflow/tensorflow,"Add DT_INT8 and DT_UINT8 as supported TPU type.

PiperOrigin-RevId: 322196027
Change-Id: I626d66c587ea0231cf1665b6d3349a13499b57e4",tpu_defs.h,"@@ -51,9 +51,10 @@ extern const char* const kTPUReplicateAttr;
 extern const char* const kOutsideCompilationAttr;
 
 // Supported types for TPUs.
-static constexpr std::array<DataType, 11> kTpuAllTypes = {
+static constexpr std::array<DataType, 13> kTpuAllTypes = {
     {DT_INT32, DT_UINT32, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE, DT_BOOL,
-     DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8}};
+     DT_COMPLEX64, DT_INT64, DT_UINT64, DT_QINT8, DT_QUINT8, DT_INT8,
+     DT_UINT8}};
 
 }  // namespace tensorflow
 
",0,train
7d8316fb85b21546e3df2aef701f1cfa9f92b6ba,tensorflow/tensorflow,"Add additional test cases

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",optimizers_test.py,"@@ -250,7 +250,7 @@ class OptimizersTest(test.TestCase):
       self.assertAlmostEqual(var_value, 6.5, 4)
       self.assertEqual(global_step_value, 1)
 
-  def testGradientMultiplyTensor(self):
+  def testGradientMultiplyInt32Tensor(self):
     with self.cached_session() as session:
       x, var, loss, global_step = _setup_model()
       v = array_ops.placeholder(dtypes.float32, [])
@@ -268,6 +268,24 @@ class OptimizersTest(test.TestCase):
       self.assertAlmostEqual(var_value, 6.5, 4)
       self.assertEqual(global_step_value, 1)
 
+  def testGradientMultiplyInt64Tensor(self):
+    with self.cached_session() as session:
+      x, var, loss, global_step = _setup_model()
+      v = array_ops.placeholder(dtypes.float64, [])
+      train = optimizers_lib.optimize_loss(
+          loss,
+          global_step,
+          learning_rate=0.1,
+          optimizer=""SGD"",
+          gradient_multipliers={var: v})
+      variables.global_variables_initializer().run()
+      session.run(train, feed_dict={x: 5, v: 7.})
+      var_value, global_step_value = session.run([var, global_step])
+      # var(0) = 10, x = 5, var(0)/dx = 5,
+      # var(1) = var(0) - learning_rate * gradient_multiplier * var(0)/dx
+      self.assertAlmostEqual(var_value, 6.5, 4)
+      self.assertEqual(global_step_value, 1)
+
   def testIgnoreVariablesWithNoGradients(self):
     _, _, loss, global_step = _setup_model()
 
",0,test
8412e4920296dd3df0ba1a99e4f3f783f74fcda2,tensorflow/tensorflow,"Add check for correct memory alignment to MemoryAllocation::MemoryAllocation() on 32-bit arm. This will give a reasonable error message at model build time, rather than a SIGBUS later.

PiperOrigin-RevId: 262385650",allocation.cc,"@@ -87,6 +87,22 @@ bool FileCopyAllocation::valid() const { return copied_buffer_ != nullptr; }
 MemoryAllocation::MemoryAllocation(const void* ptr, size_t num_bytes,
                                    ErrorReporter* error_reporter)
     : Allocation(error_reporter, Allocation::Type::kMemory) {
+#ifdef __arm__
+  if ((reinterpret_cast<uintptr_t>(ptr) % 16) != 0) {
+    // The flat buffer schema has alignment requirements of up to 16 bytes to
+    // guarantee that data can be correctly accesses on 32-bit arm. The buffer
+    // we get must also be 16-byte aligned, otherwise the guarantee will not
+    // hold (potentially resulting in a SIGBUS)..
+    //
+    // Note that 64-bit ARM may also suffer a performance impact, but no crash -
+    // that case is not checked.
+    error_reporter->Report(""The supplied buffer is not 16-byte aligned"");
+    buffer_ = nullptr;
+    buffer_size_bytes_ = 0;
+    return;
+  }
+#endif  // __arm__
+
   buffer_ = ptr;
   buffer_size_bytes_ = num_bytes;
 }
",0,train
8412e4920296dd3df0ba1a99e4f3f783f74fcda2,tensorflow/tensorflow,"Add check for correct memory alignment to MemoryAllocation::MemoryAllocation() on 32-bit arm. This will give a reasonable error message at model build time, rather than a SIGBUS later.

PiperOrigin-RevId: 262385650",model_test.cc,"@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
+#include ""tensorflow/lite/model.h""
+
 #include <fcntl.h>
 #include <stdint.h>
 #include <stdio.h>
@@ -20,7 +22,8 @@ limitations under the License.
 #include <sys/stat.h>
 #include <sys/types.h>
 
-#include ""tensorflow/lite/model.h""
+#include <fstream>
+#include <iostream>
 
 #include <gtest/gtest.h>
 #include ""tensorflow/lite/core/api/error_reporter.h""
@@ -72,6 +75,44 @@ TEST(BasicFlatBufferModel, TestNonExistantFiles) {
   ASSERT_TRUE(!FlatBufferModel::BuildFromFile(""/tmp/tflite_model_1234""));
 }
 
+TEST(BasicFlatBufferModel, TestBufferAlignment) {
+  // On 32-bit ARM buffers are required to be 16-byte aligned, on other
+  // platforms there is no alignment requirement.
+  const uintptr_t kAlignment = 16;
+  const uintptr_t kAlignmentBits = kAlignment - 1;
+
+  // Use real model data so that we can be sure error is only from the
+  // alignment requirement and not from bad data.
+  std::ifstream fp(""tensorflow/lite/testdata/empty_model.bin"");
+  ASSERT_TRUE(fp.good());
+  std::string empty_model_data((std::istreambuf_iterator<char>(fp)),
+                               std::istreambuf_iterator<char>());
+  auto free_chars = [](char* p) { free(p); };
+  std::unique_ptr<char, decltype(free_chars)> buffer(
+      reinterpret_cast<char*>(malloc(empty_model_data.size() + kAlignment)),
+      free_chars);
+
+  // Check that aligned buffer works (no other errors in the test).
+  char* aligned = reinterpret_cast<char*>(
+      (reinterpret_cast<uintptr_t>(buffer.get()) + kAlignment) &
+      ~kAlignmentBits);
+  memcpy(aligned, empty_model_data.c_str(), empty_model_data.size());
+  EXPECT_TRUE(
+      FlatBufferModel::BuildFromBuffer(aligned, empty_model_data.size()));
+
+  // Check unaligned buffer handling.
+  char* unaligned =
+      reinterpret_cast<char*>(reinterpret_cast<uintptr_t>(buffer.get()) | 0x1);
+  memcpy(unaligned, empty_model_data.c_str(), empty_model_data.size());
+#ifdef __arm__
+  EXPECT_FALSE(
+      FlatBufferModel::BuildFromBuffer(unaligned, empty_model_data.size()));
+#else   // !__arm__
+  EXPECT_TRUE(
+      FlatBufferModel::BuildFromBuffer(unaligned, empty_model_data.size()));
+#endif  // __arm__
+}
+
 // Make sure a model with nothing in it loads properly.
 TEST(BasicFlatBufferModel, TestEmptyModelsAndNullDestination) {
   auto model = FlatBufferModel::BuildFromFile(
@@ -248,15 +289,13 @@ class FakeVerifier : public tflite::TfLiteVerifier {
 TEST(BasicFlatBufferModel, TestWithTrueVerifier) {
   FakeVerifier verifier(true);
   ASSERT_TRUE(FlatBufferModel::VerifyAndBuildFromFile(
-      ""tensorflow/lite/testdata/test_model.bin"",
-      &verifier));
+      ""tensorflow/lite/testdata/test_model.bin"", &verifier));
 }
 
 TEST(BasicFlatBufferModel, TestWithFalseVerifier) {
   FakeVerifier verifier(false);
   ASSERT_FALSE(FlatBufferModel::VerifyAndBuildFromFile(
-      ""tensorflow/lite/testdata/test_model.bin"",
-      &verifier));
+      ""tensorflow/lite/testdata/test_model.bin"", &verifier));
 }
 
 TEST(BasicFlatBufferModel, TestWithNullVerifier) {
@@ -269,8 +308,7 @@ TEST(BasicFlatBufferModel, TestWithNullVerifier) {
 TEST(BasicFlatBufferModel, TestCustomErrorReporter) {
   TestErrorReporter reporter;
   auto model = FlatBufferModel::BuildFromFile(
-      ""tensorflow/lite/testdata/empty_model.bin"",
-      &reporter);
+      ""tensorflow/lite/testdata/empty_model.bin"", &reporter);
   ASSERT_TRUE(model);
 
   std::unique_ptr<Interpreter> interpreter;
",0,train
bf64fc285e88d36bb82f80757c4a1afd722347e0,tensorflow/tensorflow,"Add float16 support for NonMaxSuppressionV{2,3,4}

This fix tries to address the issue raised in 20199 where
there was no float16 support for NonMaxSuppressionV2.
As NonMaxSuppressionV2 is the earlier versions of API
and there are newer versions of NonMaxSuppression:
NonMaxSuppressionV2, NonMaxSuppressionV3, NonMaxSuppressionV4,
This fix exposes the float16 support to all of the above.
(Note in the master the default version used is NonMaxSuppressionV3)

This fix fixes 20199.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",non_max_suppression_op.cc,"@@ -75,28 +75,29 @@ static inline void ParseAndCheckBoxSizes(OpKernelContext* context,
 }
 
 // Return intersection-over-union overlap between boxes i and j
-static inline float IOUGreaterThanThreshold(
-    typename TTypes<float, 2>::ConstTensor boxes, int i, int j,
-    float iou_threshold) {
-  const float ymin_i = std::min<float>(boxes(i, 0), boxes(i, 2));
-  const float xmin_i = std::min<float>(boxes(i, 1), boxes(i, 3));
-  const float ymax_i = std::max<float>(boxes(i, 0), boxes(i, 2));
-  const float xmax_i = std::max<float>(boxes(i, 1), boxes(i, 3));
-  const float ymin_j = std::min<float>(boxes(j, 0), boxes(j, 2));
-  const float xmin_j = std::min<float>(boxes(j, 1), boxes(j, 3));
-  const float ymax_j = std::max<float>(boxes(j, 0), boxes(j, 2));
-  const float xmax_j = std::max<float>(boxes(j, 1), boxes(j, 3));
-  const float area_i = (ymax_i - ymin_i) * (xmax_i - xmin_i);
-  const float area_j = (ymax_j - ymin_j) * (xmax_j - xmin_j);
-  if (area_i <= 0 || area_j <= 0) return 0.0;
-  const float intersection_ymin = std::max<float>(ymin_i, ymin_j);
-  const float intersection_xmin = std::max<float>(xmin_i, xmin_j);
-  const float intersection_ymax = std::min<float>(ymax_i, ymax_j);
-  const float intersection_xmax = std::min<float>(xmax_i, xmax_j);
-  const float intersection_area =
-      std::max<float>(intersection_ymax - intersection_ymin, 0.0) *
-      std::max<float>(intersection_xmax - intersection_xmin, 0.0);
-  const float iou = intersection_area / (area_i + area_j - intersection_area);
+template <typename T>
+static inline bool IOUGreaterThanThreshold(
+    typename TTypes<T, 2>::ConstTensor boxes, int i, int j,
+    T iou_threshold) {
+  const T ymin_i = std::min<T>(boxes(i, 0), boxes(i, 2));
+  const T xmin_i = std::min<T>(boxes(i, 1), boxes(i, 3));
+  const T ymax_i = std::max<T>(boxes(i, 0), boxes(i, 2));
+  const T xmax_i = std::max<T>(boxes(i, 1), boxes(i, 3));
+  const T ymin_j = std::min<T>(boxes(j, 0), boxes(j, 2));
+  const T xmin_j = std::min<T>(boxes(j, 1), boxes(j, 3));
+  const T ymax_j = std::max<T>(boxes(j, 0), boxes(j, 2));
+  const T xmax_j = std::max<T>(boxes(j, 1), boxes(j, 3));
+  const T area_i = (ymax_i - ymin_i) * (xmax_i - xmin_i);
+  const T area_j = (ymax_j - ymin_j) * (xmax_j - xmin_j);
+  if (area_i <= static_cast<T>(0) || area_j <= static_cast<T>(0)) return 0;
+  const T intersection_ymin = std::max<T>(ymin_i, ymin_j);
+  const T intersection_xmin = std::max<T>(xmin_i, xmin_j);
+  const T intersection_ymax = std::min<T>(ymax_i, ymax_j);
+  const T intersection_xmax = std::min<T>(xmax_i, xmax_j);
+  const T intersection_area =
+      std::max<T>(intersection_ymax - intersection_ymin, static_cast<T>(0.0)) *
+      std::max<T>(intersection_xmax - intersection_xmin, static_cast<T>(0.0));
+  const T iou = intersection_area / (area_i + area_j - intersection_area);
   return iou > iou_threshold;
 }
 
@@ -106,11 +107,12 @@ static inline bool OverlapsGreaterThanThreshold(
   return overlaps(i, j) > overlap_threshold;
 }
 
+template <typename T>
 static inline std::function<bool(int, int)> CreateIOUSuppressCheckFn(
     const Tensor& boxes, float threshold) {
-  typename TTypes<float, 2>::ConstTensor boxes_data = boxes.tensor<float, 2>();
-  return std::bind(&IOUGreaterThanThreshold, boxes_data, std::placeholders::_1,
-                   std::placeholders::_2, threshold);
+  typename TTypes<T, 2>::ConstTensor boxes_data = boxes.tensor<T, 2>();
+  return std::bind(&IOUGreaterThanThreshold<T>, boxes_data, std::placeholders::_1,
+                   std::placeholders::_2, static_cast<T>(threshold));
 }
 
 static inline std::function<bool(int, int)> CreateOverlapsSuppressCheckFn(
@@ -121,6 +123,7 @@ static inline std::function<bool(int, int)> CreateOverlapsSuppressCheckFn(
                    std::placeholders::_1, std::placeholders::_2, threshold);
 }
 
+template <typename T>
 void DoNonMaxSuppressionOp(
     OpKernelContext* context, const Tensor& scores, int num_boxes,
     const Tensor& max_output_size, const float score_threshold,
@@ -128,13 +131,13 @@ void DoNonMaxSuppressionOp(
     bool pad_to_max_output_size = false, int* ptr_num_valid_outputs = nullptr) {
   const int output_size = max_output_size.scalar<int>()();
 
-  std::vector<float> scores_data(num_boxes);
-  std::copy_n(scores.flat<float>().data(), num_boxes, scores_data.begin());
+  std::vector<T> scores_data(num_boxes);
+  std::copy_n(scores.flat<T>().data(), num_boxes, scores_data.begin());
 
   // Data structure for selection candidate in NMS.
   struct Candidate {
     int box_index;
-    float score;
+    T score;
   };
 
   auto cmp = [](const Candidate bs_i, const Candidate bs_j) {
@@ -143,13 +146,13 @@ void DoNonMaxSuppressionOp(
   std::priority_queue<Candidate, std::deque<Candidate>, decltype(cmp)>
       candidate_priority_queue(cmp);
   for (int i = 0; i < scores_data.size(); ++i) {
-    if (scores_data[i] > score_threshold) {
+    if (scores_data[i] > static_cast<T>(score_threshold)) {
       candidate_priority_queue.emplace(Candidate({i, scores_data[i]}));
     }
   }
 
   std::vector<int> selected;
-  std::vector<float> selected_scores;
+  std::vector<T> selected_scores;
   Candidate next_candidate;
 
   while (selected.size() < output_size && !candidate_priority_queue.empty()) {
@@ -176,7 +179,7 @@ void DoNonMaxSuppressionOp(
   int num_valid_outputs = selected.size();
   if (pad_to_max_output_size) {
     selected.resize(output_size, 0);
-    selected_scores.resize(output_size, 0);
+    selected_scores.resize(output_size, static_cast<T>(0));
   }
   if (ptr_num_valid_outputs) {
     *ptr_num_valid_outputs = num_valid_outputs;
@@ -221,10 +224,10 @@ class NonMaxSuppressionOp : public OpKernel {
     if (!context->status().ok()) {
       return;
     }
-    auto suppress_check_fn = CreateIOUSuppressCheckFn(boxes, iou_threshold_);
+    auto suppress_check_fn = CreateIOUSuppressCheckFn<float>(boxes, iou_threshold_);
 
     const float score_threshold_val = std::numeric_limits<float>::lowest();
-    DoNonMaxSuppressionOp(context, scores, num_boxes, max_output_size,
+    DoNonMaxSuppressionOp<float>(context, scores, num_boxes, max_output_size,
                           score_threshold_val, suppress_check_fn);
   }
 
@@ -232,7 +235,7 @@ class NonMaxSuppressionOp : public OpKernel {
   float iou_threshold_;
 };
 
-template <typename Device>
+template <typename Device, typename T>
 class NonMaxSuppressionV2Op : public OpKernel {
  public:
   explicit NonMaxSuppressionV2Op(OpKernelConstruction* context)
@@ -264,10 +267,10 @@ class NonMaxSuppressionV2Op : public OpKernel {
     if (!context->status().ok()) {
       return;
     }
-    auto suppress_check_fn = CreateIOUSuppressCheckFn(boxes, iou_threshold_val);
+    auto suppress_check_fn = CreateIOUSuppressCheckFn<T>(boxes, iou_threshold_val);
 
     const float score_threshold_val = std::numeric_limits<float>::lowest();
-    DoNonMaxSuppressionOp(context, scores, num_boxes, max_output_size,
+    DoNonMaxSuppressionOp<T>(context, scores, num_boxes, max_output_size,
                           score_threshold_val, suppress_check_fn);
   }
 };
@@ -325,7 +328,7 @@ class NonMaxSuppressionV3V4Base : public OpKernel {
   float score_threshold_val_;
 };
 
-template <typename Device>
+template <typename Device, typename T>
 class NonMaxSuppressionV3Op : public NonMaxSuppressionV3V4Base {
  public:
   explicit NonMaxSuppressionV3Op(OpKernelConstruction* context)
@@ -334,14 +337,14 @@ class NonMaxSuppressionV3Op : public NonMaxSuppressionV3V4Base {
  protected:
   void DoComputeAndPostProcess(OpKernelContext* context) override {
     auto suppress_check_fn =
-        CreateIOUSuppressCheckFn(boxes_, iou_threshold_val_);
+        CreateIOUSuppressCheckFn<T>(boxes_, iou_threshold_val_);
 
-    DoNonMaxSuppressionOp(context, scores_, num_boxes_, max_output_size_,
+    DoNonMaxSuppressionOp<T>(context, scores_, num_boxes_, max_output_size_,
                           score_threshold_val_, suppress_check_fn);
   }
 };
 
-template <typename Device>
+template <typename Device, typename T>
 class NonMaxSuppressionV4Op : public NonMaxSuppressionV3V4Base {
  public:
   explicit NonMaxSuppressionV4Op(OpKernelConstruction* context)
@@ -353,10 +356,10 @@ class NonMaxSuppressionV4Op : public NonMaxSuppressionV3V4Base {
  protected:
   void DoComputeAndPostProcess(OpKernelContext* context) override {
     auto suppress_check_fn =
-        CreateIOUSuppressCheckFn(boxes_, iou_threshold_val_);
+        CreateIOUSuppressCheckFn<T>(boxes_, iou_threshold_val_);
     int num_valid_outputs;
 
-    DoNonMaxSuppressionOp(context, scores_, num_boxes_, max_output_size_,
+    DoNonMaxSuppressionOp<T>(context, scores_, num_boxes_, max_output_size_,
                           score_threshold_val_, suppress_check_fn,
                           pad_to_max_output_size_, &num_valid_outputs);
 
@@ -413,7 +416,7 @@ class NonMaxSuppressionWithOverlapsOp : public OpKernel {
     auto suppress_check_fn =
         CreateOverlapsSuppressCheckFn(overlaps, overlap_threshold_val);
 
-    DoNonMaxSuppressionOp(context, scores, num_boxes, max_output_size,
+    DoNonMaxSuppressionOp<float>(context, scores, num_boxes, max_output_size,
                           score_threshold_val, suppress_check_fn);
   }
 };
@@ -421,14 +424,20 @@ class NonMaxSuppressionWithOverlapsOp : public OpKernel {
 REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppression"").Device(DEVICE_CPU),
                         NonMaxSuppressionOp<CPUDevice>);
 
-REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV2"").Device(DEVICE_CPU),
-                        NonMaxSuppressionV2Op<CPUDevice>);
+REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV2"").TypeConstraint<float>(""T"").Device(DEVICE_CPU),
+                        NonMaxSuppressionV2Op<CPUDevice, float>);
+REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV2"").TypeConstraint<Eigen::half>(""T"").Device(DEVICE_CPU),
+                        NonMaxSuppressionV2Op<CPUDevice, Eigen::half>);
 
-REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV3"").Device(DEVICE_CPU),
-                        NonMaxSuppressionV3Op<CPUDevice>);
+REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV3"").TypeConstraint<float>(""T"").Device(DEVICE_CPU),
+                        NonMaxSuppressionV3Op<CPUDevice, float>);
+REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV3"").TypeConstraint<Eigen::half>(""T"").Device(DEVICE_CPU),
+                        NonMaxSuppressionV3Op<CPUDevice, Eigen::half>);
 
-REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV4"").Device(DEVICE_CPU),
-                        NonMaxSuppressionV4Op<CPUDevice>);
+REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV4"").TypeConstraint<float>(""T"").Device(DEVICE_CPU),
+                        NonMaxSuppressionV4Op<CPUDevice, float>);
+REGISTER_KERNEL_BUILDER(Name(""NonMaxSuppressionV4"").TypeConstraint<Eigen::half>(""T"").Device(DEVICE_CPU),
+                        NonMaxSuppressionV4Op<CPUDevice, Eigen::half>);
 
 REGISTER_KERNEL_BUILDER(
     Name(""NonMaxSuppressionWithOverlaps"").Device(DEVICE_CPU),
",0,test
bf64fc285e88d36bb82f80757c4a1afd722347e0,tensorflow/tensorflow,"Add float16 support for NonMaxSuppressionV{2,3,4}

This fix tries to address the issue raised in 20199 where
there was no float16 support for NonMaxSuppressionV2.
As NonMaxSuppressionV2 is the earlier versions of API
and there are newer versions of NonMaxSuppression:
NonMaxSuppressionV2, NonMaxSuppressionV3, NonMaxSuppressionV4,
This fix exposes the float16 support to all of the above.
(Note in the master the default version used is NonMaxSuppressionV3)

This fix fixes 20199.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",image_ops.cc,"@@ -683,11 +683,12 @@ REGISTER_OP(""NonMaxSuppression"")
     });
 
 REGISTER_OP(""NonMaxSuppressionV2"")
-    .Input(""boxes: float"")
-    .Input(""scores: float"")
+    .Input(""boxes: T"")
+    .Input(""scores: T"")
     .Input(""max_output_size: int32"")
     .Input(""iou_threshold: float"")
     .Output(""selected_indices: int32"")
+    .Attr(""T: {half, float}"")
     .SetShapeFn([](InferenceContext* c) {
       // Get inputs and validate ranks.
       ShapeHandle boxes;
@@ -711,22 +712,24 @@ REGISTER_OP(""NonMaxSuppressionV2"")
     });
 
 REGISTER_OP(""NonMaxSuppressionV3"")
-    .Input(""boxes: float"")
-    .Input(""scores: float"")
+    .Input(""boxes: T"")
+    .Input(""scores: T"")
     .Input(""max_output_size: int32"")
     .Input(""iou_threshold: float"")
     .Input(""score_threshold: float"")
     .Output(""selected_indices: int32"")
+    .Attr(""T: {half, float}"")
     .SetShapeFn(NMSShapeFn);
 
 REGISTER_OP(""NonMaxSuppressionV4"")
-    .Input(""boxes: float"")
-    .Input(""scores: float"")
+    .Input(""boxes: T"")
+    .Input(""scores: T"")
     .Input(""max_output_size: int32"")
     .Input(""iou_threshold: float"")
     .Input(""score_threshold: float"")
     .Output(""selected_indices: int32"")
     .Output(""valid_outputs: int32"")
+    .Attr(""T: {half, float}"")
     .Attr(""pad_to_max_output_size: bool = false"")
     .SetShapeFn([](InferenceContext* c) {
       TF_RETURN_IF_ERROR(NMSShapeFn(c));
",0,test
631f2b77dc22cf2e1b9a41d0c7518b32fe02e61b,tensorflow/tensorflow,"Add api documentation for `tf.io.read_file`.

PiperOrigin-RevId: 363707890
Change-Id: I0adfd60983bf38c312042de1d73eae1ec4c737e2",io_ops.py,"@@ -35,6 +35,7 @@ from tensorflow.python.ops import gen_io_ops
 from tensorflow.python.ops.gen_io_ops import *
 # pylint: enable=wildcard-import
 from tensorflow.python.util import deprecation
+from tensorflow.python.util import dispatch as _dispatch
 from tensorflow.python.util.tf_export import tf_export
 
 
@@ -96,6 +97,47 @@ def _restore_slice(file_pattern, tensor_name, shape_and_slice, tensor_type,
       preferred_shard, name=name)
 
 
+@_dispatch.add_dispatch_list
+@tf_export(""io.read_file"", v1=[""io.read_file"", ""read_file""])
+def read_file(filename, name=None):
+  """"""Reads the contents of file.
+
+  This operation returns a tensor with the entire contents of the input
+  filename. It does not do any parsing, it just returns the contents as
+  they are. Usually, this is the first step in the input pipeline.
+
+  Example:
+
+  >>> with open(""/tmp/file.txt"", ""w"") as f:
+  ...   f.write(""asdf"")
+  ...
+  4
+  >>> tf.io.read_file(""/tmp/file.txt"")
+  <tf.Tensor: shape=(), dtype=string, numpy=b'asdf'>
+
+  Example of using the op in a function to read an image, decode it and reshape
+  the tensor containing the pixel data:
+
+  >>> @tf.function
+  ... def load_image(filename):
+  ...   raw = tf.io.read_file(filename)
+  ...   image = tf.image.decode_png(raw, channels=3)
+  ...   # the `print` executes during tracing.
+  ...   print(""Initial shape: "", image.shape)
+  ...   image.set_shape([28, 28, 3])
+  ...   print(""Final shape: "", image.shape)
+  ...   return image
+
+  Args:
+    filename: string. filename to read from.
+    name: string.  Optional name for the op.
+
+  Returns:
+    A tensor of dtype ""string"", with the file contents.
+  """"""
+  return gen_io_ops.read_file(filename, name)
+
+
 @tf_export(v1=[""ReaderBase""])
 class ReaderBase(object):
   """"""Base class for different Reader types, that produce a record every step.
",0,train
fa4b80602bbeb6f2d9b94f22b537643b9f482a20,tensorflow/tensorflow,"Shuffle passes in kernel generator to do partial bufferization.

With this change, we first bufferize only HLO operations, optimize those and
then lower and bufferize shape computations.

PiperOrigin-RevId: 343062899
Change-Id: Ibc70a1f623f3c71c06478e75ef869fde3eecc741",kernel_creator.cc,"@@ -82,31 +82,19 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only,
         mlir::kernel_gen::transforms::CreateMaterializeBroadcastsPass());
     pm.addNestedPass<mlir::FuncOp>(
         mlir::kernel_gen::transforms::CreateUnfuseBatchNormPass());
-    pm.addPass(mlir::mhlo::createLegalizeToLhloPass());
-    // Moving `AllocOp`s and inserting missing `DeallocOp`s
-    pm.addNestedPass<mlir::FuncOp>(::mlir::createBufferHoistingPass());
-    pm.addNestedPass<mlir::FuncOp>(::mlir::createBufferDeallocationPass());
-    pm.addNestedPass<mlir::FuncOp>(mlir::createCopyRemovalPass());
-    pm.addPass(mlir::createCanonicalizerPass());
-    pm.addPass(mlir::kernel_gen::transforms::CreateShapeToDescriptorsPass());
   } else {
     pm.addNestedPass<mlir::FuncOp>(mlir::mhlo::createLegalizeTFPass(
         /*allow_partial_conversion=*/false, /*legalize_chlo=*/false));
     pm.addNestedPass<mlir::FuncOp>(mlir::createTransformUnrankedHloPass());
     pm.addNestedPass<mlir::FuncOp>(mlir::mhlo::createChloLegalizeToHloPass());
     pm.addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
-    pm.addNestedPass<mlir::FuncOp>(mlir::createCSEPass());
-    pm.addPass(mlir::kernel_gen::transforms::CreateShapeToDescriptorsPass());
-    // Clean up the IR created above. In particular, operations on descriptors
-    // are simplified here.
-    pm.addPass(mlir::createCSEPass());
-    pm.addPass(mlir::kernel_gen::transforms::CreateBufferizePass());
-    pm.addNestedPass<mlir::FuncOp>(
-        mlir::kernel_gen::transforms::CreateParallelLoopsToSequential());
   }
 
+  // Legalize only hlo operations to lhlo, keep the rest as tensors.
+  pm.addPass(mlir::kernel_gen::transforms::CreateHloBufferizePass());
   // Clean up the IR for further processing.
   pm.addPass(mlir::createCanonicalizerPass());
+  pm.addNestedPass<mlir::FuncOp>(mlir::createCSEPass());
   // We have to anticipate later unrolling in tiling to make sure that we get
   // the requested tiling after unrolling. Compute the new tiling here if
   // needed.
@@ -160,7 +148,23 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only,
   }
   // Greedily map the remaining loop to GPU hardware dimensions.
   pm.addNestedPass<::mlir::FuncOp>(xla::mlir_gpu::createMapParallelLoopsPass());
-  // Apply the mapping.
+
+  // Now lower the shape computations, bufferize all remaining ops and insert
+  // deallocs.
+  pm.addNestedPass<mlir::FuncOp>(::mlir::createBufferHoistingPass());
+  pm.addNestedPass<mlir::FuncOp>(mlir::createCopyRemovalPass());
+  pm.addPass(mlir::createCanonicalizerPass());
+  pm.addPass(mlir::kernel_gen::transforms::CreateShapeToDescriptorsPass());
+  pm.addPass(mlir::createCanonicalizerPass());
+  pm.addNestedPass<mlir::FuncOp>(mlir::createCSEPass());
+  pm.addPass(mlir::kernel_gen::transforms::CreateFinalBufferizePass());
+  pm.addNestedPass<mlir::FuncOp>(mlir::createPromoteBuffersToStackPass(64));
+  // TODO(herhut): Enabled this to avoid leaks once fixed.
+  // pm.addNestedPass<mlir::FuncOp>(::mlir::createBufferDeallocationPass());
+
+  // Apply the mapping and go to GPU. We cannot do this earlier due to missing
+  // interfaces on the GPU dialect.
+  // TODO(herhut) Implement interfaces.
   pm.addNestedPass<::mlir::FuncOp>(mlir::createParallelLoopToGpuPass());
 
   // Some basic cleanup.
@@ -190,7 +194,9 @@ Status LowerTFtoGPU(mlir::ModuleOp module, bool gpu_binary_only,
         mlir::kernel_gen::transforms::CreateEmbedMemRefPrintsPass());
   }
   pm.addNestedPass<::mlir::FuncOp>(::mlir::createCanonicalizerPass());
-
+  // TODO(herhut): Remove this pass once the LowerToCFG pass can handle it.
+  pm.addNestedPass<mlir::FuncOp>(
+      mlir::kernel_gen::transforms::CreateParallelLoopsToSequential());
   pm.addPass(::mlir::createLowerToCFGPass());
   // Map allocs, asserts, etc. to the tensorflow framework.
   pm.addPass(mlir::kernel_gen::tf_framework::CreateEmbedTFFrameworkPass());
",0,train
fa4b80602bbeb6f2d9b94f22b537643b9f482a20,tensorflow/tensorflow,"Shuffle passes in kernel generator to do partial bufferization.

With this change, we first bufferize only HLO operations, optimize those and
then lower and bufferize shape computations.

PiperOrigin-RevId: 343062899
Change-Id: Ibc70a1f623f3c71c06478e75ef869fde3eecc741",bufferize.cc,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""mlir/Dialect/StandardOps/IR/Ops.h""  // from @llvm-project
 #include ""mlir/IR/Attributes.h""  // from @llvm-project
 #include ""mlir/IR/BlockAndValueMapping.h""  // from @llvm-project
+#include ""mlir/IR/StandardTypes.h""  // from @llvm-project
 #include ""mlir/Transforms/DialectConversion.h""  // from @llvm-project
 #include ""tensorflow/compiler/mlir/tools/kernel_gen/transforms/rewriters.h""
 
",0,train
fa4b80602bbeb6f2d9b94f22b537643b9f482a20,tensorflow/tensorflow,"Shuffle passes in kernel generator to do partial bufferization.

With this change, we first bufferize only HLO operations, optimize those and
then lower and bufferize shape computations.

PiperOrigin-RevId: 343062899
Change-Id: Ibc70a1f623f3c71c06478e75ef869fde3eecc741",bufferize_pass.cc,"@@ -18,6 +18,8 @@ limitations under the License.
 
 #include <memory>
 
+#include ""llvm/ADT/STLExtras.h""
+#include ""mlir/Dialect/Affine/IR/AffineOps.h""  // from @llvm-project
 #include ""mlir/Dialect/SCF/SCF.h""  // from @llvm-project
 #include ""mlir/Dialect/SCF/Transforms.h""  // from @llvm-project
 #include ""mlir/Dialect/Shape/IR/Shape.h""  // from @llvm-project
@@ -49,6 +51,37 @@ namespace {
 #define GEN_PASS_CLASSES
 #include ""tensorflow/compiler/mlir/tools/kernel_gen/transforms/kernel_gen_passes.h.inc""
 
+struct HloBufferizePass : public HloBufferizePassBase<HloBufferizePass> {
+  // TODO(b/173201243): Move to tablegen.
+  void getDependentDialects(DialectRegistry& registry) const override {
+    registry.insert<lmhlo::LmhloDialect>();
+  }
+
+ public:
+  void runOnOperation() override {
+    OwningRewritePatternList patterns;
+    auto& context = getContext();
+    ConversionTarget target(context);
+    target.addLegalDialect<lmhlo::LmhloDialect>();
+    target.addLegalDialect<StandardOpsDialect>();
+    target.addIllegalDialect<mhlo::MhloDialect>();
+
+    BufferizeTypeConverter converter;
+    // Configure bufferize pattern for functions and lhlo.
+    mhlo::populateHLOToLHLOConversionPattern(&context, &converter, &patterns);
+
+    // Configure legality and structural patterns.
+    populateBufferizeMaterializationLegality(target);
+    populateShapeStructuralTypeConversionsAndLegality(&context, converter,
+                                                      patterns, target);
+    scf::populateSCFStructuralTypeConversionsAndLegality(&context, converter,
+                                                         patterns, target);
+    if (failed(applyPartialConversion(getOperation(), target,
+                                      std::move(patterns))))
+      signalPassFailure();
+  }
+};
+
 // TODO(herhut) : This could become a real pattern in bufferize pass. What we
 // would need to do is insert a copy to model the semantics correctly. The same
 // is true for the TensorLoad pattern that is already in there.  Then buffer
@@ -71,28 +104,36 @@ class UnrankedTensorStoreTestOnlyPattern
   }
 };
 
-struct BufferizePass : public BufferizePassBase<BufferizePass> {
+struct FinalBufferizePass : public FinalBufferizePassBase<FinalBufferizePass> {
+  // TODO(b/173201243): Move to tablegen.
   void getDependentDialects(DialectRegistry& registry) const override {
-    registry.insert<lmhlo::LmhloDialect>();
+    registry.insert<AffineDialect, scf::SCFDialect, shape::ShapeDialect,
+                    tf_framework::TFFrameworkDialect>();
   }
 
  public:
   void runOnOperation() override {
     auto& context = getContext();
     ConversionTarget target(context);
-    target.addLegalDialect<lmhlo::LmhloDialect, scf::SCFDialect,
-                           StandardOpsDialect,
-                           tf_framework::TFFrameworkDialect>();
-    target.addLegalOp<ModuleOp, ModuleTerminatorOp, FuncOp>();
+    target.addLegalDialect<scf::SCFDialect, StandardOpsDialect,
+                           tf_framework::TFFrameworkDialect, AffineDialect,
+                           shape::ShapeDialect>();
+    target.addLegalOp<ModuleOp, ModuleTerminatorOp>();
     target.addIllegalDialect<mhlo::MhloDialect>();
     target.addIllegalOp<DynamicTensorFromElementsOp, ExtractElementOp,
                         TensorFromElementsOp, TensorCastOp, TensorLoadOp,
                         TensorToMemrefOp>();
+    // Certain operations are no longer legal on tensors but otherwise are.
+    target.addDynamicallyLegalOp<ConstantOp, SelectOp>([&](Operation* op) {
+      return llvm::none_of(op->getResultTypes(),
+                           [](Type t) { return t.isa<TensorType>(); });
+    });
     target.addDynamicallyLegalOp<TensorStoreOp>([&](TensorStoreOp op) {
       return !op.tensor().getType().isa<UnrankedTensorType>();
     });
 
     BufferizeTypeConverter converter;
+    // TODO(herhut): Move this legality configuration to bufferize itself?
     auto typesAreLegal = [&converter](Operation* op) {
       return converter.isLegal(op->getOperandTypes()) &&
              converter.isLegal(op->getResultTypes());
@@ -111,6 +152,8 @@ struct BufferizePass : public BufferizePassBase<BufferizePass> {
     populateFuncOpTypeConversionPattern(patterns, &context, converter);
     populateCallOpTypeConversionPattern(patterns, &context, converter);
     populateStdBufferizePatterns(&context, converter, patterns);
+    populateEliminateBufferizeMaterializationsPatterns(&context, converter,
+                                                       patterns);
     populateExtraStdBufferizePattern(&context, &converter, &patterns);
     populateShapeStructuralTypeConversionsAndLegality(&context, converter,
                                                       patterns, target);
@@ -127,8 +170,12 @@ struct BufferizePass : public BufferizePassBase<BufferizePass> {
 
 }  // namespace
 
-std::unique_ptr<OperationPass<ModuleOp> > CreateBufferizePass() {
-  return std::make_unique<BufferizePass>();
+std::unique_ptr<OperationPass<ModuleOp> > CreateHloBufferizePass() {
+  return std::make_unique<HloBufferizePass>();
+}
+
+std::unique_ptr<OperationPass<ModuleOp> > CreateFinalBufferizePass() {
+  return std::make_unique<FinalBufferizePass>();
 }
 
 }  // namespace transforms
",0,train
fa4b80602bbeb6f2d9b94f22b537643b9f482a20,tensorflow/tensorflow,"Shuffle passes in kernel generator to do partial bufferization.

With this change, we first bufferize only HLO operations, optimize those and
then lower and bufferize shape computations.

PiperOrigin-RevId: 343062899
Change-Id: Ibc70a1f623f3c71c06478e75ef869fde3eecc741",passes.h,"@@ -47,9 +47,13 @@ std::unique_ptr<OperationPass<ModuleOp> > CreateTFKernelToLLVMPass();
 // using memref descriptors.
 std::unique_ptr<OperationPass<ModuleOp> > CreateShapeToDescriptorsPass();
 
-// Pass to tranform computations on values to their corresponding parts on
-// buffers.
-std::unique_ptr<OperationPass<ModuleOp> > CreateBufferizePass();
+// Pass to tranform hlo-level computations on values to their corresponding
+// parts on buffers.
+std::unique_ptr<OperationPass<ModuleOp>> CreateHloBufferizePass();
+
+// Pass to tranform late-dialect level computations (essentially all non-hlo
+// dialects) on values to their corresponding parts on buffers.
+std::unique_ptr<OperationPass<ModuleOp>> CreateFinalBufferizePass();
 
 // Pass to materialize broadcasts.
 std::unique_ptr<FunctionPass> CreateMaterializeBroadcastsPass();
",0,train
94e7e37a60e56464cedee82125691f2ba7b9be22,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-10-21

PiperOrigin-RevId: 275799990
Change-Id: I11e805303876cab4e0a47e4427a4fdcfb74706f2",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 10, 20)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 10, 21)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
519189837b77181137505bf83054ddd962600f9b,tensorflow/tensorflow,"Making the tf.name_scope blocks related to the factor and weight vars configurable. By default they will not be scoped.

PiperOrigin-RevId: 198759754",factorization_ops.py,"@@ -197,7 +197,8 @@ class WALSModel(object):
                row_weights=1,
                col_weights=1,
                use_factors_weights_cache=True,
-               use_gramian_cache=True):
+               use_gramian_cache=True,
+               use_scoped_vars=False):
     """"""Creates model for WALS matrix factorization.
 
     Args:
@@ -239,6 +240,8 @@ class WALSModel(object):
         weights cache to take effect.
       use_gramian_cache: When True, the Gramians will be cached on the workers
         before the updates start. Defaults to True.
+      use_scoped_vars: When True, the factor and weight vars will also be nested
+        in a tf.name_scope.
     """"""
     self._input_rows = input_rows
     self._input_cols = input_cols
@@ -251,18 +254,36 @@ class WALSModel(object):
         regularization * linalg_ops.eye(self._n_components)
         if regularization is not None else None)
     assert (row_weights is None) == (col_weights is None)
-    self._row_weights = WALSModel._create_weights(
-        row_weights, self._input_rows, self._num_row_shards, ""row_weights"")
-    self._col_weights = WALSModel._create_weights(
-        col_weights, self._input_cols, self._num_col_shards, ""col_weights"")
     self._use_factors_weights_cache = use_factors_weights_cache
     self._use_gramian_cache = use_gramian_cache
-    self._row_factors = self._create_factors(
-        self._input_rows, self._n_components, self._num_row_shards, row_init,
-        ""row_factors"")
-    self._col_factors = self._create_factors(
-        self._input_cols, self._n_components, self._num_col_shards, col_init,
-        ""col_factors"")
+
+    if use_scoped_vars:
+      with ops.name_scope(""row_weights""):
+        self._row_weights = WALSModel._create_weights(
+            row_weights, self._input_rows, self._num_row_shards, ""row_weights"")
+      with ops.name_scope(""col_weights""):
+        self._col_weights = WALSModel._create_weights(
+            col_weights, self._input_cols, self._num_col_shards, ""col_weights"")
+      with ops.name_scope(""row_factors""):
+        self._row_factors = self._create_factors(
+            self._input_rows, self._n_components, self._num_row_shards,
+            row_init, ""row_factors"")
+      with ops.name_scope(""col_factors""):
+        self._col_factors = self._create_factors(
+            self._input_cols, self._n_components, self._num_col_shards,
+            col_init, ""col_factors"")
+    else:
+      self._row_weights = WALSModel._create_weights(
+          row_weights, self._input_rows, self._num_row_shards, ""row_weights"")
+      self._col_weights = WALSModel._create_weights(
+          col_weights, self._input_cols, self._num_col_shards, ""col_weights"")
+      self._row_factors = self._create_factors(
+          self._input_rows, self._n_components, self._num_row_shards, row_init,
+          ""row_factors"")
+      self._col_factors = self._create_factors(
+          self._input_cols, self._n_components, self._num_col_shards, col_init,
+          ""col_factors"")
+
     self._row_gramian = self._create_gramian(self._n_components, ""row_gramian"")
     self._col_gramian = self._create_gramian(self._n_components, ""col_gramian"")
     with ops.name_scope(""row_prepare_gramian""):
@@ -313,37 +334,36 @@ class WALSModel(object):
   @classmethod
   def _create_factors(cls, rows, cols, num_shards, init, name):
     """"""Helper function to create row and column factors.""""""
-    with ops.name_scope(name):
-      if callable(init):
-        init = init()
-      if isinstance(init, list):
-        assert len(init) == num_shards
-      elif isinstance(init, str) and init == ""random"":
-        pass
-      elif num_shards == 1:
-        init = [init]
-      sharded_matrix = []
-      sizes = cls._shard_sizes(rows, num_shards)
-      assert len(sizes) == num_shards
-
-      def make_initializer(i, size):
-
-        def initializer():
-          if init == ""random"":
-            return random_ops.random_normal([size, cols])
-          else:
-            return init[i]
+    if callable(init):
+      init = init()
+    if isinstance(init, list):
+      assert len(init) == num_shards
+    elif isinstance(init, str) and init == ""random"":
+      pass
+    elif num_shards == 1:
+      init = [init]
+    sharded_matrix = []
+    sizes = cls._shard_sizes(rows, num_shards)
+    assert len(sizes) == num_shards
+
+    def make_initializer(i, size):
 
-        return initializer
+      def initializer():
+        if init == ""random"":
+          return random_ops.random_normal([size, cols])
+        else:
+          return init[i]
 
-      for i, size in enumerate(sizes):
-        var_name = ""%s_shard_%d"" % (name, i)
-        var_init = make_initializer(i, size)
-        sharded_matrix.append(
-            variable_scope.variable(
-                var_init, dtype=dtypes.float32, name=var_name))
+      return initializer
 
-      return sharded_matrix
+    for i, size in enumerate(sizes):
+      var_name = ""%s_shard_%d"" % (name, i)
+      var_init = make_initializer(i, size)
+      sharded_matrix.append(
+          variable_scope.variable(
+              var_init, dtype=dtypes.float32, name=var_name))
+
+    return sharded_matrix
 
   @classmethod
   def _create_weights(cls, wt_init, num_wts, num_shards, name):
@@ -384,26 +404,25 @@ class WALSModel(object):
     sizes = cls._shard_sizes(num_wts, num_shards)
     assert len(sizes) == num_shards
 
-    with ops.name_scope(name):
-      def make_wt_initializer(i, size):
+    def make_wt_initializer(i, size):
 
-        def initializer():
-          if init_mode == ""scalar"":
-            return wt_init * array_ops.ones([size])
-          else:
-            return wt_init[i]
+      def initializer():
+        if init_mode == ""scalar"":
+          return wt_init * array_ops.ones([size])
+        else:
+          return wt_init[i]
 
-        return initializer
+      return initializer
 
-      sharded_weight = []
-      for i, size in enumerate(sizes):
-        var_name = ""%s_shard_%d"" % (name, i)
-        var_init = make_wt_initializer(i, size)
-        sharded_weight.append(
-            variable_scope.variable(
-                var_init, dtype=dtypes.float32, name=var_name))
+    sharded_weight = []
+    for i, size in enumerate(sizes):
+      var_name = ""%s_shard_%d"" % (name, i)
+      var_init = make_wt_initializer(i, size)
+      sharded_weight.append(
+          variable_scope.variable(
+              var_init, dtype=dtypes.float32, name=var_name))
 
-      return sharded_weight
+    return sharded_weight
 
   @staticmethod
   def _create_gramian(n_components, name):
",0,train
afab9ac5103929ad4d3d523021308ca650457ba5,tensorflow/tensorflow,Changed example and output,image_ops_impl.py,"@@ -3252,24 +3252,19 @@ def rgb_to_yuv(images):
   Outputs a tensor of the same shape as the `images` tensor, containing the YUV
   value of the pixels.
   The output is only well defined if the value in images are in [0,1].
-  You need to scale your RGB images if their pixel values are not in the
-  required range. Below given example illustrates preprocessing of each channel
-  of images before feeding them to `rgb_to_yuv`.
 
   Usage Example:
 
-  >>> rgb_images = tf.random.uniform(shape=[100,64,64,3], maxval=255)
-  >>> preprocessed_rgb_images = tf.truediv(
-  ...   tf.subtract(
-  ...     rgb_images,
-  ...     tf.reduce_min(rgb_images)
-  ...   ),
-  ...   tf.subtract(
-  ...     tf.reduce_max(rgb_images),
-  ...     tf.reduce_min(rgb_images)
-  ...   )
-  ... )
-  >>> yub_tensor_images = tf.image.rgb_to_yuv(preprocessed_rgb_images)
+  >>> x = [[[0.1, 0.2, 0.3],
+  ...       [0.4, 0.5, 0.6]],
+  ...     [[0.7, 0.8, 0.9],
+  ...       [0.10, 0.11, 0.12]]]
+  >>> tf.image.rgb_to_yuv(x)
+  <tf.Tensor: shape=(2, 2, 3), dtype=float32, numpy=
+  array([[[ 0.1815    ,  0.05831515, -0.07149857],
+          [ 0.4815    ,  0.05831517, -0.07149856]],
+         [[ 0.7815    ,  0.05831515, -0.07149857],
+          [ 0.10815   ,  0.00583152, -0.00714985]]], dtype=float32)>
 
   Args:
     images: 2-D or higher rank. Image data to convert. Last dimension must be
",0,train
733bff53926717bb9583d4833ba062c58f27960f,tensorflow/tensorflow,"Add a tf.contrib.util.create_example utility for building Example protos.

PiperOrigin-RevId: 155868794",__init__.py,"@@ -18,6 +18,7 @@
 See @{$python/contrib.util} guide.
 
 @@constant_value
+@@create_example
 @@make_tensor_proto
 @@make_ndarray
 @@ops_used_by_graph_def
@@ -30,11 +31,11 @@ from __future__ import division
 from __future__ import print_function
 
 # pylint: disable=unused-import
+from tensorflow.contrib.util.create_example import create_example
 from tensorflow.python.framework.meta_graph import ops_used_by_graph_def
 from tensorflow.python.framework.meta_graph import stripped_op_list_for_graph
 from tensorflow.python.framework.tensor_util import constant_value
 from tensorflow.python.framework.tensor_util import make_tensor_proto
 from tensorflow.python.framework.tensor_util import MakeNdarray as make_ndarray
-# pylint: disable=unused_import
 from tensorflow.python.util.all_util import remove_undocumented
 remove_undocumented(__name__)
",0,train
733bff53926717bb9583d4833ba062c58f27960f,tensorflow/tensorflow,"Add a tf.contrib.util.create_example utility for building Example protos.

PiperOrigin-RevId: 155868794",create_example.py,"@@ -0,0 +1,61 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""Utilities for constructing Example protos.
+
+Takes ndarrays, lists, or tuples for each feature.
+
+@@create_example
+""""""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.core.example import example_pb2
+
+
+def create_example(**features):
+  """"""Constructs a `tf.train.Example` from the given features.
+
+  Args:
+    **features: Maps feature name to an integer, float, or string ndarray, or
+        another object convertible to an ndarray (list, tuple, etc).
+
+  Returns:
+    A `tf.train.Example` with the features.
+
+  Raises:
+    ValueError: if a feature is not integer, float, or string.
+  """"""
+  example = example_pb2.Example()
+  for name in features:
+    feature = example.features.feature[name]
+    values = np.asarray(features[name])
+    # Encode unicode using UTF-8.
+    if values.dtype.kind == 'U':
+      values = np.vectorize(lambda string: string.encode('utf-8'))(values)
+
+    if values.dtype.kind == 'i':
+      feature.int64_list.value.extend(values.astype(np.int64).ravel())
+    elif values.dtype.kind == 'f':
+      feature.float_list.value.extend(values.astype(np.float32).ravel())
+    elif values.dtype.kind == 'S':
+      feature.bytes_list.value.extend(values.ravel())
+    else:
+      raise ValueError('Feature ""%s"" has unexpected dtype: %s' % (name,
+                                                                  values.dtype))
+  return example
",0,train
733bff53926717bb9583d4833ba062c58f27960f,tensorflow/tensorflow,"Add a tf.contrib.util.create_example utility for building Example protos.

PiperOrigin-RevId: 155868794",create_example_test.py,"@@ -0,0 +1,86 @@
+# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""Tests for the Example creation utilities.""""""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+from tensorflow.contrib import util
+from tensorflow.core.example import example_pb2
+from tensorflow.python.platform import googletest
+
+
+class CreateExampleTest(googletest.TestCase):
+
+  def testCreateExample_empty(self):
+    self.assertEqual(util.create_example(), example_pb2.Example())
+
+    # np.asarray([]) == np.array([], dtype=np.float64), but the dtype should not
+    # matter here.
+    actual = util.create_example(foo=[], bar=())
+    expected = example_pb2.Example()
+    expected.features.feature['foo'].float_list.value.extend([])
+    expected.features.feature['bar'].float_list.value.extend([])
+    self.assertEqual(actual, expected)
+
+  def testCreateExample_scalars(self):
+    actual = util.create_example(foo=3, bar=4.2, baz='x', qux=b'y')
+    expected = example_pb2.Example()
+    expected.features.feature['foo'].int64_list.value.append(3)
+    # 4.2 cannot be represented exactly in floating point.
+    expected.features.feature['bar'].float_list.value.append(np.float32(4.2))
+    expected.features.feature['baz'].bytes_list.value.append(b'x')
+    expected.features.feature['qux'].bytes_list.value.append(b'y')
+    self.assertEqual(actual, expected)
+
+  def testCreateExample_listContainingString(self):
+    actual = util.create_example(foo=[3, 4.2, 'foo'])
+    # np.asarray([3, 4.2, 'foo']) == np.array(['3', '4.2', 'foo'])
+    expected = example_pb2.Example()
+    expected.features.feature['foo'].bytes_list.value.extend(
+        [b'3', b'4.2', b'foo'])
+    self.assertEqual(actual, expected)
+
+  def testCreateExample_lists_tuples_ranges(self):
+    actual = util.create_example(
+        foo=[1, 2, 3, 4, 5], bar=(0.5, 0.25, 0.125), baz=range(3))
+    expected = example_pb2.Example()
+    expected.features.feature['foo'].int64_list.value.extend([1, 2, 3, 4, 5])
+    expected.features.feature['bar'].float_list.value.extend([0.5, 0.25, 0.125])
+    expected.features.feature['baz'].int64_list.value.extend([0, 1, 2])
+    self.assertEqual(actual, expected)
+
+  def testCreateExample_ndarrays(self):
+    a = np.random.random((3, 4, 5)).astype(np.float32)
+    b = np.random.randint(low=1, high=10, size=(6, 5, 4))
+    actual = util.create_example(A=a, B=b)
+    expected = example_pb2.Example()
+    expected.features.feature['A'].float_list.value.extend(a.ravel())
+    expected.features.feature['B'].int64_list.value.extend(b.ravel())
+    self.assertEqual(actual, expected)
+
+  def testCreateExample_unicode(self):
+    actual = util.create_example(A=[u'\u4242', u'\u5555'])
+    expected = example_pb2.Example()
+    expected.features.feature['A'].bytes_list.value.extend(
+        [u'\u4242'.encode('utf-8'), u'\u5555'.encode('utf-8')])
+    self.assertEqual(actual, expected)
+
+
+if __name__ == '__main__':
+  googletest.main()
",0,train
5ae2d41e7a1daf4b00b24dda683fabf7c283df7c,tensorflow/tensorflow,"Checkpointable: Fix device placement when restoring name-based checkpoints.

Just need to put the restore ops on a CPU.

PiperOrigin-RevId: 188248198",checkpointable_utils.py,"@@ -493,8 +493,9 @@ class NameBasedSaverStatus(_LoadStatus):
     """"""Load the name-based training checkpoint using a new `tf.train.Saver`.""""""
     if session is None and not context.executing_eagerly():
       session = ops.get_default_session()
-    saver_lib.Saver(self._object_saver._global_variable_names()).restore(  # pylint: disable=protected-access
-        sess=session, save_path=self._save_path)
+    with ops.device(""/cpu:0""):
+      saver_lib.Saver(self._object_saver._global_variable_names()).restore(  # pylint: disable=protected-access
+          sess=session, save_path=self._save_path)
 
   def initialize_or_restore(self, session=None):
     """"""Alias for `run_restore_ops`.""""""
",0,train
5ae2d41e7a1daf4b00b24dda683fabf7c283df7c,tensorflow/tensorflow,"Checkpointable: Fix device placement when restoring name-based checkpoints.

Just need to put the restore ops on a CPU.

PiperOrigin-RevId: 188248198",checkpointable_utils_test.py,"@@ -993,20 +993,21 @@ class CheckpointCompatibilityTests(test.TestCase):
   @test_util.run_in_graph_and_eager_modes()
   def testLoadFromNameBasedSaver(self):
     """"""Save a name-based checkpoint, load it using the object-based API.""""""
-    save_path = self._write_name_based_checkpoint()
-    root = self._initialized_model()
-    self._set_sentinels(root)
-    with self.assertRaises(AssertionError):
+    with test_util.device(use_gpu=True):
+      save_path = self._write_name_based_checkpoint()
+      root = self._initialized_model()
+      self._set_sentinels(root)
+      with self.assertRaises(AssertionError):
+        self._check_sentinels(root)
+      object_saver = checkpointable_utils.CheckpointableSaver(root)
+      status = object_saver.restore(save_path)
+      with self.assertRaises(AssertionError):
+        status.assert_consumed()
+      status.run_restore_ops()
+      self._check_sentinels(root)
+      self._set_sentinels(root)
+      status.initialize_or_restore()
       self._check_sentinels(root)
-    object_saver = checkpointable_utils.CheckpointableSaver(root)
-    status = object_saver.restore(save_path)
-    with self.assertRaises(AssertionError):
-      status.assert_consumed()
-    status.run_restore_ops()
-    self._check_sentinels(root)
-    self._set_sentinels(root)
-    status.initialize_or_restore()
-    self._check_sentinels(root)
 
   # TODO(allenl): Test for the core name-based saver loading object-based
   # checkpoints once object-based checkpointing is in core.
",0,train
091e8500c02ac69c3f1eced6f923598ebfcc354c,tensorflow/tensorflow,"Mention workarounds for load_weights not loading name-based checkpoints in 1.x

PiperOrigin-RevId: 247697625",util.py,"@@ -616,8 +616,10 @@ def streaming_restore(status, session=None):
     session = keras_backend.get_session()
   if isinstance(status, NameBasedSaverStatus):
     raise NotImplementedError(
-        ""Streaming restore not supported from name-based checkpoints. File a ""
-        ""feature request if this limitation bothers you."")
+        ""Streaming restore not supported from name-based checkpoints when ""
+        ""graph building. File a feature request if this limitation bothers ""
+        ""you. As a workaround, consider either using tf.train.Checkpoint to ""
+        ""load name-based checkpoints or enabling eager execution."")
   status.run_restore_ops(session=session)
   # pylint: disable=protected-access
   status._checkpoint.new_restore_ops_callback = (
",0,test
d1cde76080ee52e3b2fb99966d44b4af515a9846,tensorflow/tensorflow,"[OpenCL] Fixes SYCL profiler tests (#141)

The profiler relies heavily on the canonical device being listed in the
TFProf nodes, which is only set for those devices which return True from
CountAsCPUTime, so we need this to return True for SYCL device nodes
too. The check for whether the node will run on an Accelerator comes
from IsPlacedOnAccelerator.",tfprof_node.cc,"@@ -25,7 +25,7 @@ bool CountAsAcceleratorTime(const string& device) {
 }
 
 bool CountAsCPUTime(const string& device) {
-  return RE2::FullMatch(device, "".*/(gpu|cpu):\\d+"");
+  return RE2::FullMatch(device, "".*/(gpu|cpu|device:sycl):\\d+"");
 }
 
 bool IsCanonicalDevice(const string& device) { return CountAsCPUTime(device); }
@@ -133,7 +133,7 @@ void TFGraphNode::AddStepStat(int64 step, const string& device,
   // See run_metadata_test.py
   // It can be /job:0/replica:0/xxxx/gpu:0, or simply /gpu:0.
   // It can has some ad-hoc suffix, such as /stream:xx or /memcpy:xx.
-  if (IsCanonicalDevice(device)) {
+  if (IsCanonicalDevice(dev)) {
     if (!canonical_device_.empty()) {
       if (canonical_device_ != dev) {
         fprintf(stderr, ""Unexpected: graph node changed device: %s->%s.\n"",
@@ -143,7 +143,11 @@ void TFGraphNode::AddStepStat(int64 step, const string& device,
     } else {
       canonical_device_ = dev;
       // TODO(xpan): Support things other than gpu?
-      host_device_ = StringReplace(dev, ""gpu:\\d+"", ""cpu:0"");
+      if (dev.find(""sycl"") != dev.npos) {
+        host_device_ = StringReplace(dev, ""device:sycl:\\d+"", ""cpu:0"");
+      } else {
+        host_device_ = StringReplace(dev, ""gpu:\\d+"", ""cpu:0"");
+      }
       AddOpType(canonical_device_);
     }
   }
@@ -217,7 +221,8 @@ TensorShapeProto VecToShapeProto(const std::vector<int64> shape_vec) {
 }
 
 bool IsPlacedOnAccelerator(const string& device) {
-  return device.find(""gpu"") != device.npos;
+  return device.find(""gpu"") != device.npos ||
+         device.find(""sycl"") != device.npos;
 }
 }  // namespace tfprof
 }  // namespace tensorflow
",0,train
74f306e3cdf653338ed40a08c38b50aed8ed810b,tensorflow/tensorflow,"Add more details in the tfl.pack error string.

PiperOrigin-RevId: 283113195
Change-Id: I7a69145252792b742b9d1d66152aa3d6eff713e8",tfl_ops.cc,"@@ -720,7 +720,8 @@ static LogicalResult Verify(PackOp op) {
   for (Value *operand : op.getOperands()) {
     auto other_type = operand->getType().cast<ShapedType>();
     if (input_type != other_type)
-      return op.emitOpError(""operands should be of the same type"");
+      return op.emitOpError(""operands should be of the same type. got "")
+             << input_type << "", "" << other_type;
   }
 
   return success();
",0,train
759125bceae56152b1060b0aa416d7dc6dad1fb2,tensorflow/tensorflow,Added utility methods to insert allocs and deallocs.,buffer_assignment.h,"@@ -94,6 +94,24 @@ struct BufferAssignmentPositions {
   /// inserted.
   Operation* getDeallocPosition() const { return deallocPosition; }
 
+  /// Inserts a new dialect-specific alloc operation that will be constructed in
+  /// the right place using the arguments provided.
+  template <typename AllocOpT, typename... Args>
+  AllocOpT insertAlloc(Value value, Args... args) const {
+    OpBuilder allocBuilder(value.getDefiningOp());
+    allocBuilder.setInsertionPoint(allocPosition);
+    return allocBuilder.create<AllocOpT>(args...);
+  }
+
+  /// Inserts a new dialect-specific dealloc operation that will be constructed
+  /// in the right place using the arguments provided.
+  template <typename DeallocOpT, typename... Args>
+  DeallocOpT insertDealloc(Value value, Args... args) const {
+    OpBuilder deallocBuilder(value.getDefiningOp());
+    deallocBuilder.setInsertionPointAfter(deallocPosition);
+    return deallocBuilder.create<DeallocOpT>(args...);
+  }
+
  private:
   Operation* allocPosition;
   Operation* deallocPosition;
",0,train
5d2c4009987a6b33a683d6cbf1ade560e1f5b59b,tensorflow/tensorflow,"[tf.data] Use a more efficient source in MapBenchmark.

Currently, we use `Dataset.from_tensors(0).repeat(None)` as the source of dummy
data in MapBenchmark. Consuming this dataset involves repeatedly creating and
destroying a TensorDataset iterator, and the cost of doing this dominates the
MapDataset execution time (for small chains). Switching to a
`Dataset.range(num_elements)` has much lower overhead per element.

From running the benchmark on my workstation (with increased num_elements), the
execution time of ""MapBenchmark.chain_length_1_single_threaded"" reduces by more
than 50%:

Before:

entry {
  name: ""MapBenchmark.chain_length_1_single_threaded""
  iters: 5
  wall_time: 1.71906495094e-06
  extras {
    key: ""num_elements""
    value {
      double_value: 1000000.0
    }
  }
}

After:

entry {
  name: ""MapBenchmark.chain_length_1_single_threaded""
  iters: 5
  wall_time: 8.35798978806e-07
  extras {
    key: ""num_elements""
    value {
      double_value: 1000000.0
    }
  }
}

PiperOrigin-RevId: 282434351
Change-Id: I7f726be65af35c5401c8c9a54c0b84bf27b9fa0f",map_benchmark.py,"@@ -28,7 +28,7 @@ class MapBenchmark(benchmark_base.DatasetBenchmarkBase):
   def benchmark_chain_of_maps(self):
 
     def benchmark_helper(chain_length, map_fn, use_inter_op_parallelism, label):
-      dataset = dataset_ops.Dataset.from_tensors(0).repeat(None)
+      dataset = dataset_ops.Dataset.range(10000)
       for _ in range(chain_length):
         dataset = dataset_ops.MapDataset(
             dataset, map_fn, use_inter_op_parallelism=use_inter_op_parallelism)
",0,test
cea9f19ebf1ff74177d91c9d18926af0f3e2ce13,tensorflow/tensorflow,"Sink standard dialect constants in sink_constants_to_control_flow pass

This is required before exporting HLO dialect ops with standard dialect constant to XLA.

Also, sink constants for sort op as well. Added a TODO to generalize this pass to handle more ops and non-const values defined outside.

PiperOrigin-RevId: 324301911
Change-Id: I2a67a2cc5d1f58dc5fad11a319a2f4ca63a8f434",sink_constants_to_control_flow.cc,"@@ -21,6 +21,7 @@ limitations under the License.
 #include ""mlir/Pass/PassManager.h""
 #include ""mlir/Support/LLVM.h""
 #include ""mlir/Transforms/RegionUtils.h""
+#include ""mlir/Dialect/StandardOps/IR/Ops.h""  // from @llvm-project
 
 namespace mlir {
 namespace mhlo {
@@ -29,6 +30,13 @@ namespace {
 
 // A pass that sinks constants implicitly captured in control flow regions. This
 // is necessary to export to XLA.
+// TODO(hinsu): Generalize this pass to handle all the ops with regions. Any
+// value used within the region that is defined outside of op's region should be
+// sank to the regions and not just the constants. Ops such as If and While
+// whose computations doesn't require fixed signature like Sort or Reduce have
+// an option to pass outside values as operands of the op to avoid recomputing
+// those within internally. Note that doing so is the only option in case of
+// BlockArguments.
 class SinkConstantsToControlFlowPass
     : public mlir::PassWrapper<SinkConstantsToControlFlowPass, FunctionPass> {
   void runOnFunction() override {
@@ -39,6 +47,8 @@ class SinkConstantsToControlFlowPass
       } else if (auto if_op = llvm::dyn_cast<IfOp>(op)) {
         SinkToRegion(&if_op.true_branch());
         SinkToRegion(&if_op.false_branch());
+      } else if (auto sort_op = llvm::dyn_cast<SortOp>(op)) {
+        SinkToRegion(&sort_op.comparator());
       }
     });
   }
@@ -46,26 +56,26 @@ class SinkConstantsToControlFlowPass
  private:
   // Performs constant sinking into a region.
   static void SinkToRegion(Region* region) {
-    llvm::DenseMap<Value, ConstOp> sunk_constant;
+    llvm::DenseMap<Value, Operation*> sunk_constant;
     visitUsedValuesDefinedAbove({*region}, [&](OpOperand* use) {
       Value constant = use->get();
-      auto const_op = dyn_cast_or_null<ConstOp>(constant.getDefiningOp());
-      if (!const_op) return;
+      auto op = constant.getDefiningOp();
+      if (!op || !isa<ConstOp, ConstantOp>(op)) return;
       auto map_entry = sunk_constant.try_emplace(constant, nullptr);
       if (!map_entry.second) {
         // This constant has already been cloned into the region, reuse it.
-        use->set(map_entry.first->getSecond().getResult());
-        if (constant.use_empty()) const_op.erase();
+        use->set(map_entry.first->getSecond()->getResult(0));
+        if (op->use_empty()) op->erase();
         return;
       }
       if (constant.hasOneUse()) {
-        const_op.getOperation()->moveBefore(&region->front().front());
+        op->moveBefore(&region->front().front());
         return;
       }
-      map_entry.first->getSecond() = const_op.clone();
+      map_entry.first->getSecond() = op->clone();
       region->front().getOperations().insert(region->front().begin(),
                                              map_entry.first->getSecond());
-      use->set(map_entry.first->getSecond().getResult());
+      use->set(map_entry.first->getSecond()->getResult(0));
     });
   }
 };
",0,train
bcb5a132684424ff678e9d64fc291f49ce7fcc4c,tensorflow/tensorflow,"Explicitly cast the types of a few variables in VLOG statements to avoid an issue where the compiler isn't sure of the type when building for arm64 computers.

PiperOrigin-RevId: 207151595",virtual_scheduler.cc,"@@ -859,9 +859,10 @@ Costs VirtualScheduler::Summary() const {
     const auto& memory_cost = op_cost_pair.second.memory_time.count();
     const bool is_op_cost_accurate = !op_cost_pair.second.inaccurate;
     if (cost) {  // Skip printing out zero-cost ops.
-      VLOG(1) << strings::Printf("" + %30s : %c %10ld / %10ld / %10ld"",
-                                 op.c_str(), (is_op_cost_accurate ? ' ' : '~'),
-                                 cost, compute_cost, memory_cost);
+      VLOG(1) << strings::Printf(
+          "" + %30s : %c %10lld / %10lld / %10lld"", op.c_str(),
+          (is_op_cost_accurate ? ' ' : '~'), static_cast<int64>(cost),
+          static_cast<int64>(compute_cost), static_cast<int64>(memory_cost));
     }
   }
 
@@ -936,10 +937,12 @@ Costs VirtualScheduler::Summary() const {
                                : 0.0;
       if (cost || mem_usage_percent > 1.0) {
         // Print out only non-zero cost ops or ops with > 1% memory usage.
-        VLOG(1) << strings::Printf("" + %30s : %c %10ld / %10ld / %10ld"",
+        VLOG(1) << strings::Printf("" + %30s : %c %10lld / %10lld / %10lld"",
                                    op.c_str(),
-                                   (is_op_cost_accurate ? ' ' : '~'), cost,
-                                   compute_cost, memory_cost)
+                                   (is_op_cost_accurate ? ' ' : '~'),
+                                   static_cast<int64>(cost),
+                                   static_cast<int64>(compute_cost),
+                                   static_cast<int64>(memory_cost))
                 << "" ("" << strings::HumanReadableNumBytes(op_mem_usage) << "" [""
                 << mem_usage_percent << ""%] ""
                 << (persisent_ops.count(op) > 0 ? "": persistent op)"" : "")"");
",0,train
b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_batch_matmul_op.cc,"@@ -174,122 +174,105 @@ class BatchMatMulMkl : public OpKernel {
       }
     }
 
-    MklCblasGemmBatch(CblasRowMajor, adj_x_, adj_y_, m_array, n_array, k_array,
-                      &a_array[0], lda_array, &b_array[0], ldb_array,
-                      &c_array[0], ldc_array, 1, group_size);
+    MklCblasGemmBatch<Scalar>(
+        CblasRowMajor, adj_x_, adj_y_, m_array, n_array, k_array,
+        reinterpret_cast<const void**>(&a_array[0]), lda_array,
+        reinterpret_cast<const void**>(&b_array[0]), ldb_array,
+        reinterpret_cast<void**>(&c_array[0]), ldc_array, 1, group_size);
   }
 
  private:
   bool adj_x_;
   bool adj_y_;
 
+  template <typename T,
+            typename std::enable_if<(std::is_same<T, float>::value ||
+                                     std::is_same<T, double>::value),
+                                    int>::type = 0>
   void MklCblasGemmBatch(
       const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB,
       const std::vector<MKL_INT>& M_Array, const std::vector<MKL_INT>& N_Array,
-      const std::vector<MKL_INT>& K_Array, const float** A_Array,
-      const std::vector<MKL_INT>& lda_Array, const float** B_Array,
-      const std::vector<MKL_INT>& ldb_Array, float** C_Array,
+      const std::vector<MKL_INT>& K_Array, const void** A_Array,
+      const std::vector<MKL_INT>& lda_Array, const void** B_Array,
+      const std::vector<MKL_INT>& ldb_Array, void** C_Array,
       const std::vector<MKL_INT>& ldc_Array, const MKL_INT group_count,
       const std::vector<MKL_INT>& group_size) {
     std::vector<CBLAS_TRANSPOSE> TransA_Array(
         group_size[0], TransA ? CblasTrans : CblasNoTrans);
     std::vector<CBLAS_TRANSPOSE> TransB_Array(
         group_size[0], TransB ? CblasTrans : CblasNoTrans);
-    std::vector<float> alpha_Array(group_size[0], 1.0);
-    std::vector<float> beta_Array(group_size[0], 0.0);
-    cblas_sgemm_batch(Layout, &TransA_Array[0], &TransB_Array[0], &M_Array[0],
-                      &N_Array[0], &K_Array[0], &alpha_Array[0], A_Array,
-                      &lda_Array[0], B_Array, &ldb_Array[0], &beta_Array[0],
-                      C_Array, &ldc_Array[0], group_count, &group_size[0]);
-  }
-
-#ifdef ENABLE_MKLDNN_V1_2
-  void MklCblasGemmBatch(
-      const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB,
-      const std::vector<MKL_INT>& M_Array, const std::vector<MKL_INT>& N_Array,
-      const std::vector<MKL_INT>& K_Array, const bfloat16** A_Array,
-      const std::vector<MKL_INT>& lda_Array, const bfloat16** B_Array,
-      const std::vector<MKL_INT>& ldb_Array, bfloat16** C_Array,
-      const std::vector<MKL_INT>& ldc_Array, const MKL_INT group_count,
-      const std::vector<MKL_INT>& group_size) {
-    std::vector<CBLAS_TRANSPOSE> TransA_Array(group_size[0], TransA);
-    std::vector<CBLAS_TRANSPOSE> TransB_Array(group_size[0], TransB);
-    std::vector<float> alpha_Array(group_size[0], 1.0);
-    std::vector<float> beta_Array(group_size[0], 0.0);
-    dnnl_gemm_batch<bfloat16>(Layout, TransA_Array, TransB_Array, M_Array,
-                              N_Array, K_Array, alpha_Array, A_Array, lda_Array,
-                              B_Array, ldb_Array, beta_Array, C_Array,
-                              ldc_Array, group_count, group_size);
-  }
-#endif  // ENABLE_MKLDNN_V1_2
-
-  void MklCblasGemmBatch(
-      const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB,
-      const std::vector<MKL_INT>& M_Array, const std::vector<MKL_INT>& N_Array,
-      const std::vector<MKL_INT>& K_Array, const double** A_Array,
-      const std::vector<MKL_INT>& lda_Array, const double** B_Array,
-      const std::vector<MKL_INT>& ldb_Array, double** C_Array,
-      const std::vector<MKL_INT>& ldc_Array, const MKL_INT group_count,
-      const std::vector<MKL_INT>& group_size) {
-    std::vector<CBLAS_TRANSPOSE> TransA_array(
-        group_size[0], TransA ? CblasTrans : CblasNoTrans);
-    std::vector<CBLAS_TRANSPOSE> TransB_array(
-        group_size[0], TransB ? CblasTrans : CblasNoTrans);
-    std::vector<double> alpha_Array(group_size[0], 1.0);
-    std::vector<double> beta_Array(group_size[0], 0.0);
-    cblas_dgemm_batch(Layout, &TransA_array[0], &TransB_array[0], &M_Array[0],
-                      &N_Array[0], &K_Array[0], &alpha_Array[0], A_Array,
-                      &lda_Array[0], B_Array, &ldb_Array[0], &beta_Array[0],
-                      C_Array, &ldc_Array[0], group_count, &group_size[0]);
+    if (std::is_same<T, float>::value) {
+      std::vector<float> alpha_Array(group_size[0], 1.0);
+      std::vector<float> beta_Array(group_size[0], 0.0);
+      cblas_sgemm_batch(Layout, &TransA_Array[0], &TransB_Array[0], &M_Array[0],
+                        &N_Array[0], &K_Array[0], &alpha_Array[0],
+                        reinterpret_cast<const float**>(A_Array), &lda_Array[0],
+                        reinterpret_cast<const float**>(B_Array), &ldb_Array[0],
+                        &beta_Array[0], reinterpret_cast<float**>(C_Array),
+                        &ldc_Array[0], group_count, &group_size[0]);
+    } else {
+      std::vector<double> alpha_Array(group_size[0], 1.0);
+      std::vector<double> beta_Array(group_size[0], 0.0);
+      cblas_dgemm_batch(
+          Layout, &TransA_Array[0], &TransB_Array[0], &M_Array[0], &N_Array[0],
+          &K_Array[0], &alpha_Array[0],
+          reinterpret_cast<const double**>(A_Array), &lda_Array[0],
+          reinterpret_cast<const double**>(B_Array), &ldb_Array[0],
+          &beta_Array[0], reinterpret_cast<double**>(C_Array), &ldc_Array[0],
+          group_count, &group_size[0]);
+    }
   }
 
+  template <typename T,
+            typename std::enable_if<(std::is_same<T, complex64>::value ||
+                                     std::is_same<T, complex128>::value),
+                                    int>::type = 0>
   void MklCblasGemmBatch(
       const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB,
       const std::vector<MKL_INT>& M_Array, const std::vector<MKL_INT>& N_Array,
-      const std::vector<MKL_INT>& K_Array, const complex64** A_Array,
-      const std::vector<MKL_INT>& lda_Array, const complex64** B_Array,
-      const std::vector<MKL_INT>& ldb_Array, complex64** C_Array,
+      const std::vector<MKL_INT>& K_Array, const void** A_Array,
+      const std::vector<MKL_INT>& lda_Array, const void** B_Array,
+      const std::vector<MKL_INT>& ldb_Array, void** C_Array,
       const std::vector<MKL_INT>& ldc_Array, const MKL_INT group_count,
       const std::vector<MKL_INT>& group_size) {
     std::vector<CBLAS_TRANSPOSE> TransA_array(
         group_size[0], TransA ? CblasConjTrans : CblasNoTrans);
     std::vector<CBLAS_TRANSPOSE> TransB_array(
         group_size[0], TransB ? CblasConjTrans : CblasNoTrans);
-    std::vector<complex64> alpha_Array(group_size[0], {1.0f, 0.0f});
-    std::vector<complex64> beta_Array(group_size[0], {0.0f, 0.0f});
-    cblas_cgemm_batch(Layout, &TransA_array[0], &TransB_array[0], &M_Array[0],
-                      &N_Array[0], &K_Array[0],
-                      static_cast<const void*>(&alpha_Array[0]),
-                      reinterpret_cast<const void**>(A_Array), &lda_Array[0],
-                      reinterpret_cast<const void**>(B_Array), &ldb_Array[0],
-                      static_cast<const void*>(&beta_Array[0]),
-                      reinterpret_cast<void**>(C_Array), &ldc_Array[0],
-                      group_count, &group_size[0]);
+    std::vector<T> alpha_Array(group_size[0], {1.0f, 0.0f});
+    std::vector<T> beta_Array(group_size[0], {0.0f, 0.0f});
+    auto gemm_fn = (std::is_same<T, complex64>::value) ? cblas_cgemm_batch
+                                                       : cblas_zgemm_batch;
+    gemm_fn(Layout, &TransA_array[0], &TransB_array[0], &M_Array[0],
+            &N_Array[0], &K_Array[0], static_cast<const void*>(&alpha_Array[0]),
+            reinterpret_cast<const void**>(A_Array), &lda_Array[0],
+            reinterpret_cast<const void**>(B_Array), &ldb_Array[0],
+            static_cast<const void*>(&beta_Array[0]),
+            reinterpret_cast<void**>(C_Array), &ldc_Array[0], group_count,
+            &group_size[0]);
   }
 
-  void MklCblasGemmBatch(
+#ifdef ENABLE_MKLDNN_V1_2
+  void MklCblasGemmBatch<bfloat16>(
       const CBLAS_LAYOUT Layout, const bool TransA, const bool TransB,
       const std::vector<MKL_INT>& M_Array, const std::vector<MKL_INT>& N_Array,
-      const std::vector<MKL_INT>& K_Array, const complex128** A_Array,
-      const std::vector<MKL_INT>& lda_Array, const complex128** B_Array,
-      const std::vector<MKL_INT>& ldb_Array, complex128** C_Array,
+      const std::vector<MKL_INT>& K_Array, const void** A_Array,
+      const std::vector<MKL_INT>& lda_Array, const void** B_Array,
+      const std::vector<MKL_INT>& ldb_Array, void** C_Array,
       const std::vector<MKL_INT>& ldc_Array, const MKL_INT group_count,
       const std::vector<MKL_INT>& group_size) {
-    std::vector<CBLAS_TRANSPOSE> TransA_array(
-        group_size[0], TransA ? CblasConjTrans : CblasNoTrans);
-    std::vector<CBLAS_TRANSPOSE> TransB_array(
-        group_size[0], TransB ? CblasConjTrans : CblasNoTrans);
-    std::vector<complex128> alpha_Array(group_size[0], {1.0f, 0.0f});
-    std::vector<complex128> beta_Array(group_size[0], {0.0f, 0.0f});
-    cblas_zgemm_batch(Layout, &TransA_array[0], &TransB_array[0], &M_Array[0],
-                      &N_Array[0], &K_Array[0],
-                      static_cast<const void*>(&alpha_Array[0]),
-                      reinterpret_cast<const void**>(A_Array), &lda_Array[0],
-                      reinterpret_cast<const void**>(B_Array), &ldb_Array[0],
-                      static_cast<const void*>(&beta_Array[0]),
-                      reinterpret_cast<void**>(C_Array), &ldc_Array[0],
-                      group_count, &group_size[0]);
+    std::vector<CBLAS_TRANSPOSE> TransA_Array(group_size[0], TransA);
+    std::vector<CBLAS_TRANSPOSE> TransB_Array(group_size[0], TransB);
+    std::vector<float> alpha_Array(group_size[0], 1.0);
+    std::vector<float> beta_Array(group_size[0], 0.0);
+    dnnl_gemm_batch<bfloat16>(
+        Layout, TransA_Array, TransB_Array, M_Array, N_Array, K_Array,
+        alpha_Array, reinterpret_cast<const bfloat16**>(A_Array), lda_Array,
+        reinterpret_cast<const bfloat16**>(B_Array), ldb_Array, beta_Array,
+        reinterpret_cast<bfloat16**>(C_Array), ldc_Array, group_count,
+        group_size);
   }
+#endif  // ENABLE_MKLDNN_V1_2
 };
 
 #define REGISTER_BATCH_MATMUL_MKL(TYPE)                                       \
",0,test
b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_matmul_op.cc,"@@ -268,10 +268,10 @@ class MklMatMulOp : public OpKernel {
 // TODO(inteltf) Consider template specialization when adding/removing
 // additional types
 TF_CALL_float(REGISTER_CPU);
-#ifndef ENABLE_MKLDNN_V1
+#if !defined(ENABLE_MKLDNN_V1) || defined(ENABLE_MKLDNN_V1_2)
 // MKLDNNv1 does not have support for bfloat16 GEMM. Only V1.2 has that support.
 TF_CALL_bfloat16(REGISTER_CPU);
-#endif  // ENABLE_MKLDNN_V1
+#endif  // !defined(ENABLE_MKLDNN_V1) || defined(ENABLE_MKLDNN_V1_2)
 
 #ifndef INTEL_MKL_DNN_ONLY
 TF_CALL_double(REGISTER_CPU);
",0,test
b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_matmul_op_fused.cc,"@@ -187,7 +187,7 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase<T> {
 
   void ExtendMklDnnMatMulFwdParams(OpKernelContext* ctx,
                                    MklDnnMatMulFwdParams& params) {
-#ifndef ENABLE_MKL_DNN_V1
+#ifndef ENABLE_MKLDNN_V1
     if (fused_ops_.size() == 2) {
       string post_op = fused_ops_[1];
 
@@ -203,7 +203,7 @@ class MklFusedMatMulOp : public MklDnnMatMulOpBase<T> {
                      ""Unsupported post-argument in MklFusedMatMul: "", post_op));
       }
     }
-#endif
+#endif  // !ENABLE_MKLDNN_V1
   }
 
  private:
",0,test
b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_matmul_ops_common.h,"@@ -97,11 +97,8 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive {
     context_.dst_mem->set_data_handle(static_cast<void*>(dst_data));
 
 #ifdef ENABLE_MKLDNN_V1
-    DCHECK_EQ(context_.fwd_primitives.size(), context_.net_args.size());
-    for (size_t i = 0; i < context_.fwd_primitives.size(); ++i) {
-      context_.fwd_primitives.at(i).execute(*context_.fwd_stream,
-                                            context_.net_args.at(i));
-    }
+    execute_primitives(context_.fwd_primitives, context_.fwd_stream,
+                       context_.net_args);
 #else
     context_.fwd_stream->submit(context_.fwd_primitives);
 #endif  // ENABLE_MKLDNN_V1
@@ -117,7 +114,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive {
   // In MKL-DNN v1.x, memory format tags only provide a partial description
   // of the memory layout. Hence, these functions are disabled for v1.x.
   memory::format GetSrcMemoryFormat() const { return context_.src_fmt; }
-  memory::format GetweightMemoryFormat() const { return context_.weight_fmt; }
+  memory::format GetWeightMemoryFormat() const { return context_.weight_fmt; }
 #endif  // ENABLE_MKLDNN_V1
 
   std::shared_ptr<mkldnn::inner_product_forward::primitive_desc>
@@ -132,7 +129,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive {
     // Expected memory format for this primitive instance
     MEMORY_FORMAT src_fmt;
     MEMORY_FORMAT weight_fmt;
-#endif  // ENABLE_MKLDNN_V1
+#endif  // !ENABLE_MKLDNN_V1
 
     // MKL-DNN memory.
     std::shared_ptr<mkldnn::memory> src_mem;
@@ -164,7 +161,7 @@ class MklDnnMatMulFwdPrimitive : public MklPrimitive {
 #ifndef ENABLE_MKLDNN_V1
           src_fmt(MEMORY_FORMAT::any),
           weight_fmt(MEMORY_FORMAT::any),
-#endif  // ENABLE_MKLDNN_V1
+#endif  // !ENABLE_MKLDNN_V1
           src_mem(nullptr),
           weight_mem(nullptr),
           bias_mem(nullptr),
",0,test
b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_qmatmul_op.cc,"@@ -243,11 +243,7 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Toutput> {
 
       // Check if src and weight data need to be reordered.
       Tinput* src_data = nullptr;
-#ifdef ENABLE_MKLDNN_V1
       if (IS_SRC_REORDER_NEEDED(src_md, matmul_fwd_pd, matmul_fwd)) {
-#else
-      if (src_md.data.format != matmul_fwd->GetSrcMemoryFormat()) {
-#endif
         src.SetUsrMem(src_md, &src_tensor);
         src.CheckReorderToOpMem(MEMORY_PD_WITHOUT_DATA(
             matmul_fwd_pd.get()->PRIMITIVE_DESC_SRC, this->cpu_engine_));
@@ -258,11 +254,7 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Toutput> {
       }
 
       Tweight* weight_data = nullptr;
-#ifdef ENABLE_MKLDNN_V1
       if (IS_WEIGHTS_REORDER_NEEDED(weight_md, matmul_fwd_pd, matmul_fwd)) {
-#else
-      if (weight_md.data.format != matmul_fwd->GetweightMemoryFormat()) {
-#endif
         bool is_weight_cached = false;
         // For batch size 1, MKL-DNN expects that weight format is OI whereas
         // TF default format is IO. So in that case convert weight from IO
@@ -280,7 +272,7 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Toutput> {
               context, static_cast<int32>(weight_mkl_shape.GetTfDataFormat()));
 #else
           weight_data = GetCachedWeight(
-              context, static_cast<int32>(matmul_fwd->GetweightMemoryFormat()));
+              context, static_cast<int32>(matmul_fwd->GetWeightMemoryFormat()));
 #endif
           is_weight_cached = (weight_data != nullptr);
         }
@@ -554,14 +546,10 @@ class MklDnnQuantizedMatMulOp : public MklDnnMatMulOpBase<Toutput> {
     OP_REQUIRES_OK(context, context->allocate_persistent(
                                 DT_INT32, weight_mkl_format, &weight_oi_md,
                                 &weight_md_tensor_ptr));
-#ifdef ENABLE_MKLDNN_V1
-    // Using the logic from filter caching in mkl_conv_ops.cc
-    weight_md_tensor_ptr->scalar<int32>()() =
-        static_cast<int32>(weight_mkl_shape.GetTfDataFormat());
-#else
     weight_md_tensor_ptr->scalar<int32>()() =
-        matmul_fwd_pd.get()->weights_primitive_desc().desc().data.format;
-#endif  // ENABLE_MKLDNN_V1
+        static_cast<int32>(GET_TF_DATA_FORMAT(
+            weight_mkl_shape,
+            matmul_fwd_pd.get()->weights_primitive_desc().desc()));
   }
 
   Tweight* GetCachedWeight(OpKernelContext* context, int32 weight_mf)
",0,test
b15bccccbcddef2fa576e14b7e67a06e10f11690,tensorflow/tensorflow,Addressing comments,mkl_types.h,"@@ -149,7 +149,7 @@ namespace tensorflow {
 #define IS_SRC_REORDER_NEEDED(src_md, op_pd, op) \
   src_md.data.format != op->GetSrcMemoryFormat()
 #define IS_WEIGHTS_REORDER_NEEDED(weights_md, op_pd, op) \
-  weights_md.data.format != op->GetWeightsMemoryFormat()
+  weights_md.data.format != op->GetWeightMemoryFormat()
 #define GET_MEMORY_DESC_FROM_MEM_PTR(mem_ptr) \
   mem_ptr->get_primitive_desc().desc()
 #define GET_MEMORY_PRIMITIVE_DESC_FROM_MEM_PTR(mem_ptr) \
",0,test
6f0f757c808266a184da4004faf6f66b66f36014,tensorflow/tensorflow,Replace TEST_F with XLA_TEST_F to allow for disabling (#12520),copy_test.cc,"@@ -56,30 +56,30 @@ class CopyOpTest : public HloTestBase {
                                 tensorflow::gtl::ArraySlice<int64> permutation);
 };
 
-TEST_F(CopyOpTest, CopyR0Bool) { TestCopyOp(*Literal::CreateR0<bool>(true)); }
+XLA_TEST_F(CopyOpTest, CopyR0Bool) { TestCopyOp(*Literal::CreateR0<bool>(true)); }
 
-TEST_F(CopyOpTest, CopyR1S0U32) { TestCopyOp(*Literal::CreateR1<uint32>({})); }
+XLA_TEST_F(CopyOpTest, CopyR1S0U32) { TestCopyOp(*Literal::CreateR1<uint32>({})); }
 
-TEST_F(CopyOpTest, CopyR1S3U32) {
+XLA_TEST_F(CopyOpTest, CopyR1S3U32) {
   TestCopyOp(*Literal::CreateR1<uint32>({1, 2, 3}));
 }
 
-TEST_F(CopyOpTest, CopyR3F32_2x2x3) {
+XLA_TEST_F(CopyOpTest, CopyR3F32_2x2x3) {
   TestCopyOp(*Literal::CreateR3({{{1.0f, 2.0f, 3.0f}, {4.0f, 5.0f, 6.0f}},
                                  {{1.1f, 2.1f, 3.1f}, {6.1f, 3.5f, 2.8f}}}));
 }
 
-TEST_F(CopyOpTest, CopyR4S32_2x2x3x2) {
+XLA_TEST_F(CopyOpTest, CopyR4S32_2x2x3x2) {
   TestCopyOp(*Literal::CreateR4(
       {{{{1, -2}, {-4, 5}, {6, 7}}, {{8, 9}, {10, 11}, {12, 13}}},
        {{{10, 3}, {7, -2}, {3, 6}}, {{2, 5}, {-11, 5}, {-2, -5}}}}));
 }
 
-TEST_F(CopyOpTest, CopyR4S32_0x2x3x2) {
+XLA_TEST_F(CopyOpTest, CopyR4S32_0x2x3x2) {
   TestCopyOp(*Literal::CreateR4FromArray4D(Array4D<int32>(0, 2, 3, 2)));
 }
 
-TEST_F(CopyOpTest, CopyParameterScalar) {
+XLA_TEST_F(CopyOpTest, CopyParameterScalar) {
   auto builder = HloComputation::Builder(TestName());
 
   // Copy literal to device to use as parameter.
@@ -102,7 +102,7 @@ TEST_F(CopyOpTest, CopyParameterScalar) {
   LiteralTestUtil::ExpectR0Near<float>(42.0f, *result, error_spec_);
 }
 
-TEST_F(CopyOpTest, CopyConstantR2Twice) {
+XLA_TEST_F(CopyOpTest, CopyConstantR2Twice) {
   auto builder = HloComputation::Builder(TestName());
 
   auto literal = Literal::CreateR2<float>({{1.0, 2.0}, {3.0, 4.0}});
@@ -123,7 +123,7 @@ TEST_F(CopyOpTest, CopyConstantR2Twice) {
                                        error_spec_);
 }
 
-TEST_F(CopyOpTest, CopyConstantR2DifferentLayouts) {
+XLA_TEST_F(CopyOpTest, CopyConstantR2DifferentLayouts) {
   HloComputation::Builder builder(TestName());
 
   std::unique_ptr<Literal> literal =
",0,train
ffb40e2bf2a47f299955041faf6bd5740baa69ef,tensorflow/tensorflow,"Ensure that PyFunc arrays are C-ordered.
Change: 120625852",py_func_test.py,"@@ -153,5 +153,11 @@ class PyOpTest(tf.test.TestCase):
       self.assertEqual(sess.run(x), 1)
       self.assertEqual(sess.run(x), 2)
 
+  def testCOrder(self):
+    with self.test_session():
+      val = [[1, 2], [3, 4]]
+      x, = tf.py_func(lambda: np.array(val, order=""F""), [], [tf.int64])
+      self.assertAllEqual(val, x.eval())
+
 if __name__ == ""__main__"":
   tf.test.main()
",0,train
ffb40e2bf2a47f299955041faf6bd5740baa69ef,tensorflow/tensorflow,"Ensure that PyFunc arrays are C-ordered.
Change: 120625852",script_ops.py,"@@ -62,9 +62,9 @@ class FuncRegistry(object):
     # Ensures that we return either a single numpy array or a list of numpy
     # arrays.
     if isinstance(ret, (tuple, list)):
-      ret = [np.array(x) for x in ret]
+      ret = [np.array(x, order=""C"") for x in ret]
     else:
-      ret = np.array(ret)
+      ret = np.array(ret, order=""C"")
     return ret
 
   def size(self):
",0,train
3ed1e3029e68ca8cb6306c8f31182306741dcf0c,tensorflow/tensorflow,"Remove static_assert for type checking in FlatBufferVectorToTfLiteTypeArray.

It turns out that std::is_same() has dropped the non-string argument in c++17. This breaks internal users that are building against qualcomm.

PiperOrigin-RevId: 317790812
Change-Id: If56a61d20426670251b55f370a6b5fa886a49e21",micro_allocator.cc,"@@ -401,12 +401,9 @@ TfLiteStatus FlatBufferVectorToTfLiteTypeArray(
     kTfLiteArrayType** result) {
   TFLITE_DCHECK(error_reporter != nullptr);
   TFLITE_DCHECK(flatbuffer_array != nullptr);
-  // Only two conversions are supported - float and int32 - ensure that these
-  // match at compile time instead of duplicating functions here:
-  static_assert((std::is_same<kFlatBufferVectorType, int32_t>() &&
-                 std::is_same<kTfLiteArrayType, TfLiteIntArray>()) ||
-                (std::is_same<kFlatBufferVectorType, float>() &&
-                 std::is_same<kTfLiteArrayType, TfLiteFloatArray>()));
+  // TODO(b/159668691): Consider adding type assertion or breaking this function
+  // into multiple functions for each type. std::is_same is c++11 and has a
+  // special updated constructor in c++17 that requires a string argument.
   if (FLATBUFFERS_LITTLEENDIAN) {
     // On little-endian machines, TfLite*Array happens to have the same memory
     // layout as flatbuffers:Vector<kFlatBufferVectorType>, so we can
",0,train
f4b06261c900c3217891eea6285d603fcf11776b,tensorflow/tensorflow,"Use self.handle inside ResourceVariable to allow tf.distribute to customize
handle behavior

I'm working on a new version of DistributedVariable which directly inherits from BaseResourceVariable. Its handle would return different resource tensors under different context, e.g. self.handle would be a replicated tensor under tpu context. This can avoid the need to use raw variable operations for special resource handles like tpu replicate handle or parallel device handle.

PiperOrigin-RevId: 355663353
Change-Id: I16201f94ef27a0dc7ac1491c616d7bd68397123a",packed_distributed_variable.py,"@@ -252,7 +252,8 @@ class PackedVarAndDevice(object):
     self._device = device
 
   def __getattr__(self, name):
-    return getattr(self._var, name)
+    with ops.device(self._device):
+      return getattr(self._var, name)
 
   def var(self):
     return self._var
",0,train
f4b06261c900c3217891eea6285d603fcf11776b,tensorflow/tensorflow,"Use self.handle inside ResourceVariable to allow tf.distribute to customize
handle behavior

I'm working on a new version of DistributedVariable which directly inherits from BaseResourceVariable. Its handle would return different resource tensors under different context, e.g. self.handle would be a replicated tensor under tpu context. This can avoid the need to use raw variable operations for special resource handles like tpu replicate handle or parallel device handle.

PiperOrigin-RevId: 355663353
Change-Id: I16201f94ef27a0dc7ac1491c616d7bd68397123a",resource_variable_ops.py,"@@ -516,12 +516,12 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor):
   @property
   def device(self):
     """"""The device this variable is on.""""""
-    return self._handle.device
+    return self.handle.device
 
   @property
   def graph(self):
     """"""The `Graph` of this variable.""""""
-    return self._handle.graph
+    return self.handle.graph
 
   @property
   def name(self):
@@ -596,7 +596,7 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor):
   @property
   def op(self):
     """"""The op for this variable.""""""
-    return self._handle.op
+    return self.handle.op
 
   @property
   def trainable(self):
@@ -655,7 +655,7 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor):
     else:
       new_variable = copy_to_graph_uninitialized(self)
     obj_map = {self: new_variable}
-    resource_map = {self._handle: new_variable.handle}
+    resource_map = {self.handle: new_variable.handle}
     return obj_map, resource_map
 
   def _read_variable_op(self):
@@ -663,8 +663,8 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor):
 
     def read_and_set_handle():
       result = gen_resource_variable_ops.read_variable_op(
-          self._handle, self._dtype)
-      _maybe_set_handle_data(self._dtype, self._handle, result)
+          self.handle, self._dtype)
+      _maybe_set_handle_data(self._dtype, self.handle, result)
       return result
 
     if getattr(self, ""_caching_device"", None) is not None:
@@ -678,7 +678,7 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor):
       # Note that if a control flow context is active the input of the read op
       # might not actually be the handle. This line bypasses it.
       tape.record_operation(
-          ""ReadVariableOp"", [result], [self._handle],
+          ""ReadVariableOp"", [result], [self.handle],
           backward_function=lambda x: [x],
           forward_function=lambda x: [x])
     return result
@@ -703,12 +703,12 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor):
     with ops.name_scope(""Gather"" if name is None else name) as name:
       variable_accessed(self)
       value = gen_resource_variable_ops.resource_gather(
-          self._handle, indices, dtype=self._dtype, name=name)
+          self.handle, indices, dtype=self._dtype, name=name)
 
       if self._dtype == dtypes.variant:
         # For DT_VARIANT types, the handle's shape_and_type[1:] stores the
         # variant's handle data.  Extract it.
-        handle_data = get_eager_safe_handle_data(self._handle)
+        handle_data = get_eager_safe_handle_data(self.handle)
         if handle_data.is_set and len(handle_data.shape_and_type) > 1:
           value._handle_data = (  # pylint: disable=protected-access
               cpp_shape_inference_pb2.CppShapeInferenceResult.HandleData(
@@ -722,7 +722,7 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor):
       if self.trainable:
         variable_accessed(self)
       value = gen_resource_variable_ops.resource_gather_nd(
-          self._handle, indices, dtype=self._dtype, name=name)
+          self.handle, indices, dtype=self._dtype, name=name)
 
     return array_ops.identity(value)
 
@@ -855,7 +855,7 @@ class BaseResourceVariable(variables.VariableV1, core.Tensor):
   def _lazy_read(self, op):
     variable_accessed(self)
     return _UnreadVariable(
-        handle=self._handle,
+        handle=self.handle,
         dtype=self.dtype,
         shape=self._shape,
         in_graph_mode=self._in_graph_mode,
",0,train
bf31a56af7748b2a022ade234b98b41140d68248,tensorflow/tensorflow,"Add the {input, output}_details method to the Signature Runner in Python

PiperOrigin-RevId: 392819614
Change-Id: I4ec5d522b172405f0db1c7ed04d273ecfe4e340e",interpreter.py,"@@ -269,6 +269,50 @@ class SignatureRunner(object):
           output_index, self._subgraph_index)
     return result
 
+  def get_input_details(self):
+    """"""Gets input tensor details.
+
+    Returns:
+      A dictionary from input name to tensor details where each item is a
+      dictionary with details about an input tensor. Each dictionary contains
+      the following fields that describe the tensor:
+
+      + `name`: The tensor name.
+      + `index`: The tensor index in the interpreter.
+      + `shape`: The shape of the tensor.
+      + `shape_signature`: Same as `shape` for models with known/fixed shapes.
+        If any dimension sizes are unkown, they are indicated with `-1`.
+      + `dtype`: The numpy data type (such as `np.int32` or `np.uint8`).
+      + `quantization`: Deprecated, use `quantization_parameters`. This field
+        only works for per-tensor quantization, whereas
+        `quantization_parameters` works in all cases.
+      + `quantization_parameters`: A dictionary of parameters used to quantize
+        the tensor:
+        ~ `scales`: List of scales (one if per-tensor quantization).
+        ~ `zero_points`: List of zero_points (one if per-tensor quantization).
+        ~ `quantized_dimension`: Specifies the dimension of per-axis
+        quantization, in the case of multiple scales/zero_points.
+      + `sparsity_parameters`: A dictionary of parameters used to encode a
+        sparse tensor. This is empty if the tensor is dense.
+    """"""
+    result = {}
+    for input_name, tensor_index in self._inputs.items():
+      result[input_name] = self._interpreter._get_tensor_details(tensor_index)  # pylint: disable=protected-access
+    return result
+
+  def get_output_details(self):
+    """"""Gets output tensor details.
+
+    Returns:
+      A dictionary from input name to tensor details where each item is a
+      dictionary with details about an output tensor. The dictionary contains
+      the same fields as described for `get_input_details()`.
+    """"""
+    result = {}
+    for output_name, tensor_index in self._outputs:
+      result[output_name] = self._interpreter._get_tensor_details(tensor_index)  # pylint: disable=protected-access
+    return result
+
 
 @_tf_export('lite.experimental.OpResolverType')
 @enum.unique
",0,test
bf31a56af7748b2a022ade234b98b41140d68248,tensorflow/tensorflow,"Add the {input, output}_details method to the Signature Runner in Python

PiperOrigin-RevId: 392819614
Change-Id: I4ec5d522b172405f0db1c7ed04d273ecfe4e340e",lite_v2_test.py,"@@ -178,10 +178,23 @@ class FromConcreteFunctionTest(lite_v2_test_util.ModelTest):
     add_signature_runner = interpreter.get_signature_runner('add')
     add_output = add_signature_runner(x=input_data)
     self.assertEqual(add_output['output_0'], 3)
+    input_details = add_signature_runner.get_input_details()
+    self.assertEqual(1, len(input_details))
+    self.assertEqual('add_x:0', input_details['x']['name'])
+    self.assertEqual(np.float32, input_details['x']['dtype'])
+    self.assertTrue(([1] == input_details['x']['shape']).all())
+    self.assertEqual((0.0, 0), input_details['x']['quantization'])
 
     sub_signature_runner = interpreter.get_signature_runner('sub')
     sub_output = sub_signature_runner(x=input_data)
     self.assertEqual(sub_output['output_0'], -2)
+    output_details = sub_signature_runner.get_output_details()
+    self.assertEqual(1, len(output_details))
+    self.assertEqual('StatefulPartitionedCall:0',
+                     output_details['output_0']['name'])
+    self.assertEqual(np.float32, output_details['output_0']['dtype'])
+    self.assertTrue(([1] == output_details['output_0']['shape']).all())
+    self.assertEqual((0.0, 0), output_details['output_0']['quantization'])
 
   def _getIntegerQuantizeModel(self, num_filters=16):
     np.random.seed(0)
",0,test
bf31a56af7748b2a022ade234b98b41140d68248,tensorflow/tensorflow,"Add the {input, output}_details method to the Signature Runner in Python

PiperOrigin-RevId: 392819614
Change-Id: I4ec5d522b172405f0db1c7ed04d273ecfe4e340e",calibrator.py,"@@ -87,11 +87,13 @@ class Calibrator(object):
         # Convert signature based inputs to the tensor index based data.
         if not hasattr(self, ""_interpreter""):
           self._interpreter = Interpreter(model_content=self._model_content)
-        input_array = [None] * len(sample)
+        input_array = []
         signature_runner = self._interpreter.get_signature_runner()
-        for input_name, value in sample.items():
-          tensor_index = signature_runner._inputs[input_name]  # pylint: disable=protected-access
-          input_array[tensor_index] = value
+        input_details = sorted(
+            signature_runner.get_input_details().items(),
+            key=lambda item: item[1][""index""])
+        for input_name, input_detail in input_details:
+          input_array.append(sample[input_name])
       elif isinstance(sample, list):
         input_array = sample
       else:
",0,test
6598d11e8b8ea3f33c65091d5ffdfacbbc98cfad,tensorflow/tensorflow,"Whitelist XlaBroadcastHelperOp and enable tests XlaDynamicUpdateSliceOp

PiperOrigin-RevId: 311049106
Change-Id: I6f47f784f744ba3e60f3f377fa90412b1114d3b5",legalize_tf_with_tf2xla.cc,"@@ -165,6 +165,7 @@ static bool IsOpWhitelisted(Operation* op) {
     TypeID::get<TF::TruncateModOp>(),
     TypeID::get<TF::UnpackOp>(),
     TypeID::get<TF::XdivyOp>(),
+    TypeID::get<TF::XlaBroadcastHelperOp>(),
     TypeID::get<TF::XlaConvOp>(),
     TypeID::get<TF::XlaDotOp>(),
     TypeID::get<TF::XlaPadOp>(),
",0,train
6598d11e8b8ea3f33c65091d5ffdfacbbc98cfad,tensorflow/tensorflow,"Whitelist XlaBroadcastHelperOp and enable tests XlaDynamicUpdateSliceOp

PiperOrigin-RevId: 311049106
Change-Id: I6f47f784f744ba3e60f3f377fa90412b1114d3b5",xla_ops_test.py,"@@ -51,7 +51,6 @@ class XlaOpsNumericalTest(xla_test.XLATestCase, parameterized.TestCase):
         equality_fn = self.assertAllClose
       equality_fn(result, expected, rtol=1e-3)
 
-  @test_util.disable_mlir_bridge('Not supported yet')
   def testAdd(self):
     for dtype in self.numeric_types:
       self._assertOpOutputMatchesExpected(
",0,train
a3dfb6f36692c5a887c2ae10713f408772b00d2f,tensorflow/tensorflow,"Change ThenBlasLtMatmul implem to a template

- This is required to ensure that the template version of
  DoBlasLtMatmul is called (which is important because it performs
  additional type checks).",stream.cc,"@@ -4809,28 +4809,79 @@ Stream &Stream::ThenBlasGemmStridedBatched(
               c, ldc, stride_c, batch_count);
 }
 
-Stream& Stream::ThenBlasLtMatmul(const blas::IBlasLtMatmulPlan* plan,
-                                 const HostOrDeviceScalar<void>& alpha,
-                                 DeviceMemoryBase a, DeviceMemoryBase b,
-                                 const HostOrDeviceScalar<void>& beta,
-                                 DeviceMemoryBase c,
-                                 ScratchAllocator* scratch_allocator,
-                                 const blas::IBlasLtMatmulAlgorithm* algorithm,
-                                 DeviceMemoryBase bias,
-                                 blas::ProfileResult* output_profile_result) {
+template <typename ABType, typename CType>
+Stream& Stream::ThenBlasLtMatmulImpl(
+    const blas::IBlasLtMatmulPlan* plan, const HostOrDeviceScalar<CType>& alpha,
+    const DeviceMemory<ABType>& a, const DeviceMemory<ABType>& b,
+    const HostOrDeviceScalar<CType>& beta, DeviceMemory<CType>* c,
+    ScratchAllocator* scratch_allocator,
+    const blas::IBlasLtMatmulAlgorithm* algorithm,
+    const DeviceMemory<CType>& bias,
+    blas::ProfileResult* output_profile_result) {
   VLOG_CALL(PARAM(plan), PARAM(alpha), PARAM(a), PARAM(b), PARAM(beta),
             PARAM(c), PARAM(algorithm), PARAM(bias));
 
-  ThenBlasWithProfileImpl<const blas::IBlasLtMatmulPlan*,
-                          const HostOrDeviceScalar<void>&, DeviceMemoryBase,
-                          DeviceMemoryBase, const HostOrDeviceScalar<void>&,
-                          DeviceMemoryBase, ScratchAllocator*,
-                          const blas::IBlasLtMatmulAlgorithm*, DeviceMemoryBase>
+  ThenBlasWithProfileImpl<
+      const blas::IBlasLtMatmulPlan*, const HostOrDeviceScalar<CType>&,
+      const DeviceMemory<ABType>&, const DeviceMemory<ABType>&,
+      const HostOrDeviceScalar<CType>&, DeviceMemory<CType>*, ScratchAllocator*,
+      const blas::IBlasLtMatmulAlgorithm*, const DeviceMemory<CType>&>
       impl;
   return impl(this, &blas::BlasSupport::DoBlasLtMatmul, plan, alpha, a, b, beta,
               c, scratch_allocator, algorithm, bias, output_profile_result);
 }
 
+// Explicit template instantiations for each supported type combination.
+template Stream& Stream::ThenBlasLtMatmulImpl<int8, int32>(
+    const blas::IBlasLtMatmulPlan*, const HostOrDeviceScalar<int32>&,
+    const DeviceMemory<int8>&, const DeviceMemory<int8>&,
+    const HostOrDeviceScalar<int32>&, DeviceMemory<int32>*, ScratchAllocator*,
+    const blas::IBlasLtMatmulAlgorithm*, const DeviceMemory<int32>&,
+    blas::ProfileResult*);
+
+template Stream& Stream::ThenBlasLtMatmulImpl<Eigen::half, Eigen::half>(
+    const blas::IBlasLtMatmulPlan*, const HostOrDeviceScalar<Eigen::half>&,
+    const DeviceMemory<Eigen::half>&, const DeviceMemory<Eigen::half>&,
+    const HostOrDeviceScalar<Eigen::half>&, DeviceMemory<Eigen::half>*,
+    ScratchAllocator*, const blas::IBlasLtMatmulAlgorithm*,
+    const DeviceMemory<Eigen::half>&, blas::ProfileResult*);
+
+template Stream& Stream::ThenBlasLtMatmulImpl<float, float>(
+    const blas::IBlasLtMatmulPlan*, const HostOrDeviceScalar<float>&,
+    const DeviceMemory<float>&, const DeviceMemory<float>&,
+    const HostOrDeviceScalar<float>&, DeviceMemory<float>*, ScratchAllocator*,
+    const blas::IBlasLtMatmulAlgorithm*, const DeviceMemory<float>&,
+    blas::ProfileResult*);
+
+template Stream& Stream::ThenBlasLtMatmulImpl<double, double>(
+    const blas::IBlasLtMatmulPlan*, const HostOrDeviceScalar<double>&,
+    const DeviceMemory<double>&, const DeviceMemory<double>&,
+    const HostOrDeviceScalar<double>&, DeviceMemory<double>*, ScratchAllocator*,
+    const blas::IBlasLtMatmulAlgorithm*, const DeviceMemory<double>&,
+    blas::ProfileResult*);
+
+template Stream&
+Stream::ThenBlasLtMatmulImpl<std::complex<float>, std::complex<float>>(
+    const blas::IBlasLtMatmulPlan*,
+    const HostOrDeviceScalar<std::complex<float>>&,
+    const DeviceMemory<std::complex<float>>&,
+    const DeviceMemory<std::complex<float>>&,
+    const HostOrDeviceScalar<std::complex<float>>&,
+    DeviceMemory<std::complex<float>>*, ScratchAllocator*,
+    const blas::IBlasLtMatmulAlgorithm*,
+    const DeviceMemory<std::complex<float>>&, blas::ProfileResult*);
+
+template Stream&
+Stream::ThenBlasLtMatmulImpl<std::complex<double>, std::complex<double>>(
+    const blas::IBlasLtMatmulPlan*,
+    const HostOrDeviceScalar<std::complex<double>>&,
+    const DeviceMemory<std::complex<double>>&,
+    const DeviceMemory<std::complex<double>>&,
+    const HostOrDeviceScalar<std::complex<double>>&,
+    DeviceMemory<std::complex<double>>*, ScratchAllocator*,
+    const blas::IBlasLtMatmulAlgorithm*,
+    const DeviceMemory<std::complex<double>>&, blas::ProfileResult*);
+
 Stream &Stream::ThenSetRngSeed(const uint8 *seed, uint64 seed_bytes) {
   VLOG_CALL(PARAM(seed), PARAM(seed_bytes));
 
",0,train
a3dfb6f36692c5a887c2ae10713f408772b00d2f,tensorflow/tensorflow,"Change ThenBlasLtMatmul implem to a template

- This is required to ensure that the template version of
  DoBlasLtMatmul is called (which is important because it performs
  additional type checks).",stream.h,"@@ -1679,16 +1679,6 @@ class Stream {
                        DeviceMemory<std::complex<double>> *b, int ldb);
 
   // See BlasSupport::DoBlatLtMatmul.
-  Stream& ThenBlasLtMatmul(const blas::IBlasLtMatmulPlan* plan,
-                           const HostOrDeviceScalar<void>& alpha,
-                           DeviceMemoryBase a, DeviceMemoryBase b,
-                           const HostOrDeviceScalar<void>& beta,
-                           DeviceMemoryBase c,
-                           ScratchAllocator* scratch_allocator,
-                           const blas::IBlasLtMatmulAlgorithm* algorithm,
-                           DeviceMemoryBase bias,
-                           blas::ProfileResult* output_profile_result);
-
   // Note that we prevent alpha and beta from being used to deduce CType so that
   // they can be constructed implicitly from values of type CType. Without this,
   // type deduction would fail when this function is called with a value of type
@@ -1703,8 +1693,8 @@ class Stream {
       const blas::IBlasLtMatmulAlgorithm* algorithm,
       const DeviceMemory<CType>& bias = {},
       blas::ProfileResult* output_profile_result = nullptr) {
-    return ThenBlasLtMatmul(plan, alpha, a, b, beta, *c, scratch_allocator,
-                            algorithm, bias, output_profile_result);
+    return ThenBlasLtMatmulImpl(plan, alpha, a, b, beta, c, scratch_allocator,
+                                algorithm, bias, output_profile_result);
   }
 
   // See FftSupport::DoFft.
@@ -2139,6 +2129,19 @@ class Stream {
       const dnn::BatchDescriptor &bias_descriptor,
       DeviceMemory<T> *backward_bias_data);
 
+  // Implementation of ThenBlasLtMatmul that is shared by all types.
+  template <typename ABType, typename CType>
+  Stream& ThenBlasLtMatmulImpl(const blas::IBlasLtMatmulPlan* plan,
+                               const HostOrDeviceScalar<CType>& alpha,
+                               const DeviceMemory<ABType>& a,
+                               const DeviceMemory<ABType>& b,
+                               const HostOrDeviceScalar<CType>& beta,
+                               DeviceMemory<CType>* c,
+                               ScratchAllocator* scratch_allocator,
+                               const blas::IBlasLtMatmulAlgorithm* algorithm,
+                               const DeviceMemory<CType>& bias,
+                               blas::ProfileResult* output_profile_result);
+
   SE_DISALLOW_COPY_AND_ASSIGN(Stream);
 };
 
",0,train
3db982b63419fec84084fc606bbaa0de3277b996,tensorflow/tensorflow,"Added mean 'Loss' to Estimator.Evaluate
Change: 150085654",estimator.py,"@@ -36,6 +36,7 @@ from tensorflow.python.estimator import run_config
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import random_seed
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import metrics as metrics_lib
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import gfile
 from tensorflow.python.platform import tf_logging as logging
@@ -602,13 +603,15 @@ class Estimator(object):
       estimator_spec = self._call_model_fn(
           features, labels, model_fn_lib.ModeKeys.EVAL)
 
+      self._verify_default_metric_key(model_fn_lib.MetricKeys.LOSS,
+                                      estimator_spec.eval_metric_ops)
+      estimator_spec.eval_metric_ops[
+          model_fn_lib.MetricKeys.LOSS] = metrics_lib.mean(estimator_spec.loss)
+
       update_op, eval_dict = _extract_metric_update_ops(
           estimator_spec.eval_metric_ops)
 
-      if ops.GraphKeys.GLOBAL_STEP in six.iterkeys(eval_dict):
-        raise ValueError(
-            'Metric with name `global_step` is not allowed, because Estimator '
-            'already defines a default metric with the same name.')
+      self._verify_default_metric_key(ops.GraphKeys.GLOBAL_STEP, eval_dict)
       eval_dict[ops.GraphKeys.GLOBAL_STEP] = global_step_tensor
 
       eval_results = evaluation._evaluate_once(  # pylint: disable=protected-access
@@ -627,6 +630,12 @@ class Estimator(object):
 
     return eval_results
 
+  def _verify_default_metric_key(self, metric_key, eval_dict):
+    if metric_key in six.iterkeys(eval_dict):
+      raise ValueError(
+          'Metric with name `%s` is not allowed, because Estimator '
+          'already defines a default metric with the same name.' % metric_key)
+
 
 def _get_replica_device_setter(config):
   """"""Creates a replica device setter if required as a default device_fn.
",0,test
3db982b63419fec84084fc606bbaa0de3277b996,tensorflow/tensorflow,"Added mean 'Loss' to Estimator.Evaluate
Change: 150085654",estimator_test.py,"@@ -401,10 +401,8 @@ class EstimatorEvaluateTest(test.TestCase):
             'metric_value': 2.})
     est.train(dummy_input_fn, steps=5)
     scores = est.evaluate(dummy_input_fn, steps=1)
-    self.assertDictEqual(
-        {'metric': 2.,
-         'global_step': 5},
-        scores)
+    self.assertIn('metric', scores)
+    self.assertAlmostEqual(2., scores['metric'])
 
   def test_steps0_raises_error(self):
     est = estimator.Estimator(
@@ -431,6 +429,36 @@ class EstimatorEvaluateTest(test.TestCase):
         ValueError, 'Metric with name `global_step` is not allowed'):
       est.evaluate(dummy_input_fn, steps=1)
 
+  def test_global_step_is_reported(self):
+    est = estimator.Estimator(
+        model_fn=_model_fn_with_eval_metric_ops,
+        params={'metric_name': 'metric',
+                'metric_value': 2.})
+    est.train(dummy_input_fn, steps=5)
+    scores = est.evaluate(dummy_input_fn, steps=1)
+    self.assertIn('global_step', scores)
+    self.assertEqual(5, scores['global_step'])
+
+  def test_loss_metric_is_reported(self):
+
+    def _model_fn_with_incremental_loss(features, labels, mode):
+      _, _ = features, labels
+      local_weight = variables.Variable(
+          0., name='local_weight', collections=[ops.GraphKeys.LOCAL_VARIABLES])
+      # Loss will be 2, 4, 6, ...
+      loss = 2 * state_ops.assign_add(local_weight, 1.)
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=loss,
+          train_op=state_ops.assign_add(training.get_global_step(), 1))
+
+    est = estimator.Estimator(model_fn=_model_fn_with_incremental_loss)
+    est.train(dummy_input_fn, steps=1)
+    scores = est.evaluate(dummy_input_fn, steps=5)
+    self.assertIn(model_fn_lib.MetricKeys.LOSS, scores)
+    # Average loss will be (2 + 4 + 6 + 8 + 10)/5=6
+    self.assertAlmostEqual(6., scores[model_fn_lib.MetricKeys.LOSS])
+
   def test_hooks_are_used(self):
     step_counter_hook = _StepCounterHook()
 
@@ -454,10 +482,7 @@ class EstimatorEvaluateTest(test.TestCase):
         dummy_input_fn,
         steps=1,
         checkpoint_path=saver.latest_checkpoint(est1.model_dir))
-    self.assertDictEqual(
-        {'metric': 2.,
-         'global_step': 5},
-        scores)
+    self.assertEqual(5, scores['global_step'])
 
   def test_scaffold_is_used(self):
 
",0,test
3db982b63419fec84084fc606bbaa0de3277b996,tensorflow/tensorflow,"Added mean 'Loss' to Estimator.Evaluate
Change: 150085654",model_fn.py,"@@ -47,6 +47,13 @@ class ModeKeys(object):
   PREDICT = 'infer'
 
 
+class MetricKeys(object):
+  """"""Metric key strings.""""""
+  LOSS = 'loss'
+  AUC = 'auc'
+  ACCURACY = 'accuracy'
+
+
 class EstimatorSpec(
     collections.namedtuple('EstimatorSpec', [
         'predictions', 'loss', 'train_op', 'eval_metric_ops',
",0,test
e383266c782e4bfaf6c18ffb91b94eb5f44155d1,tensorflow/tensorflow,"Add RaggedTensor dispatch for tf.debugging.assert_type.

PiperOrigin-RevId: 423338351
Change-Id: I44c9423d0170a21d682f7cb20157247a26c18f4c",check_ops_test.py,"@@ -34,6 +34,7 @@ from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import gradients
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import random_ops
+from tensorflow.python.ops.ragged import ragged_factory_ops
 from tensorflow.python.platform import test
 
 
@@ -1534,6 +1535,14 @@ class AssertTypeTest(test.TestCase):
                                        sparse_float.dense_shape)
     self.evaluate(out)
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_raggedtensor_doesnt_raise_when_correct_type(self):
+    x = ragged_factory_ops.constant([[1., 2.], [3.]])
+    with ops.control_dependencies(
+        [check_ops.assert_type(x, dtypes.float32)]):
+      y = array_ops.identity(x)
+    self.assertAllEqual(x, y)
+
   @test_util.run_in_graph_and_eager_modes
   def test_raises_when_wrong_type(self):
     floats = constant_op.constant([1.0, 2.0], dtype=dtypes.float16)
@@ -1549,6 +1558,12 @@ class AssertTypeTest(test.TestCase):
     with self.assertRaisesRegexp(TypeError, ""must be of type.*float32""):
       check_ops.assert_type(sparse_float16, dtypes.float32)
 
+  @test_util.run_in_graph_and_eager_modes
+  def test_raggedtensor_raises_when_wrong_type(self):
+    x = ragged_factory_ops.constant([[1, 2], [3]])
+    with self.assertRaisesRegex(TypeError, ""must be of type.*float32""):
+      check_ops.assert_type(x, dtypes.float32)
+
   def test_raise_when_tf_type_is_not_dtype(self):
     # Test case for GitHub issue:
     # https://github.com/tensorflow/tensorflow/issues/45975
",0,train
e383266c782e4bfaf6c18ffb91b94eb5f44155d1,tensorflow/tensorflow,"Add RaggedTensor dispatch for tf.debugging.assert_type.

PiperOrigin-RevId: 423338351
Change-Id: I44c9423d0170a21d682f7cb20157247a26c18f4c",ragged_check_ops.py,"@@ -0,0 +1,27 @@
+# Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""Asserts and Boolean Checks for RaggedTensors.""""""
+
+from tensorflow.python.ops import check_ops
+from tensorflow.python.ops.ragged import ragged_tensor
+from tensorflow.python.util import dispatch
+
+
+@dispatch.dispatch_for_api(check_ops.assert_type)
+def assert_type(tensor: ragged_tensor.Ragged, tf_type, message=None, name=None):
+  return check_ops.assert_type(tensor.flat_values, tf_type,
+                               message=message, name=name)
+
+
",0,train
e383266c782e4bfaf6c18ffb91b94eb5f44155d1,tensorflow/tensorflow,"Add RaggedTensor dispatch for tf.debugging.assert_type.

PiperOrigin-RevId: 423338351
Change-Id: I44c9423d0170a21d682f7cb20157247a26c18f4c",ragged_dispatch_test.py,"@@ -26,6 +26,7 @@ from tensorflow.python.framework import random_seed
 from tensorflow.python.framework import sparse_tensor
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import data_flow_ops
 from tensorflow.python.ops import image_ops_impl
@@ -903,8 +904,7 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
                   np.exp(5) / (np.exp(4) + np.exp(5)),
               ],
           ]),
-          rtol=1e-6,
-      ),
+          rtol=1e-6),
   ])
   def testRaggedDispatch(self,
                          op,
@@ -1036,6 +1036,13 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       self.evaluate(variables.global_variables_initializer())
     self.assertAllEqual(math_ops.add(x, v), [[11, 12], [13, 14, 15]])
 
+  def testAssertType(self):
+    x = ragged_factory_ops.constant([[1., 2.], [3.]])
+    with ops.control_dependencies(
+        [check_ops.assert_type(x, dtypes.float32)]):
+      y = array_ops.identity(x)
+    self.assertAllEqual(x, y)
+
 
 if __name__ == '__main__':
   googletest.main()
",0,train
e383266c782e4bfaf6c18ffb91b94eb5f44155d1,tensorflow/tensorflow,"Add RaggedTensor dispatch for tf.debugging.assert_type.

PiperOrigin-RevId: 423338351
Change-Id: I44c9423d0170a21d682f7cb20157247a26c18f4c",ragged_ops.py,"@@ -27,6 +27,7 @@ circular dependencies.
 from tensorflow.python.ops.ragged import ragged_array_ops
 from tensorflow.python.ops.ragged import ragged_batch_gather_ops
 from tensorflow.python.ops.ragged import ragged_batch_gather_with_default_op
+from tensorflow.python.ops.ragged import ragged_check_ops
 from tensorflow.python.ops.ragged import ragged_concat_ops
 from tensorflow.python.ops.ragged import ragged_conversion_ops
 from tensorflow.python.ops.ragged import ragged_dispatch
",0,train
e3a4aa36027b779ca1011c6331f173cc15d16135,tensorflow/tensorflow,"Remove xw_plus_b from the API for TF 2.0.

PiperOrigin-RevId: 221732670",nn_ops.py,"@@ -2299,7 +2299,7 @@ def _calc_bias_add_flops(graph, node):
   return ops.OpStats(""flops"", input_count)
 
 
-@tf_export(""nn.xw_plus_b"")
+@tf_export(v1=[""nn.xw_plus_b""])
 def xw_plus_b(x, weights, biases, name=None):  # pylint: disable=invalid-name
   """"""Computes matmul(x, weights) + biases.
 
",0,test
23d9a2b49d12ced9d8291f1097af92ae067bd4a6,tensorflow/tensorflow,"Remove unneeded copies now that int64 and int64_t are the same

Just removed a couple of obvious ones. Larger/more uniform update coming later.

PiperOrigin-RevId: 354637425
Change-Id: If7ae27acf47c81f1a39eb9120ddbfd0bae828a15",lower_static_tensor_list.cc,"@@ -216,14 +216,8 @@ struct ConvertConst : public OpConversionPattern<TF::ConstOp> {
     // If the list is empty, directly create the final result instead of
     // creating the tf.Pack op. tf.Pack op requires at least one operand.
     if (tensors.empty()) {
-      absl::InlinedVector<tensorflow::int64, 4> tf_shape;
-      tf_shape.reserve(result_shape.size());
-      for (int64_t dim : result_shape) {
-        tf_shape.push_back(dim);
-      }
-
       tensorflow::Tensor tensor(list->element_dtype,
-                                tensorflow::TensorShape(tf_shape));
+                                tensorflow::TensorShape(result_shape));
       auto attr_or = tensorflow::ConvertTensor(tensor, &rewriter);
       if (!attr_or.ok()) return failure();
       rewriter.replaceOpWithNewOp<TF::ConstOp>(op, attr_or.ValueOrDie());
",0,test
23d9a2b49d12ced9d8291f1097af92ae067bd4a6,tensorflow/tensorflow,"Remove unneeded copies now that int64 and int64_t are the same

Just removed a couple of obvious ones. Larger/more uniform update coming later.

PiperOrigin-RevId: 354637425
Change-Id: If7ae27acf47c81f1a39eb9120ddbfd0bae828a15",shape_inference_utils.cc,"@@ -56,7 +56,6 @@ limitations under the License.
 
 #define DEBUG_TYPE ""tf-shape-inference-utils""
 
-using ::tensorflow::int64;
 using tensorflow::shape_inference::DimensionHandle;
 using tensorflow::shape_inference::InferenceContext;
 using tensorflow::shape_inference::ShapeHandle;
@@ -83,12 +82,7 @@ NamedAttrList GetAllAttributesFromOperation(Operation* op) {
 // Extracts a PartialTensorShape from the MLIR type.
 Optional<tensorflow::PartialTensorShape> GetShapeFromMlirType(Type t) {
   if (auto ranked_type = t.dyn_cast<RankedTensorType>()) {
-    // Convert the MLIR shape indices (int64_t) to TensorFlow indices
-    // (int64).
-    ArrayRef<int64_t> shape = ranked_type.getShape();
-    SmallVector<int64, 8> tf_shape(shape.begin(), shape.end());
-    return tensorflow::PartialTensorShape(
-        MutableArrayRefToSpan<int64>(tf_shape));
+    return tensorflow::PartialTensorShape(ranked_type.getShape());
   }
   return None;
 }
",0,test
23d9a2b49d12ced9d8291f1097af92ae067bd4a6,tensorflow/tensorflow,"Remove unneeded copies now that int64 and int64_t are the same

Just removed a couple of obvious ones. Larger/more uniform update coming later.

PiperOrigin-RevId: 354637425
Change-Id: If7ae27acf47c81f1a39eb9120ddbfd0bae828a15",array_container_utils.h,"@@ -41,11 +41,6 @@ inline absl::Span<const T> ArrayRefToSpan(llvm::ArrayRef<T> ref) {
   return absl::Span<const T>(ref.data(), ref.size());
 }
 
-template <typename T>
-inline absl::Span<T> MutableArrayRefToSpan(llvm::MutableArrayRef<T> ref) {
-  return absl::Span<T>(ref.data(), ref.size());
-}
-
 }  // namespace mlir
 
 #endif  // TENSORFLOW_COMPILER_MLIR_UTILS_ARRAY_CONTAINER_UTILS_H_
",0,test
23d9a2b49d12ced9d8291f1097af92ae067bd4a6,tensorflow/tensorflow,"Remove unneeded copies now that int64 and int64_t are the same

Just removed a couple of obvious ones. Larger/more uniform update coming later.

PiperOrigin-RevId: 354637425
Change-Id: If7ae27acf47c81f1a39eb9120ddbfd0bae828a15",mlir_hlo_builder.cc,"@@ -49,16 +49,12 @@ static mlir::DenseIntElementsAttr GetI64ElementsAttr(
     absl::Span<const int64> values, mlir::Builder* builder) {
   auto ty = mlir::RankedTensorType::get({static_cast<int64_t>(values.size())},
                                         builder->getIntegerType(64));
-  llvm::SmallVector<int64_t, 4> mlir_values;
-  mlir_values.reserve(values.size());
-  for (const auto& value : values) {
-    mlir_values.push_back(value);
-  }
-  return mlir::DenseIntElementsAttr::get(ty, mlir_values);
+  return mlir::DenseIntElementsAttr::get(
+      ty, llvm::makeArrayRef(values.data(), values.size()));
 }
 
 static mlir::DenseIntElementsAttr ConvertPadding(
-    absl::Span<const std::pair<tensorflow::int64, tensorflow::int64>> padding,
+    absl::Span<const std::pair<int64_t, int64_t>> padding,
     mlir::Builder* builder) {
   llvm::SmallVector<int64_t, 8> elements;
   elements.reserve(padding.size() * 2);
@@ -80,7 +76,7 @@ StatusOr<XlaOp> MlirHloBuilder::MakeXlaOp(mlir::Value val) {
     return InvalidArgument(""unsupported type: %s"", ToString(ty).c_str());
   }
 
-  int64 handle = reinterpret_cast<int64>(val.getAsOpaquePointer());
+  int64_t handle = reinterpret_cast<int64_t>(val.getAsOpaquePointer());
   handle_to_shape_[handle] = std::move(shape);
   return XlaOp(handle, this);
 }
",0,test
101d46ab716931f27c76b86c2f4d1e5780b43e64,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-05-13

PiperOrigin-RevId: 311289765
Change-Id: I6167b9a3d737248f831fbd4405339a9e59220944",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 5, 12)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 5, 13)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,test
136ccd3d83f1dbc27dbe80cab6fd8662964062dc,tensorflow/tensorflow,"Remove the check for optimizer in model_to_estimator.

PiperOrigin-RevId: 247287177",distribute_strategy_test.py,"@@ -35,6 +35,7 @@ from tensorflow.python.framework import test_util
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.distribute import distributed_training_utils
 from tensorflow.python.keras.optimizer_v2 import gradient_descent as gradient_descent_keras
+from tensorflow.python.keras.optimizer_v2 import rmsprop as rmsprop_keras
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.parsing_ops import gen_parsing_ops
@@ -115,7 +116,7 @@ def multi_inputs_multi_outputs_model():
       inputs=[input_a, input_b, input_m], outputs=[output_c, output_d])
   model.compile(
       loss='categorical_crossentropy',
-      optimizer=gradient_descent.GradientDescentOptimizer(0.001),
+      optimizer=gradient_descent_keras.SGD(learning_rate=0.001),
       metrics={
           'dense_2': 'categorical_accuracy',
           'dense_3': 'categorical_accuracy'
@@ -371,7 +372,7 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase,
     keras_model.compile(
         loss='categorical_crossentropy',
         metrics=[keras.metrics.CategoricalAccuracy()],
-        optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01),
+        optimizer=rmsprop_keras.RMSprop(learning_rate=0.01),
         cloning=cloning)
     config = run_config_lib.RunConfig(
         tf_random_seed=_RANDOM_SEED,
@@ -405,7 +406,7 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase,
     keras_model.compile(
         loss='categorical_crossentropy',
         metrics=[keras.metrics.CategoricalAccuracy()],
-        optimizer=rmsprop.RMSPropOptimizer(learning_rate=0.01),
+        optimizer=rmsprop_keras.RMSprop(learning_rate=0.01),
         cloning=cloning)
     config = run_config_lib.RunConfig(
         tf_random_seed=_RANDOM_SEED,
@@ -477,36 +478,6 @@ class TestEstimatorDistributionStrategy(test_util.TensorFlowTestCase,
       eval_results = est_keras.evaluate(input_fn=eval_input_fn, steps=1)
       self.assertLess(eval_results['loss'], baseline_eval_results['loss'])
 
-  @combinations.generate(
-      combinations.combine(
-          distribution=[
-              strategy_combinations.mirrored_strategy_with_gpu_and_cpu
-          ],
-          mode=['graph'],
-          cloning=[True, False]))
-  def test_keras_optimizer_with_distribution_strategy(self, distribution,
-                                                      cloning):
-    keras_model = simple_sequential_model()
-    keras_model.compile(
-        loss='categorical_crossentropy',
-        optimizer=keras.optimizers.rmsprop(lr=0.01),
-        cloning=cloning)
-
-    config = run_config_lib.RunConfig(
-        tf_random_seed=_RANDOM_SEED,
-        model_dir=self._base_dir,
-        train_distribute=distribution)
-    with self.cached_session():
-      est_keras = keras_lib.model_to_estimator(
-          keras_model=keras_model, config=config)
-      with self.assertRaisesRegexp(ValueError,
-                                   'Only TensorFlow native optimizers are '
-                                   'supported with DistributionStrategy.'):
-        est_keras.train(input_fn=get_ds_train_input_fn, steps=_TRAIN_SIZE / 16)
-
-    writer_cache.FileWriterCache.clear()
-    gfile.DeleteRecursively(self._config.model_dir)
-
 
 class TestDistributionStrategyWithNumpyArrays(test.TestCase,
                                               parameterized.TestCase):
",0,test
5ff365a4cebce6f50f4cfeeab7490992f6089961,tensorflow/tensorflow,"Add user-defined initializers to the IndyLSTMCell.

PiperOrigin-RevId: 204137901",rnn_cell.py,"@@ -3153,8 +3153,8 @@ class IndyGRUCell(rnn_cell_impl.LayerRNNCell):
     reuse: (optional) Python boolean describing whether to reuse variables
      in an existing scope.  If not `True`, and the existing scope already has
      the given variables, an error is raised.
-    kernel_initializer: (optional) The initializer to use for the weight and
-    projection matrices.
+    kernel_initializer: (optional) The initializer to use for the weight
+      matrices applied to the input.
     bias_initializer: (optional) The initializer to use for the bias.
     name: String, the name of the layer. Layers with the same name will
       share weights, but to avoid mistakes we require reuse=True in such
@@ -3287,6 +3287,8 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell):
                forget_bias=1.0,
                activation=None,
                reuse=None,
+               kernel_initializer=None,
+               bias_initializer=None,
                name=None,
                dtype=None):
     """"""Initialize the IndyLSTM cell.
@@ -3300,6 +3302,9 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell):
       reuse: (optional) Python boolean describing whether to reuse variables
         in an existing scope.  If not `True`, and the existing scope already has
         the given variables, an error is raised.
+      kernel_initializer: (optional) The initializer to use for the weight
+        matrix applied to the inputs.
+      bias_initializer: (optional) The initializer to use for the bias.
       name: String, the name of the layer. Layers with the same name will
         share weights, but to avoid mistakes we require reuse=True in such
         cases.
@@ -3314,6 +3319,8 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell):
     self._num_units = num_units
     self._forget_bias = forget_bias
     self._activation = activation or math_ops.tanh
+    self._kernel_initializer = kernel_initializer
+    self._bias_initializer = bias_initializer
 
   @property
   def state_size(self):
@@ -3332,7 +3339,8 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell):
     # pylint: disable=protected-access
     self._kernel_w = self.add_variable(
         ""%s_w"" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
-        shape=[input_depth, 4 * self._num_units])
+        shape=[input_depth, 4 * self._num_units],
+        initializer=self._kernel_initializer)
     self._kernel_u = self.add_variable(
         ""%s_u"" % rnn_cell_impl._WEIGHTS_VARIABLE_NAME,
         shape=[1, 4 * self._num_units],
@@ -3341,7 +3349,9 @@ class IndyLSTMCell(rnn_cell_impl.LayerRNNCell):
     self._bias = self.add_variable(
         rnn_cell_impl._BIAS_VARIABLE_NAME,
         shape=[4 * self._num_units],
-        initializer=init_ops.zeros_initializer(dtype=self.dtype))
+        initializer=(self._bias_initializer
+                     if self._bias_initializer is not None else
+                     init_ops.zeros_initializer(dtype=self.dtype)))
     # pylint: enable=protected-access
 
     self.built = True
",0,train
7ea9843501d8938b7d58c5a95eacc3158b5784ec,tensorflow/tensorflow,"Optimize `tf.nn.embedding_lookup()` and `tf.gather()` when shapes are known.

This avoids cross-device transfers of shape metadata, which is often
statically known at graph construction time. As a result, the load on
the parameter servers is reduced.
Change: 117135698",array_grad.py,"@@ -174,10 +174,14 @@ ops.NoGradient(""ZerosLike"")
 
 @ops.RegisterGradient(""Gather"")
 def _GatherGrad(op, grad):
-  # op.inputs[0] can be large, so colocate the shape calculation with it.
-  with ops.colocate_with(op.inputs[0]):
-    dense_shape = array_ops.shape(op.inputs[0])
-    values_shape = array_ops.concat(0, [[-1], dense_shape[1:]])
+  if op.inputs[0].get_shape().is_fully_defined():
+    dense_shape = constant_op.constant(op.inputs[0].get_shape().as_list())
+    values_shape = [-1] + op.inputs[0].get_shape()[1:].as_list()
+  else:
+    # op.inputs[0] can be large, so colocate the shape calculation with it.
+    with ops.colocate_with(op.inputs[0]):
+      dense_shape = array_ops.shape(op.inputs[0])
+      values_shape = array_ops.concat(0, [[-1], dense_shape[1:]])
 
   values = array_ops.reshape(grad, values_shape)
   indices = array_ops.reshape(op.inputs[1], [-1])
",0,train
7ea9843501d8938b7d58c5a95eacc3158b5784ec,tensorflow/tensorflow,"Optimize `tf.nn.embedding_lookup()` and `tf.gather()` when shapes are known.

This avoids cross-device transfers of shape metadata, which is often
statically known at graph construction time. As a result, the load on
the parameter servers is reduced.
Change: 117135698",embedding_ops.py,"@@ -105,8 +105,11 @@ def embedding_lookup(params, ids, partition_strategy=""mod"", name=None,
         else:
           dim_0_sizes = []
           for p in xrange(np):
-            with ops.colocate_with(params[p]):
-              dim_0_sizes.append(array_ops.shape(params[p])[0])
+            if params[p].get_shape()[0].value is not None:
+              dim_0_sizes.append(params[p].get_shape()[0].value)
+            else:
+              with ops.colocate_with(params[p]):
+                dim_0_sizes.append(array_ops.shape(params[p])[0])
           num_total_ids = math_ops.reduce_sum(
               math_ops.cast(array_ops.pack(dim_0_sizes), flat_ids.dtype))
         ids_per_partition = num_total_ids // np
@@ -147,18 +150,22 @@ def embedding_lookup(params, ids, partition_strategy=""mod"", name=None,
       ret = data_flow_ops.dynamic_stitch(pindices, partitioned_result,
                                          name=name)
       # Reshape to reverse the flattening of ids.
-      # It's important that we compute params[0].shape on the right device
-      # to avoid data motion.
-      with ops.colocate_with(params[0]):
-        params_shape = array_ops.shape(params[0])
-      ret = array_ops.reshape(ret, array_ops.concat(0, [
-          array_ops.shape(ids), array_ops.slice(params_shape, [1], [-1])]))
-      # output shape = ids.shape + params[*].shape[1:]
-      # Normally the reshape is sufficient, but setting shape explicitly
-      # teaches shape inference that params[1:].get_shape() matters.
       element_shape = params[0].get_shape()[1:]
       for p in params[1:]:
         element_shape = element_shape.merge_with(p.get_shape()[1:])
+      if element_shape.is_fully_defined():
+        ret = array_ops.reshape(ret, array_ops.concat(0, [
+            array_ops.shape(ids), element_shape]))
+      else:
+        # It's important that we compute params[0].shape on the right device
+        # to avoid data motion.
+        with ops.colocate_with(params[0]):
+          params_shape = array_ops.shape(params[0])
+        ret = array_ops.reshape(ret, array_ops.concat(0, [
+            array_ops.shape(ids), array_ops.slice(params_shape, [1], [-1])]))
+      # output shape = ids.shape + params[*].shape[1:]
+      # Normally the reshape is sufficient, but setting shape explicitly
+      # teaches shape inference that params[1:].get_shape() matters.
       ret.set_shape(ids.get_shape().concatenate(element_shape))
       return ret
 
",0,train
19852793cecb4434791bba42d6e76dbb4b107e99,tensorflow/tensorflow,"Add complex data type support to tf.sparse.to_dense

This PR tries to fix the issue raised in 53653 where
tf.sparse.to_dense does not support complex64 or complex128
(tf.sparse.from_dense support complex dtypes).

This PR adds complex64/complex128 support for tf.sparse.to_dense.

This PR fixes 53653.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",sparse_to_dense_op.cc,"@@ -187,6 +187,8 @@ class SparseToDense : public OpKernel {
 TF_CALL_REAL_NUMBER_TYPES(REGISTER_KERNELS_ALL);
 REGISTER_KERNELS_ALL(bool);
 REGISTER_KERNELS_ALL(tstring);
+REGISTER_KERNELS_ALL(complex64);
+REGISTER_KERNELS_ALL(complex128);
 
 #undef REGISTER_KERNELS_ALL
 #undef REGISTER_KERNELS
",0,train
b3cdbcf13659b8a6bfe83b61248ab0e429fd9273,tensorflow/tensorflow,Minor typo fix. (#2889),strip_unused.py,"@@ -57,7 +57,7 @@ tf.app.flags.DEFINE_boolean(""input_binary"", False,
 tf.app.flags.DEFINE_string(""output_graph"", """",
                            """"""Output 'GraphDef' file name."""""")
 tf.app.flags.DEFINE_string(""input_node_names"", """",
-                           """"""The name of the output nodes, comma separated."""""")
+                           """"""The name of the input nodes, comma separated."""""")
 tf.app.flags.DEFINE_string(""output_node_names"", """",
                            """"""The name of the output nodes, comma separated."""""")
 tf.app.flags.DEFINE_integer(""placeholder_type_enum"",
",0,train
308bb3c69b850535a49d49a63ca74d0a7ba61fc1,tensorflow/tensorflow,"Handle zero batch input in BatchNorm correctly if inside a DistributionStrategy scope.

PiperOrigin-RevId: 240643242",zero_batch_test.py,"@@ -0,0 +1,109 @@
+# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""Test DistributionStrategy in the zero batch case.""""""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+from absl.testing import parameterized
+import numpy as np
+
+from tensorflow.python.distribute import combinations
+from tensorflow.python.distribute import strategy_combinations
+from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import ops
+from tensorflow.python.layers import normalization
+from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import variables
+from tensorflow.python.ops.losses import losses
+from tensorflow.python.platform import test
+from tensorflow.python.training import gradient_descent
+
+
+all_combinations = combinations.combine(
+    distribution=[
+        strategy_combinations.one_device_strategy,
+    ],
+    mode=[""graph""])
+
+
+class NormalizationTest(test.TestCase, parameterized.TestCase):
+
+  @combinations.generate(
+      combinations.times(all_combinations,
+                         combinations.combine(fused=[True, False])))
+  def testBNWithZeroBatchInput(self, distribution, fused):
+    with distribution.scope(), self.cached_session() as sess:
+      bn_list = []
+      inputs = ops.convert_to_tensor(
+          np.random.random((0, 4, 4, 3)) + 100, dtype=dtypes.float32)
+      targets = ops.convert_to_tensor(
+          np.random.random((0, 4, 4, 3)), dtype=dtypes.float32)
+
+      def step_fn(is_training, inputs, targets=None):
+        bn = normalization.BatchNormalization(
+            axis=3, epsilon=1e-3, momentum=0.9, fused=fused)
+        bn_list.append(bn)
+        outputs = bn.apply(inputs, training=is_training)
+        if not is_training:
+          return outputs
+
+        loss = losses.mean_squared_error(targets, outputs)
+        optimizer = gradient_descent.GradientDescentOptimizer(0.01)
+        train_op = optimizer.minimize(loss)
+        with ops.control_dependencies([train_op]):
+          return array_ops.identity(loss)
+
+      train_op = distribution.extended.call_for_each_replica(
+          step_fn, args=(True, inputs, targets))
+      predict_op = distribution.extended.call_for_each_replica(
+          step_fn, args=(False, inputs))
+      bn = bn_list[0]
+
+      self.evaluate(variables.global_variables_initializer())
+
+      # Check for initial statistics and weights.
+      moving_mean, moving_var = self.evaluate(
+          [bn.moving_mean, bn.moving_variance])
+      self.assertAllEqual([0, 0, 0], moving_mean)
+      self.assertAllEqual([1, 1, 1], moving_var)
+
+      np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
+      self.assertAllEqual([1, 1, 1], np_gamma)
+      self.assertAllEqual([0, 0, 0], np_beta)
+
+      for _ in range(100):
+        np_output, _, _ = sess.run([train_op] + bn.updates)
+        self.assertEqual(0.0, np_output)
+
+      # Verify that the statistics and weights are not changed after training.
+      moving_mean, moving_var = self.evaluate(
+          [bn.moving_mean, bn.moving_variance])
+      self.assertAllEqual([0, 0, 0], moving_mean)
+      self.assertAllEqual([1, 1, 1], moving_var)
+
+      np_gamma, np_beta = self.evaluate([bn.gamma, bn.beta])
+      self.assertAllEqual([1, 1, 1], np_gamma)
+      self.assertAllEqual([0, 0, 0], np_beta)
+
+      # Test inference.
+      np_output = sess.run(predict_op)
+      self.assertEqual([], np_output.tolist())
+
+
+if __name__ == ""__main__"":
+  test.main()
+
",0,train
308bb3c69b850535a49d49a63ca74d0a7ba61fc1,tensorflow/tensorflow,"Handle zero batch input in BatchNorm correctly if inside a DistributionStrategy scope.

PiperOrigin-RevId: 240643242",normalization.py,"@@ -424,7 +424,7 @@ class BatchNormalizationBase(Layer):
         self._scope.set_partitioner(partitioner)
     self.built = True
 
-  def _assign_moving_average(self, variable, value, momentum):
+  def _assign_moving_average(self, variable, value, momentum, inputs_size):
     with ops.name_scope(None, 'AssignMovingAvg',
                         [variable, value, momentum]) as scope:
       with ops.colocate_with(variable):
@@ -433,12 +433,19 @@ class BatchNormalizationBase(Layer):
           decay = math_ops.cast(decay, variable.dtype.base_dtype)
         update_delta = (
             variable - math_ops.cast(value, variable.dtype)) * decay
+        # TODO(b/129279393): Support zero batch input in non
+        # DistributionStrategy code as well.
+        if distribution_strategy_context.has_strategy():
+          update_delta = tf_utils.smart_cond(
+              inputs_size > 0,
+              lambda: update_delta, lambda: K.zeros_like(update_delta))
         return state_ops.assign_sub(variable, update_delta, name=scope)
 
   def _fused_batch_norm(self, inputs, training):
     """"""Returns the output of fused batch norm.""""""
     beta = self.beta if self.center else self._beta_const
     gamma = self.gamma if self.scale else self._gamma_const
+    inputs_size = array_ops.size(inputs)
 
     def _fused_batch_norm_training():
       return nn.fused_batch_norm(
@@ -482,21 +489,22 @@ class BatchNormalizationBase(Layer):
         strategy = distribution_strategy_context.get_strategy()
         mean_update = strategy.extended.update(
             self.moving_mean, self._assign_moving_average,
-            (mean, self.momentum))
+            (mean, self.momentum, inputs_size))
         variance_update = strategy.extended.update(
             self.moving_variance, self._assign_moving_average,
-            (variance, self.momentum))
+            (variance, self.momentum, inputs_size))
       else:
         mean_update = self._assign_moving_average(self.moving_mean, mean,
-                                                  momentum)
-        variance_update = self._assign_moving_average(self.moving_variance,
-                                                      variance, momentum)
+                                                  momentum, inputs_size)
+        variance_update = self._assign_moving_average(
+            self.moving_variance, variance, momentum, inputs_size)
       self.add_update(mean_update, inputs=True)
       self.add_update(variance_update, inputs=True)
 
     return output
 
-  def _renorm_correction_and_moments(self, mean, variance, training):
+  def _renorm_correction_and_moments(self, mean, variance, training,
+                                     inputs_size):
     """"""Returns the correction and update values for renorm.""""""
     stddev = math_ops.sqrt(variance + self.epsilon)
     # Compute the average mean and standard deviation, as if they were
@@ -527,7 +535,7 @@ class BatchNormalizationBase(Layer):
                             lambda: d,
                             lambda: array_ops.zeros_like(d))
 
-    def _update_renorm_variable(var, weight, value):
+    def _update_renorm_variable(var, weight, value, inputs_size):
       """"""Updates a moving average and weight, returns the unbiased value.""""""
       value = array_ops.identity(value)
       def _do_update():
@@ -540,9 +548,10 @@ class BatchNormalizationBase(Layer):
         # Make sure the weight is not updated until before r and d computation.
         with ops.control_dependencies([value]):
           weight_value = array_ops.constant(1., dtype=weight.dtype)
-        new_var = self._assign_moving_average(var, value, self.renorm_momentum)
-        new_weight = self._assign_moving_average(weight, weight_value,
-                                                 self.renorm_momentum)
+        new_var = self._assign_moving_average(var, value, self.renorm_momentum,
+                                              inputs_size)
+        new_weight = self._assign_moving_average(
+            weight, weight_value, self.renorm_momentum, inputs_size)
         # TODO(yuefengz): the updates to var and weighted can not be batched
         # together if we fetch their updated values here. Consider calculating
         # new values and delaying the updates.
@@ -553,17 +562,26 @@ class BatchNormalizationBase(Layer):
       return tf_utils.smart_cond(training, _do_update, _fake_update)
 
     # TODO(yuefengz): colocate the operations
-    new_mean = _update_renorm_variable(self.renorm_mean,
-                                       self.renorm_mean_weight, mean)
-    new_stddev = _update_renorm_variable(self.renorm_stddev,
-                                         self.renorm_stddev_weight, stddev)
+    new_mean = _update_renorm_variable(
+        self.renorm_mean, self.renorm_mean_weight, mean, inputs_size)
+    new_stddev = _update_renorm_variable(
+        self.renorm_stddev, self.renorm_stddev_weight, stddev, inputs_size)
     # Make sqrt(moving_variance + epsilon) = new_stddev.
     new_variance = math_ops.square(new_stddev) - self.epsilon
 
     return (r, d, new_mean, new_variance)
 
   def _moments(self, inputs, reduction_axes, keep_dims):
-    return nn.moments(inputs, reduction_axes, keep_dims=keep_dims)
+    mean, variance = nn.moments(inputs, reduction_axes, keep_dims=keep_dims)
+    # TODO(b/129279393): Support zero batch input in non DistributionStrategy
+    # code as well.
+    if distribution_strategy_context.has_strategy():
+      inputs_size = array_ops.size(inputs)
+      mean = tf_utils.smart_cond(
+          inputs_size > 0, lambda: mean, lambda: K.zeros_like(mean))
+      variance = tf_utils.smart_cond(
+          inputs_size > 0, lambda: variance, lambda: K.zeros_like(variance))
+    return mean, variance
 
   def call(self, inputs, training=None):
     if training is None:
@@ -661,9 +679,10 @@ class BatchNormalizationBase(Layer):
       else:
         new_mean, new_variance = mean, variance
 
+      inputs_size = array_ops.size(inputs)
       if self.renorm:
         r, d, new_mean, new_variance = self._renorm_correction_and_moments(
-            new_mean, new_variance, training)
+            new_mean, new_variance, training, inputs_size)
         # When training, the normalized values (say, x) will be transformed as
         # x * gamma + beta without renorm, and (x * r + d) * gamma + beta
         # = x * (r * gamma) + (d * gamma + beta) with renorm.
@@ -679,8 +698,8 @@ class BatchNormalizationBase(Layer):
           if in_eager_mode and not self.trainable:
             return
           return strategy.extended.update(
-              var, self._assign_moving_average, (value, self.momentum),
-              group=False)
+              var, self._assign_moving_average,
+              (value, self.momentum, inputs_size), group=False)
         # We need to unwrap the moving_mean or moving_variance in the case of
         # training being false to match the output of true_fn and false_fn
         # in the smart cond.
@@ -697,7 +716,9 @@ class BatchNormalizationBase(Layer):
           """"""Compute the updates for mean and variance.""""""
           if in_eager_mode and not self.trainable:
             return
-          return self._assign_moving_average(var, value, self.momentum)
+          return self._assign_moving_average(var, value, self.momentum,
+                                             inputs_size)
+
         mean_update = tf_utils.smart_cond(
             training,
             lambda: _do_update(self.moving_mean, new_mean),
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",executor.cc,"@@ -1737,6 +1737,7 @@ void ExecutorState::Process(TaggedNode tagged_node, int64 scheduled_nsec) {
   params.inputs = &inputs;
   params.input_alloc_attrs = &input_alloc_attrs;
   params.runner = &runner_;
+  params.run_all_kernels_inline = run_all_kernels_inline_;
   params.stats_collector = stats_collector_;
   params.inc_num_deferred_ops_function = [this]() {
     mutex_lock lock(num_deferred_ops_mu_);
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",function.cc,"@@ -532,6 +532,7 @@ class CallOp : public AsyncOpKernel {
     opts.step_container = ctx->step_container();
     opts.stats_collector = ctx->stats_collector();
     opts.runner = ctx->runner();
+    opts.run_all_kernels_inline = ctx->run_all_kernels_inline();
     opts.collective_executor = ctx->collective_executor();
     std::vector<Tensor> args;
     args.reserve(ctx->num_inputs());
@@ -1021,6 +1022,7 @@ void FunctionLibraryRuntimeImpl::ExecutorArgsFromOptions(
   }
   exec_args->collective_executor = run_opts.collective_executor;
   exec_args->call_frame = frame;
+  exec_args->run_all_kernels_inline = run_opts.run_all_kernels_inline;
 }
 
 void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",function_test.cc,"@@ -1872,6 +1872,67 @@ TEST_F(FunctionLibraryRuntimeTest, CrossDevice) {
                               TensorShape({})));
 }
 
+class AreAllKernelsInlineOp : public OpKernel {
+ public:
+  using OpKernel::OpKernel;
+
+  void Compute(OpKernelContext* ctx) override {
+    Tensor* output;
+    OP_REQUIRES_OK(ctx, ctx->allocate_output(0, {}, &output));
+    output->scalar<bool>()() = ctx->run_all_kernels_inline();
+  }
+};
+
+REGISTER_OP(""AreAllKernelsInline"").Output(""result : bool"").SetIsStateful();
+REGISTER_KERNEL_BUILDER(Name(""AreAllKernelsInline"").Device(DEVICE_CPU),
+                        AreAllKernelsInlineOp);
+
+TEST_F(FunctionLibraryRuntimeTest, RunAllKernelsInline) {
+  // Create a function ""F"" that includes an AreAllKernelsInline op, and a
+  // function ""G"" that calls ""F"".
+  auto f = FDH::Create(
+      // Name
+      ""F"",
+      // Args
+      {},
+      // Return values
+      {""ret: bool""},
+      // Attrs
+      {},
+      // Nodes
+      {// y = AreAllKernelsInline()
+       {{""y""}, ""AreAllKernelsInline"", {}, {}}},
+      {{""ret"", ""y:result:0""}});
+
+  auto g = FDH::Create(
+      // Name
+      ""G"",
+      // Args
+      {},
+      // Return values
+      {""ret: bool""},
+      // Attrs
+      {},
+      // Nodes
+      {// y = F()
+       {{""y""}, ""F"", {}, {}}},
+      {{""ret"", ""y:ret:0""}});
+
+  Init({f, g});
+  FunctionLibraryRuntime::Handle handle;
+  TF_CHECK_OK(Instantiate(flr0_, ""G"", {}, &handle));
+
+  // Test that the `run_all_kernels_inline` flag is inherited by the kernel
+  // running inside the called function.
+  for (bool inline_option : {false, true}) {
+    FunctionLibraryRuntime::Options opts;
+    opts.run_all_kernels_inline = inline_option;
+    Tensor result;
+    TF_CHECK_OK(Run(flr0_, handle, opts, {}, {&result}, true));
+    EXPECT_EQ(result.scalar<bool>()(), inline_option);
+  }
+}
+
 namespace {
 
 bool DoNothing(Graph* g) { return false; }
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",function.h,"@@ -712,6 +712,10 @@ class FunctionLibraryRuntime {
     // If True, allow returning dead tensors.
     bool allow_dead_tensors = false;
 
+    // If True, hint that all kernels should be treated as ""inexpensive"", and
+    // hence executed on the scheduling thread.
+    bool run_all_kernels_inline = false;
+
     // Returns a human readable representation of this.
     string DebugString() const;
   };
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",op_kernel.h,"@@ -732,6 +732,7 @@ class OpKernelContext {
     std::function<void(std::function<void()>)>* runner = nullptr;
     StepStatsCollectorInterface* stats_collector = nullptr;
     GraphCollector* graph_collector = nullptr;
+    bool run_all_kernels_inline = false;
 
     // TensorSliceReaderCache support.
     checkpoint::TensorSliceReaderCacheWrapper* slice_reader_cache = nullptr;
@@ -867,6 +868,12 @@ class OpKernelContext {
   // If non-null, kernels should populate with any partition subgraphs created.
   GraphCollector* graph_collector() { return params_->graph_collector; }
 
+  // If True, hint that all kernels in functions called by this kernel, should
+  // be treated as ""inexpensive"", and hence executed on the scheduling thread.
+  bool run_all_kernels_inline() const {
+    return params_->run_all_kernels_inline;
+  }
+
   // Input to output forwarding.
 
   // Set the output Ref Tensor at output_index to be an alias of the
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",batch_kernels.cc,"@@ -515,6 +515,7 @@ class BatchResource : public ResourceBase {
     opts.stats_collector = last_task_context->stats_collector();
     opts.rendezvous = last_task_context->rendezvous();
     opts.runner = last_task_context->runner();
+    opts.run_all_kernels_inline = last_task_context->run_all_kernels_inline();
 
     auto* flib = last_task_context->function_library();
     std::vector<Tensor> combined_outputs;
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",iterator_ops.cc,"@@ -835,6 +835,7 @@ class OneShotIteratorOp : public AsyncOpKernel {
     });
     opts.step_container = &step_container;
     opts.runner = ctx->runner();
+    opts.run_all_kernels_inline = ctx->run_all_kernels_inline();
     Notification n;
     Status factory_status;
     std::vector<Tensor> return_values;
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",map_defun_op.cc,"@@ -239,6 +239,7 @@ void MapDefunOp::SetRunOptions(OpKernelContext* ctx,
   } else {
     opts->runner = ctx->runner();
   }
+  opts->run_all_kernels_inline = ctx->run_all_kernels_inline();
 }
 
 Status MapDefunOp::SetupArgs(OpKernelContext* ctx,
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",single_threaded_executor.cc,"@@ -259,6 +259,7 @@ class SingleThreadedExecutorImpl : public Executor {
 
     Args::Runner runner_copy = args.runner;
     params.runner = &runner_copy;
+    params.run_all_kernels_inline = args.run_all_kernels_inline;
     params.stats_collector = args.stats_collector;
 
     // NOTE(mrry): We are assuming that the graph is loopless and condless.
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",function_ops.cc,"@@ -253,6 +253,7 @@ class SymbolicGradientOp : public AsyncOpKernel {
     opts.rendezvous = ctx->rendezvous();
     opts.cancellation_manager = ctx->cancellation_manager();
     opts.runner = ctx->runner();
+    opts.run_all_kernels_inline = ctx->run_all_kernels_inline();
     opts.stats_collector = ctx->stats_collector();
     opts.step_container = ctx->step_container();
     opts.collective_executor = ctx->collective_executor();
@@ -365,6 +366,7 @@ void RemoteCallOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) {
 
   FunctionLibraryRuntime::Options opts;
   opts.runner = ctx->runner();
+  opts.run_all_kernels_inline = ctx->run_all_kernels_inline();
   opts.source_device = source_device;
   if (opts.source_device != target_device) {
     opts.remote_execution = true;
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",functional_ops.cc,"@@ -107,6 +107,7 @@ void SetRunOptions(OpKernelContext* ctx, FunctionLibraryRuntime::Options* opts,
     opts->stats_collector = ctx->stats_collector();
   }
   opts->runner = ctx->runner();
+  opts->run_all_kernels_inline = ctx->run_all_kernels_inline();
   opts->step_container = ctx->step_container();
 }
 
",0,train
9a924476f3cfce7db5c07613d74730296dfc4537,tensorflow/tensorflow,"Function calls inherit `run_all_kernels_inline` from their parent context.

The `Executor::Args::run_all_kernels_inline` flag optimizes the execution of graphs with many small kernels, and avoids potentially unbounded stack growth. This change enables functions called by a kernel to inherit this flag, which extends support for the option to larger and more complicated graphs containing function calls. It adds the flag to `OpKernelContext::Params` and `FunctionLibraryRuntime::Options`, and updates function-calling kernels to propagate it.

PiperOrigin-RevId: 298636976
Change-Id: I28263aa5a17ce7d94b84f6bb42657ce3f4b88cfa",partitioned_function_ops.cc,"@@ -241,6 +241,7 @@ void PartitionedCallOp::RunFunction(FunctionLibraryRuntime::Handle handle,
   // TODO(akshayka): Consider selecting a runner on a per-device basis,
   // i.e., using device-specific threadpools when available.
   run_opts.runner = ctx->runner();
+  run_opts.run_all_kernels_inline = ctx->run_all_kernels_inline();
   run_opts.source_device =
       lib->device() == nullptr ? """" : lib->device()->name();
   run_opts.allow_dead_tensors = true;
",0,train
48f2893491fdc7352590a7d93b9ec317f2850c64,tensorflow/tensorflow,"Pass -v when invoking FileCheck.

FileCheck has learned to output the debugging information we were printing, plus other info.  See https://reviews.llvm.org/rL349418.

PiperOrigin-RevId: 226941909",filecheck.cc,"@@ -48,7 +48,7 @@ StatusOr<bool> RunFileCheck(const string& input, const string& pattern) {
 
   tensorflow::SubProcess file_check_process;
   file_check_process.SetProgram(file_check_path,
-                                {file_check_path, pattern_path});
+                                {file_check_path, ""-v"", pattern_path});
   file_check_process.SetChannelAction(tensorflow::CHAN_STDIN,
                                       tensorflow::ACTION_PIPE);
   file_check_process.SetChannelAction(tensorflow::CHAN_STDERR,
@@ -71,9 +71,7 @@ StatusOr<bool> RunFileCheck(const string& input, const string& pattern) {
       LOG(WARNING) << ""NOTE: FileCheck binary does not exist!"";
     }
 
-    LOG(WARNING) << ""FileCheck error: "" << standard_error;
-    LOG(WARNING) << ""FileCheck input was:"";
-    XLA_LOG_LINES(tensorflow::WARNING, input);
+    LOG(WARNING) << ""FileCheck error:\n"" << standard_error;
     LOG(WARNING) << ""FileCheck pattern was:"";
     XLA_LOG_LINES(tensorflow::WARNING, pattern);
   } else if (!standard_error.empty()) {
",0,train
1fa7b63eb3a45541551f398f2d076d5ef99e33dd,tensorflow/tensorflow,"Temporarily disable two tests due to overflow

PiperOrigin-RevId: 387638876
Change-Id: If9293e2a806db6d70cc4fe2ed5d9bf47230ec52a",tensor_array_ops_test.py,"@@ -259,10 +259,11 @@ class TensorArrayTest(xla_test.XLATestCase):
       self.assertAllEqual(convert([[2.0, 201.0]]), d1)
       self.assertAllEqual(convert([[3.0, 301.0]]), d2)
 
-  @test_util.disable_control_flow_v2(""b/122315872 (split)"")
-  def testTensorArraySplitRead(self):
-    for dtype in self.numeric_tf_types:
-      self._testTensorArraySplitRead(dtype)
+  # Disable temporarily due to b/195023333
+  # @test_util.disable_control_flow_v2(""b/122315872 (split)"")
+  # def testTensorArraySplitRead(self):
+  #   for dtype in self.numeric_tf_types:
+  #     self._testTensorArraySplitRead(dtype)
 
   @test_util.disable_control_flow_v2(""TensorArray.grad is not supported in v2"")
   def testTensorGradArrayWriteRead(self):
@@ -1046,11 +1047,12 @@ class TensorArrayTest(xla_test.XLATestCase):
       self.assertAllEqual(convert([1.0, -1.0]), read_vals[0])
       self.assertAllEqual(convert([10.0, -10.0]), read_vals[1])
 
-  @test_util.disable_control_flow_v2(""b/122315734 (scatter)"")
-  def testTensorArrayScatterRead(self):
-    for dtype in self.numeric_tf_types:
-      self._testTensorArrayScatterRead(dtype)
-    self._testTensorArrayScatterRead(dtypes.bool)
+  # Disable temporarily due to b/195023333
+  # @test_util.disable_control_flow_v2(""b/122315734 (scatter)"")
+  # def testTensorArrayScatterRead(self):
+  #   for dtype in self.numeric_tf_types:
+  #     self._testTensorArrayScatterRead(dtype)
+  #   self._testTensorArrayScatterRead(dtypes.bool)
 
   @test_util.disable_control_flow_v2(""b/122315734 (scatter)"")
   def testTensorArrayScatterReadAndGradients(self):
",0,test
4d80b9758fc2ef64cb05ebd80d5fd1f21584413e,tensorflow/tensorflow,"[tf.data] Fixes RangeDataset cardinality when start_ == stop_.

PiperOrigin-RevId: 396700532
Change-Id: Ie282649a1518a477208618170cb65dc744f38341",range_dataset_op.cc,"@@ -164,7 +164,9 @@ class RangeDatasetOp::Dataset : public DatasetBase {
   }
 
   int64_t Cardinality() const override {
-    if (step_ > 0) {
+    if (start_ == stop_) {
+      return 0;
+    } else if (step_ > 0) {
       return std::max(int64_t{0}, (stop_ - start_ - 1) / step_ + 1);
     } else {
       return std::max(int64_t{0}, (start_ - stop_ - 1) / -step_ + 1);
",0,train
4d80b9758fc2ef64cb05ebd80d5fd1f21584413e,tensorflow/tensorflow,"[tf.data] Fixes RangeDataset cardinality when start_ == stop_.

PiperOrigin-RevId: 396700532
Change-Id: Ie282649a1518a477208618170cb65dc744f38341",cardinality_test.py,"@@ -115,6 +115,9 @@ def _test_combinations():
       (""Range4"", lambda: dataset_ops.Dataset.range(10, 5), 0),
       (""Range5"", lambda: dataset_ops.Dataset.range(5, 10, 2), 3),
       (""Range6"", lambda: dataset_ops.Dataset.range(10, 5, -2), 3),
+      (""Range7"", lambda: dataset_ops.Dataset.range(0, 0, -2), 0),
+      (""Range8"", lambda: dataset_ops.Dataset.range(3, 3, 1), 0),
+      (""Range9"", lambda: dataset_ops.Dataset.range(-4, -4, 2), 0),
       (""Repeat1"", lambda: dataset_ops.Dataset.range(0).repeat(0), 0),
       (""Repeat2"", lambda: dataset_ops.Dataset.range(1).repeat(0), 0),
       (""Repeat3"", lambda: dataset_ops.Dataset.range(0).repeat(5), 0),
",0,train
82eafde35fce5aa7cbe57b6af49cb990f5edaf2c,tensorflow/tensorflow,"Fix negative axis issue with ragged tensor and reduce_sum

This fix tries to address the issue raised in 27497 where
`tf.reduce_sum` with multiple negative axes and ragged tensor
does not produce correct result.

The issue is that during reduce op, ragged tensor will reduce
one axis at a time. However, for negative axis, sort result is
reversed so order is different.

This fix convert to positive before the sort to make sure
the order.

This fix fixes 27497.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",ragged_math_ops.py,"@@ -461,6 +461,12 @@ def _ragged_reduce_aggregate(reduce_op,
       elif len(axis) == 1:
         axis = axis[0]
       else:
+        # When reducing multiple axes, as we reduce one at a time (see below),
+        # the negative axis has to be converted to positive at the first run
+        # as the sort with negative axis will have different orders.
+        # See GitHub issue 27497.
+        axis = [ragged_util.get_positive_axis(
+            a, rt_input.shape.ndims) for a in axis]
         # When reducing multiple axes, just reduce one at a time.  This is less
         # efficient, and only works for associative ops.  (In particular, it
         # does not work for reduce_mean.)  However, reducing multiple axes at
",0,train
8bf25a491b60d223bba11233de9e62f4b0db17e8,tensorflow/tensorflow,"Add a read-ahead cache to the GCS implementation of RandomAccessFile.

In some cases TensorFlow reads the data via RandomAccessFile in really small
chunks, which doesn't work very efficiently with HTTP requests. Adding a
read-ahead cache significantly boosts the performance.
Change: 125691397",gcs_file_system.cc,"@@ -16,6 +16,7 @@ limitations under the License.
 #include ""tensorflow/core/platform/cloud/gcs_file_system.h""
 #include <stdio.h>
 #include <unistd.h>
+#include <algorithm>
 #include <cstdio>
 #include <cstdlib>
 #include <cstring>
@@ -80,19 +81,58 @@ Status ParseGcsPath(const string& fname, string* bucket, string* object) {
   return Status::OK();
 }
 
-/// GCS-based implementation of a random access file.
+/// A GCS-based implementation of a random access file with a read-ahead buffer.
 class GcsRandomAccessFile : public RandomAccessFile {
  public:
   GcsRandomAccessFile(const string& bucket, const string& object,
                       AuthProvider* auth_provider,
-                      HttpRequest::Factory* http_request_factory)
+                      HttpRequest::Factory* http_request_factory,
+                      size_t read_ahead_bytes)
       : bucket_(bucket),
         object_(object),
         auth_provider_(auth_provider),
-        http_request_factory_(std::move(http_request_factory)) {}
+        http_request_factory_(std::move(http_request_factory)),
+        read_ahead_bytes_(read_ahead_bytes) {}
 
+  /// The implementation of reads with a read-ahead buffer.
   Status Read(uint64 offset, size_t n, StringPiece* result,
               char* scratch) const override {
+    if (offset >= buffer_start_offset_ &&
+        offset + n <= buffer_start_offset_ + buffer_content_size_) {
+      // If the requested range is fully in the buffer, just return it.
+      std::memcpy(scratch, buffer_.get() + offset - buffer_start_offset_, n);
+      *result = StringPiece(scratch, n);
+      return Status::OK();
+    }
+
+    // Update the buffer content based on the new requested range.
+    auto buffer_size = n + read_ahead_bytes_;
+    buffer_.reset(new char[buffer_size]);
+    buffer_start_offset_ = offset;
+    buffer_content_size_ = 0;
+    StringPiece buffer_content;
+    TF_RETURN_IF_ERROR(
+        ReadFromGCS(offset, buffer_size, &buffer_content, buffer_.get()));
+    buffer_content_size_ = buffer_content.size();
+
+    // Set the results.
+    *result = StringPiece(scratch, std::min(buffer_content_size_, n));
+    std::memcpy(scratch, buffer_.get(), result->size());
+
+    if (result->size() < n) {
+      // This is not an error per se. The RandomAccessFile interface expects
+      // that Read returns OutOfRange if fewer bytes were read than requested.
+      return errors::OutOfRange(strings::StrCat(""EOF reached, "", result->size(),
+                                                "" bytes were read out of "", n,
+                                                "" bytes requested.""));
+    }
+    return Status::OK();
+  }
+
+ private:
+  /// A helper function to actually read the data from GCS.
+  Status ReadFromGCS(uint64 offset, size_t n, StringPiece* result,
+                     char* scratch) const {
     string auth_token;
     TF_RETURN_IF_ERROR(AuthProvider::GetToken(auth_provider_, &auth_token));
 
@@ -105,22 +145,21 @@ class GcsRandomAccessFile : public RandomAccessFile {
     TF_RETURN_IF_ERROR(request->SetRange(offset, offset + n - 1));
     TF_RETURN_IF_ERROR(request->SetResultBuffer(scratch, n, result));
     TF_RETURN_IF_ERROR(request->Send());
-
-    if (result->size() < n) {
-      // This is not an error per se. The RandomAccessFile interface expects
-      // that Read returns OutOfRange if fewer bytes were read than requested.
-      return errors::OutOfRange(strings::StrCat(""EOF reached, "", result->size(),
-                                                "" bytes were read out of "", n,
-                                                "" bytes requested.""));
-    }
     return Status::OK();
   }
 
- private:
   string bucket_;
   string object_;
   AuthProvider* auth_provider_;
   HttpRequest::Factory* http_request_factory_;
+  const size_t read_ahead_bytes_;
+
+  // The buffer-related members need to be mutable, because they are modified
+  // by the const Read() method.
+  mutable std::unique_ptr<char[]> buffer_;
+  // The original file offset of the first byte in the buffer.
+  mutable size_t buffer_start_offset_ = 0;
+  mutable size_t buffer_content_size_ = 0;
 };
 
 /// \brief GCS-based implementation of a writeable file.
@@ -233,16 +272,19 @@ GcsFileSystem::GcsFileSystem()
 
 GcsFileSystem::GcsFileSystem(
     std::unique_ptr<AuthProvider> auth_provider,
-    std::unique_ptr<HttpRequest::Factory> http_request_factory)
+    std::unique_ptr<HttpRequest::Factory> http_request_factory,
+    size_t read_ahead_bytes)
     : auth_provider_(std::move(auth_provider)),
-      http_request_factory_(std::move(http_request_factory)) {}
+      http_request_factory_(std::move(http_request_factory)),
+      read_ahead_bytes_(read_ahead_bytes) {}
 
 Status GcsFileSystem::NewRandomAccessFile(
     const string& fname, std::unique_ptr<RandomAccessFile>* result) {
   string bucket, object;
   TF_RETURN_IF_ERROR(ParseGcsPath(fname, &bucket, &object));
   result->reset(new GcsRandomAccessFile(bucket, object, auth_provider_.get(),
-                                        http_request_factory_.get()));
+                                        http_request_factory_.get(),
+                                        read_ahead_bytes_));
   return Status::OK();
 }
 
",0,train
8bf25a491b60d223bba11233de9e62f4b0db17e8,tensorflow/tensorflow,"Add a read-ahead cache to the GCS implementation of RandomAccessFile.

In some cases TensorFlow reads the data via RandomAccessFile in really small
chunks, which doesn't work very efficiently with HTTP requests. Adding a
read-ahead cache significantly boosts the performance.
Change: 125691397",gcs_file_system.h,"@@ -30,7 +30,8 @@ class GcsFileSystem : public FileSystem {
  public:
   GcsFileSystem();
   GcsFileSystem(std::unique_ptr<AuthProvider> auth_provider,
-                std::unique_ptr<HttpRequest::Factory> http_request_factory);
+                std::unique_ptr<HttpRequest::Factory> http_request_factory,
+                size_t read_ahead_bytes);
 
   Status NewRandomAccessFile(
       const string& filename,
@@ -63,6 +64,11 @@ class GcsFileSystem : public FileSystem {
  private:
   std::unique_ptr<AuthProvider> auth_provider_;
   std::unique_ptr<HttpRequest::Factory> http_request_factory_;
+
+  // The number of bytes to read ahead for buffering purposes in the
+  // RandomAccessFile implementation. Defaults to 256Mb.
+  const size_t read_ahead_bytes_ = 256 * 1024 * 1024;
+
   TF_DISALLOW_COPY_AND_ASSIGN(GcsFileSystem);
 };
 
",0,train
8bf25a491b60d223bba11233de9e62f4b0db17e8,tensorflow/tensorflow,"Add a read-ahead cache to the GCS implementation of RandomAccessFile.

In some cases TensorFlow reads the data via RandomAccessFile in really small
chunks, which doesn't work very efficiently with HTTP requests. Adding a
read-ahead cache significantly boosts the performance.
Change: 125691397",gcs_file_system_test.cc,"@@ -49,7 +49,7 @@ class FakeAuthProvider : public AuthProvider {
   }
 };
 
-TEST(GcsFileSystemTest, NewRandomAccessFile) {
+TEST(GcsFileSystemTest, NewRandomAccessFile_NoReadAhead) {
   std::vector<HttpRequest*> requests(
       {new FakeHttpRequest(
            ""Uri: https://bucket.storage.googleapis.com/random_access.txt\n""
@@ -63,7 +63,8 @@ TEST(GcsFileSystemTest, NewRandomAccessFile) {
            ""6789"")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   std::unique_ptr<RandomAccessFile> file;
   TF_EXPECT_OK(fs.NewRandomAccessFile(""gs://bucket/random_access.txt"", &file));
@@ -82,6 +83,65 @@ TEST(GcsFileSystemTest, NewRandomAccessFile) {
   EXPECT_EQ(""6789"", result);
 }
 
+TEST(GcsFileSystemTest, NewRandomAccessFile_WithReadAhead) {
+  std::vector<HttpRequest*> requests(
+      {new FakeHttpRequest(
+           ""Uri: https://bucket.storage.googleapis.com/random_access.txt\n""
+           ""Auth Token: fake_token\n""
+           ""Range: 0-8\n"",
+           ""01234567""),
+       new FakeHttpRequest(
+           ""Uri: https://bucket.storage.googleapis.com/random_access.txt\n""
+           ""Auth Token: fake_token\n""
+           ""Range: 6-15\n"",
+           ""6789abcd""),
+       new FakeHttpRequest(
+           ""Uri: https://bucket.storage.googleapis.com/random_access.txt\n""
+           ""Auth Token: fake_token\n""
+           ""Range: 6-20\n"",
+           ""6789abcd""),
+       new FakeHttpRequest(
+           ""Uri: https://bucket.storage.googleapis.com/random_access.txt\n""
+           ""Auth Token: fake_token\n""
+           ""Range: 15-29\n"",
+           """")});
+  GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
+                   std::unique_ptr<HttpRequest::Factory>(
+                       new FakeHttpRequestFactory(&requests)),
+                   5 /* read ahead bytes */);
+
+  std::unique_ptr<RandomAccessFile> file;
+  TF_EXPECT_OK(fs.NewRandomAccessFile(""gs://bucket/random_access.txt"", &file));
+
+  char scratch[100];
+  StringPiece result;
+
+  // Read the first chunk. The cache will be updated with 4 + 5 = 9 bytes.
+  TF_EXPECT_OK(file->Read(0, 4, &result, scratch));
+  EXPECT_EQ(""0123"", result);
+
+  // The second chunk will be fully loaded from the cache, no requests are made.
+  TF_EXPECT_OK(file->Read(4, 4, &result, scratch));
+  EXPECT_EQ(""4567"", result);
+
+  // The chunk is only partially cached -- the request will be made to
+  // reload the cache. 5 + 5 = 10 bytes will be requested.
+  TF_EXPECT_OK(file->Read(6, 5, &result, scratch));
+  EXPECT_EQ(""6789a"", result);
+
+  // The range can only be partially satisfied. An attempt to fill the cache
+  // with 10 + 5 = 15 bytes will be made.
+  EXPECT_EQ(errors::Code::OUT_OF_RANGE,
+            file->Read(6, 10, &result, scratch).code());
+  EXPECT_EQ(""6789abcd"", result);
+
+  // The range cannot be satisfied. An attempt to fill the cache
+  // with 10 + 5 = 15 bytes will be made.
+  EXPECT_EQ(errors::Code::OUT_OF_RANGE,
+            file->Read(15, 10, &result, scratch).code());
+  EXPECT_TRUE(result.empty());
+}
+
 TEST(GcsFileSystemTest, NewWritableFile) {
   std::vector<HttpRequest*> requests({new FakeHttpRequest(
       ""Uri: https://www.googleapis.com/upload/storage/v1/b/bucket/o?""
@@ -91,7 +151,8 @@ TEST(GcsFileSystemTest, NewWritableFile) {
       """")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewWritableFile(""gs://bucket/path/writeable.txt"", &file));
@@ -116,7 +177,8 @@ TEST(GcsFileSystemTest, NewAppendableFile) {
            """")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   std::unique_ptr<WritableFile> file;
   TF_EXPECT_OK(fs.NewAppendableFile(""gs://bucket/path/appendable.txt"", &file));
@@ -142,7 +204,8 @@ TEST(GcsFileSystemTest, NewReadOnlyMemoryRegionFromFile) {
            content)});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   std::unique_ptr<ReadOnlyMemoryRegion> region;
   TF_EXPECT_OK(fs.NewReadOnlyMemoryRegionFromFile(
@@ -166,7 +229,8 @@ TEST(GcsFileSystemTest, FileExists) {
            """", errors::NotFound(""404""))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   EXPECT_TRUE(fs.FileExists(""gs://bucket/path/file1.txt""));
   EXPECT_FALSE(fs.FileExists(""gs://bucket/path/file2.txt""));
@@ -176,7 +240,8 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles) {
   auto requests = CreateGetThreeChildrenRequest();
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren(""gs://bucket/path/"", &children));
@@ -188,7 +253,8 @@ TEST(GcsFileSystemTest, GetChildren_ThreeFiles_NoSlash) {
   auto requests = CreateGetThreeChildrenRequest();
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren(""gs://bucket/path"", &children));
@@ -204,7 +270,8 @@ TEST(GcsFileSystemTest, GetChildren_Empty) {
       ""{}"")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   std::vector<string> children;
   TF_EXPECT_OK(fs.GetChildren(""gs://bucket/path/"", &children));
@@ -221,7 +288,8 @@ TEST(GcsFileSystemTest, DeleteFile) {
                            """")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   TF_EXPECT_OK(fs.DeleteFile(""gs://bucket/path/file1.txt""));
 }
@@ -234,7 +302,8 @@ TEST(GcsFileSystemTest, DeleteDir_Empty) {
       ""{}"")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   TF_EXPECT_OK(fs.DeleteDir(""gs://bucket/path/""));
 }
@@ -248,7 +317,8 @@ TEST(GcsFileSystemTest, DeleteDir_NonEmpty) {
       ""  { \""name\"": \""path/file1.txt\"" }]}"")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   EXPECT_FALSE(fs.DeleteDir(""gs://bucket/path/"").ok());
 }
@@ -261,7 +331,8 @@ TEST(GcsFileSystemTest, GetFileSize) {
       strings::StrCat(""{\""size\"": \""1010\""}""))});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   uint64 size;
   TF_EXPECT_OK(fs.GetFileSize(""gs://bucket/file.txt"", &size));
@@ -284,7 +355,8 @@ TEST(GcsFileSystemTest, RenameFile) {
            """")});
   GcsFileSystem fs(std::unique_ptr<AuthProvider>(new FakeAuthProvider),
                    std::unique_ptr<HttpRequest::Factory>(
-                       new FakeHttpRequestFactory(&requests)));
+                       new FakeHttpRequestFactory(&requests)),
+                   0 /* read ahead bytes */);
 
   TF_EXPECT_OK(
       fs.RenameFile(""gs://bucket/path/src.txt"", ""gs://bucket/path/dst.txt""));
",0,train
2147ea442a0f514ef81c91ba569de89bab5753ca,tensorflow/tensorflow,"[PJRT] Expose PyClient::runtime_type() to distinguish between ""stream_executor"" and ""tfrt"" backends.

PiperOrigin-RevId: 370531014
Change-Id: I498827583a21e51de390e8d0d34ebe175b323a5a",pjrt_client.h,"@@ -57,6 +57,14 @@ static const PjRtPlatformId kGpuId = tensorflow::Fingerprint64(kGpuName);
 static const PjRtPlatformId kTpuId = tensorflow::Fingerprint64(kTpuName);
 
 enum PjRtRuntimeType { kStreamExecutor, kTfrt };
+static constexpr absl::string_view PjRtRuntimeTypeString(PjRtRuntimeType type) {
+  switch (type) {
+    case kStreamExecutor:
+      return ""stream_executor"";
+    case kTfrt:
+      return ""tfrt"";
+  }
+}
 
 class PjRtClient;
 
@@ -184,6 +192,8 @@ class PjRtClient {
   // Returns a string that identifies the platform (CPU/GPU/TPU).
   virtual absl::string_view platform_name() const = 0;
 
+  // Returns a string containing human-readable, platform-specific version info
+  // (e.g. the CUDA version on GPU or libtpu version on Cloud TPU).
   virtual absl::string_view platform_version() const = 0;
 
   // Returns an enum that identifies the type of runtime being used under this
",0,train
2147ea442a0f514ef81c91ba569de89bab5753ca,tensorflow/tensorflow,"[PJRT] Expose PyClient::runtime_type() to distinguish between ""stream_executor"" and ""tfrt"" backends.

PiperOrigin-RevId: 370531014
Change-Id: I498827583a21e51de390e8d0d34ebe175b323a5a",py_client.h,"@@ -100,6 +100,9 @@ class PyClient : public std::enable_shared_from_this<PyClient> {
   absl::string_view platform_version() const {
     return pjrt_client_->platform_version();
   }
+  absl::string_view runtime_type() const {
+    return PjRtRuntimeTypeString(pjrt_client_->runtime_type());
+  }
   int addressable_device_count() const {
     return pjrt_client_->addressable_device_count();
   }
",0,train
2147ea442a0f514ef81c91ba569de89bab5753ca,tensorflow/tensorflow,"[PJRT] Expose PyClient::runtime_type() to distinguish between ""stream_executor"" and ""tfrt"" backends.

PiperOrigin-RevId: 370531014
Change-Id: I498827583a21e51de390e8d0d34ebe175b323a5a",xla.cc,"@@ -206,6 +206,7 @@ PYBIND11_MODULE(xla_extension, m) {
   py::class_<PyClient, std::shared_ptr<PyClient>> py_local_client(m, ""Client"");
   py_local_client.def_property_readonly(""platform"", &PyClient::platform_name)
       .def_property_readonly(""platform_version"", &PyClient::platform_version)
+      .def_property_readonly(""runtime_type"", &PyClient::runtime_type)
       .def(""device_count"", &PyClient::device_count)
       .def(""local_device_count"", &PyClient::addressable_device_count)
       .def(""devices"", &PyClient::Devices)
",0,train
11fb9e3fcca1567d4e09d9fa043d97e31b66d8a7,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 377602003
Change-Id: Ied5dd5cd7f963a157719f20600f0326e14a5a964",fake_quant_ops_functor.h,"@@ -87,13 +87,13 @@ struct FakeQuantWithMinMaxArgsFunctor {
     float nudged_min, nudged_max, nudged_scale;
     Nudge(min, max, quant_min, quant_max, &nudged_min, &nudged_max,
           &nudged_scale);
-    const float quant_zero = floor(-nudged_min / nudged_scale + 0.5f);
+    const float inv_nudged_scale = 1.0f / nudged_scale;
 
     auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
     auto clamped_shifted = clamped - nudged_min;
     outputs.device(d) =
-        (clamped_shifted / nudged_scale - quant_zero + 0.5f).floor() *
-        nudged_scale;
+        (clamped_shifted * inv_nudged_scale + 0.5f).floor() * nudged_scale +
+        nudged_min;
   }
 };
 
@@ -138,14 +138,13 @@ struct FakeQuantWithMinMaxVarsFunctor {
     float nudged_min, nudged_max, nudged_scale;
     Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
           &nudged_scale);
-    const float quant_zero = floor(-nudged_min / nudged_scale + 0.5f);
     const auto nudged_scale_repl = inputs.constant(nudged_scale);
 
     const auto clamped = inputs.cwiseMin(nudged_max).cwiseMax(nudged_min);
     const auto clamped_shifted = clamped - nudged_min;
-    outputs.device(d) =
-        (clamped_shifted / nudged_scale_repl - quant_zero + 0.5f).floor() *
-        nudged_scale_repl;
+    outputs.device(d) = (clamped_shifted / nudged_scale_repl + 0.5f).floor() *
+                            nudged_scale_repl +
+                        nudged_min;
   }
 };
 
@@ -213,15 +212,13 @@ struct FakeQuantWithMinMaxVarsPerChannelFunctor {
       float nudged_min, nudged_max, nudged_scale;
       Nudge(min_val, max_val, quant_min, quant_max, &nudged_min, &nudged_max,
             &nudged_scale);
-      const float quant_zero = floor(-nudged_min / nudged_scale + 0.5f);
-
       const auto clamped =
           inputs.chip<1>(i).cwiseMin(nudged_max).cwiseMax(nudged_min);
       const auto clamped_shifted = clamped - nudged_min;
 
       outputs.chip<1>(i).device(d) =
-          (clamped_shifted / nudged_scale - quant_zero + 0.5f).floor() *
-          nudged_scale;
+          (clamped_shifted / nudged_scale + 0.5f).floor() * nudged_scale +
+          nudged_min;
     }
   }
 };
",0,train
11fb9e3fcca1567d4e09d9fa043d97e31b66d8a7,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 377602003
Change-Id: Ied5dd5cd7f963a157719f20600f0326e14a5a964",fake_quant_ops_test.cc,"@@ -54,9 +54,7 @@ class QuantOpsTest : public OpsTestBase {
                                       const bool narrow_range, const float min,
                                       const float max, const TensorShape& shape,
                                       const gtl::ArraySlice<float> data,
-                                      gtl::ArraySlice<float> expected_data,
-                                      const double atol = -1.0,
-                                      const double rtol = -1.0) {
+                                      gtl::ArraySlice<float> expected_data) {
     TF_EXPECT_OK(NodeDefBuilder(""op"", ""FakeQuantWithMinMaxArgs"")
                      .Input(FakeInput(DT_FLOAT))  // inputs
                      .Attr(""min"", min)
@@ -74,16 +72,14 @@ class QuantOpsTest : public OpsTestBase {
     Tensor* output = GetOutput(0);
     Tensor expected(allocator(), DT_FLOAT, shape);
     FillValues<float>(&expected, expected_data);
-    ExpectClose(expected, *output, atol, rtol);
+    ExpectClose(expected, *output);
   }
 
   void RunTestFakeQuantWithMinMaxVars(const int num_bits,
                                       const bool narrow_range, const float min,
                                       const float max, const TensorShape& shape,
                                       const gtl::ArraySlice<float> data,
-                                      gtl::ArraySlice<float> expected_data,
-                                      const double atol = -1.0,
-                                      const double rtol = -1.0) {
+                                      gtl::ArraySlice<float> expected_data) {
     TF_EXPECT_OK(NodeDefBuilder(""op"", ""FakeQuantWithMinMaxVars"")
                      .Input(FakeInput(DT_FLOAT))  // inputs
                      .Input(FakeInput(DT_FLOAT))  // min
@@ -105,15 +101,14 @@ class QuantOpsTest : public OpsTestBase {
     Tensor* output = GetOutput(0);
     Tensor expected(allocator(), DT_FLOAT, TensorShape({2, 3}));
     FillValues<float>(&expected, expected_data);
-    ExpectClose(expected, *output, atol, rtol);
+    ExpectClose(expected, *output);
   }
 
   void RunTestFakeQuantWithMinMaxVarsPerChannel(
       const int num_bits, const bool narrow_range,
       const TensorShape& minmax_shape, const gtl::ArraySlice<float> min,
       const gtl::ArraySlice<float> max, const TensorShape& shape,
-      const gtl::ArraySlice<float> data, gtl::ArraySlice<float> expected_data,
-      const double atol = -1.0, const double rtol = -1.0) {
+      const gtl::ArraySlice<float> data, gtl::ArraySlice<float> expected_data) {
     TF_EXPECT_OK(NodeDefBuilder(""op"", ""FakeQuantWithMinMaxVarsPerChannel"")
                      .Input(FakeInput(DT_FLOAT))  // inputs
                      .Input(FakeInput(DT_FLOAT))  // min
@@ -135,30 +130,10 @@ class QuantOpsTest : public OpsTestBase {
     Tensor* output = GetOutput(0);
     Tensor expected(allocator(), DT_FLOAT, shape);
     FillValues<float>(&expected, expected_data);
-    ExpectClose(expected, *output, atol, rtol);
+    ExpectClose(expected, *output);
   }
 };
 
-TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_RegularRange) {
-  // Original quantization range: [-10, 10], scale: 20/255.
-  // Original zero point: 127.5, nudged zero point 128.0.
-  // Expected quantized values: 0.0.
-  RunTestFakeQuantWithMinMaxArgs(8, false, -10.0f, 10.0f, TensorShape({2, 3}),
-                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
-                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
-                                 0.0);
-}
-
-TEST_F(QuantOpsTest, WithArgsSymmetricRangeZeroInput_NarrowRange) {
-  // Original quantization range: [-10, 10], scale: 20/254.
-  // Original zero point: 128., no nudging necessary.
-  // Expected quantized values: 0.0.
-  RunTestFakeQuantWithMinMaxArgs(8, true, -10.0f, 10.0f, TensorShape({2, 3}),
-                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
-                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
-                                 0.0);
-}
-
 TEST_F(QuantOpsTest, WithArgsNoNudging_RegularRange) {
   // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4.
   // Original zero point: 40, no nudging necessary.
@@ -506,26 +481,6 @@ TEST_F(QuantOpsTest, WithVars_ZeroMinAndMax) {
                                  {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f});
 }
 
-TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_RegularRange) {
-  // Original quantization range: [-10, 10], scale: 20/255.
-  // Original zero point: 127.5, nudged zero point 128.
-  // Expected quantized values: 0.
-  RunTestFakeQuantWithMinMaxVars(8, false, -10.0f, 10.0f, TensorShape({2, 3}),
-                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
-                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
-                                 0.0);
-}
-
-TEST_F(QuantOpsTest, WithVarsSymmetricRangeZeroInput_NarrowRange) {
-  // Original quantization range: [-10, 10], scale: 20/254.
-  // Original zero point: 128., no nudging necessary.
-  // Expected quantized values: 0.
-  RunTestFakeQuantWithMinMaxVars(8, true, -10.0f, 10.0f, TensorShape({2, 3}),
-                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f},
-                                 {0.0f, 0.0f, 0.0f, 0.0f, 0.0f, 0.0f}, 0.0,
-                                 0.0);
-}
-
 TEST_F(QuantOpsTest, WithVarsNoNudging_RegularRange) {
   // Original quantization range: [-10 + 0 / 4, -10 + 255 / 4], scale: 1/4.
   // Original zero point: 40, no nudging necessary.
@@ -913,26 +868,6 @@ TEST_F(QuantOpsTest, WithVarsPerChannel_ZeroMinAndMax) {
       {0.0f, 0.0f, 0.0f, 0.0f});
 }
 
-TEST_F(QuantOpsTest, WithVarsPerChannelSymmetricRangeZeroInput_RegularRange) {
-  // Original quantization range: [-10, 10], scale: 20/255.
-  // Original zero point: 127.5, nudged zero point 128.0.
-  // Expected quantized values: 0.
-  RunTestFakeQuantWithMinMaxVarsPerChannel(
-      8, false, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f},
-      {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
-      {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0);
-}
-
-TEST_F(QuantOpsTest, WithVarsPerChannelSymmetricRangeZeroInput_NarrowRange) {
-  // Original quantization range: [-10, 10], scale: 20/254.
-  // Original zero point: 128.0, no nudging necessary.
-  // Expected quantized values: 0.
-  RunTestFakeQuantWithMinMaxVarsPerChannel(
-      8, true, TensorShape({4}), {-10.0f, -10.0f, -10.0f, -10.0f},
-      {10.0f, 10.0f, 10.0f, 10.0f}, TensorShape({4}), {0.0f, 0.0f, 0.0f, 0.0f},
-      {0.0f, 0.0f, 0.0f, 0.0f}, 0.0, 0.0);
-}
-
 TEST_F(QuantOpsTest, WithVarsPerChannelDim1NudgedDown_RegularRange) {
   // Original quantization ranges: [-0.4 / 4 + 0 / 4, -0.4 / 4 + 255 / 4].
   // Scale: 1/4,  original zero point: 0.4, nudged to 0.
",0,train
ff9fa17710e8f9ee2b74f4022f0ce72a8ca23f6b,tensorflow/tensorflow,"[Trackable Method Migration] Change `checkpoint_dependencies` property to `_trackable_children` method.

PiperOrigin-RevId: 419693734
Change-Id: Iba7549714c3fcd984fb5e5bfe4517137678c8e4f",rnn_cell_test.py,"@@ -237,9 +237,8 @@ class RNNTest(test.TestCase):
     cell = Plus1RNNCell()
     full_dropout_cell = rnn_cell.DropoutWrapper(
         cell, input_keep_prob=1e-6, seed=0)
-    (name, dep), = full_dropout_cell._checkpoint_dependencies
-    self.assertIs(dep, cell)
-    self.assertEqual(""cell"", name)
+    self.assertIn(""cell"", full_dropout_cell._trackable_children())
+    self.assertIs(full_dropout_cell._trackable_children()[""cell""], cell)
     batch_size = 2
     input_size = 5
     max_length = 8
@@ -2584,8 +2583,8 @@ class RNNCellTest(test.TestCase, parameterized.TestCase):
               ],
               state_is_tuple=False)
           self.assertEqual(cell.dtype, None)
-          self.assertEqual(""cell-0"", cell._checkpoint_dependencies[0].name)
-          self.assertEqual(""cell-1"", cell._checkpoint_dependencies[1].name)
+          self.assertIn(""cell-0"", cell._trackable_children())
+          self.assertIn(""cell-1"", cell._trackable_children())
           cell.get_config()  # Should not throw an error
           g, out_m = cell(x, m)
           # Layer infers the input type.
@@ -2830,10 +2829,10 @@ class RNNCellTest(test.TestCase, parameterized.TestCase):
         bias_initializer=init_ops.constant_initializer(0.5))
     g, m_new = base_cell(x, m)
     wrapper_object = wrapper_type(base_cell)
-    (name, dep), = wrapper_object._checkpoint_dependencies
     wrapper_object.get_config()  # Should not throw an error
-    self.assertIs(dep, base_cell)
-    self.assertEqual(""cell"", name)
+
+    self.assertIn(""cell"", wrapper_object._trackable_children())
+    self.assertIs(wrapper_object._trackable_children()[""cell""], base_cell)
 
     g_res, m_new_res = wrapper_object(x, m)
     self.evaluate([variables_lib.global_variables_initializer()])
@@ -2873,10 +2872,8 @@ class RNNCellTest(test.TestCase, parameterized.TestCase):
     m = array_ops.zeros([1, 3])
     cell = rnn_cell_impl.GRUCell(3)
     wrapped_cell = wrapper_type(cell, ""/cpu:0"")
-    (name, dep), = wrapped_cell._checkpoint_dependencies
     wrapped_cell.get_config()  # Should not throw an error
-    self.assertIs(dep, cell)
-    self.assertEqual(""cell"", name)
+    self.assertEqual(wrapped_cell._trackable_children()[""cell""], cell)
 
     outputs, _ = wrapped_cell(x, m)
     self.assertIn(""cpu:0"", outputs.device.lower())
",0,train
e9a33ba0e504838ce9781b7cfa4401d67cecff80,tensorflow/tensorflow,"Fix kernel logging macros so that they work when TF_LITE_STRIP_ERROR_STRINGS is defined

PiperOrigin-RevId: 439244018",common.h,"@@ -173,8 +173,9 @@ void TfLiteFloatArrayFree(TfLiteFloatArray* a);
     }                                                 \
   } while (false)
 #else  // TF_LITE_STRIP_ERROR_STRINGS
-#define TF_LITE_KERNEL_LOG(context, ...)
-#define TF_LITE_MAYBE_KERNEL_LOG(context, ...)
+#define UNUSED(...) (void)sizeof(__VA_ARGS__)
+#define TF_LITE_KERNEL_LOG(context, ...) UNUSED(__VA_ARGS__)
+#define TF_LITE_MAYBE_KERNEL_LOG(context, ...) UNUSED(__VA_ARGS__)
 #endif  // TF_LITE_STRIP_ERROR_STRINGS
 
 // Check whether value is true, and if not return kTfLiteError from
",0,test
28c9bc3ab1bbcba5056470519bd1118f2722e05b,tensorflow/tensorflow,"Slightly simplify base64 decoding function.
Change: 131142953",base64.cc,"@@ -15,6 +15,7 @@ limitations under the License.
 
 #include ""tensorflow/core/lib/strings/base64.h""
 
+#include <cstring>
 #include <memory>
 #include ""tensorflow/core/lib/core/errors.h""
 
@@ -57,38 +58,14 @@ inline uint32 Convert(char x) {
   return static_cast<uint32>(z);
 }
 
-Status DecodeOneChar(const char* codes, char* result) {
-  const uint32 packed = (Convert(codes[0]) << 2) |
-                        (Convert(codes[1]) >> 4);
+Status DecodeThreeChars(const char* codes, char* result) {
+  const uint32 packed = (Convert(codes[0]) << 18) | (Convert(codes[1]) << 12) |
+                        (Convert(codes[2]) << 6) | (Convert(codes[3]));
   // Convert() return value has upper 25 bits set if input is invalid.
   // Therefore `packed` has high bits set iff at least one of code is invalid.
   if (TF_PREDICT_FALSE((packed & 0xFF000000) != 0)) {
     return errors::InvalidArgument(""Invalid character found in base64."");
   }
-  *result = static_cast<char>(packed);
-  return Status::OK();
-}
-
-Status DecodeTwoChars(const char* codes, char* result) {
-  const uint32 packed = (Convert(codes[0]) << 10) |
-                        (Convert(codes[1]) << 4) |
-                        (Convert(codes[2]) >> 2);
-  if (TF_PREDICT_FALSE((packed & 0xFF000000) != 0)) {
-    return errors::InvalidArgument(""Invalid character found in base64."");
-  }
-  result[0] = static_cast<char>(packed >> 8);
-  result[1] = static_cast<char>(packed);
-  return Status::OK();
-}
-
-Status DecodeThreeChars(const char* codes, char* result) {
-  const uint32 packed = (Convert(codes[0]) << 18) |
-                        (Convert(codes[1]) << 12) |
-                        (Convert(codes[2]) << 6) |
-                        (Convert(codes[3]));
-  if (TF_PREDICT_FALSE((packed & 0xFF000000) != 0)) {
-    return errors::InvalidArgument(""Invalid character found in base64."");
-  }
   result[0] = static_cast<char>(packed >> 16);
   result[1] = static_cast<char>(packed >> 8);
   result[2] = static_cast<char>(packed);
@@ -106,7 +83,10 @@ Status Base64Decode(StringPiece data, string* decoded) {
     return Status::OK();
   }
 
-  // max_decoded_size may overestimate by up to 3 bytes.
+  // This decoding procedure will write 3 * ceil(data.size() / 4) bytes to be
+  // output buffer, then truncate if necessary. Therefore we must overestimate
+  // and allocate sufficient amount. Currently max_decoded_size may overestimate
+  // by up to 3 bytes.
   const size_t max_decoded_size = 3 * (data.size() / 4) + 3;
   std::unique_ptr<char[]> buffer(new char[max_decoded_size]);
   char* current = buffer.get();
@@ -135,25 +115,22 @@ Status Base64Decode(StringPiece data, string* decoded) {
     }
   }
 
-  switch (end - b64) {
-    case 4:
-      TF_RETURN_IF_ERROR(DecodeThreeChars(b64, current));
-      current += 3;
-      break;
-    case 3:
-      TF_RETURN_IF_ERROR(DecodeTwoChars(b64, current));
-      current += 2;
-      break;
-    case 2:
-      TF_RETURN_IF_ERROR(DecodeOneChar(b64, current));
-      current += 1;
-      break;
-    default:  // case 1
-      // We may check this condition early by checking data.size() % 4 == 1.
-      return errors::InvalidArgument(
-          ""Base64 string length cannot be 1 modulo 4."");
+  const int remain = end - b64;
+  if (TF_PREDICT_FALSE(remain == 1)) {
+    // We may check this condition early by checking data.size() % 4 == 1.
+    return errors::InvalidArgument(
+        ""Base64 string length cannot be 1 modulo 4."");
   }
 
+  // A valid base64 character will replace paddings, if any.
+  char tail[4] = {kBase64UrlSafeChars[0], kBase64UrlSafeChars[0],
+                  kBase64UrlSafeChars[0], kBase64UrlSafeChars[0]};
+  // Copy tail of the input into the array, then decode.
+  std::memcpy(tail, b64, remain * sizeof(*b64));
+  TF_RETURN_IF_ERROR(DecodeThreeChars(tail, current));
+  // We know how many parsed characters are valid.
+  current += remain - 1;
+
   decoded->assign(buffer.get(), current - buffer.get());
   return Status::OK();
 }
",0,train
d8ee3cbf769323a185614d9c427f809d135c9830,tensorflow/tensorflow,"Make InTopK return False if any of the predictions are NaN or non-finite.
Change: 114218051",in_topk_op.cc,"@@ -56,11 +56,20 @@ class InTopK : public OpKernel {
     const auto num_classes = predictions.dimension(1);
     for (int b = 0; b < size; b++) {
       T target_prediction = predictions(b, targets(b));
+      bool cannot_say = !std::isfinite(target_prediction);
       int more_probable_classes = 0;
-      for (int i = 0; i < num_classes; ++i) {
-        if (predictions(b, i) > target_prediction) ++more_probable_classes;
+      if (!cannot_say) {
+        for (int i = 0; i < num_classes; ++i) {
+          T pred = predictions(b, i);
+          if (!std::isfinite(pred)) {
+            cannot_say = true;
+            break;
+          } else if (pred > target_prediction) {
+            ++more_probable_classes;
+          }
+        }
       }
-      out(b) = more_probable_classes < k_;
+      out(b) = cannot_say ? false : (more_probable_classes < k_);
     }
   }
 
@@ -68,13 +77,11 @@ class InTopK : public OpKernel {
   int k_;
 };
 
-REGISTER_KERNEL_BUILDER(Name(""InTopK"")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<int32>(""T""),
-                        InTopK<float, int32>);
-REGISTER_KERNEL_BUILDER(Name(""InTopK"")
-                            .Device(DEVICE_CPU)
-                            .TypeConstraint<int64>(""T""),
-                        InTopK<float, int64>);
+REGISTER_KERNEL_BUILDER(
+    Name(""InTopK"").Device(DEVICE_CPU).TypeConstraint<int32>(""T""),
+    InTopK<float, int32>);
+REGISTER_KERNEL_BUILDER(
+    Name(""InTopK"").Device(DEVICE_CPU).TypeConstraint<int64>(""T""),
+    InTopK<float, int64>);
 
 }  // namespace tensorflow
",0,train
d8ee3cbf769323a185614d9c427f809d135c9830,tensorflow/tensorflow,"Make InTopK return False if any of the predictions are NaN or non-finite.
Change: 114218051",in_topk_op_test.py,"@@ -55,6 +55,11 @@ class InTopKTest(tf.test.TestCase):
     target = np.asarray([0, 2]).astype(np.int64)
     self._validateInTopK(predictions, target, 2, [False, True])
 
+  def testInTopNan(self):
+    predictions = [[0.1, float(""nan""), 0.2, 0.4], [0.1, 0.2, 0.3, float(""inf"")]]
+    target = [0, 2]
+    self._validateInTopK(predictions, target, 2, [False, False])
+
 
 if __name__ == ""__main__"":
   tf.test.main()
",0,train
786ec0a88129c4dc729994ac1e6956bdb8ac5da1,tensorflow/tensorflow,"Update GraphDef version to 873.

PiperOrigin-RevId: 393607022
Change-Id: I01c66290c41847f9ee219dea196959841ed72cba",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 872  // Updated: 2021/8/28
+#define TF_GRAPH_DEF_VERSION 873  // Updated: 2021/8/29
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
e48dc7f745a932a6ae6c21bb7b24bee1e76f1e5f,tensorflow/tensorflow,Adding a trivial helper function.,kernels.cc,"@@ -439,6 +439,23 @@ void TF_OpKernelConstruction_GetAttrStringList(TF_OpKernelConstruction* ctx,
   }
 }
 
+void TF_OpKernelConstruction_GetAttrTensorShape(TF_OpKernelConstruction* ctx,
+                                                const char* attr_name, int64_t* values,
+                                                size_t max_vals,
+                                                TF_Status* status) {
+  ::tensorflow::TensorShape shape;
+  auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelConstruction*>(ctx);
+  ::tensorflow::Status s = cc_ctx->GetAttr(attr_name, &shape);
+  ::tensorflow::Set_TF_Status_from_Status(status, s);
+
+  if (!status->status.ok()) return;
+
+  const auto len = std::min(max_vals, (size_t)shape.dims());
+  for (int i = 0; i < len; ++i) {
+    values[i] = static_cast<int64_t>(shape.dim_size(i));
+  }
+}
+
 bool TF_OpKernelConstruction_HasAttr(TF_OpKernelConstruction* ctx,
                                      const char* attr_name, TF_Status* status) {
   auto* cc_ctx = reinterpret_cast<::tensorflow::OpKernelConstruction*>(ctx);
",0,train
e48dc7f745a932a6ae6c21bb7b24bee1e76f1e5f,tensorflow/tensorflow,Adding a trivial helper function.,kernels.h,"@@ -322,6 +322,18 @@ TF_CAPI_EXPORT extern void TF_OpKernelConstruction_GetAttrStringList(
     size_t* lengths, int max_values, void* storage, size_t storage_size,
     TF_Status* status);
 
+// Interprets the named kernel construction attribute as a shape attribute and fills
+// in `vals` with the size of each dimension.
+// `vals` must point to an array of length at least `max_values` (ideally set
+// to total_size from
+// TF_OpKernelConstruction_GetAttrSize(ctx, attr_name, &list_size,
+// &total_size)).
+TF_CAPI_EXPORT extern void TF_OpKernelConstruction_GetAttrTensorShape(
+                                           TF_OpKernelConstruction* ctx,
+                                           const char* attr_name, int64_t* values,
+                                           size_t max_vals,
+                                           TF_Status* status);
+
 // Return true if the kernel construction has the attr_name
 TF_CAPI_EXPORT extern bool TF_OpKernelConstruction_HasAttr(
     TF_OpKernelConstruction* ctx, const char* attr_name, TF_Status* status);
",0,train
b7bc8650cd673e2b12ed2a9b5a81d8074cee1e2a,tensorflow/tensorflow,fixed grammar in dataset_ops and readers,dataset_ops.py,"@@ -278,9 +278,9 @@ class DatasetV2(object):
     Note that if `tensors` contains a NumPy array, and eager execution is not
     enabled, the values will be embedded in the graph as one or more
     `tf.constant` operations. For large datasets (> 1 GB), this can waste
-    memory and run into byte limits of graph serialization.  If tensors contains
-    one or more large NumPy arrays, consider the alternative described in
-    [this guide](https://tensorflow.org/guide/datasets#consuming_numpy_arrays).
+    memory and run into byte limits of graph serialization. If `tensors`
+    contains one or more large NumPy arrays, consider the alternative described
+    in [this guide](https://tensorflow.org/guide/datasets#consuming_numpy_arrays).
 
     Args:
       tensors: A nested structure of tensors.
@@ -297,9 +297,9 @@ class DatasetV2(object):
     Note that if `tensors` contains a NumPy array, and eager execution is not
     enabled, the values will be embedded in the graph as one or more
     `tf.constant` operations. For large datasets (> 1 GB), this can waste
-    memory and run into byte limits of graph serialization.  If tensors contains
-    one or more large NumPy arrays, consider the alternative described in
-    [this guide](https://tensorflow.org/guide/datasets#consuming_numpy_arrays).
+    memory and run into byte limits of graph serialization. If `tensors`
+    contains one or more large NumPy arrays, consider the alternative described
+    in [this guide](https://tensorflow.org/guide/datasets#consuming_numpy_arrays).
 
     Args:
       tensors: A nested structure of tensors, each having the same size in the
@@ -566,7 +566,7 @@ class DatasetV2(object):
     ```
 
     Args:
-      *args: follow same semantics as python's xrange.
+      *args: follows the same semantics as python's xrange.
         len(args) == 1 -> start = 0, stop = args[0], step = 1
         len(args) == 2 -> start = args[0], stop = args[1], step = 1
         len(args) == 3 -> start = args[0], stop = args[1, stop = args[2]
@@ -852,10 +852,10 @@ class DatasetV2(object):
 
     Raises:
       ValueError: if `num_shards` or `index` are illegal values. Note: error
-        checking is done on a best-effort basis, and aren't guaranteed to be
-        caught upon dataset creation. (e.g. providing in a placeholder tensor
-        bypasses the early checking, and will instead result in an error during
-        a session.run call.)
+        checking is done on a best-effort basis, and errors aren't guaranteed
+        to be caught upon dataset creation. (e.g. providing in a placeholder
+        tensor bypasses the early checking, and will instead result in an error
+        during a session.run call.)
     """"""
     num_shards = ops.convert_to_tensor(
         num_shards, name=""num_shards"", dtype=dtypes.int64)
@@ -892,7 +892,7 @@ class DatasetV2(object):
       batch_size: A `tf.int64` scalar `tf.Tensor`, representing the number of
         consecutive elements of this dataset to combine in a single batch.
       drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing
-        whether the last batch should be dropped in the case its has fewer than
+        whether the last batch should be dropped in the case it has fewer than
         `batch_size` elements; the default behavior is not to drop the smaller
         batch.
 
@@ -949,7 +949,7 @@ class DatasetV2(object):
         respective components.  Defaults are `0` for numeric types and
         the empty string for string types.
       drop_remainder: (Optional.) A `tf.bool` scalar `tf.Tensor`, representing
-        whether the last batch should be dropped in the case its has fewer than
+        whether the last batch should be dropped in the case it has fewer than
         `batch_size` elements; the default behavior is not to drop the smaller
         batch.
 
@@ -1573,7 +1573,7 @@ class DatasetV1Adapter(DatasetV1):
 class Options(object):
   """"""Represents options for tf.data.Dataset.
 
-  An `Options` object can be for instance used to control which static
+  An `Options` object can be, for instance, used to control which static
   optimizations to apply or whether to use performance modeling to dynamically
   tune the parallelism of operations such as `tf.data.Dataset.map` or
   `tf.data.Dataset.interleave`.
",0,train
b7bc8650cd673e2b12ed2a9b5a81d8074cee1e2a,tensorflow/tensorflow,fixed grammar in dataset_ops and readers,readers.py,"@@ -180,7 +180,7 @@ class TFRecordDatasetV2(dataset_ops.DatasetV2):
 
   def __init__(self, filenames, compression_type=None, buffer_size=None,
                num_parallel_reads=None):
-    """"""Creates a `TFRecordDataset` to read for one or more TFRecord files.
+    """"""Creates a `TFRecordDataset` to read one or more TFRecord files.
 
     NOTE: The `num_parallel_reads` argument can be used to improve performance
     when reading from a remote filesystem.
",0,train
2473778dbe885da80e49a78022cc7efb60c6789d,tensorflow/tensorflow,"Revert ""Internal change""

This reverts commit 44ee91f82d40c79c248defea21010e63f58e0857.",legalize_tf.cc,"@@ -50,6 +50,7 @@ limitations under the License.
 #include ""tensorflow/compiler/mlir/lite/utils/validators.h""
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_ops.h""
 #include ""tensorflow/compiler/mlir/tensorflow/ir/tf_ops_a_m.h""
+#include ""tensorflow/compiler/mlir/tensorflow/transforms/lower_tf.h""
 #include ""tensorflow/compiler/mlir/tensorflow/utils/mangling_util.h""
 #include ""tensorflow/compiler/xla/status.h""
 #include ""tensorflow/compiler/xla/statusor.h""
@@ -661,6 +662,9 @@ void LegalizeTF::runOnFunction() {
   auto* context = &getContext();
   auto func = getFunction();
 
+  // Add TF->TF lowering patterns.
+  TF::PopulateLoweringTFPatterns(context, &patterns);
+
   // Add the generated patterns to the list.
   populateWithGenerated(context, patterns);
   patterns
",0,train
6d2df88d657c87fdce6365bce9b19f1c39b0b0b2,tensorflow/tensorflow,"Several Estimator changes:
- support configurable input_fn calling in Estimator subclasses.
- pass params and config to the input_fn.
- allow callables for model_fn and input_fn.

PiperOrigin-RevId: 159725554",tpu_estimator.py,"@@ -307,7 +307,7 @@ def _convert_model_fn_to_train_step(model_fn, dequeue_fn, mode, run_config):
 
   def _call_model_fn(features, labels):
     """"""Calls the model_fn with required parameters.""""""
-    model_fn_args = estimator_lib._model_fn_args(model_fn)  # pylint: disable=protected-access
+    model_fn_args = estimator_lib._fn_args(model_fn)  # pylint: disable=protected-access
     kwargs = {}
     if 'mode' in model_fn_args:
       kwargs['mode'] = mode
",0,test
6d2df88d657c87fdce6365bce9b19f1c39b0b0b2,tensorflow/tensorflow,"Several Estimator changes:
- support configurable input_fn calling in Estimator subclasses.
- pass params and config to the input_fn.
- allow callables for model_fn and input_fn.

PiperOrigin-RevId: 159725554",estimator.py,"@@ -52,7 +52,7 @@ from tensorflow.python.util import tf_inspect
 
 
 _VALID_MODEL_FN_ARGS = set(
-    ['features', 'labels', 'mode', 'params', 'config'])
+    ['features', 'labels', 'mode', 'params', 'self', 'config'])
 
 
 class Estimator(object):
@@ -357,7 +357,7 @@ class Estimator(object):
               }
 
   def _assert_members_are_not_overridden(self):
-    allowed_overrides = set(['_create_global_step'])
+    allowed_overrides = set(['_call_input_fn', '_create_global_step'])
     estimator_members = set([m for m in Estimator.__dict__.keys()
                              if not m.startswith('__')])
     subclass_members = set(self.__class__.__dict__.keys())
@@ -485,7 +485,7 @@ class Estimator(object):
       return export_dir
 
   def _get_features_from_input_fn(self, input_fn):
-    result = input_fn()
+    result = self._call_input_fn(input_fn)
     if not ops.get_default_graph().get_collection(ops.GraphKeys.QUEUE_RUNNERS):
       logging.warning('Input graph does not contain a QueueRunner. '
                       'That means predict yields forever. '
@@ -549,6 +549,29 @@ class Estimator(object):
     assert step.dtype.is_integer
     return step
 
+  def _call_input_fn(self, input_fn):
+    """"""Calls the input function.
+
+    Args:
+      input_fn: The input function.
+
+    Returns:
+      Either features or (features, labels) where features and labels are:
+        features - `Tensor` or dictionary of string feature name to `Tensor`.
+        labels - `Tensor` or dictionary of `Tensor` with labels.
+
+    Raises:
+      ValueError: if input_fn takes invalid arguments.
+    """"""
+    input_fn_args = _fn_args(input_fn)
+    kwargs = {}
+    if 'params' in input_fn_args:
+      kwargs['params'] = self.params
+    if 'config' in input_fn_args:
+      kwargs['config'] = self.config
+    with ops.device('/cpu:0'):
+      return input_fn(**kwargs)
+
   def _call_model_fn(self, features, labels, mode):
     """"""Calls model function.
 
@@ -563,7 +586,7 @@ class Estimator(object):
     Raises:
       ValueError: if model_fn returns invalid objects.
     """"""
-    model_fn_args = _model_fn_args(self._model_fn)
+    model_fn_args = _fn_args(self._model_fn)
     kwargs = {}
     if 'mode' in model_fn_args:
       kwargs['mode'] = mode
@@ -584,8 +607,7 @@ class Estimator(object):
     with ops.Graph().as_default() as g, g.device(self._device_fn):
       random_seed.set_random_seed(self._config.tf_random_seed)
       global_step_tensor = self._create_and_assert_global_step(g)
-      with ops.device('/cpu:0'):
-        features, labels = input_fn()
+      features, labels = self._call_input_fn(input_fn)
       estimator_spec = self._call_model_fn(features, labels,
                                            model_fn_lib.ModeKeys.TRAIN)
       ops.add_to_collection(ops.GraphKeys.LOSSES, estimator_spec.loss)
@@ -666,7 +688,7 @@ class Estimator(object):
     with ops.Graph().as_default() as g:
       random_seed.set_random_seed(self._config.tf_random_seed)
       global_step_tensor = self._create_and_assert_global_step(g)
-      features, labels = input_fn()
+      features, labels = self._call_input_fn(input_fn)
       estimator_spec = self._call_model_fn(
           features, labels, model_fn_lib.ModeKeys.EVAL)
 
@@ -749,7 +771,7 @@ def _get_replica_device_setter(config):
     return None
 
 
-def _model_fn_args(fn):
+def _fn_args(fn):
   """"""Get argument names for function-like object.
 
   Args:
@@ -762,6 +784,9 @@ def _model_fn_args(fn):
     ValueError: if partial function has positionally bound arguments
   """"""
   _, fn = tf_decorator.unwrap(fn)
+  if hasattr(fn, '__call__') and tf_inspect.ismethod(fn.__call__):
+    # Handle callables.
+    return tuple(tf_inspect.getargspec(fn.__call__).args)
   if hasattr(fn, 'func') and hasattr(fn, 'keywords') and hasattr(fn, 'args'):
     # Handle functools.partial and similar objects.
     return tuple([
@@ -774,7 +799,7 @@ def _model_fn_args(fn):
 
 def _verify_model_fn_args(model_fn, params):
   """"""Verifies model fn arguments.""""""
-  args = set(_model_fn_args(model_fn))
+  args = set(_fn_args(model_fn))
   if 'features' not in args:
     raise ValueError('model_fn (%s) must include features argument.' % model_fn)
   if 'labels' not in args:
",0,test
6d2df88d657c87fdce6365bce9b19f1c39b0b0b2,tensorflow/tensorflow,"Several Estimator changes:
- support configurable input_fn calling in Estimator subclasses.
- pass params and config to the input_fn.
- allow callables for model_fn and input_fn.

PiperOrigin-RevId: 159725554",estimator_test.py,"@@ -120,6 +120,9 @@ class EstimatorInheritanceConstraintTest(test.TestCase):
       def __init__(self):
         super(_Estimator, self).__init__(model_fn=dummy_model_fn)
 
+      def _call_input_fn(self, input_fn):
+        return input_fn()
+
       def _create_global_step(self, graph):
         pass
 
@@ -325,6 +328,79 @@ def _make_input_fn(features, labels):
 
 class EstimatorTrainTest(test.TestCase):
 
+  def test_callable_model_fn(self):
+    expected_features = {'x': 42., 'y': 43.}
+    expected_labels = 44.
+
+    model_fn_call_count = [0]
+
+    test_self = self
+
+    class ModelFn(object):
+
+      def __call__(self, features, labels):
+        model_fn_call_count[0] += 1
+        test_self.assertItemsEqual(expected_features.keys(), features.keys())
+        return _estimator_spec(
+            expected_features, expected_labels, features, labels,
+            model_fn_lib.ModeKeys.TRAIN)
+
+    with self.assertRaisesRegexp(ValueError, 'does not include params'):
+      estimator.Estimator(model_fn=ModelFn(), params={'a': 'b'})
+    est = estimator.Estimator(model_fn=ModelFn(), config=run_config.RunConfig())
+    self.assertEqual(0, model_fn_call_count[0])
+    est.train(
+        input_fn=_make_input_fn(expected_features, expected_labels), steps=1)
+    self.assertEqual(1, model_fn_call_count[0])
+
+  def test_callable_input_fn(self):
+    expected_params = {'batch_size': 10}
+    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
+    input_fn_call_count = [0]
+
+    def _model_fn(features, labels, mode, params, config):
+      del params, config
+      return model_fn_global_step_incrementer(features, labels, mode)
+
+    test_self = self
+
+    class InputFn(object):
+
+      def __call__(self, params, config):
+        input_fn_call_count[0] += 1
+        test_self.assertEqual(expected_params, params)
+        test_self.assertEqual(4321, config.tf_random_seed)
+        return dummy_input_fn()
+
+    est = estimator.Estimator(model_fn=_model_fn,
+                              params=expected_params,
+                              config=expected_config)
+    self.assertEqual(0, input_fn_call_count[0])
+    est.train(InputFn(), steps=1)
+    self.assertEqual(1, input_fn_call_count[0])
+
+  def test_input_fn_args(self):
+    expected_params = {'batch_size': 10}
+    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
+    input_fn_call_count = [0]
+
+    def _model_fn(features, labels, mode, params, config):
+      del params, config
+      return model_fn_global_step_incrementer(features, labels, mode)
+
+    def _input_fn(params, config):
+      input_fn_call_count[0] += 1
+      self.assertEqual(expected_params, params)
+      self.assertEqual(4321, config.tf_random_seed)
+      return dummy_input_fn()
+
+    est = estimator.Estimator(model_fn=_model_fn,
+                              params=expected_params,
+                              config=expected_config)
+    self.assertEqual(0, input_fn_call_count[0])
+    est.train(_input_fn, steps=1)
+    self.assertEqual(1, input_fn_call_count[0])
+
   def test_minimal_model_fn_args(self):
     expected_features = {'x': 42., 'y': 43.}
     expected_labels = 44.
@@ -665,6 +741,29 @@ class _StepCounterHook(session_run_hook.SessionRunHook):
 
 class EstimatorEvaluateTest(test.TestCase):
 
+  def test_input_fn_args(self):
+    expected_params = {'batch_size': 10}
+    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
+    input_fn_call_count = [0]
+
+    def _model_fn(features, labels, mode, params, config):
+      del params, config
+      return model_fn_global_step_incrementer(features, labels, mode)
+
+    def _input_fn(params, config):
+      input_fn_call_count[0] += 1
+      self.assertEqual(expected_params, params)
+      self.assertEqual(4321, config.tf_random_seed)
+      return dummy_input_fn()
+
+    est = estimator.Estimator(model_fn=_model_fn,
+                              params=expected_params,
+                              config=expected_config)
+    est.train(dummy_input_fn, steps=1)
+    self.assertEqual(0, input_fn_call_count[0])
+    est.evaluate(_input_fn, steps=1)
+    self.assertEqual(1, input_fn_call_count[0])
+
   def test_model_fn_must_return_estimator_spec(self):
     def _model_fn(features, labels, mode):
       _, _ = features, labels
@@ -866,6 +965,33 @@ class EstimatorEvaluateTest(test.TestCase):
 
 class EstimatorPredictTest(test.TestCase):
 
+  def test_input_fn_args(self):
+    expected_params = {'batch_size': 10}
+    expected_config = run_config.RunConfig().replace(tf_random_seed=4321)
+    input_fn_call_count = [0]
+
+    def _model_fn(features, labels, mode, params, config):
+      del features, labels, params, config
+      return model_fn_lib.EstimatorSpec(
+          mode,
+          loss=constant_op.constant(0.),
+          train_op=state_ops.assign_add(training.get_global_step(), 1),
+          predictions=constant_op.constant([[10.]]))
+
+    def _input_fn(params, config):
+      input_fn_call_count[0] += 1
+      self.assertEqual(expected_params, params)
+      self.assertEqual(4321, config.tf_random_seed)
+      return dummy_input_fn()
+
+    est = estimator.Estimator(model_fn=_model_fn,
+                              params=expected_params,
+                              config=expected_config)
+    est.train(dummy_input_fn, steps=1)
+    self.assertEqual(0, input_fn_call_count[0])
+    next(est.predict(_input_fn))
+    self.assertEqual(1, input_fn_call_count[0])
+
   def test_no_trained_model_in_model_dir(self):
     est = estimator.Estimator(model_fn=model_fn_global_step_incrementer)
     with self.assertRaisesRegexp(ValueError,
",0,test
92e8484f0abbc0f18f0e7d2680085fa3c285bf31,tensorflow/tensorflow,Stubbed out memory allocation calls in audio library,fft_util.cc,"@@ -20,6 +20,8 @@ limitations under the License.
 #include ""kiss_fft.h""
 #include ""tools/kiss_fftr.h""
 
+#include ""tensorflow/lite/experimental/microfrontend/lib/memory_util.h""
+
 int FftPopulateState(struct FftState* state, size_t input_size) {
   state->input_size = input_size;
   state->fft_size = 1;
@@ -28,14 +30,14 @@ int FftPopulateState(struct FftState* state, size_t input_size) {
   }
 
   state->input = reinterpret_cast<int16_t*>(
-      malloc(state->fft_size * sizeof(*state->input)));
+      microfrontend_alloc(state->fft_size * sizeof(*state->input)));
   if (state->input == nullptr) {
     fprintf(stderr, ""Failed to alloc fft input buffer\n"");
     return 0;
   }
 
-  state->output = reinterpret_cast<complex_int16_t*>(
-      malloc((state->fft_size / 2 + 1) * sizeof(*state->output) * 2));
+  state->output = reinterpret_cast<complex_int16_t*>(microfrontend_alloc(
+      (state->fft_size / 2 + 1) * sizeof(*state->output) * 2));
   if (state->output == nullptr) {
     fprintf(stderr, ""Failed to alloc fft output buffer\n"");
     return 0;
@@ -49,7 +51,7 @@ int FftPopulateState(struct FftState* state, size_t input_size) {
     fprintf(stderr, ""Kiss memory sizing failed.\n"");
     return 0;
   }
-  state->scratch = malloc(scratch_size);
+  state->scratch = microfrontend_alloc(scratch_size);
   if (state->scratch == nullptr) {
     fprintf(stderr, ""Failed to alloc fft scratch buffer\n"");
     return 0;
@@ -66,7 +68,7 @@ int FftPopulateState(struct FftState* state, size_t input_size) {
 }
 
 void FftFreeStateContents(struct FftState* state) {
-  free(state->input);
-  free(state->output);
-  free(state->scratch);
+  microfrontend_free(state->input);
+  microfrontend_free(state->output);
+  microfrontend_free(state->scratch);
 }
",0,train
92e8484f0abbc0f18f0e7d2680085fa3c285bf31,tensorflow/tensorflow,Stubbed out memory allocation calls in audio library,filterbank_util.c,"@@ -18,6 +18,8 @@ limitations under the License.
 #include <math.h>
 #include <stdio.h>
 
+#include ""tensorflow/lite/experimental/microfrontend/lib/memory_util.h""
+
 #define kFilterbankIndexAlignment 4
 #define kFilterbankChannelBlockSize 4
 
@@ -65,32 +67,32 @@ int FilterbankPopulateState(const struct FilterbankConfig* config,
            ? 1
            : kFilterbankIndexAlignment / sizeof(int16_t));
 
-  state->channel_frequency_starts =
-      malloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts));
+state->channel_frequency_starts =
+      microfrontend_alloc(num_channels_plus_1 * sizeof(*state->channel_frequency_starts));
   state->channel_weight_starts =
-      malloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts));
+      microfrontend_alloc(num_channels_plus_1 * sizeof(*state->channel_weight_starts));
   state->channel_widths =
-      malloc(num_channels_plus_1 * sizeof(*state->channel_widths));
-  state->work = malloc(num_channels_plus_1 * sizeof(*state->work));
+      microfrontend_alloc(num_channels_plus_1 * sizeof(*state->channel_widths));
+  state->work = microfrontend_alloc(num_channels_plus_1 * sizeof(*state->work));
 
   float* center_mel_freqs =
-      malloc(num_channels_plus_1 * sizeof(*center_mel_freqs));
+      microfrontend_alloc(num_channels_plus_1 * sizeof(*center_mel_freqs));
   int16_t* actual_channel_starts =
-      malloc(num_channels_plus_1 * sizeof(*actual_channel_starts));
+      microfrontend_alloc(num_channels_plus_1 * sizeof(*actual_channel_starts));
   int16_t* actual_channel_widths =
-      malloc(num_channels_plus_1 * sizeof(*actual_channel_widths));
+      microfrontend_alloc(num_channels_plus_1 * sizeof(*actual_channel_widths));
 
   if (state->channel_frequency_starts == NULL ||
       state->channel_weight_starts == NULL || state->channel_widths == NULL ||
       center_mel_freqs == NULL || actual_channel_starts == NULL ||
       actual_channel_widths == NULL) {
-    free(center_mel_freqs);
-    free(actual_channel_starts);
-    free(actual_channel_widths);
+    microfrontend_free(center_mel_freqs);
+    microfrontend_free(actual_channel_starts);
+    microfrontend_free(actual_channel_widths);
     fprintf(stderr, ""Failed to allocate channel buffers\n"");
     return 0;
   }
-
+  
   CalculateCenterFrequencies(num_channels_plus_1, config->lower_band_limit,
                              config->upper_band_limit, center_mel_freqs);
 
@@ -165,9 +167,9 @@ int FilterbankPopulateState(const struct FilterbankConfig* config,
 
   // If the alloc failed, we also need to nuke the arrays.
   if (state->weights == NULL || state->unweights == NULL) {
-    free(center_mel_freqs);
-    free(actual_channel_starts);
-    free(actual_channel_widths);
+    microfrontend_free(center_mel_freqs);
+    microfrontend_free(actual_channel_starts);
+    microfrontend_free(actual_channel_widths);
     fprintf(stderr, ""Failed to allocate weights or unweights\n"");
     return 0;
   }
@@ -200,9 +202,9 @@ int FilterbankPopulateState(const struct FilterbankConfig* config,
     }
   }
 
-  free(center_mel_freqs);
-  free(actual_channel_starts);
-  free(actual_channel_widths);
+  microfrontend_free(center_mel_freqs);
+  microfrontend_free(actual_channel_starts);
+  microfrontend_free(actual_channel_widths);
   if (state->end_index >= spectrum_size) {
     fprintf(stderr, ""Filterbank end_index is above spectrum size.\n"");
     return 0;
@@ -211,10 +213,10 @@ int FilterbankPopulateState(const struct FilterbankConfig* config,
 }
 
 void FilterbankFreeStateContents(struct FilterbankState* state) {
-  free(state->channel_frequency_starts);
-  free(state->channel_weight_starts);
-  free(state->channel_widths);
-  free(state->weights);
-  free(state->unweights);
-  free(state->work);
+  microfrontend_free(state->channel_frequency_starts);
+  microfrontend_free(state->channel_weight_starts);
+  microfrontend_free(state->channel_widths);
+  microfrontend_free(state->weights);
+  microfrontend_free(state->unweights);
+  microfrontend_free(state->work);
 }
",0,train
92e8484f0abbc0f18f0e7d2680085fa3c285bf31,tensorflow/tensorflow,Stubbed out memory allocation calls in audio library,pcan_gain_control_util.c,"@@ -17,6 +17,8 @@ limitations under the License.
 #include <math.h>
 #include <stdio.h>
 
+#include ""tensorflow/lite/experimental/microfrontend/lib/memory_util.h""
+
 #define kint16max 0x00007FFF
 
 void PcanGainControlFillConfigWithDefaults(
@@ -52,7 +54,7 @@ int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
   }
   state->noise_estimate = noise_estimate;
   state->num_channels = num_channels;
-  state->gain_lut = malloc(kWideDynamicFunctionLUTSize * sizeof(int16_t));
+  state->gain_lut = microfrontend_alloc(kWideDynamicFunctionLUTSize * sizeof(int16_t));
   if (state->gain_lut == NULL) {
     fprintf(stderr, ""Failed to allocate gain LUT\n"");
     return 0;
@@ -88,5 +90,5 @@ int PcanGainControlPopulateState(const struct PcanGainControlConfig* config,
 }
 
 void PcanGainControlFreeStateContents(struct PcanGainControlState* state) {
-  free(state->gain_lut);
+  microfrontend_free(state->gain_lut);
 }
",0,train
92e8484f0abbc0f18f0e7d2680085fa3c285bf31,tensorflow/tensorflow,Stubbed out memory allocation calls in audio library,window_util.c,"@@ -19,6 +19,8 @@ limitations under the License.
 #include <stdlib.h>
 #include <string.h>
 
+#include ""tensorflow/lite/experimental/microfrontend/lib/memory_util.h""
+
 // Some platforms don't have M_PI
 #ifndef M_PI
 #define M_PI 3.14159265358979323846
@@ -34,7 +36,7 @@ int WindowPopulateState(const struct WindowConfig* config,
   state->size = config->size_ms * sample_rate / 1000;
   state->step = config->step_size_ms * sample_rate / 1000;
 
-  state->coefficients = malloc(state->size * sizeof(*state->coefficients));
+  state->coefficients = microfrontend_alloc(state->size * sizeof(*state->coefficients));
   if (state->coefficients == NULL) {
     fprintf(stderr, ""Failed to allocate window coefficients\n"");
     return 0;
@@ -51,13 +53,13 @@ int WindowPopulateState(const struct WindowConfig* config,
   }
 
   state->input_used = 0;
-  state->input = malloc(state->size * sizeof(*state->input));
+  state->input = microfrontend_alloc(state->size * sizeof(*state->input));
   if (state->input == NULL) {
     fprintf(stderr, ""Failed to allocate window input\n"");
     return 0;
   }
 
-  state->output = malloc(state->size * sizeof(*state->output));
+  state->output = microfrontend_alloc(state->size * sizeof(*state->output));
   if (state->output == NULL) {
     fprintf(stderr, ""Failed to allocate window output\n"");
     return 0;
@@ -67,7 +69,7 @@ int WindowPopulateState(const struct WindowConfig* config,
 }
 
 void WindowFreeStateContents(struct WindowState* state) {
-  free(state->coefficients);
-  free(state->input);
-  free(state->output);
+  microfrontend_free(state->coefficients);
+  microfrontend_free(state->input);
+  microfrontend_free(state->output);
 }
",0,train
87610fd68852983e97f5475a364b08272d080e48,tensorflow/tensorflow,"Add mhlo python binding generator target

This just invokes the generator backend & creates a filegroup.

PiperOrigin-RevId: 377318653
Change-Id: I0f750cb8890a5259f7e87805fde45bc4b8cb7d3b",mhlo.py,"@@ -0,0 +1,18 @@
+# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+
+# pylint: disable=wildcard-import,relative-beyond-top-level
+from ._mhlo_ops_gen import *
+# pylint: enable=wildcard-import,relative-beyond-top-level
",0,train
8a25f427db3d3dc5c9ddffc775b4c7dd4a96a6f9,tensorflow/tensorflow,"Enabe BF16 SoftmaxGrad(Sum), and fix accuracy by accum type.",reduction_ops.h,"@@ -19,9 +19,9 @@ limitations under the License.
 // Functor definitions for Reduction ops, must be compilable by nvcc.
 
 #include <iostream>
-#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/tensor_types.h""
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 
 namespace tensorflow {
 namespace functor {
@@ -58,6 +58,29 @@ struct ReduceEigenImpl {
   }
 };
 
+// Specialization for BF16 Reducer to fix accuracy.
+// TODO: all BF16 Reducer should have specialization to fix accuracy.
+#define CASTING_SPECIALIZATION(Reducer, ScalarType, IntermediateType)        \
+  template <typename Device, typename OUT_T, typename IN_T,                  \
+            typename ReductionAxes>                                          \
+  struct ReduceEigenImpl<Device, OUT_T, IN_T, ReductionAxes,                 \
+                         Reducer<ScalarType>> {                              \
+    void operator()(const Device& d, OUT_T out, IN_T in,                     \
+                    const ReductionAxes& reduction_axes,                     \
+                    const Reducer<ScalarType>& reducer) {                    \
+      static_assert(std::is_same<ScalarType, typename OUT_T::Scalar>::value, \
+                    """");                                                     \
+      Reducer<IntermediateType> intermediate_reducer;                        \
+      auto in_as_intermediate = in.template cast<IntermediateType>();        \
+      out.device(d) =                                                        \
+          in_as_intermediate.reduce(reduction_axes, intermediate_reducer)    \
+              .template cast<ScalarType>();                                  \
+    }                                                                        \
+  };
+
+CASTING_SPECIALIZATION(Eigen::internal::SumReducer, bfloat16, float);
+#undef CASTING_SPECIALIZATION
+
 template <typename Device, typename OUT_T, typename IN_T,
           typename ReductionAxes, typename Scalar>
 struct ReduceEigenImpl<Device, OUT_T, IN_T, ReductionAxes,
",0,test
8a25f427db3d3dc5c9ddffc775b4c7dd4a96a6f9,tensorflow/tensorflow,"Enabe BF16 SoftmaxGrad(Sum), and fix accuracy by accum type.",nn_grad.cc,"@@ -31,7 +31,11 @@ Status SoftmaxGrad(const AttrSlice& attrs, FunctionDef* g) {
       // Ret val defs
       {""grad_x: T""},
       // Attr defs
+#if defined(INTEL_MKL) && defined(ENABLE_INTEL_MKL_BFLOAT16)
+      {{""T: {float, double, bfloat16}""}},
+#else
       {{""T: {float, double}""}},
+#endif
       // Nodes
       // Based on _SoftmaxGrad in nn_grad.py.
       {
",0,test
8a25f427db3d3dc5c9ddffc775b4c7dd4a96a6f9,tensorflow/tensorflow,"Enabe BF16 SoftmaxGrad(Sum), and fix accuracy by accum type.",math_ops_test.py,"@@ -44,6 +44,16 @@ class ReduceTest(test_util.TensorFlowTestCase):
       y_tf = self.evaluate(math_ops.reduce_sum(x))
       self.assertEqual(y_tf, 21)
 
+  def testReduceExtendType(self):
+    in_f32 = np.random.rand(1024, 1024).astype(np.float)
+    in_bf16 = math_ops.cast(in_f32, dtypes.bfloat16)
+
+    out_f32 = self.evaluate(math_ops.reduce_sum(in_f32))
+    out_bf16 = self.evaluate(math_ops.reduce_sum(in_bf16))
+    expected = math_ops.cast(out_f32, dtypes.bfloat16)
+
+    self.assertAllEqual(out_bf16, expected)
+
   def testReduceExplicitAxes(self):
     x = np.array([[1, 2, 3], [4, 5, 6]], dtype=np.int32)
     with test_util.device(use_gpu=True):
",0,test
2f2141f504f7584e2ac3b6115249e7b190bdeb02,tensorflow/tensorflow,"[XLA:GPU] Schedule a DCE pass after multi-output fusion

MOF leaves behind instructions that have been cloned into a fusion, blocking
merging more fusions into the input of the newly created multi-output fusion.

PiperOrigin-RevId: 210078458",nvptx_compiler.cc,"@@ -291,6 +291,7 @@ Status OptimizeHloModule(HloModule* hlo_module, se::StreamExecutor* stream_exec,
     fusion.AddPass<GpuMultiOutputFusion>();
     fusion.AddPass<HloCSE>(/*is_layout_sensitive=*/true,
                            /*only_fusion_computations=*/true);
+    fusion.AddPass<HloDCE>();
     TF_RETURN_IF_ERROR(fusion.Run(hlo_module).status());
 
     HloPassPipeline reduce_pipeline(""reduce-precision"");
",0,test
2cd247d20422a41c33e0f4be265eba2df537ed3b,tensorflow/tensorflow,"Handle positive and negative infinity in TopKV2.

TopKV2 hides iota in the low bits of the input after converting from bf16 to f32. This usually works, but for positive and negative infinity or'ing in iota produces NANs.

To handle positive and negative infinity, treat bf16 as integers in
sign-magnitude format. Convert to two's complement. Sort in two's complement and
convert back.

Add an exhaustive unit test for bfloat16 to float conversion.

PiperOrigin-RevId: 201421784",sort_ops_test.py,"@@ -81,7 +81,7 @@ class XlaSortOpTest(xla_test.XLATestCase):
 
   def testTopKZeros(self):
     """"""Tests that positive and negative zeros sort correctly.""""""
-    # Requires Sort HLO, which is not implemented on CPU or GPU.
+    # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU.
     if self.device in [""XLA_CPU"", ""XLA_GPU""]:
       return
 
@@ -99,7 +99,32 @@ class XlaSortOpTest(xla_test.XLATestCase):
           {p: np.array([0., -0., 0., 3., -0., -4., 0., -0.], dtype=bfloat16)})
       self.assertAllEqual(
           np.array([3., 0., 0., 0.], dtype=bfloat16), results[0])
-      self.assertEqual(set([0, 2, 3, 6]), set(results[1]))
+      self.assertEqual(list([3, 0, 1, 2]), list(results[1]))
+
+  def testTopKInfinities(self):
+    """"""Tests that positive and negative infinity sort correctly.""""""
+    # TODO(b/26783907): The Sort HLO is not implemented on CPU or GPU.
+    if self.device in [""XLA_CPU"", ""XLA_GPU""]:
+      return
+
+    # Only bfloat16 is implemented.
+    bfloat16 = dtypes.bfloat16.as_numpy_dtype
+    if bfloat16 not in self.numeric_types:
+      return
+
+    with self.test_session() as sess:
+      p = array_ops.placeholder(dtypes.bfloat16)
+      with self.test_scope():
+        topk = nn_ops.top_k(p, k=6)
+      results = sess.run(topk, {
+          p: np.array(
+              [1, 2, float(""inf""), -float(""inf""), -1, -2], dtype=bfloat16)
+      })
+      self.assertAllEqual(
+          np.array(
+              [float(""inf""), 2.0, 1.0, -1.0, -2.0, -float(""inf"")],
+              dtype=bfloat16), results[0])
+      self.assertEqual(list([2, 1, 0, 4, 5, 3]), list(results[1]))
 
 
 if __name__ == ""__main__"":
",0,train
2cd247d20422a41c33e0f4be265eba2df537ed3b,tensorflow/tensorflow,"Handle positive and negative infinity in TopKV2.

TopKV2 hides iota in the low bits of the input after converting from bf16 to f32. This usually works, but for positive and negative infinity or'ing in iota produces NANs.

To handle positive and negative infinity, treat bf16 as integers in
sign-magnitude format. Convert to two's complement. Sort in two's complement and
convert back.

Add an exhaustive unit test for bfloat16 to float conversion.

PiperOrigin-RevId: 201421784",topk_op.cc,"@@ -61,42 +61,89 @@ class TopKOp : public XlaOpKernel {
     if (input_shape.dim_size(0) < k) {
       k = input_shape.dim_size(0);
     }
-    const xla::XlaOp input = context->Input(0);
-    xla::XlaOp iota;
-    OP_REQUIRES_OK(context, XlaHelpers::Iota(b, DT_INT32, n, &iota));
+    const xla::XlaOp input_bf16 = context->Input(0);
+    xla::XlaOp iota_s32;
+    OP_REQUIRES_OK(context, XlaHelpers::Iota(b, DT_INT32, n, &iota_s32));
 
     // TODO(b/73891930): add a key-value sort to HLO, rather than using
     // bit-packing tricks here.
-    // TODO(b/73891930): this implementation will convert Infs to NaNs. A
-    // key-value sort would avoid this; for now, it is no worse than, say, the
-    // CPU backend in fast-math mode.
+
+    xla::XlaOp zero = b->ConstantR0<int32>(0);
+
+    // max can either be 0x7FFFFFFF or 0x8000000. Neither choice is totally
+    // ideal. The implications of the choice are:
+    //
+    // 0x7FFFFFFF
+    // 1. +0.0 > -0.0
+    // 2. The elements of the inputs and outputs are bitwise identical.
+    // 3. The sort is unstable since a later +0.0 will appear before an earlier
+    // -0.0.
+    //
+    // 0x8000000
+    // 1. +0.0 == -0.0
+    // 2. All -0.0 in the input are replaced with +0.0 in the output.
+    // 3. The sort is stable.
+    xla::XlaOp max = b->ConstantR0<int32>(0x80000000);
+    xla::XlaOp index_mask = b->ConstantR0<int32>(0x0000FFFF);
+    xla::XlaOp value_mask = b->ConstantR0<int32>(0xFFFF0000);
+
+    // Convert to from bf16 to f32. The lower 16-bits are zero due to the
+    // definition of bf16.
+    xla::XlaOp input_f32 = b->ConvertElementType(input_bf16, xla::F32);
+
+    // Negate the input to reverse sort it. The lower 16-bits are zero, because
+    // negating a float is just inverting the high-bit.
+    xla::XlaOp negative_input_f32 = b->Neg(input_f32);
+
+    // Convert to a sign magnitude integer. The lower 16-bits are zero, since
+    // bitcast convert doesn't change any bits.
+    xla::XlaOp negative_input_sm32 =
+        b->BitcastConvertType(negative_input_f32, xla::S32);
+
+    // Convert from sign magnitude integer to two's complement integer. The
+    // lower 16-bits are zero on both sides of the select. On the false side,
+    // the value is unchanged, and on the true side, the lower 16-bits of max
+    // are all zero, so the lower 16-bits of the result of the subtraction will
+    // also be zero.
+    xla::XlaOp negative_input_s32 =
+        b->Select(b->Lt(negative_input_sm32, zero),
+                  b->Sub(max, negative_input_sm32), negative_input_sm32);
+
+    // In order for the Or with iota_s32 to to work properly, the lower 16-bits
+    // of negative_input_32 must be zero.
 
     // Pack elements as:
     // * upper 16 bits are the value
     // * lower 16 bits are the index.
-    xla::XlaOp packed = b->BitcastConvertType(
-        b->Or(b->BitcastConvertType(b->ConvertElementType(input, xla::F32),
-                                    xla::S32),
-              iota),
-        xla::F32);
+    xla::XlaOp packed_s32 = b->Or(negative_input_s32, iota_s32);
 
     // TODO(phawkins): use a more efficient algorithm that does not require a
     // full sort.
-    xla::XlaOp sorted = b->Slice(b->Rev(b->Sort(packed), {0}),
-                                 /*start_indices=*/{0},
-                                 /*limit_indices=*/{k},
-                                 /*strides=*/{1});
-
-    // Unpack the value/index
-    xla::XlaOp x = b->BitcastConvertType(sorted, xla::S32);
-    xla::XlaOp indices = b->And(x, b->ConstantR0<int32>(0x0000FFFF));
-    xla::XlaOp values = b->ConvertElementType(
-        b->BitcastConvertType(b->And(x, b->ConstantR0<int32>(0xFFFF0000)),
-                              xla::F32),
-        xla::BF16);
-
-    context->SetOutput(0, values);
-    context->SetOutput(1, indices);
+    xla::XlaOp sorted_s32 = b->Slice(b->Sort(packed_s32),
+                                     /*start_indices=*/{0},
+                                     /*limit_indices=*/{k},
+                                     /*strides=*/{1});
+
+    // Unpack the value/index.
+    xla::XlaOp indices_s32 = b->And(sorted_s32, index_mask);
+    xla::XlaOp negative_values_s32 = b->And(sorted_s32, value_mask);
+
+    // Convert from two's complement integer to sign magnitude integer.
+    xla::XlaOp negative_values_sm32 =
+        b->Select(b->Lt(negative_values_s32, zero),
+                  b->Sub(max, negative_values_s32), negative_values_s32);
+
+    xla::XlaOp negative_values_f32 =
+        b->BitcastConvertType(negative_values_sm32, xla::F32);
+
+    // Negate the values to get back the original inputs.
+    xla::XlaOp values_f32 = b->Neg(negative_values_f32);
+
+    // Convert from f32 to bf16.
+    xla::XlaOp values_bf16 = b->ConvertElementType(values_f32, xla::BF16);
+
+    context->SetOutput(0, values_bf16);
+    context->SetOutput(1, indices_s32);
   }
 
  private:
",0,train
2cd247d20422a41c33e0f4be265eba2df537ed3b,tensorflow/tensorflow,"Handle positive and negative infinity in TopKV2.

TopKV2 hides iota in the low bits of the input after converting from bf16 to f32. This usually works, but for positive and negative infinity or'ing in iota produces NANs.

To handle positive and negative infinity, treat bf16 as integers in
sign-magnitude format. Convert to two's complement. Sort in two's complement and
convert back.

Add an exhaustive unit test for bfloat16 to float conversion.

PiperOrigin-RevId: 201421784",convert_test.cc,"@@ -461,5 +461,26 @@ XLA_TEST_F(ConvertTest, ConvertS64U64) {
   ComputeAndCompareR1<uint64>(&builder, unsigned_x, {});
 }
 
+XLA_TEST_F(ConvertTest, ConvertBF16F32) {
+  XlaBuilder builder(TestName());
+
+  std::vector<bfloat16> all_bfloats(1 << 16);
+  for (int i = 0; i < all_bfloats.size(); ++i) {
+    all_bfloats[i].value = i;
+  }
+
+  std::vector<uint32> expected(all_bfloats.size());
+  for (int i = 0; i < expected.size(); ++i) {
+    expected[i] = (1U << 16) * i;
+  }
+
+  // Exhaustively test all bf16 to f32 conversions.
+  xla::XlaOp all_bfloats_bf16 = builder.ConstantR1<bfloat16>(all_bfloats);
+  xla::XlaOp all_bfloats_f32 =
+      builder.ConvertElementType(all_bfloats_bf16, F32);
+  xla::XlaOp all_bfloats_u32 = builder.BitcastConvertType(all_bfloats_f32, U32);
+  ComputeAndCompareR1<uint32>(&builder, expected, {});
+}
+
 }  // namespace
 }  // namespace xla
",0,train
7a08a4067cdbbcffbd158a4f018bb064b36fe909,tensorflow/tensorflow,"Add a number of missing headers being transitively pulled in.

This enables a few headers to be removed from implementations and in turn
simplify the build graph some.

PiperOrigin-RevId: 291452567
Change-Id: Ia29c0095f6444043f0f6fb7d91b3abd93dd983af",op_compatibility_test.cc,"@@ -22,6 +22,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/op.h""
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/kernels/ops_testutil.h""
+#include ""tensorflow/core/lib/core/status_test_util.h""
 #include ""tensorflow/core/lib/strings/str_util.h""
 #include ""tensorflow/core/platform/test.h""
 
",0,train
5f9852c3ea1046513369ef9c0b2a2c6c103b147d,tensorflow/tensorflow,"Fix how keras's model_to_estimator function is exported.

Fix issue if estimator is not present when API files are generated
but installed later.

PiperOrigin-RevId: 222107827",__init__.py,"@@ -24,23 +24,54 @@ from tensorflow.python.util.tf_export import tf_export
 # As long as you depend //third_party/py/tensorflow:tensorflow target
 # everything will work as normal.
 
-try:
-  from tensorflow.python.estimator import keras as keras_lib  # pylint: disable=g-import-not-at-top
-  model_to_estimator = tf_export('keras.estimator.model_to_estimator')(
-      keras_lib.model_to_estimator)
-except Exception:  # pylint: disable=broad-except
-
-  # pylint: disable=unused-argument
-  def stub_model_to_estimator(keras_model=None,
-                              keras_model_path=None,
-                              custom_objects=None,
-                              model_dir=None,
-                              config=None):
+
+# LINT.IfChange
+@tf_export('keras.estimator.model_to_estimator')
+def model_to_estimator(
+    keras_model=None,
+    keras_model_path=None,
+    custom_objects=None,
+    model_dir=None,
+    config=None):
+  """"""Constructs an `Estimator` instance from given keras model.
+
+  For usage example, please see:
+  [Creating estimators from Keras
+  Models](https://tensorflow.org/guide/estimators#model_to_estimator).
+
+  Args:
+    keras_model: A compiled Keras model object. This argument is mutually
+      exclusive with `keras_model_path`.
+    keras_model_path: Path to a compiled Keras model saved on disk, in HDF5
+      format, which can be generated with the `save()` method of a Keras model.
+      This argument is mutually exclusive with `keras_model`.
+    custom_objects: Dictionary for custom objects.
+    model_dir: Directory to save `Estimator` model parameters, graph, summary
+      files for TensorBoard, etc.
+    config: `RunConfig` to config `Estimator`.
+
+  Returns:
+    An Estimator from given keras model.
+
+  Raises:
+    ValueError: if neither keras_model nor keras_model_path was given.
+    ValueError: if both keras_model and keras_model_path was given.
+    ValueError: if the keras_model_path is a GCS URI.
+    ValueError: if keras_model has not been compiled.
+  """"""
+  try:
+    from tensorflow_estimator.python.estimator import keras as keras_lib  # pylint: disable=g-import-not-at-top
+  except ImportError:
     raise NotImplementedError(
         'tf.keras.estimator.model_to_estimator function not available in your '
         'installation.')
-  # pylint: enable=unused-argument
+  keras_lib.model_to_estimator(
+      keras_model=keras_model,
+      keras_model_path=keras_model_path,
+      custom_objects=custom_objects,
+      model_dir=model_dir,
+      config=config)
+
+# LINT.ThenChange(//third_party/tensorflow_estimator/python/estimator/keras.py)
 
-  model_to_estimator = tf_export('keras.estimator.model_to_estimator')(
-      stub_model_to_estimator)
 
",0,train
821d738255cc7baf9330bd7265e72dca972be465,tensorflow/tensorflow,"Fix Linalg lowering to loops

    This CL makes lowering to loops always be a:
    ```
    %D = linalg.dim %view, constant : !linalg.view<...>
    affine.for %ix = %c0 to %D {
      ...
    }
    ```

    This form composes correctly with tiling and is also the proper way to emit loops from views that across function boundaries.
    The previous version that would extract the range_min/max/step was composing incorrectly with tiling (i.e. would shift by range_min both in the loop bounds and in the slice) and would not work across function boundaries.

    The relevant tests are updated and a new test `dot_view`---which lowers to loops from views passed as function parameters---is added.

    When additional context is available, the linalg.dim operations should be folded away but this is left for a future CL.

--

PiperOrigin-RevId: 249634712",LinalgOps.h,"@@ -336,6 +336,10 @@ public:
                     ArrayRef<Value *> operands) {
     return impl->create(builder, loc, operands);
   }
+  Operation::operand_range getInputsAndOutputs() {
+    auto range = this->getOperation()->getOperands();
+    return {range.begin(), range.begin() + getNumInputsAndOutputs()};
+  }
 
 private:
   struct Concept {
",0,train
821d738255cc7baf9330bd7265e72dca972be465,tensorflow/tensorflow,"Fix Linalg lowering to loops

    This CL makes lowering to loops always be a:
    ```
    %D = linalg.dim %view, constant : !linalg.view<...>
    affine.for %ix = %c0 to %D {
      ...
    }
    ```

    This form composes correctly with tiling and is also the proper way to emit loops from views that across function boundaries.
    The previous version that would extract the range_min/max/step was composing incorrectly with tiling (i.e. would shift by range_min both in the loop bounds and in the slice) and would not work across function boundaries.

    The relevant tests are updated and a new test `dot_view`---which lowers to loops from views passed as function parameters---is added.

    When additional context is available, the linalg.dim operations should be folded away but this is left for a future CL.

--

PiperOrigin-RevId: 249634712",Utils.h,"@@ -89,8 +89,16 @@ Value *createOrReturnView(FuncBuilder *b, Location loc,
 enum class RangePart { Min = 0, Max, Step };
 Value *extractRangePart(Value *range, RangePart part);
 
+/// Returns the values obtained by applying `map` to the list of values.
+/// Performs simplifications and foldings where possible.
+SmallVector<Value *, 4> applyMapToValues(FuncBuilder *b, Location loc,
+                                         AffineMap map,
+                                         ArrayRef<Value *> values,
+                                         FunctionConstants &state);
+
 /// Returns the values obtained by applying `map` to the list of range parts
-/// extracted from `ranges`.
+/// extracted from `ranges`. Performs simplifications and foldings where
+/// possible.
 SmallVector<Value *, 4> applyMapToRangePart(FuncBuilder *b, Location loc,
                                             AffineMap map,
                                             ArrayRef<Value *> ranges,
",0,train
bdc6a138403e8257841e8dff6d6b9322bb65053a,tensorflow/tensorflow,"Peel once on all tiled_loop dimensions

This CL assumes that the best single rule for loop peeling is to ensure that there is a single loop without any padding needed. All dimensions are peeled once to ensure that this loop exists. Other peeled loops could be peeled again to remove more padding, but the assumption is that in the common case, this will not be worth the IR size/compile time increase.

This is a temporary rule of thumb until more advanced heuristics can be created with multiple different code generation strategies depending on the input.

PiperOrigin-RevId: 399143335
Change-Id: Ie20653fc4d900c43b03107211c9cac4a3a89781c",tf_cpurt_peel_tiled_loops.cc,"@@ -40,17 +40,17 @@ struct PeelTiledLoop
       mlir::linalg::TiledLoopOp loop,
       mlir::PatternRewriter &rewriter) const override {
     if (loop->hasAttr(kWasPeeledAttr)) return mlir::failure();
-    auto peeled_idx = loop.getNumLoops() - 1;
-    mlir::linalg::TiledLoopOp peel;
-    if (mlir::linalg::peelAndCanonicalizeTiledLoop(rewriter, loop, peeled_idx,
-                                                   peel)
-            .failed())
-      return mlir::failure();
-
-    // Ensure that the peeling doesn't keep occurring forever.
     auto true_attr = mlir::BoolAttr::get(rewriter.getContext(), true);
     loop->setAttr(kWasPeeledAttr, true_attr);
-    peel->setAttr(kWasPeeledAttr, true_attr);
+    for (int peeled_idx = loop.getNumLoops() - 1; peeled_idx >= 0;
+         peeled_idx--) {
+      mlir::linalg::TiledLoopOp peel;
+      // Mark the new loop if one was created
+      if (mlir::linalg::peelAndCanonicalizeTiledLoop(rewriter, loop, peeled_idx,
+                                                     peel)
+              .succeeded())
+        peel->setAttr(kWasPeeledAttr, true_attr);
+    }
     return mlir::success();
   }
 };
",0,train
e0266dbf39deac09315b764524835299b513926c,tensorflow/tensorflow,"Use `static_cast` instead of C-style casts.

PiperOrigin-RevId: 316738458
Change-Id: I54f2f2f43d31606246475df0eae8d20e673aee6b",types.h,"@@ -37,18 +37,18 @@ namespace tensorflow {
 // Alias tensorflow::string to std::string.
 using std::string;
 
-static const uint8 kuint8max = ((uint8)0xFF);
-static const uint16 kuint16max = ((uint16)0xFFFF);
-static const uint32 kuint32max = ((uint32)0xFFFFFFFF);
-static const uint64 kuint64max = ((uint64)0xFFFFFFFFFFFFFFFFull);
-static const int8 kint8min = ((int8)~0x7F);
-static const int8 kint8max = ((int8)0x7F);
-static const int16 kint16min = ((int16)~0x7FFF);
-static const int16 kint16max = ((int16)0x7FFF);
-static const int32 kint32min = ((int32)~0x7FFFFFFF);
-static const int32 kint32max = ((int32)0x7FFFFFFF);
-static const int64 kint64min = ((int64)~0x7FFFFFFFFFFFFFFFll);
-static const int64 kint64max = ((int64)0x7FFFFFFFFFFFFFFFll);
+static const uint8 kuint8max = static_cast<uint8>(0xFF);
+static const uint16 kuint16max = static_cast<uint16>(0xFFFF);
+static const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF);
+static const uint64 kuint64max = static_cast<uint64>(0xFFFFFFFFFFFFFFFFull);
+static const int8 kint8min = static_cast<int8>(~0x7F);
+static const int8 kint8max = static_cast<int8>(0x7F);
+static const int16 kint16min = static_cast<int16>(~0x7FFF);
+static const int16 kint16max = static_cast<int16>(0x7FFF);
+static const int32 kint32min = static_cast<int32>(~0x7FFFFFFF);
+static const int32 kint32max = static_cast<int32>(0x7FFFFFFF);
+static const int64 kint64min = static_cast<int64>(~0x7FFFFFFFFFFFFFFFll);
+static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFll);
 
 // A typedef for a uint64 used as a short fingerprint.
 typedef uint64 Fprint;
",0,test
debc40442f13d96047bb0f64e5f8f6921b0baf2b,tensorflow/tensorflow,"Remove explicit static linking from tests that load a shared library.

This was causing the .so files to have undefined symbols from core/framework.
Change: 145479847",gru_ops_test.cc,"@@ -27,8 +27,10 @@ class GruOpsTest : public ::testing::Test {
     TF_Status* status = TF_NewStatus();
     auto* lib = TF_LoadLibrary(
         ""tensorflow/contrib/rnn/python/ops/_gru_ops.so"", status);
-    CHECK_EQ(TF_OK, TF_GetCode(status));
+    TF_Code code = TF_GetCode(status);
+    string status_msg(TF_Message(status));
     TF_DeleteStatus(status);
+    ASSERT_EQ(TF_OK, code) << status_msg;
     TF_DeleteLibraryHandle(lib);
   }
 };
",0,train
debc40442f13d96047bb0f64e5f8f6921b0baf2b,tensorflow/tensorflow,"Remove explicit static linking from tests that load a shared library.

This was causing the .so files to have undefined symbols from core/framework.
Change: 145479847",lstm_ops_test.cc,"@@ -29,9 +29,11 @@ class LSTMOpsTest : public ::testing::Test {
     TF_Status* status = TF_NewStatus();
     auto* lib = TF_LoadLibrary(
         ""tensorflow/contrib/rnn/python/ops/_lstm_ops.so"", status);
-    CHECK_EQ(TF_OK, TF_GetCode(status));
-    TF_DeleteLibraryHandle(lib);
+    TF_Code code = TF_GetCode(status);
+    string status_msg(TF_Message(status));
     TF_DeleteStatus(status);
+    ASSERT_EQ(TF_OK, code) << status_msg;
+    TF_DeleteLibraryHandle(lib);
   }
 };
 
",0,train
daa75aff18fd42598d1fb68f13da13042e886c07,tensorflow/tensorflow,"Allows TensorListScatter to scatter at non-contiguous indices to make it consistent with TensorArray.scatter.

PiperOrigin-RevId: 227874653",list_kernels.h,"@@ -521,14 +521,31 @@ class TensorListScatter : public OpKernel {
                     ""Specified a list with shape "", element_shape.DebugString(),
                     "" from a tensor with shape "", output_shape.DebugString()));
     output_list.element_shape = element_shape;
-    output_list.tensors.reserve(indices.NumElements());
+
+    OP_REQUIRES(c, indices.NumElements() == input_tensor.shape().dim_size(0),
+                errors::InvalidArgument(
+                    ""Invalid number of rows in input tensor. Expected: "",
+                    indices.NumElements(),
+                    "" Actual: "", input_tensor.shape().dim_size(0)));
+
+    // Validate indices and resize output_list.tensors to fit the highest index.
+    {
+      size_t list_size = 0;
+      for (int index = 0; index < indices.NumElements(); ++index) {
+        const int i = indices.flat<int32>()(index);
+        OP_REQUIRES(c, i >= 0,
+                    errors::InvalidArgument(
+                        ""Indices in TensorListScatter must all be positive.""));
+        if (i >= list_size) {
+          list_size = i + 1;
+        }
+      }
+      output_list.tensors.resize(list_size, Tensor(DT_INVALID));
+    }
+
     for (int index = 0; index < indices.NumElements(); ++index) {
       const int i = indices.flat<int32>()(index);
-      OP_REQUIRES(c, i < input_tensor.shape().dim_size(0),
-                  errors::InvalidArgument(
-                      ""Trying to scatter index "", i, "" from tensor with "",
-                      input_tensor.shape().dim_size(0), "" rows.""));
-      Tensor tmp = input_tensor.Slice(i, i + 1);
+      Tensor tmp = input_tensor.Slice(index, index + 1);
       TensorShape tmp_shape = tmp.shape();
       tmp_shape.RemoveDim(0);
       OP_REQUIRES(c, tmp.CopyFrom(tmp, tmp_shape),
@@ -541,7 +558,7 @@ class TensorListScatter : public OpKernel {
       // many small ondes.
       aligned.flat<T>().device(c->eigen_device<Device>()) =
           tmp.unaligned_flat<T>();
-      output_list.tensors.push_back(aligned);
+      std::swap(output_list.tensors[i], aligned);
     }
     output_tensor->scalar<Variant>()() = std::move(output_list);
   }
",0,test
daa75aff18fd42598d1fb68f13da13042e886c07,tensorflow/tensorflow,"Allows TensorListScatter to scatter at non-contiguous indices to make it consistent with TensorArray.scatter.

PiperOrigin-RevId: 227874653",list_ops_test.py,"@@ -290,6 +290,47 @@ class ListOpsTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       t = list_ops.tensor_list_gather(l, [], element_dtype=dtypes.float32)
       self.evaluate(t)
 
+  def testGatherGradWithNonContiguousIndices(self):
+    with backprop.GradientTape(persistent=True) as tape:
+      t = constant_op.constant([1.0, 2.0, 3.0])
+      l = list_ops.tensor_list_from_tensor(t, element_shape=[])
+      c = constant_op.constant(5.0)
+      tape.watch(c)
+      l = list_ops.tensor_list_set_item(l, 1, c)
+      t = list_ops.tensor_list_gather(l, [1], element_dtype=dtypes.float32)
+      self.assertAllEqual(self.evaluate(t), [5.0])
+      s = t[0] * t[0]
+    dt = tape.gradient(s, c)
+    self.assertAllEqual(self.evaluate(dt), 10.0)
+    dl = tape.gradient(t, l)
+    dl_length = list_ops.tensor_list_length(dl)
+    self.assertAllEqual(self.evaluate(dl_length), 3)
+
+  def testScatterOutputListSize(self):
+    c0 = constant_op.constant([1.0, 2.0])
+    l = list_ops.tensor_list_scatter(
+        c0, [1, 3], ops.convert_to_tensor([], dtype=dtypes.int32))
+    # TensorListScatter should return a list with size largest index + 1.
+    self.assertEqual(self.evaluate(list_ops.tensor_list_length(l)), 4)
+
+  def testScatterWithInvalidRowsInInputTensorFails(self):
+    c0 = constant_op.constant([1.0, 2.0])
+    with self.assertRaisesRegexp(
+        errors.InvalidArgumentError,
+        ""Invalid number of rows in input tensor. Expected: 3 Actual: 2""):
+      l = list_ops.tensor_list_scatter(
+          c0, [1, 0, 2], ops.convert_to_tensor([], dtype=dtypes.int32))
+      self.evaluate(l)
+
+  def testScatterWithNegativeIndicesFails(self):
+    c0 = constant_op.constant([1.0, 2.0])
+    with self.assertRaisesRegexp(
+        errors.InvalidArgumentError,
+        ""Indices in TensorListScatter must all be positive.""):
+      l = list_ops.tensor_list_scatter(
+          c0, [-1, -2], ops.convert_to_tensor([], dtype=dtypes.int32))
+      self.evaluate(l)
+
   def testScatterGrad(self):
     with backprop.GradientTape() as tape:
       c0 = constant_op.constant([1.0, 2.0])
",0,test
daa75aff18fd42598d1fb68f13da13042e886c07,tensorflow/tensorflow,"Allows TensorListScatter to scatter at non-contiguous indices to make it consistent with TensorArray.scatter.

PiperOrigin-RevId: 227874653",tensor_array_ops_test.py,"@@ -1359,7 +1359,6 @@ class TensorArrayTest(test.TestCase):
   def testSkipEagerTensorArrayEvalEmptyWithDefault(self):
     self._testTensorArrayEvalEmptyWithDefault()
 
-  @test_util.disable_control_flow_v2(""b/117943286"")
   @test_util.run_v1_only(""b/117943489"")
   def testSkipEagerTensorArrayScatterReadAndGradients(self):
     with self.session(use_gpu=True) as session:
@@ -1387,8 +1386,8 @@ class TensorArrayTest(test.TestCase):
       self.assertAllEqual([10.0, -10.0], read_vals[1])
       self.assertAllEqual([[2.0, 3.0], [4.0, 5.0]], grad_vals[0])
 
-  @test_util.disable_control_flow_v2(""b/117943286"")
-  @test_util.run_v1_only(""b/117943286"")
+  @test_util.disable_control_flow_v2(""b/118890905"")
+  @test_util.run_v1_only(""b/118890905"")
   def testTensorArrayWriteGatherAndGradients(self):
     with self.session(use_gpu=True) as session:
       ta = tensor_array_ops.TensorArray(
",0,test
daa75aff18fd42598d1fb68f13da13042e886c07,tensorflow/tensorflow,"Allows TensorListScatter to scatter at non-contiguous indices to make it consistent with TensorArray.scatter.

PiperOrigin-RevId: 227874653",list_ops.py,"@@ -200,10 +200,16 @@ def _TensorListResizeGrad(op, dlist):
 
 @ops.RegisterGradient(""TensorListGather"")
 def _TensorListGatherGrad(op, dtensor):
-  _, indices = op.inputs
-  return gen_list_ops.tensor_list_scatter(
-      tensor=dtensor, indices=indices,
-      element_shape=ops.convert_to_tensor(-1, dtype=dtypes.int32)), None
+  input_list, indices = op.inputs
+  dlist = gen_list_ops.tensor_list_scatter(
+      tensor=dtensor,
+      indices=indices,
+      element_shape=ops.convert_to_tensor(-1, dtype=dtypes.int32))
+  # TensorListScatter returns a list with size `max(indices) + 1`
+  # so we manually resize it to match the size of the input list.
+  input_list_size = gen_list_ops.tensor_list_length(input_list)
+  dlist = gen_list_ops.tensor_list_resize(dlist, input_list_size)
+  return dlist, None
 
 
 @ops.RegisterGradient(""TensorListScatter"")
",0,test
dbdaee1bf6f0840a86ac1002248a1600850ba549,tensorflow/tensorflow,"Replace `DCHECK_LE(f_dim, feature_dims)` with corresponding `OP_REQUIRES`

PiperOrigin-RevId: 411069705
Change-Id: I4866a80873d0be4ce43157c713ed476cb1445741",stats_ops.cc,"@@ -1692,6 +1692,15 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel {
     const int64_t stats_dims = logits_dims + hessians_dims;
     const int64_t num_sparse_entries = feature_indices_t->dim_size(0);
     const int32_t feature_dims = feature_shape(1);
+    for (int i = 0; i < num_sparse_entries; ++i) {
+      const int32_t f_dim = feature_indices(i, 1);
+      OP_REQUIRES(
+          context, f_dim <= feature_dims,
+          errors::InvalidArgument(
+              ""Got invalid feature index feature_indices("", i, ""1) = "", f_dim,
+              "" which is above "", feature_dims,
+              "" (from feature_shape: "", feature_shape_t->DebugString(), "")""));
+    }
     OP_REQUIRES(context, num_sparse_entries <= batch_size * feature_dims,
                 errors::InvalidArgument(
                     ""feature_indices dim0 should be <= gradients dim0 * ""
@@ -1735,7 +1744,6 @@ class BoostedTreesSparseAggregateStatsOp : public OpKernel {
                                           num_nodes, "", got "", instance, "")""));
       // the feature dimension.
       const int32_t f_dim = feature_indices(i, 1);
-      DCHECK_LE(f_dim, feature_dims);
       // the bucket id of the value.
       const int32_t bucket_id = feature_values(i);
 
",0,train
4fa4001d457b1b7e3a38533defbebbed143c7a33,tensorflow/tensorflow,"Expose _log_and_record method to allow easier subclassing of StepCounter

PiperOrigin-RevId: 182112167",tpu_estimator.py,"@@ -36,6 +36,7 @@ from tensorflow.contrib.tpu.python.tpu import tpu_feed
 from tensorflow.contrib.tpu.python.tpu import training_loop
 from tensorflow.contrib.tpu.python.tpu import util as util_lib
 
+from tensorflow.core.framework.summary_pb2 import Summary
 from tensorflow.core.protobuf import config_pb2
 
 from tensorflow.python.estimator import estimator as estimator_lib
@@ -53,6 +54,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import summary
+from tensorflow.python.training import basic_session_run_hooks
 from tensorflow.python.training import evaluation
 from tensorflow.python.training import session_run_hook
 from tensorflow.python.training import training
@@ -216,6 +218,10 @@ class _TPUContext(object):
             (mode == model_fn_lib.ModeKeys.EVAL and
              self._eval_batch_size is None))
 
+  @property
+  def global_batch_size(self):
+    return self._train_batch_size
+
   @property
   def batch_size_for_input_fn(self):
     """"""Returns the shard batch size for `input_fn`.""""""
@@ -1317,6 +1323,31 @@ class _EvalMetrics(object):
     return eval_metric_ops, eval_update_ops
 
 
+class ExamplesPerSecondHook(basic_session_run_hooks.StepCounterHook):
+  """"""Count examples during runtime.""""""
+
+  def __init__(self,
+               batch_size,
+               every_n_steps=100,
+               every_n_secs=None,
+               output_dir=None,
+               summary_writer=None):
+    self._batch_size = batch_size
+    super(ExamplesPerSecondHook, self).__init__(
+        every_n_steps=every_n_steps,
+        every_n_secs=every_n_secs,
+        output_dir=output_dir,
+        summary_writer=summary_writer)
+
+  def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
+    examples_per_sec = self._batch_size * elapsed_steps / elapsed_time
+    if self._summary_writer is not None:
+      example_summary = Summary(value=[Summary.Value(
+          tag='examples_sec', simple_value=examples_per_sec)])
+      self._summary_writer.add_summary(example_summary, global_step)
+    logging.info('examples/sec: %g', examples_per_sec)
+
+
 class TPUEstimator(estimator_lib.Estimator):
   """"""Estimator with TPU support.
 
@@ -1534,8 +1565,8 @@ class TPUEstimator(estimator_lib.Estimator):
     if max_steps is not None:
       util_lib.check_positive_integer(max_steps, 'Train max_steps')
 
-    return [_TPUStopAtStepHook(self._iterations_per_training_loop,
-                               steps, max_steps)]
+    return [_TPUStopAtStepHook(self._iterations_per_training_loop, steps,
+                               max_steps)]
 
   def _convert_eval_steps_to_hooks(self, steps):
     with self._ctx.with_mode(model_fn_lib.ModeKeys.EVAL) as ctx:
@@ -1547,11 +1578,11 @@ class TPUEstimator(estimator_lib.Estimator):
 
     util_lib.check_positive_integer(steps, 'Eval steps')
 
-    hooks = []
-    hooks.append(evaluation._StopAfterNEvalsHook(  # pylint: disable=protected-access
-        num_evals=steps))
-    hooks.append(_SetEvalIterationsHook(steps))
-    return hooks
+    return [
+        evaluation._StopAfterNEvalsHook(  # pylint: disable=protected-access
+            num_evals=steps),
+        _SetEvalIterationsHook(steps)
+    ]
 
   def _call_input_fn(self, input_fn, mode):
     """"""Calls the input function.
@@ -1632,6 +1663,7 @@ class TPUEstimator(estimator_lib.Estimator):
               _train_on_tpu_system(ctx, model_fn_wrapper, dequeue_fn))
           hooks = [
               TPUInfeedOutfeedSessionHook(ctx, enqueue_ops),
+              ExamplesPerSecondHook(self._ctx.global_batch_size),
               training.LoggingTensorHook(
                   {'loss': array_ops.identity(loss),
                    'step': training.get_global_step()},
",0,train
4fa4001d457b1b7e3a38533defbebbed143c7a33,tensorflow/tensorflow,"Expose _log_and_record method to allow easier subclassing of StepCounter

PiperOrigin-RevId: 182112167",basic_session_run_hooks.py,"@@ -529,6 +529,14 @@ class StepCounterHook(session_run_hook.SessionRunHook):
   def before_run(self, run_context):  # pylint: disable=unused-argument
     return SessionRunArgs(self._global_step_tensor)
 
+  def _log_and_record(self, elapsed_steps, elapsed_time, global_step):
+    steps_per_sec = elapsed_steps / elapsed_time
+    if self._summary_writer is not None:
+      summary = Summary(value=[Summary.Value(
+          tag=self._summary_tag, simple_value=steps_per_sec)])
+      self._summary_writer.add_summary(summary, global_step)
+    logging.info(""%s: %g"", self._summary_tag, steps_per_sec)
+
   def after_run(self, run_context, run_values):
     _ = run_context
 
@@ -540,12 +548,7 @@ class StepCounterHook(session_run_hook.SessionRunHook):
         elapsed_time, elapsed_steps = self._timer.update_last_triggered_step(
             global_step)
         if elapsed_time is not None:
-          steps_per_sec = elapsed_steps / elapsed_time
-          if self._summary_writer is not None:
-            summary = Summary(value=[Summary.Value(
-                tag=self._summary_tag, simple_value=steps_per_sec)])
-            self._summary_writer.add_summary(summary, global_step)
-          logging.info(""%s: %g"", self._summary_tag, steps_per_sec)
+          self._log_and_record(elapsed_steps, elapsed_time, global_step)
 
     # Check whether the global step has been increased. Here, we do not use the
     # timer.last_triggered_step as the timer might record a different global
",0,train
6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution.
Also enable stack_trace propagation in ResourceHandle.

PiperOrigin-RevId: 434850304",kernel_and_device.cc,"@@ -395,6 +395,7 @@ KernelAndDeviceFunc::PrepareForRun(
   opts->step_container = step_container;
   opts->collective_executor =
       collective_executor_ ? collective_executor_->get() : nullptr;
+  opts->stack_trace = stack_trace;
 
   opts->stats_collector = nullptr;
   opts->runner = get_runner();
",0,train
6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution.
Also enable stack_trace propagation in ResourceHandle.

PiperOrigin-RevId: 434850304",executor.cc,"@@ -74,6 +74,7 @@ limitations under the License.
 #include ""tensorflow/core/profiler/lib/traceme_encode.h""
 #include ""tensorflow/core/protobuf/error_codes.pb.h""
 #include ""tensorflow/core/util/determinism.h""
+#include ""tensorflow/core/util/managed_stack_trace.h""
 #include ""tensorflow/core/util/tensor_slice_reader_cache.h""
 
 namespace tensorflow {
@@ -373,6 +374,7 @@ class ExecutorState {
   ExecutorImpl::KernelStats* const kernel_stats_;
   CancellationManager* cancellation_manager_;
   CoordinationServiceAgent* coordination_service_agent_;
+  absl::optional<ManagedStackTrace> stack_trace_ = absl::nullopt;
   // If not null, use this device to schedule intra-op operation
   std::unique_ptr<DeviceBase> user_device_;
   Executor::Args::Runner runner_;
@@ -422,6 +424,7 @@ ExecutorState<PropagatorStateType>::ExecutorState(
       kernel_stats_(kernel_stats),
       cancellation_manager_(args.cancellation_manager),
       coordination_service_agent_(args.coordination_service_agent),
+      stack_trace_(args.stack_trace),
       runner_(args.runner),
       sync_on_finish_(args.sync_on_finish),
       run_all_kernels_inline_(args.run_all_kernels_inline),
@@ -717,6 +720,7 @@ void ExecutorState<PropagatorStateType>::Process(TaggedNode tagged_node,
   params.tensor_store = tensor_store_;
   params.cancellation_manager = cancellation_manager_;
   params.coordination_service_agent = coordination_service_agent_;
+  params.stack_trace = stack_trace_;
   params.call_frame = call_frame_;
   params.function_library = immutable_state_.params().function_library;
   params.resource_manager = device->resource_manager();
",0,train
6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution.
Also enable stack_trace propagation in ResourceHandle.

PiperOrigin-RevId: 434850304",executor.h,"@@ -30,6 +30,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/core/threadpool_interface.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/macros.h""
+#include ""tensorflow/core/util/managed_stack_trace.h""
 
 namespace tensorflow {
 
@@ -105,6 +106,7 @@ class Executor {
     int64_t start_time_usecs = 0;
     // The deadline for the kernel to complete by. Empty if unspecified.
     absl::optional<absl::Time> deadline;
+    absl::optional<ManagedStackTrace> stack_trace = absl::nullopt;
 
     // If true, calls Sync() on the device.
     bool sync_on_finish = false;
",0,train
6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution.
Also enable stack_trace propagation in ResourceHandle.

PiperOrigin-RevId: 434850304",function.cc,"@@ -543,6 +543,7 @@ class CallOp : public AsyncOpKernel {
     opts.runner = ctx->runner();
     opts.run_all_kernels_inline = ctx->run_all_kernels_inline();
     opts.collective_executor = ctx->collective_executor();
+    opts.stack_trace = ctx->stack_trace();
     std::vector<Tensor> args;
     args.reserve(ctx->num_inputs());
     for (int i = 0; i < ctx->num_inputs(); ++i) {
@@ -1031,6 +1032,7 @@ void FunctionLibraryRuntimeImpl::ExecutorArgsFromOptions(
   exec_args->run_all_kernels_inline = run_opts.run_all_kernels_inline;
   exec_args->user_intra_op_threadpool = run_opts.user_intra_op_threadpool;
   exec_args->coordination_service_agent = run_opts.coordination_service_agent;
+  exec_args->stack_trace = run_opts.stack_trace;
 }
 
 void FunctionLibraryRuntimeImpl::RunRemote(const Options& opts, Handle handle,
",0,train
6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution.
Also enable stack_trace propagation in ResourceHandle.

PiperOrigin-RevId: 434850304",single_threaded_executor.cc,"@@ -315,6 +315,7 @@ class SingleThreadedExecutorImpl : public Executor {
     params.resource_manager = device->resource_manager();
     params.step_container = args.step_container;
     params.collective_executor = args.collective_executor;
+    params.stack_trace = args.stack_trace;
     params.slice_reader_cache = nullptr;  // TODO(mrry): Too severe?
     params.inputs = &node_inputs;
     params.input_alloc_attrs = &input_alloc_attrs;
",0,train
6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution.
Also enable stack_trace propagation in ResourceHandle.

PiperOrigin-RevId: 434850304",function.h,"@@ -843,6 +843,8 @@ class FunctionLibraryRuntime {
     StepStatsCollectorInterface* stats_collector = nullptr;
     CoordinationServiceAgent* coordination_service_agent = nullptr;
 
+    absl::optional<ManagedStackTrace> stack_trace = absl::nullopt;
+
     std::function<void(std::function<void()>)>* runner = nullptr;
 
     // Parameters for remote function execution.
",0,train
6a57baa5153f5f348c0cb577da8854d46282118d,tensorflow/tensorflow,"Plumb through stack_trace() in func execution.
Also enable stack_trace propagation in ResourceHandle.

PiperOrigin-RevId: 434850304",resource_variable_ops.cc,"@@ -253,7 +253,8 @@ void VarHandleOp::Compute(OpKernelContext* ctx) {
     ResourceMgr* mgr = ctx->resource_manager();
     ResourceHandle handle = ResourceHandle::MakeRefCountingHandle<Var>(
         resource, ctx->device()->name(),
-        std::vector<DtypeAndPartialTensorShape>{dtype_and_shape_});
+        std::vector<DtypeAndPartialTensorShape>{dtype_and_shape_},
+        ctx->stack_trace());
     // TODO(b/203901837): See if we can abolish all code paths that lookup
     // anonymous variables and then stop publishing them to the manager.
     OP_REQUIRES_OK(ctx, mgr->CreateUnowned<Var>(handle.container(),
",0,train
cc10ac9b7d593375a7cee0c167c20989dc29e8cf,tensorflow/tensorflow,remove unnecessary lambda,linalg_ops.py,"@@ -545,7 +545,7 @@ def norm(tensor,
       if is_matrix_norm and ord in [2, 2.0]:
         axes = list(range(rank))
         perm_before = list(filter(lambda i: i not in axis, axes)) + list(axis)
-        perm_after = list(map(lambda i: perm_before.index(i), axes))
+        perm_after = list(map(perm_before.index, axes))
         result = array_ops.transpose(array_ops.expand_dims(
             math_ops.reduce_max(gen_linalg_ops.svd(
                 array_ops.transpose(tensor, perm=perm_before),
",0,train
f17620153c47370f30a84b99eaba82bef8cd7d8e,tensorflow/tensorflow,"Handle delayed variable initialization in MirroredStrategy. Test with RNN layer.
Bug reported and solution suggested in #19069

PiperOrigin-RevId: 196718454",mirrored_strategy.py,"@@ -111,10 +111,13 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
             kwargs[""name""] = ""%s/replica_%d"" % (var0name, i)
             # Initialize replicas with the same value:
             if context.executing_eagerly():
-              initial_value = index[devices[0]].value()
+              kwargs[""initial_value""] = array_ops.identity(
+                  index[devices[0]].value())
             else:
-              initial_value = index[devices[0]].initial_value
-            kwargs[""initial_value""] = array_ops.identity(initial_value)
+              def initial_value_fn(device=d):
+                with ops.device(device):
+                  return array_ops.identity(index[devices[0]].initial_value)
+              kwargs[""initial_value""] = initial_value_fn
           with context.context().device_policy(context.DEVICE_PLACEMENT_SILENT):
             v = next_creator(*args, **kwargs)
           assert not isinstance(v, values.DistributedVariable)
",0,train
f17620153c47370f30a84b99eaba82bef8cd7d8e,tensorflow/tensorflow,"Handle delayed variable initialization in MirroredStrategy. Test with RNN layer.
Bug reported and solution suggested in #19069

PiperOrigin-RevId: 196718454",mirrored_strategy_multigpu_test.py,"@@ -28,9 +28,12 @@ from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.eager import context
 from tensorflow.python.eager import test
 from tensorflow.python.framework import constant_op
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import test_util
 from tensorflow.python.layers import core
+from tensorflow.python.ops import rnn
+from tensorflow.python.ops import rnn_cell_impl
 from tensorflow.python.ops import variable_scope
 from tensorflow.python.ops import variables
 from tensorflow.python.training import distribute as distribute_lib
@@ -436,6 +439,30 @@ class MirroredStrategyVariableCreationTest(test.TestCase):
         self.assertEquals(""foo/"" + name + "":0"", v0.name)
         self.assertEquals(""tower_1/foo/"" + name + "":0"", v1.name)
 
+  def testDynamicRnnVariables(self):
+    def model_fn():
+      inputs = constant_op.constant(2 * [2 * [[0.0, 1.0, 2.0, 3.0, 4.0]]])
+      cell_fw = rnn_cell_impl.LSTMCell(300)
+      cell_bw = rnn_cell_impl.LSTMCell(300)
+      (outputs, _) = rnn.bidirectional_dynamic_rnn(
+          cell_fw,
+          cell_bw,
+          inputs,
+          dtype=dtypes.float32)
+      return outputs
+
+    dist = mirrored_strategy.MirroredStrategy(
+        [""/device:GPU:0"", ""/device:CPU:0""])
+
+    with context.graph_mode(), dist.scope():
+      result = dist.call_for_each_tower(model_fn, run_concurrently=False)
+      # Two variables are created by the RNN layer.
+      self.assertEquals(2, len(result))
+      for v in result:
+        self.assertIsInstance(v, values.DistributedValues)
+        _, v1 = dist.unwrap(v)
+        self.assertStartsWith(v1.name, ""tower_1/"")
+
 
 if __name__ == ""__main__"":
   test.main()
",0,train
2efd47de550fa1eceb12d36a87449c4cbdf2f861,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2022-03-08

PiperOrigin-RevId: 433145847",compat.py,"@@ -29,7 +29,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2022, 3, 7)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2022, 3, 8)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
e7e5bc9440792a187dd7847a72088ae50b9ad2be,tensorflow/tensorflow,"Fix issue using python flatbuffers library.

PiperOrigin-RevId: 331633117
Change-Id: I92b1d4af9e046a6f0e610365ce95e90fa7e05921",util.py,"@@ -27,7 +27,6 @@ from absl import logging
 import six
 from six.moves import range
 
-import flatbuffers
 from tensorflow.core.protobuf import config_pb2 as _config_pb2
 from tensorflow.core.protobuf import graph_debug_info_pb2
 from tensorflow.core.protobuf import meta_graph_pb2 as _meta_graph_pb2
@@ -578,7 +577,7 @@ def _convert_model_from_bytearray_to_object(model_bytearray):
 def _convert_model_from_object_to_bytearray(model_object):
   """"""Converts a tflite model from a parsable object into a bytearray.""""""
   # Initial size of the buffer, which will grow automatically if needed
-  builder = flatbuffers.Builder(1024)
+  builder = schema_fb.flatbuffers.Builder(1024)
   model_offset = model_object.Pack(builder)
   builder.Finish(model_offset, file_identifier=_TFLITE_FILE_IDENTIFIER)
   return bytes(builder.Output())
",0,train
e7e5bc9440792a187dd7847a72088ae50b9ad2be,tensorflow/tensorflow,"Fix issue using python flatbuffers library.

PiperOrigin-RevId: 331633117
Change-Id: I92b1d4af9e046a6f0e610365ce95e90fa7e05921",flatbuffer_utils.py,"@@ -30,7 +30,6 @@ import os
 import random
 import re
 
-import flatbuffers
 from tensorflow.lite.python import schema_py_generated as schema_fb
 
 _TFLITE_FILE_IDENTIFIER = b'TFL3'
@@ -84,7 +83,7 @@ def read_model_with_mutable_tensors(input_tflite_file):
 def convert_object_to_bytearray(model_object):
   """"""Converts a tflite model from an object to a immutable bytearray.""""""
   # Initial size of the buffer, which will grow automatically if needed
-  builder = flatbuffers.Builder(1024)
+  builder = schema_fb.flatbuffers.Builder(1024)
   model_offset = model_object.Pack(builder)
   builder.Finish(model_offset, file_identifier=_TFLITE_FILE_IDENTIFIER)
   model_bytearray = bytes(builder.Output())
@@ -157,7 +156,7 @@ def randomize_weights(model, random_seed=0):
 
 
 def xxd_output_to_bytes(input_cc_file):
-  """"""Converts xxd output C++ source file to bytes (immutable)
+  """"""Converts xxd output C++ source file to bytes (immutable).
 
   Args:
     input_cc_file: Full path name to th C++ source file dumped by xxd
@@ -196,7 +195,7 @@ def xxd_output_to_bytes(input_cc_file):
 
 
 def xxd_output_to_object(input_cc_file):
-  """"""Converts xxd output C++ source file to object
+  """"""Converts xxd output C++ source file to object.
 
   Args:
     input_cc_file: Full path name to th C++ source file dumped by xxd
",0,train
e7e5bc9440792a187dd7847a72088ae50b9ad2be,tensorflow/tensorflow,"Fix issue using python flatbuffers library.

PiperOrigin-RevId: 331633117
Change-Id: I92b1d4af9e046a6f0e610365ce95e90fa7e05921",test_utils.py,"@@ -21,7 +21,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import flatbuffers
 from tensorflow.lite.python import schema_py_generated as schema_fb
 
 TFLITE_SCHEMA_VERSION = 3
@@ -29,7 +28,7 @@ TFLITE_SCHEMA_VERSION = 3
 
 def build_mock_flatbuffer_model():
   """"""Creates a flatbuffer containing an example model.""""""
-  builder = flatbuffers.Builder(1024)
+  builder = schema_fb.flatbuffers.Builder(1024)
 
   schema_fb.BufferStart(builder)
   buffer0_offset = schema_fb.BufferEnd(builder)
",0,train
a847d09b50066f03a9b45156044b1517367eeea4,tensorflow/tensorflow,"Preserve buffer allocation source locations in LHLO.

PiperOrigin-RevId: 396558903
Change-Id: Ia1375ef8da22746cf07b7ef17dac2046e6dde496",mhlo_to_lhlo_with_xla.cc,"@@ -1577,6 +1577,16 @@ Status LhloDialectEmitter::Initialize() {
     NamedAttrList arg_attr_list;
     mlir::Type arg_type = MemRefType::get({alloc->size()}, i8_type_);
 
+    // Propagate source location information for every HLOInstruction that
+    // uses this allocation.
+    std::vector<mlir::Location> buf_locs;
+    buf_locs.reserve(alloc->assigned_buffers().size());
+    for (const auto& entry : alloc->assigned_buffers()) {
+      const xla::HloValue* hlo_value = entry.first;
+      buf_locs.push_back(getLocation(hlo_value->instruction()));
+    }
+    mlir::Location loc = builder_.getFusedLoc(buf_locs);
+
     if (alloc->is_entry_computation_parameter()) {
       arg_attr_list.set(""lmhlo.params"",
                         builder_.getIndexAttr(alloc->parameter_number()));
@@ -1615,7 +1625,7 @@ Status LhloDialectEmitter::Initialize() {
         }
       }
     }
-    block->addArgument(arg_type);
+    block->addArgument(arg_type, loc);
     allocations_[alloc] = block->getArguments().back();
     args_attrs.push_back(arg_attr_list.getDictionary(builder_.getContext()));
   }
",0,test
da5b8ea5bd378cad5e313a540ba40a102dcddf6d,tensorflow/tensorflow,"Use empty rather than alternative size checks (readability-container-size-empty)

PiperOrigin-RevId: 369912337
Change-Id: I74688afc85c9999c9a31d14f02abcf909d915686",tf_ops_a_m.cc,"@@ -1618,7 +1618,7 @@ static LogicalResult inferConvReturnTypes(
                              ""D tensor"");
 
   if (padding == tensorflow::Padding::EXPLICIT) {
-    if (explicit_padding.size() == 0) {
+    if (explicit_padding.empty()) {
       return emitOptionalError(location,
                                ""requires attribute 'explicit_paddings' with ""
                                ""'EXPLICIT' padding mode"");
",0,train
da5b8ea5bd378cad5e313a540ba40a102dcddf6d,tensorflow/tensorflow,"Use empty rather than alternative size checks (readability-container-size-empty)

PiperOrigin-RevId: 369912337
Change-Id: I74688afc85c9999c9a31d14f02abcf909d915686",cluster_formation.cc,"@@ -188,7 +188,7 @@ void BuildClusters(Block* block, OpBuilder builder) {
   llvm::MapVector<StringRef, Cluster> nearest_clusters;
   for (Operation& op : llvm::make_early_inc_range(*block)) {
     auto device = GetDevice(&op);
-    if (device == """") continue;
+    if (device.empty()) continue;
 
     // If no cluster of same device has been formed yet, create a new cluster
     // with op alone.
",0,train
da5b8ea5bd378cad5e313a540ba40a102dcddf6d,tensorflow/tensorflow,"Use empty rather than alternative size checks (readability-container-size-empty)

PiperOrigin-RevId: 369912337
Change-Id: I74688afc85c9999c9a31d14f02abcf909d915686",tf_device_assignment.cc,"@@ -38,7 +38,7 @@ class SimpleTFDeviceAssignmentPass
     getFunction().walk([&](Operation* op) {
       if (auto device_attr = op->getAttrOfType<StringAttr>(""device"")) {
         // We assign default device to ops with device attribute that is empty.
-        if (device_attr.getValue() == """") {
+        if (device_attr.getValue().empty()) {
           op->setAttr(""device"", builder.getStringAttr(default_device_));
         }
       } else if (op->getDialect() == tf) {
",0,train
d2c578c71901275323ba3c00c57ec2e91531a698,tensorflow/tensorflow,"[XLA:SPMD] Avoid designated initializer.

It broke external build.

PiperOrigin-RevId: 311447720
Change-Id: I460624dc2242deead277eb70fbd1c6a0701250f6",spmd_partitioner.h,"@@ -370,14 +370,15 @@ class SpmdPartitioningVisitor : public DfsHloVisitorWithDefault {
   int64 NewChannel() { return (*next_channel_id_)++; }
 
   PartitionedHlo::PartitioningState MakePartitioningState() {
-    return PartitionedHlo::PartitioningState{
-        .b = &b_,
-        .module = module_,
-        .num_replicas = num_replicas_,
-        .partition_id = partition_id_,
-        .collective_ops_creator = collective_ops_creator_,
-        .next_channel_id = next_channel_id_,
-        .reshard_cache = &reshard_cache_};
+    PartitionedHlo::PartitioningState state;
+    state.b = &b_;
+    state.module = module_;
+    state.num_replicas = num_replicas_;
+    state.partition_id = partition_id_;
+    state.collective_ops_creator = collective_ops_creator_;
+    state.next_channel_id = next_channel_id_;
+    state.reshard_cache = &reshard_cache_;
+    return state;
   }
 
   SpmdBuilder* builder() { return &b_; }
",0,train
f24f5cd47493b3db9a8b053bd4723b18ce57ae0f,tensorflow/tensorflow,"Simplifies `testBatch` to eliminate testing timeouts.
Change: 134301154",tensorflow_dataframe_test.py,"@@ -153,8 +153,8 @@ class TensorFlowDataFrameTestCase(tf.test.TestCase):
     tensorflow_df = df.TensorFlowDataFrame.from_pandas(pandas_df, shuffle=False)
 
     # Rebatch `df` into the following sizes successively.
-    batch_sizes = [8, 4, 7]
-    num_batches = 10
+    batch_sizes = [4, 7]
+    num_batches = 3
 
     final_batch_size = batch_sizes[-1]
 
",0,test
eca0365de37ebed58d98e22b0b6542512b7f90c8,tensorflow/tensorflow,"Add examples for `tf.unstack`.

PiperOrigin-RevId: 342952616
Change-Id: I5367754d272ea5b6e367becc19d6eebb3b9a9de9",array_ops.py,"@@ -1549,22 +1549,101 @@ ops.register_tensor_conversion_function((list, tuple),
 def unstack(value, num=None, axis=0, name=""unstack""):
   """"""Unpacks the given dimension of a rank-`R` tensor into rank-`(R-1)` tensors.
 
-  Unpacks `num` tensors from `value` by chipping it along the `axis` dimension.
-  If `num` is not specified (the default), it is inferred from `value`'s shape.
-  If `value.shape[axis]` is not known, `ValueError` is raised.
+  Unpacks tensors from `value` by chipping it along the `axis` dimension.
 
-  For example, given a tensor of shape `(A, B, C, D)`;
-
-  If `axis == 0` then the i'th tensor in `output` is the slice
-    `value[i, :, :, :]` and each tensor in `output` will have shape `(B, C, D)`.
-    (Note that the dimension unpacked along is gone, unlike `split`).
+  >>> x = tf.reshape(tf.range(12), (3,4))
+  >>>
+  >>> p, q, r = tf.unstack(x)
+  >>> p.shape.as_list()
+  [4]
 
-  If `axis == 1` then the i'th tensor in `output` is the slice
-    `value[:, i, :, :]` and each tensor in `output` will have shape `(A, C, D)`.
-  Etc.
+  >>> i, j, k, l = tf.unstack(x, axis=1)
+  >>> i.shape.as_list()
+  [3]
 
   This is the opposite of stack.
 
+  >>> x = tf.stack([i, j, k, l], axis=1)
+
+  More generally if you have a tensor of shape `(A, B, C, D)`:
+
+  >>> A, B, C, D = [2, 3, 4, 5]
+  >>> t = tf.random.normal(shape=[A, B, C, D])
+
+  The number of tensor returned is equal to the length of the target `axis`:
+
+  >>> axis = 2
+  >>> items = tf.unstack(t, axis=axis)
+  >>> len(items) == t.shape[axis]
+  True
+
+  The shape of each result tensor is equal to the shape of the input tensor,
+  with the target `axis` removed.
+
+  >>> items[0].shape.as_list()  # [A, B, D]
+  [2, 3, 5]
+
+  The value of each tensor `items[i]` is equal to the slice of `input` across
+  `axis` at index `i`:
+
+  >>> for i in range(len(items)):
+  ...   slice = t[:,:,i,:]
+  ...   assert tf.reduce_all(slice == items[i])
+
+  #### Python iterable unpacking
+
+  With eager execution you _can_ unstack the 0th axis of a tensor using python's
+  iterable unpacking:
+
+  >>> t = tf.constant([1,2,3])
+  >>> a,b,c = t
+
+  `unstack` is still necessary because Iterable unpacking doesn't work in
+  a `@tf.function`: Symbolic tensors are not iterable.
+
+  You need to use `tf.unstack` here:
+
+  >>> @tf.function
+  ... def bad(t):
+  ...   a,b,c = t
+  ...   return a
+  >>>
+  >>> bad(t)
+  Traceback (most recent call last):
+  ...
+  OperatorNotAllowedInGraphError: ...
+
+  >>> @tf.function
+  ... def good(t):
+  ...   a,b,c = tf.unstack(t)
+  ...   return a
+  >>>
+  >>> good(t).numpy()
+  1
+
+  #### Unknown shapes
+
+  Eager tensors have concrete values, so their shape is always known.
+  Inside a `tf.function` the symbolic tensors may have unknown shapes.
+  If the length of `axis` is unknown `tf.unstack` will fail because it cannot
+  handle an unknown number of tensors:
+
+  >>> @tf.function(input_signature=[tf.TensorSpec([None], tf.float32)])
+  ... def bad(t):
+  ...   tensors = tf.unstack(t)
+  ...   return tensors[0]
+  >>>
+  >>> bad(tf.constant([1,2,3]))
+  Traceback (most recent call last):
+  ...
+  ValueError: Cannot infer num from shape (None,)
+
+  If you know the `axis` length you can pass it as the `num` argument. But this
+  must be a constant value.
+
+  If you actually need a variable number of tensors in a single `tf.function`
+  trace, you will need to use exlicit loops and a `tf.TensorArray` instead.
+
   Args:
     value: A rank `R > 0` `Tensor` to be unstacked.
     num: An `int`. The length of the dimension `axis`. Automatically inferred if
@@ -1577,8 +1656,9 @@ def unstack(value, num=None, axis=0, name=""unstack""):
     The list of `Tensor` objects unstacked from `value`.
 
   Raises:
+    ValueError: If `axis` is out of the range `[-R, R)`.
     ValueError: If `num` is unspecified and cannot be inferred.
-    ValueError: If `axis` is out of the range [-R, R).
+    InvalidArgumentError: If `num` does not match the shape of `value`.
   """"""
   if num is None:
     value = ops.convert_to_tensor(value)
",0,test
7e8073610db8019414bdfee2d9043e65bc698484,tensorflow/tensorflow,"[NFC] Expose GetNcclCollectivePermuteConfig() as a static method of NcclCollectivePermuteThunk.

PiperOrigin-RevId: 393457374
Change-Id: I91fd782edb99d33be0c3f6d3fedaf6b9d660dce0",nccl_collective_permute_thunk.h,"@@ -56,6 +56,10 @@ struct NcclCollectivePermuteConfig : public NcclCollectiveConfig {
 // Thunk that performs a NCCL-based collective permute.
 class NcclCollectivePermuteThunk : public NcclCollectiveThunk {
  public:
+  static NcclCollectivePermuteConfig GetNcclCollectivePermuteConfig(
+      mlir::lmhlo::CollectivePermuteOp op, int64_t replica_count,
+      int64_t partition_count);
+
   NcclCollectivePermuteThunk(ThunkInfo thunk_info,
                              mlir::lmhlo::CollectivePermuteOp op,
                              int64_t replica_count, int64_t partition_count,
@@ -81,10 +85,6 @@ class NcclCollectivePermuteThunk : public NcclCollectiveThunk {
   const NcclCollectiveConfig& config() const override { return config_; }
 
  private:
-  static NcclCollectivePermuteConfig GetNcclCollectivePermuteConfig(
-      mlir::lmhlo::CollectivePermuteOp op, int64_t replica_count,
-      int64_t partition_count);
-
   const NcclCollectivePermuteConfig config_;
   const Buffer buffer_;
 };
",0,test
2f7455d56c8328fd1b232e5bca68b636e0a34822,tensorflow/tensorflow,"Update minimum op version for TF 2.2.0 branch cut.

PiperOrigin-RevId: 296328883
Change-Id: I3deda696e7ad2c35cbd580decd72ca79e91963e4",op_version.cc,"@@ -89,7 +89,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kGatherNd, 1}, ""1.14.0""},
           {{OperatorType::kSvdf, 1}, ""1.5.0""},
           {{OperatorType::kSvdf, 2}, ""1.14.0""},
-          {{OperatorType::kSvdf, 3}, kPendingReleaseOpVersion},
+          {{OperatorType::kSvdf, 3}, ""2.2.0""},
           {{OperatorType::kL2Normalization, 1}, ""1.5.0""},
           {{OperatorType::kL2Normalization, 2}, ""1.14.0""},
           {{OperatorType::kL2Pool, 1}, ""1.5.0""},
@@ -137,7 +137,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kRelu6, 2}, ""1.14.0""},
           {{OperatorType::kResizeBilinear, 1}, ""1.7.0""},
           {{OperatorType::kResizeBilinear, 2}, ""1.14.0""},
-          {{OperatorType::kResizeBilinear, 3}, kPendingReleaseOpVersion},
+          {{OperatorType::kResizeBilinear, 3}, ""2.2.0""},
           {{OperatorType::kResizeNearestNeighbor, 1}, ""1.13.1""},
           {{OperatorType::kResizeNearestNeighbor, 2}, ""1.14.0""},
           {{OperatorType::kSqueeze, 1}, ""1.6.0""},
@@ -171,7 +171,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kCTCBeamSearchDecoder, 1}, ""1.11.0""},
           {{OperatorType::kUnpack, 1}, ""1.11.0""},
           {{OperatorType::kUnpack, 2}, ""1.14.0""},
-          {{OperatorType::kUnpack, 3}, kPendingReleaseOpVersion},
+          {{OperatorType::kUnpack, 3}, ""2.2.0""},
           {{OperatorType::kLeakyRelu, 1}, ""1.13.1""},
           {{OperatorType::kLogistic, 1}, ""1.14.0""},
           {{OperatorType::kLogistic, 2}, ""1.14.0""},
@@ -198,10 +198,10 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kLess, 2}, ""1.14.0""},
           {{OperatorType::kLessEqual, 1}, ""1.14.0""},
           {{OperatorType::kLessEqual, 2}, ""1.14.0""},
-          {{OperatorType::kSegmentSum, 1}, kPendingReleaseOpVersion},
+          {{OperatorType::kSegmentSum, 1}, ""2.2.0""},
           {{OperatorType::kSelect, 1}, ""1.14.0""},
           {{OperatorType::kSelect, 2}, ""1.14.0""},
-          {{OperatorType::kSelectV2, 1}, kPendingReleaseOpVersion},
+          {{OperatorType::kSelectV2, 1}, ""2.2.0""},
           {{OperatorType::kFloorDiv, 1}, ""1.14.0""},
           {{OperatorType::kFloorDiv, 2}, ""1.14.0""},
           {{OperatorType::kFloor, 1}, ""1.9.0""},
@@ -232,7 +232,7 @@ string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kHardSwish, 1}, ""1.15.0""},
           {{OperatorType::kFill, 1}, ""1.13.0""},
           {{OperatorType::kReverseV2, 1}, ""1.14.0""},
-          {{OperatorType::kReverseV2, 2}, kPendingReleaseOpVersion},
+          {{OperatorType::kReverseV2, 2}, ""2.2.0""},
           {{OperatorType::kRank, 1}, ""1.14.0""},
       });
 
",0,train
8933b8a21280696ab119b63263babdb54c298538,tensorflow/tensorflow,"Fix a null pointer exception caused by branching on uninitialized data.

This is due to not checking that the params for the quantization exists. If there is no quantization, we should not access the `.params` field.

PiperOrigin-RevId: 385173491
Change-Id: I8fc476c4b274fdb21ba741caa0fbc6d1b8840663",depthwise_conv.cc,"@@ -176,6 +176,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   if (data_type != kTfLiteFloat32) {
     TF_LITE_ENSURE_EQ(context, filter->quantization.type,
                       kTfLiteAffineQuantization);
+    TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
     const auto* affine_quantization =
         reinterpret_cast<TfLiteAffineQuantization*>(
             filter->quantization.params);
@@ -195,6 +196,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   }
 
   if (is_hybrid) {
+    TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
     const auto* affine_quantization =
         reinterpret_cast<TfLiteAffineQuantization*>(
             filter->quantization.params);
@@ -495,6 +497,7 @@ TfLiteStatus EvalHybridPerChannel(TfLiteContext* context, TfLiteNode* node,
   op_params.weights_offset = 0;
   op_params.float_activation_min = output_activation_min;
   op_params.float_activation_max = output_activation_max;
+  TF_LITE_ENSURE(context, filter->quantization.type != kTfLiteNoQuantization);
   const auto* affine_quantization =
       reinterpret_cast<TfLiteAffineQuantization*>(filter->quantization.params);
   if (kernel_type == kReference) {
",0,train
0131d1a7d052ff5104c8c4ab22944b95ece130ed,tensorflow/tensorflow,"Add absl::Cord support to open source TensorFlow

PiperOrigin-RevId: 341926653
Change-Id: Id6174cf149526cd07670bebb2be6c91dbbf11a50",cord.h,"@@ -16,6 +16,6 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_
 #define TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_
 
-// TODO(ebrevdo): Fill this in.
+#include ""absl/strings/cord.h""
 
 #endif  // TENSORFLOW_CORE_PLATFORM_DEFAULT_CORD_H_
",0,train
4aaab50552a3cdb4b785653f071ae6c7193992ca,tensorflow/tensorflow,CLN: fix coding style,array_grad.py,"@@ -763,9 +763,10 @@ def _ExtractImagePatchesGrad(op, grad):
                                  (1, rows_out, cols_out, ksize_r * ksize_c))
 
   # Construct mapping table for indices: (input -> output).
-  idx_matrix = array_ops.concat([array_ops.expand_dims(input_idx_patched, axis=-1),
-                                 array_ops.expand_dims(output_idx, axis=-1)],
-                                axis=-1)
+  idx_matrix = array_ops.concat(
+      [array_ops.expand_dims(input_idx_patched, axis=-1),
+       array_ops.expand_dims(output_idx, axis=-1)],
+      axis=-1)
   idx_map = array_ops.reshape(idx_matrix, (-1, 2))
 
   sp_shape = (input_indices_num, output_indices_num)
",0,train
dbc129a925f936e3179c93a7908bc01132c9a61e,tensorflow/tensorflow,"Add MLIR generated Softplus GPU kernels.

Not enabled by default yet.

PiperOrigin-RevId: 383389507
Change-Id: Ie0c81f5c95ce7a3a2514d6e00c524c92830cc15d",gpu_op_softplus.cc,"@@ -0,0 +1,25 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
+#include ""tensorflow/core/kernels/mlir_generated/base_gpu_op.h""
+
+namespace tensorflow {
+
+GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Softplus, DT_HALF);
+GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Softplus, DT_FLOAT);
+GENERATE_AND_REGISTER_UNARY_GPU_KERNEL(Softplus, DT_DOUBLE);
+
+}  // namespace tensorflow
",0,train
dbc129a925f936e3179c93a7908bc01132c9a61e,tensorflow/tensorflow,"Add MLIR generated Softplus GPU kernels.

Not enabled by default yet.

PiperOrigin-RevId: 383389507
Change-Id: Ie0c81f5c95ce7a3a2514d6e00c524c92830cc15d",gpu_unary_ops_test.cc,"@@ -899,6 +899,28 @@ GENERATE_DEFAULT_TEST(Sinh, DT_FLOAT, DT_FLOAT, std::sinh,
 GENERATE_DEFAULT_TEST(Sinh, DT_DOUBLE, DT_DOUBLE, std::sinh,
                       test::OpsTestConfig())
 
+/// Test `tf.Softplus`.
+
+// Reference implementation
+template <typename T>
+T baseline_softplus(T x) {
+  T epsilon = std::numeric_limits<T>::epsilon();
+  T threshold = 2 + std::log(epsilon);
+  if (x > -threshold && x < threshold) {
+    return std::exp(x);
+  }
+  return std::log1p(std::exp(x));
+}
+
+GENERATE_DEFAULT_TEST_2(Softplus, DT_HALF, DT_FLOAT, DT_HALF, DT_FLOAT,
+                        baseline_softplus, test::OpsTestConfig())
+
+GENERATE_DEFAULT_TEST(Softplus, DT_FLOAT, DT_FLOAT, baseline_softplus,
+                      test::OpsTestConfig())
+
+GENERATE_DEFAULT_TEST(Softplus, DT_DOUBLE, DT_DOUBLE, baseline_softplus,
+                      test::OpsTestConfig())
+
 /// Test `tf.Sqrt`.
 
 GENERATE_DEFAULT_TEST(Sqrt, DT_FLOAT, DT_FLOAT, std::sqrt,
",0,train
dbc129a925f936e3179c93a7908bc01132c9a61e,tensorflow/tensorflow,"Add MLIR generated Softplus GPU kernels.

Not enabled by default yet.

PiperOrigin-RevId: 383389507
Change-Id: Ie0c81f5c95ce7a3a2514d6e00c524c92830cc15d",softplus_op.cc,"@@ -91,13 +91,14 @@ TF_CALL_FLOAT_TYPES(REGISTER_KERNELS);
     (defined(TENSORFLOW_USE_ROCM) && TENSORFLOW_USE_ROCM)
 // Forward declarations of the functor specializations for GPU.
 namespace functor {
-#define DECLARE_GPU_SPEC(T)                                          \
-  template <>                                                        \
-  void Softplus<GPUDevice, T>::operator()(                           \
-      const GPUDevice& d, typename TTypes<T>::ConstTensor features,  \
-      typename TTypes<T>::Tensor activations);                       \
-  extern template struct Softplus<GPUDevice, T>;                     \
-                                                                     \
+#define DECLARE_SOFTPLUS_GPU_SPEC(T)                                \
+  template <>                                                       \
+  void Softplus<GPUDevice, T>::operator()(                          \
+      const GPUDevice& d, typename TTypes<T>::ConstTensor features, \
+      typename TTypes<T>::Tensor activations);                      \
+  extern template struct Softplus<GPUDevice, T>;
+
+#define DECLARE_SOFTPLUS_GRAD_GPU_SPEC(T)                            \
   template <>                                                        \
   void SoftplusGrad<GPUDevice, T>::operator()(                       \
       const GPUDevice& d, typename TTypes<T>::ConstTensor gradients, \
@@ -105,20 +106,34 @@ namespace functor {
       typename TTypes<T>::Tensor backprops);                         \
   extern template struct SoftplusGrad<GPUDevice, T>;
 
-TF_CALL_GPU_NUMBER_TYPES(DECLARE_GPU_SPEC);
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
+TF_CALL_GPU_NUMBER_TYPES(DECLARE_SOFTPLUS_GPU_SPEC);
+#endif
+
+TF_CALL_GPU_NUMBER_TYPES(DECLARE_SOFTPLUS_GRAD_GPU_SPEC);
 }  // namespace functor
 
 // Registration of the GPU implementations.
-#define REGISTER_GPU_KERNELS(type)                                       \
-  REGISTER_KERNEL_BUILDER(                                               \
-      Name(""Softplus"").Device(DEVICE_GPU).TypeConstraint<type>(""T""),     \
-      SoftplusOp<GPUDevice, type>);                                      \
+#define REGISTER_SOFTPLUS_GPU_KERNELS(type)                          \
+  REGISTER_KERNEL_BUILDER(                                           \
+      Name(""Softplus"").Device(DEVICE_GPU).TypeConstraint<type>(""T""), \
+      SoftplusOp<GPUDevice, type>);
+
+#define REGISTER_SOFTPLUS_GRAD_GPU_KERNELS(type)                         \
   REGISTER_KERNEL_BUILDER(                                               \
       Name(""SoftplusGrad"").Device(DEVICE_GPU).TypeConstraint<type>(""T""), \
       SoftplusGradOp<GPUDevice, type>);
 
-TF_CALL_GPU_NUMBER_TYPES(REGISTER_GPU_KERNELS);
-#undef REGISTER_GPU_KERNELS
+#if !defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) || \
+    !defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_SOFTPLUS_GPU_KERNELS);
+#endif
+
+TF_CALL_GPU_NUMBER_TYPES(REGISTER_SOFTPLUS_GRAD_GPU_KERNELS);
+
+#undef REGISTER_SOFTPLUS_GPU_KERNELS
+#undef REGISTER_SOFTPLUS_GRAD_GPU_KERNELS
 
 #endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
",0,train
453c9d6bc11fd09523ea0c2ebee8e82b94f76654,tensorflow/tensorflow,"[tf:tfrt] Do not outline too large clusters

PiperOrigin-RevId: 416263997
Change-Id: I516c2f960450aa7bcf5f5e3459bb22953aea03c0",clustering.cc,"@@ -826,6 +826,9 @@ mlir::LogicalResult VerifyCluster(const Cluster& cluster) {
     (void)inserted;
   }
 
+  // TODO(b/202247905): Large clusters can lead to a very long compilation time.
+  if (ops.size() > 10) return failure();
+
   // TODO(ezhulenev): This is a temporary workaround to disable forming clusters
   // with known compilation problems.
   for (Operation* op : ops) {
",0,train
27de8e717c1bec91398f5a6be6c7287b657fc960,tensorflow/tensorflow,"Improve shape function for CudnnRNNParamsSize

In cudnn_rnn_ops.cc, the CudnnRNNParamsSize does not
have restrictions on num_layers, num_units, and input_size,
though they all should be scalars.

This fix adds the shape check of num_layers, num_units, and input_size
for CudnnRNNParamsSize.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",cudnn_rnn_ops.cc,"@@ -52,6 +52,12 @@ REGISTER_OP(""CudnnRNNParamsSize"")
     .Attr(""seed2: int = 0"")
     .Output(""params_size: S"")
     .SetShapeFn([](InferenceContext* c) {
+      ShapeHandle unused;
+      // num_layers, num_units, and input_size should be scalars.
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(0), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(1), 0, &unused));
+      TF_RETURN_IF_ERROR(c->WithRank(c->input(2), 0, &unused));
+
       c->set_output(0, c->Vector(1));
       return Status::OK();
     });
",0,test
20bab61688b60300eafb2c7cc48b9ad542bcb1a4,tensorflow/tensorflow,"Update tensorflow/core/kernels/mkl_relu_op.cc

Co-Authored-By: guizili0 <guizi.li@intel.com>",mkl_relu_op.cc,"@@ -1368,7 +1368,7 @@ class MklLeakyReluGradOp : public MklReluGradOpBase<Device, T, eltwise_relu> {
     OP_REQUIRES_OK(context, context->GetAttr(""alpha"", &alpha));
     OP_REQUIRES(
         context, alpha <= 1,
-        errors::InvalidArgument(""MKL LeakyRelu only support alpha <= 1. ""
+        errors::InvalidArgument(""MKL LeakyRelu only supports alpha <= 1. ""
                                 ""alpha is: "",
                                 alpha));
 
",0,train
0822126d7e0b9cd612dffaf5a89eb930e15e37f9,tensorflow/tensorflow,"Add FunctionSpec to def_funcion.PolymorphicFunction. In the future, this should be consolidated with function.PolymorphicFunction's FunctionSpec.

PiperOrigin-RevId: 226170883",def_function.py,"@@ -236,6 +236,10 @@ class PolymorphicFunction(object):
     """"""
     self._python_function = python_function
     self._input_signature = input_signature
+    # TODO(vbardiovsky): Both _stateful_fn and _stateless_fn are populating the
+    # same FunctionSpec. Consider removing it from both and passing in instead.
+    self._function_spec = function_lib.FunctionSpec.from_function_and_signature(
+        python_function, input_signature)
     self._autograph = autograph
     self._experimental_autograph_options = experimental_autograph_options
     if self._experimental_autograph_options is not None:
@@ -265,15 +269,8 @@ class PolymorphicFunction(object):
 
   def _canonicalize_function_inputs(self, args, kwds):
     """"""Canonicalize the inputs to the Python function.""""""
-    if not self._stateful_fn:
-      raise ValueError(
-          ""_canonicalize_function_inputs must be called only after _initialize ""
-          ""has run."")
-    # pylint: disable=protected-access
     if self._input_signature is None or args or kwds:
-      return self._stateful_fn._function_spec.canonicalize_function_inputs(
-          *args, **kwds)
-    # pylint: enable=protected-access
+      return self._function_spec.canonicalize_function_inputs(*args, **kwds)  # pylint: disable=protected-access
     # If an input signature is defined, we may need to fetch a concrete function
     # without any inputs specified. In this case args and kwds should be ignored
     # but running _canonicalize_function_inputs would raise an exception.
@@ -405,6 +402,10 @@ class PolymorphicFunction(object):
   def input_signature(self):
     return self._input_signature
 
+  @property
+  def function_spec(self):
+    return self._function_spec
+
   def get_initialization_function(self, *args, **kwargs):
     """"""Returns a `Function` object which initializes this function's variables.
 
",0,train
54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,base_test.py,"@@ -126,16 +126,15 @@ class SimpleMultiEnginesTest(trt_test.TfTrtIntegrationTestBase):
 class SimpleMultiEnginesTest2(trt_test.TfTrtIntegrationTestBase):
 
   def GraphFn(self, inp):
-    """"""Create a graph containing two segment.""""""
+    """"""Create a graph containing two segments.""""""
     n = inp
     for i in range(2):
       c = constant_op.constant(1.0, name=""c%d"" % i)
       n = math_ops.add(n, c, name=""add%d"" % i)
       n = math_ops.mul(n, n, name=""mul%d"" % i)
-    edge = self.trt_incompatible_op(n, name=""incompatible"")
-    with ops.control_dependencies([edge]):
-      c = constant_op.constant(1.0, name=""c2"")
-      n = math_ops.add(n, c, name=""add2"")
+    n = self.trt_incompatible_op(n, name=""incompatible"")
+    c = constant_op.constant(1.0, name=""c2"")
+    n = math_ops.add(n, c, name=""add2"")
     n = math_ops.mul(n, n, name=""mul2"")
     c = constant_op.constant(1.0, name=""c3"")
     n = math_ops.add(n, c, name=""add3"")
",0,train
54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,combined_nms_test.py,"@@ -33,15 +33,10 @@ class CombinedNmsTest(trt_test.TfTrtIntegrationTestBase):
     self.num_boxes = 200
 
   def GraphFn(self, boxes, scores):
-    max_output_size_per_class = 3
     max_total_size = 3
     score_threshold = 0.1
     iou_threshold = 0.5
     # Shapes
-    max_output_size_per_class_tensor = constant_op.constant(
-        max_output_size_per_class,
-        dtype=dtypes.int32,
-        name='max_output_size_per_class')
     max_total_size_tensor = constant_op.constant(
         max_total_size, dtype=dtypes.int32, name='max_total_size')
     iou_threshold_tensor = constant_op.constant(
@@ -51,7 +46,7 @@ class CombinedNmsTest(trt_test.TfTrtIntegrationTestBase):
     nms_output = image_ops_impl.combined_non_max_suppression(
         boxes,
         scores,
-        max_output_size_per_class_tensor,
+        max_total_size_tensor,
         max_total_size_tensor,
         iou_threshold_tensor,
         score_threshold_tensor,
@@ -86,8 +81,7 @@ class CombinedNmsTest(trt_test.TfTrtIntegrationTestBase):
       return {
           'TRTEngineOp_0': [
               'combined_nms/CombinedNonMaxSuppression',
-              'max_output_size_per_class', 'max_total_size', 'iou_threshold',
-              'score_threshold'
+              'max_total_size', 'iou_threshold', 'score_threshold'
           ]
       }
     else:
",0,train
54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,quantization_test.py,"@@ -117,8 +117,8 @@ class NonQuantizedPrecisionsWithRangesTest(trt_test.TfTrtIntegrationTestBase):
   def ExpectedEnginesToBuild(self, run_params):
     """"""Return the expected engines to build.""""""
     # The fake quant ops are not supported in FP32/FP16 mode, and will split the
-    # graph into three TRT segments.
-    return [""TRTEngineOp_0"", ""TRTEngineOp_1"", ""TRTEngineOp_2"", ""TRTEngineOp_3""]
+    # graph into two TRT segments.
+    return [""TRTEngineOp_0"", ""TRTEngineOp_1""]
 
   def ExpectedAbsoluteTolerance(self, run_params):
     """"""The absolute tolerance to compare floating point results.""""""
",0,train
54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,reshape_transpose_test.py,"@@ -15,6 +15,7 @@
 """"""Basic tests for TF-TensorRT integration.""""""
 
 from tensorflow.python.compiler.tensorrt.test import tf_trt_integration_test_base as trt_test
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
@@ -32,9 +33,10 @@ class ReshapeTest(trt_test.TfTrtIntegrationTestBase):
     # conversion.
     #
     # These reshapes happen at batch dimension, thus conversion should fail.
+    orig_shape = constant_op.constant([-1, 24, 24, 2], name=""original_shape"")
     for shape in [[2, 50, 24, 24, 2], [-1, 50, 24, 24, 2], [2, 50, -1, 24, 2]]:
       incompatible_reshape = array_ops.reshape(inp, shape)
-      reshape_back = array_ops.reshape(incompatible_reshape, [-1, 24, 24, 2])
+      reshape_back = array_ops.reshape(incompatible_reshape, orig_shape)
       outputs.append(self.trt_incompatible_op(reshape_back))
     # Add another block with many reshapes that don't change the batch
     # dimension.
",0,train
54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,trt_convert.py,"@@ -306,10 +306,9 @@ def _get_tensorrt_rewriter_config(conversion_params,
   rewriter_config_with_trt.remapping = False
 
   if not disable_non_trt_optimizers:
-    # Layout optimizer may add Const nodes followed by Reshape nodes, thus we
-    # need to run constant folding again.
     rewriter_config_with_trt.optimizers.extend(
-        [""constfold"", ""layout"", ""constfold""])
+        [""pruning"", ""debug_stripper"", ""layout"", ""dependency"", ""constfold"",
+         ""common_subgraph_elimination""])
 
   rewriter_config_with_trt.meta_optimizer_iterations = (
       rewriter_config_pb2.RewriterConfig.ONE)
",0,train
54ac76671b51b3aae688c4906101c1334cd95c4f,tensorflow/tensorflow,Change Grappler optimizers run by the TRT converter to avoid constant duplication,trt_convert_test.py,"@@ -274,7 +274,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         self.assertEqual(
             {
                 ""add"": ""AddV2"",
-                ""add/ReadVariableOp"": ""Const"",
+                ""v1"": ""Const"",
                 ""add_1"": ""AddV2"",
                 ""add_2"": ""AddV2"",
                 ""input1"": ""Placeholder"",
@@ -806,7 +806,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     node_name_to_op = {node.name: node.op for node in output_graph_def.node}
     self.assertEqual(
         {
-            ""add/ReadVariableOp"": ""Const"",
+            ""v1"": ""Const"",
             ""input1"": ""Placeholder"",
             ""input2"": ""Placeholder"",
             ""add"": ""AddV2"",
",0,train
7a26883cb88478e17d0e23ff9e4058aa853426de,tensorflow/tensorflow,"Update GraphDef version to 724.

PiperOrigin-RevId: 366415693
Change-Id: Ib42da615ac33d8551cfcf8dfd685b6e7d1bf8eb1",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 723  // Updated: 2021/4/1
+#define TF_GRAPH_DEF_VERSION 724  // Updated: 2021/4/2
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
ec3f158515788d9a86dc1bad479f6dfc3be2879a,tensorflow/tensorflow,"Catch non-worker-preemption errors in the preemption handler.

PiperOrigin-RevId: 358928837
Change-Id: I8c6a248a89579ca4418ba73fae17fcb9f7acd5ea",cluster_coordinator.py,"@@ -622,6 +622,7 @@ class WorkerPreemptionHandler(object):
     self._cluster_update_lock = threading.Lock()
     self._cluster_due_for_update_or_finish = threading.Event()
     self._worker_up_cond = threading.Condition(self._cluster_update_lock)
+    self._error_from_recovery = None
     self._should_preemption_thread_run = True
     self._preemption_handler_thread = threading.Thread(
         target=self._preemption_handler,
@@ -680,6 +681,14 @@ class WorkerPreemptionHandler(object):
       with self._cluster_update_lock:
         self._cluster_due_for_update_or_finish.set()
         self._worker_up_cond.wait(_WORKER_MAXIMUM_RECOVERY_SEC)
+        if self._error_from_recovery:
+          # TODO(yuefengz): there is only one worker that will get this error.
+          # Ideally we shuold let all workers notified by `_worker_up_cond` get
+          # this error.
+          try:
+            raise self._error_from_recovery
+          finally:
+            self._error_from_recovery = None
         logging.info(""Worker %s has been recovered."", worker_device_name)
 
       if on_recovery_fn:
@@ -717,7 +726,15 @@ class WorkerPreemptionHandler(object):
           if self._should_preemption_thread_run:
             self._cluster_due_for_update_or_finish.clear()
         except Exception as e:  # pylint: disable=broad-except
-          self._validate_preemption_failure(e)
+          try:
+            self._validate_preemption_failure(e)
+          except Exception as e:  # pylint: disable=broad-except
+            # In this case, a parameter server fails. So we raise this error to
+            # the caller of `wait_on_failure`.
+            self._error_from_recovery = e
+            self._worker_up_cond.notify_all()
+            if self._should_preemption_thread_run:
+              self._cluster_due_for_update_or_finish.clear()
           # NOTE: Since the first RPC (GetStatus) of update_server_def is
           # currently blocking by default, error should only happen if:
           # (1) More workers failed while waiting for the previous workers to
",0,train
ec3f158515788d9a86dc1bad479f6dfc3be2879a,tensorflow/tensorflow,"Catch non-worker-preemption errors in the preemption handler.

PiperOrigin-RevId: 358928837
Change-Id: I8c6a248a89579ca4418ba73fae17fcb9f7acd5ea",fault_tolerance_test.py,"@@ -446,10 +446,26 @@ class BaseFaultToleranceTest(object):  # pylint: disable=missing-docstring
     self.assertGreaterEqual(model.iterations.numpy(), 10)
 
   def testPSFailureWhileRecoveryFromWokerFailure(self):
-    # Only by adding this empty test, can the problem of b/180348454 be
-    # reproduced.
-    # TODO(yuefengz): fill in this test.
-    pass
+    model = self._create_model_and_run_indefinitely()
+
+    time.sleep(1)
+    self.assertFalse(self.cluster_coord.done())
+
+    def kill(task):
+      self._cluster.kill_task(task, 0)
+      self.sleep(1)
+      self._cluster.start_task(task, 0)
+
+    kill_thread_1 = threading.Thread(target=kill, args=(""worker"",))
+    kill_thread_2 = threading.Thread(target=kill, args=(""ps"",))
+    kill_thread_1.start()
+    kill_thread_2.start()
+    kill_thread_1.join()
+    kill_thread_2.join()
+
+    with self.assertRaises(
+        (errors.UnavailableError, errors.InvalidArgumentError)):
+      model.join_training_functions()
 
   def testNumpyFetchedAfterWorkerFailure(self):
 
",0,train
05471ab95fc86834d171a3df23bd4397266a985e,tensorflow/tensorflow,Update losses_test.py,losses_test.py,"@@ -1806,7 +1806,7 @@ class HuberLossTest(test.TestCase):
 class BinaryTruePositivesViaControlFlow(losses.Loss):
 
   def __init__(self, reduction=losses_utils.ReductionV2.AUTO):
-    super().__init__(reduction=reduction)
+    super(BinaryTruePositivesViaControlFlow, self).__init__(reduction=reduction)
 
   def call(self, y_true, y_pred):
     y_true = math_ops.cast(y_true, dtypes.bool)
",0,train
f6bf10607fc0bd00e94704e1ae20f06f34b81df3,tensorflow/tensorflow,"[tf.data] Fix a bug in prefetch dataset serialization logic.

PiperOrigin-RevId: 313453820
Change-Id: I573d4288fbb10b7491778ce4edf24241f5e35fa1",prefetch_dataset_op.cc,"@@ -100,9 +100,13 @@ class PrefetchDatasetOp::Dataset : public DatasetBase {
     TF_RETURN_IF_ERROR(b->AddScalar(buffer_size_, &buffer_size));
     AttrValue slack_period_attr;
     b->BuildAttrValue(slack_period_, &slack_period_attr);
-    TF_RETURN_IF_ERROR(b->AddDataset(
-        this, {input_graph_node, buffer_size},
-        {std::make_pair(kSlackPeriod, slack_period_attr)}, output));
+    AttrValue legacy_autotune_attr;
+    b->BuildAttrValue(legacy_autotune_, &legacy_autotune_attr);
+    TF_RETURN_IF_ERROR(
+        b->AddDataset(this, {input_graph_node, buffer_size},
+                      {std::make_pair(kSlackPeriod, slack_period_attr),
+                       std::make_pair(kLegacyAutotune, legacy_autotune_attr)},
+                      output));
     return Status::OK();
   }
 
",0,train
d93a6f392597928113895ace200ee1e0b6a13b5f,tensorflow/tensorflow,Add test and fix one error for writable file,gcs_filesystem.cc,"@@ -151,7 +151,7 @@ static void SyncImpl(const std::string& bucket, const std::string& object,
       *offset = static_cast<int64_t>(metadata->size());
     }
     outfile->clear();
-    outfile->seekp(std::ios::end);
+    outfile->seekp(0, std::ios::end);
     TF_SetStatus(status, TF_OK, """");
   } else {
     std::string temporary_object =
@@ -275,11 +275,6 @@ uint64_t Length(const TF_ReadOnlyMemoryRegion* region) {
 // SECTION 4. Implementation for `TF_Filesystem`, the actual filesystem
 // ----------------------------------------------------------------------------
 namespace tf_gcs_filesystem {
-typedef struct GCSFile {
-  gcs::Client gcs_client;  // owned
-  bool compose;
-} GCSFile;
-
 // TODO(vnvo2409): Add lazy-loading and customizing parameters.
 void Init(TF_Filesystem* filesystem, TF_Status* status) {
   google::cloud::StatusOr<gcs::Client> client =
",0,train
d93a6f392597928113895ace200ee1e0b6a13b5f,tensorflow/tensorflow,Add test and fix one error for writable file,gcs_filesystem.h,"@@ -28,7 +28,27 @@ int64_t Read(const TF_RandomAccessFile* file, uint64_t offset, size_t n,
              char* buffer, TF_Status* status);
 }  // namespace tf_random_access_file
 
+namespace tf_writable_file {
+void Cleanup(TF_WritableFile* file);
+void Append(const TF_WritableFile* file, const char* buffer, size_t n,
+            TF_Status* status);
+int64_t Tell(const TF_WritableFile* file, TF_Status* status);
+void Flush(const TF_WritableFile* file, TF_Status* status);
+void Sync(const TF_WritableFile* file, TF_Status* status);
+void Close(const TF_WritableFile* file, TF_Status* status);
+}  // namespace tf_writable_file
+
+namespace tf_read_only_memory_region {
+void Cleanup(TF_ReadOnlyMemoryRegion* region);
+const void* Data(const TF_ReadOnlyMemoryRegion* region);
+uint64_t Length(const TF_ReadOnlyMemoryRegion* region);
+}  // namespace tf_read_only_memory_region
+
 namespace tf_gcs_filesystem {
+typedef struct GCSFile {
+  google::cloud::storage::Client gcs_client;  // owned
+  bool compose;
+} GCSFile;
 void Init(TF_Filesystem* filesystem, TF_Status* status);
 void Cleanup(TF_Filesystem* filesystem);
 void NewRandomAccessFile(const TF_Filesystem* filesystem, const char* path,
@@ -37,6 +57,10 @@ void NewWritableFile(const TF_Filesystem* filesystem, const char* path,
                      TF_WritableFile* file, TF_Status* status);
 void NewAppendableFile(const TF_Filesystem* filesystem, const char* path,
                        TF_WritableFile* file, TF_Status* status);
+void NewReadOnlyMemoryRegionFromFile(const TF_Filesystem* filesystem,
+                                     const char* path,
+                                     TF_ReadOnlyMemoryRegion* region,
+                                     TF_Status* status);
 }  // namespace tf_gcs_filesystem
 
 #endif  // TENSORFLOW_C_EXPERIMENTAL_FILESYSTEM_PLUGINS_GCS_GCS_FILESYSTEM_H_
",0,train
d93a6f392597928113895ace200ee1e0b6a13b5f,tensorflow/tensorflow,Add test and fix one error for writable file,gcs_filesystem_test.cc,"@@ -93,38 +93,53 @@ class GCSFilesystemTest : public ::testing::Test {
 };
 std::string GCSFilesystemTest::tmp_dir_;
 
-::testing::AssertionResult WriteToServer(const std::string& path, size_t length,
-                                         gcs::Client* gcs_client,
+::testing::AssertionResult WriteToServer(const std::string& path, size_t offset,
+                                         size_t length, gcs::Client* gcs_client,
                                          TF_Status* status) {
   std::string bucket, object;
   ParseGCSPath(path, false, &bucket, &object, status);
-  if (TF_GetCode(status) != TF_OK) {
+  if (TF_GetCode(status) != TF_OK)
     return ::testing::AssertionFailure() << TF_Message(status);
-  }
 
   auto writer = gcs_client->WriteObject(bucket, object);
-  writer.write(content, length);
+  writer.write(content + offset, length);
   writer.Close();
-  if (writer.metadata()) {
+  if (writer.metadata())
     return ::testing::AssertionSuccess();
-  } else {
+  else
     return ::testing::AssertionFailure()
            << writer.metadata().status().message();
-  }
 }
 
-::testing::AssertionResult CompareSubString(int64_t offset, size_t n,
+::testing::AssertionResult CompareSubString(int64_t offset, size_t length,
                                             absl::string_view result,
                                             size_t read) {
   // Result isn't a null-terminated string so we have to wrap it inside a
   // `string_view`
-  if (n == read && content_view.substr(offset, n) ==
-                       absl::string_view(result).substr(0, read)) {
+  if (length == read && content_view.substr(offset, length) ==
+                            absl::string_view(result).substr(0, read))
     return ::testing::AssertionSuccess();
-  } else {
+  else
     return ::testing::AssertionFailure()
            << ""Result: "" << absl::string_view(result).substr(0, read)
-           << "" Read:"" << read;
+           << "" Read: "" << read;
+}
+
+::testing::AssertionResult CompareWithServer(const std::string& path,
+                                             size_t offset, size_t length,
+                                             gcs::Client* gcs_client,
+                                             TF_Status* status) {
+  std::string bucket, object;
+  ParseGCSPath(path, false, &bucket, &object, status);
+  if (TF_GetCode(status) != TF_OK)
+    return ::testing::AssertionFailure() << TF_Message(status);
+
+  auto reader = gcs_client->ReadObject(bucket, object);
+  if (!reader)
+    return ::testing::AssertionFailure() << reader.status().message();
+  else {
+    std::string content{std::istreambuf_iterator<char>{reader}, {}};
+    return CompareSubString(offset, length, content, content.length());
   }
 }
 
@@ -162,9 +177,10 @@ TEST_F(GCSFilesystemTest, RandomAccessFile) {
   ASSERT_EQ(TF_GetCode(status_), TF_NOT_FOUND) << TF_Message(status_);
   TF_SetStatus(status_, TF_OK, """");
 
-  auto gcs_client = static_cast<gcs::Client*>(filesystem_->plugin_filesystem);
-  ASSERT_TRUE(
-      WriteToServer(filepath, content_view.length(), gcs_client, status_));
+  auto gcs_file =
+      static_cast<tf_gcs_filesystem::GCSFile*>(filesystem_->plugin_filesystem);
+  ASSERT_TRUE(WriteToServer(filepath, 0, content_view.length(),
+                            &gcs_file->gcs_client, status_));
 
   read = tf_random_access_file::Read(file, 0, content_view.length(), result,
                                      status_);
@@ -185,6 +201,97 @@ TEST_F(GCSFilesystemTest, RandomAccessFile) {
   delete file;
 }
 
+TEST_F(GCSFilesystemTest, WritableFile) {
+  std::string filepath = GetURIForPath(""a_file"");
+  TF_WritableFile* file = new TF_WritableFile;
+  tf_gcs_filesystem::NewWritableFile(filesystem_, filepath.c_str(), file,
+                                     status_);
+  ASSERT_TF_OK(status_);
+  tf_writable_file::Append(file, content, 4, status_);
+  ASSERT_TF_OK(status_);
+  auto length = tf_writable_file::Tell(file, status_);
+  ASSERT_EQ(length, 4);
+  ASSERT_TF_OK(status_);
+  tf_writable_file::Flush(file, status_);
+  ASSERT_TF_OK(status_);
+
+  auto gcs_file =
+      static_cast<tf_gcs_filesystem::GCSFile*>(filesystem_->plugin_filesystem);
+  ASSERT_TRUE(
+      CompareWithServer(filepath, 0, 4, &gcs_file->gcs_client, status_));
+
+  tf_writable_file::Append(file, content + 4, 4, status_);
+  ASSERT_TF_OK(status_);
+  length = tf_writable_file::Tell(file, status_);
+  ASSERT_EQ(length, 8);
+  ASSERT_TF_OK(status_);
+  tf_writable_file::Flush(file, status_);
+  ASSERT_TF_OK(status_);
+  ASSERT_TRUE(
+      CompareWithServer(filepath, 0, 8, &gcs_file->gcs_client, status_));
+
+  tf_writable_file::Close(file, status_);
+  ASSERT_TF_OK(status_);
+  tf_writable_file::Cleanup(file);
+
+  // Testing for compose objects
+  gcs_file->compose = true;
+  filepath = GetURIForPath(""b_file"");
+  tf_gcs_filesystem::NewWritableFile(filesystem_, filepath.c_str(), file,
+                                     status_);
+  ASSERT_TF_OK(status_);
+  tf_writable_file::Append(file, content, 4, status_);
+  ASSERT_TF_OK(status_);
+  length = tf_writable_file::Tell(file, status_);
+  ASSERT_EQ(length, 4);
+  ASSERT_TF_OK(status_);
+  tf_writable_file::Flush(file, status_);
+  ASSERT_TF_OK(status_);
+  ASSERT_TRUE(
+      CompareWithServer(filepath, 0, 4, &gcs_file->gcs_client, status_));
+
+  tf_writable_file::Append(file, content + 4, 4, status_);
+  ASSERT_TF_OK(status_);
+  length = tf_writable_file::Tell(file, status_);
+  ASSERT_EQ(length, 8);
+  ASSERT_TF_OK(status_);
+  tf_writable_file::Flush(file, status_);
+  ASSERT_TF_OK(status_);
+  ASSERT_TRUE(
+      CompareWithServer(filepath, 0, 8, &gcs_file->gcs_client, status_));
+
+  tf_writable_file::Close(file, status_);
+  ASSERT_TF_OK(status_);
+  tf_writable_file::Cleanup(file);
+  delete file;
+}
+
+TEST_F(GCSFilesystemTest, ReadOnlyMemoryRegion) {
+  std::string path = GetURIForPath(""a_file"");
+  auto gcs_file =
+      static_cast<tf_gcs_filesystem::GCSFile*>(filesystem_->plugin_filesystem);
+  ASSERT_TRUE(WriteToServer(path, 0, 0, &gcs_file->gcs_client, status_));
+  TF_ReadOnlyMemoryRegion* region = new TF_ReadOnlyMemoryRegion;
+  tf_gcs_filesystem::NewReadOnlyMemoryRegionFromFile(filesystem_, path.c_str(),
+                                                     region, status_);
+  ASSERT_EQ(TF_GetCode(status_), TF_INVALID_ARGUMENT) << TF_Message(status_);
+
+  TF_SetStatus(status_, TF_OK, """");
+  ASSERT_TRUE(WriteToServer(path, 0, content_view.length(),
+                            &gcs_file->gcs_client, status_));
+  tf_gcs_filesystem::NewReadOnlyMemoryRegionFromFile(filesystem_, path.c_str(),
+                                                     region, status_);
+  ASSERT_TF_OK(status_);
+  auto length = tf_read_only_memory_region::Length(region);
+  ASSERT_EQ(length, content_view.length());
+  auto data =
+      static_cast<const char*>(tf_read_only_memory_region::Data(region));
+  ASSERT_TRUE(CompareSubString(0, content_view.length(), data, length));
+
+  tf_read_only_memory_region::Cleanup(region);
+  delete region;
+}
+
 }  // namespace
 }  // namespace tensorflow
 
",0,train
8649852b75ed43fd62e1429086e4a8f5dd6d38ee,tensorflow/tensorflow,"Fix CPU NCHW BiasAddGrad op when height and width are 1.

Before it outputted uninitialized memory.

There was a special case in ReduceMiddleDimensions which incorrectly used ReduceOuterDimensions. If a ReduceInnerDimensions struct existed, that could have been used instead to make the special case work, but it doesn't exist, so this change removes the special case.

PiperOrigin-RevId: 396563734
Change-Id: I8ce437e8d26d0ecc9f44390de17c71b49ee74ee8",redux_functor.h,"@@ -230,11 +230,6 @@ struct ReduceMiddleDimensions {
           input.template flat<InputT>().template cast<OutputT>().reshape(
               output_dims);
       return;
-    } else if (1 == inner_dim) {
-      // Equivalent to ReduceOuterDimensions.
-      const ReduceOuterDimensions<InputT, AccumT, OutputT, BinaryFunctor> redux;
-      redux(device, input_dims, input, output);
-      return;
     }
 
     // Compute block size along the outer dimension for efficiency.
",0,test
8649852b75ed43fd62e1429086e4a8f5dd6d38ee,tensorflow/tensorflow,"Fix CPU NCHW BiasAddGrad op when height and width are 1.

Before it outputted uninitialized memory.

There was a special case in ReduceMiddleDimensions which incorrectly used ReduceOuterDimensions. If a ReduceInnerDimensions struct existed, that could have been used instead to make the special case work, but it doesn't exist, so this change removes the special case.

PiperOrigin-RevId: 396563734
Change-Id: I8ce437e8d26d0ecc9f44390de17c71b49ee74ee8",bias_op_base.py,"@@ -254,7 +254,7 @@ class BiasAddTestBase(test.TestCase):
         self._testGradient(np_input, bias, dtype, data_format, use_gpu)
 
   def testGradientTensor4D(self):
-    for (data_format, use_gpu) in [(""NHWC"", False)]:
+    for (data_format, use_gpu) in [(""NHWC"", False), (""NCHW"", False)]:
       for dtype in (dtypes.float16, dtypes.float32, dtypes.float64):
         np_input = np.arange(
             1.0, 49.0,
@@ -273,6 +273,13 @@ class BiasAddTestBase(test.TestCase):
         self._testGradient(np_input,
                            np.random.rand(64).astype(dtype.as_numpy_dtype),
                            dtype, data_format, use_gpu)
+        np_input = np.arange(
+            1.0, 129.0,
+            dtype=dtype.as_numpy_dtype).reshape([4, 1, 1,
+                                                 32]).astype(np.float32)
+        self._testGradient(np_input,
+                           np.random.rand(32).astype(dtype.as_numpy_dtype),
+                           dtype, data_format, use_gpu)
 
   def testGradientTensor5D(self):
     for (data_format, use_gpu) in [(""NHWC"", False), (""NHWC"", True),
",0,test
d96e762f330b3646150ee811058be39345d1124e,tensorflow/tensorflow,clarified the DispatchServer creation process,data_service_ops.py,"@@ -318,12 +318,26 @@ def distribute(processing_mode,
   a ""one_epoch"" mode which partitions the dataset across the tf.data
   workers, so that the consumers see each element of the dataset only once.
 
+  To see the distributed operations in action, the `DispatchServer` should be
+  started first so that tf.data workers can register to it.
+
+  ```
+  dispatcher = tf.data.experimental.service.DispatchServer(port=5000)
+  print(dispatcher.target) # prints grpc://localhost:5000
+
+  dispatcher_address = dispatcher.target.split(""://"")[1]
+  worker = tf.data.experimental.service.WorkerServer(
+           port=0, dispatcher_address=dispatcher_address)
+  ```
+
+  Now, when the operations on a `tf.data.Dataset` can distributed to the worker.
+
   ```
   dataset = tf.data.Dataset.range(5)
   dataset = dataset.map(lambda x: x*x)
   dataset = dataset.apply(
       tf.data.experimental.service.distribute(""parallel_epochs"",
-                                              ""grpc://dataservice:5000""))
+                                              dispatcher.target))
   dataset = dataset.map(lambda x: x+1)
 
   for element in dataset:
@@ -331,7 +345,7 @@ def distribute(processing_mode,
   ```
 
   In the above example, the first two lines (before the call to `distribute`)
-  will be executed on tf.data workers, and the elements provided over
+  will be executed on the tf.data worker, and the elements are provided over
   RPC. The remaining transformations (after the call to `distribute`) will be
   executed locally.
 
@@ -339,9 +353,10 @@ def distribute(processing_mode,
   datasets. Instead of each dataset creating its own job, all
   datasets with the same `job_name` will consume from the same job. A new job
   will be created for each iteration of the dataset (with each repetition of
-  `Dataset.repeat` counting as a new iteration). Suppose two training workers
-  (in either a single client or multi-client setup) iterate over the below
-  dataset, and there is a single tf.data worker:
+  `Dataset.repeat` counting as a new iteration). Suppose the `DispatchServer`
+  is serving on `dataservice:5000` and two training workers (in either a single
+  client or multi-client setup) iterate over the below dataset, and there is a
+  single tf.data worker:
 
   ```
   range5_dataset = tf.data.Dataset.range(5)
",0,train
d96e762f330b3646150ee811058be39345d1124e,tensorflow/tensorflow,clarified the DispatchServer creation process,distribute.py,"@@ -460,8 +460,7 @@ def batch_sizes_for_worker(global_batch_size, num_workers,
   worker_0 = floor * worker_0 + array_ops.concat([
       array_ops.ones(num_ceil, dtype=dtypes.int64),
       array_ops.zeros(num_subbatches - num_ceil, dtype=dtypes.int64)
-  ],
-                                                 axis=0)
+  ], axis=0)
 
   return array_ops.concat([worker_0[offset:], worker_0[:offset]], axis=0)
 
",0,train
d96e762f330b3646150ee811058be39345d1124e,tensorflow/tensorflow,clarified the DispatchServer creation process,distribute_options.py,"@@ -80,6 +80,5 @@ class DistributeOptions(options.OptionsBase):
   num_devices = options.create_option(
       name=""num_devices"",
       ty=int,
-      docstring=
-      ""The number of devices attached to this input pipeline. This will be ""
-      ""automatically set by MultiDeviceIterator."")
+      docstring=""The number of devices attached to this input pipeline. ""
+      ""This will be automatically set by MultiDeviceIterator."")
",0,train
13e153172c0afc1e24a98db98df07ea0cb680d8d,tensorflow/tensorflow,format code,non_max_suppression_op.cc,"@@ -197,10 +197,12 @@ void DoNonMaxSuppressionOp(OpKernelContext* context, const Tensor& scores,
     scale = static_cast<T>(-1.0) / soft_nms_sigma;
   }
 
-  auto suppress_weight = [similarity_threshold, scale, is_soft_nms](const T sim) {
+  auto suppress_weight = [similarity_threshold, scale,
+                          is_soft_nms](const T sim) {
     const T weight =
         static_cast<T>(std::exp(static_cast<float>(scale * sim * sim)));
-    return is_soft_nms || sim <= similarity_threshold ? weight : static_cast<T>(0.0);
+    return is_soft_nms || sim <= similarity_threshold ? weight
+                                                      : static_cast<T>(0.0);
   };
 
   std::vector<int> selected;
",0,train
8f0570b9627f12fc95b02eca70e6267735f9c717,tensorflow/tensorflow,"Remove unneeded quote for forward slash based on review comment.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",tensor_shape_div_test.py,"@@ -40,8 +40,8 @@ class DimensionDivTest(test_util.TensorFlowTestCase):
     """"""Without from __future__ import division, __rdiv__ is used.""""""
     if six.PY2:  # Old division exists only in Python 2
       two = tensor_shape.Dimension(2)
-      message = (r""unsupported operand type\(s\) for \/: ""
-                 r""'int' and 'Dimension', please use \/\/ instead"")
+      message = (r""unsupported operand type\(s\) for /: ""
+                 r""'int' and 'Dimension', please use // instead"")
       with self.assertRaisesRegexp(TypeError, message):
         _ = 6 / two
 
",0,train
8f0570b9627f12fc95b02eca70e6267735f9c717,tensorflow/tensorflow,"Remove unneeded quote for forward slash based on review comment.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",tensor_shape_test.py,"@@ -209,16 +209,16 @@ class DimensionTest(test_util.TensorFlowTestCase):
     # Note: This test is related to GitHub issue 25790.
     six = tensor_shape.Dimension(6)
     two = tensor_shape.Dimension(2)
-    message = (r""unsupported operand type\(s\) for \/: ""
-               r""'Dimension' and 'Dimension', please use \/\/ instead"")
+    message = (r""unsupported operand type\(s\) for /: ""
+               r""'Dimension' and 'Dimension', please use // instead"")
     with self.assertRaisesRegexp(TypeError, message):
       _ = six / two
-    message = (r""unsupported operand type\(s\) for \/: ""
-               r""'Dimension' and 'int', please use \/\/ instead"")
+    message = (r""unsupported operand type\(s\) for /: ""
+               r""'Dimension' and 'int', please use // instead"")
     with self.assertRaisesRegexp(TypeError, message):
       _ = six / 2
-    message = (r""unsupported operand type\(s\) for \/: ""
-               r""'int' and 'Dimension', please use \/\/ instead"")
+    message = (r""unsupported operand type\(s\) for /: ""
+               r""'int' and 'Dimension', please use // instead"")
     with self.assertRaisesRegexp(TypeError, message):
       _ = 6 / two
 
",0,train
2cbbe2ae0d4ab61d8f08f1eb31417e4a163395c7,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-01-23

PiperOrigin-RevId: 291116711
Change-Id: Iaccd5467e1581192360210fdcc01ca6b40bb713a",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 1, 22)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 1, 23)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
34b08363810df7dbb678902a5158358cc006e514,tensorflow/tensorflow,"Fix gru kernel test for msan.

PiperOrigin-RevId: 247150783",unidirectional_sequence_gru_test.cc,"@@ -31,11 +31,13 @@ using ::testing::ElementsAreArray;
 
 class GRUOpModel : public SingleOpModel {
  public:
-  explicit GRUOpModel(const std::vector<std::vector<int>>& input_shapes,
-                      const TensorType& weight_type = TensorType_FLOAT32) {
+  explicit GRUOpModel(int n_batch, int n_input, int n_output,
+                      const std::vector<std::vector<int>>& input_shapes,
+                      const TensorType& weight_type = TensorType_FLOAT32)
+      : n_batch_(n_batch), n_input_(n_input), n_output_(n_output) {
     input_ = AddInput(TensorType_FLOAT32);
     input_state_ =
-        AddInput(TensorData{TensorType_FLOAT32, {n_batch_, n_output_}}, true);
+        AddInput(TensorData{TensorType_FLOAT32, {n_batch, n_output}}, true);
     gate_weight_ = AddInput(TensorType_FLOAT32);
     gate_bias_ = AddInput(TensorType_FLOAT32);
     candidate_weight_ = AddInput(TensorType_FLOAT32);
@@ -100,7 +102,8 @@ TEST(GRUTest, SimpleTest) {
   const int n_input = 2;
   const int n_output = 3;
 
-  GRUOpModel m({{n_time, n_batch, n_input},
+  GRUOpModel m(n_batch, n_input, n_output,
+               {{n_time, n_batch, n_input},
                 {n_batch, n_output},
                 {2 * n_output, n_input + n_output},
                 {2 * n_output},
",0,train
1700ac827237992143144a5763a72d56b2da7127,tensorflow/tensorflow,"Use correct module when calling reduce_prod.

PiperOrigin-RevId: 158544698",util.py,"@@ -544,7 +544,7 @@ def fill_lower_triangular(x, validate_args=False, name=""fill_lower_triangular""):
       m = np.prod(batch_shape).astype(np.int32)
     else:
       batch_shape = array_ops.shape(x)[:-1]
-      m = array_ops.reduce_prod(array_ops.shape(x)[:-1])
+      m = math_ops.reduce_prod(array_ops.shape(x)[:-1])
     batch_ids = math_ops.range(m)
 
     # Assemble the tril_ids into batch,tril_id pairs.
",0,train
ec26ef5fbc463ffee3321b34c68eac08e4b4c64e,tensorflow/tensorflow,"Add shape assertion to categorical crossentropy loss.

PiperOrigin-RevId: 292243729
Change-Id: I88da74f303e46075b3934749664129b03b8774b2",backend.py,"@@ -4571,6 +4571,7 @@ def categorical_crossentropy(target, output, from_logits=False, axis=-1):
   dtype=float32)
 
   """"""
+  target.shape.assert_is_compatible_with(output.shape)
   if from_logits:
     return nn.softmax_cross_entropy_with_logits_v2(
         labels=target, logits=output, axis=axis)
",0,train
ec26ef5fbc463ffee3321b34c68eac08e4b4c64e,tensorflow/tensorflow,"Add shape assertion to categorical crossentropy loss.

PiperOrigin-RevId: 292243729
Change-Id: I88da74f303e46075b3934749664129b03b8774b2",losses_test.py,"@@ -875,6 +875,15 @@ class CategoricalCrossentropyTest(test.TestCase):
     expected_value = 400.0 * label_smoothing / 3.0
     self.assertAlmostEqual(self.evaluate(loss), expected_value, 3)
 
+  def test_shape_mismatch(self):
+    y_true = constant_op.constant([[0], [1], [2]])
+    y_pred = constant_op.constant([[.9, .05, .05], [.5, .89, .6],
+                                   [.05, .01, .94]])
+
+    cce_obj = keras.losses.CategoricalCrossentropy()
+    with self.assertRaisesRegexp(ValueError, 'Shapes .+ are incompatible'):
+      cce_obj(y_true, y_pred)
+
 
 @test_util.run_all_in_graph_and_eager_modes
 class SparseCategoricalCrossentropyTest(test.TestCase):
",0,train
121dd0762284314d739296de70b1ea1979cd5949,tensorflow/tensorflow,"Dispatch while loops based exclusively on the test closure, not the modified state. Upgrade the tests from v1-only mode.

PiperOrigin-RevId: 232817358",control_flow.py,"@@ -329,7 +329,6 @@ class ControlFlowTransformer(converter.Base):
     cond_closure = set()
     for s in cond_scope.read:
       cond_closure |= s.support_set
-    cond_closure -= loop_state
 
     loop_state, state_ssf, state_ast_tuple, ssf_map = self._state_constructs(
         loop_state, reserved_symbols)
",0,test
121dd0762284314d739296de70b1ea1979cd5949,tensorflow/tensorflow,"Dispatch while loops based exclusively on the test closure, not the modified state. Upgrade the tests from v1-only mode.

PiperOrigin-RevId: 232817358",control_flow_test.py,"@@ -33,8 +33,7 @@ class ControlFlowTest(converter_testing.TestCase):
       inputs = (inputs,)
     with self.converted(test_fn, control_flow, {},
                         constant_op.constant) as result:
-      with self.cached_session() as sess:
-        self.assertEqual(sess.run(result.test_fn(*inputs)), expected)
+      self.assertEqual(self.evaluate(result.test_fn(*inputs)), expected)
 
   @test_util.run_deprecated_v1
   def test_while_basic(self):
@@ -78,6 +77,33 @@ class ControlFlowTest(converter_testing.TestCase):
 
     self.assertTransformedResult(test_fn, constant_op.constant(5), 0)
 
+  @test_util.run_deprecated_v1
+  def test_while_dispatches_by_cond_only(self):
+
+    class TensorIncompatibleNumeric(object):
+      """"""Works in arithmetic expression, but errors out with TF ops.""""""
+
+      def __init__(self, val):
+        self.val = val
+
+      def __add__(self, other):
+        return TensorIncompatibleNumeric(self.val + other)
+
+    def test_fn(n, s):
+      while n > 0:
+        n -= 1
+        s += n
+      return s
+
+    self.assertTransformedResult(test_fn, (constant_op.constant(5), 0), 10)
+    with self.converted(test_fn, control_flow, {}) as result:
+      # n alone controls the staging. When the loop is not staged, Python
+      # knows how to add the two objects. But when staged, tf.while_loop will
+      # not know how to deal with the TensorIncompatibleNumeric object.
+      self.assertEqual(result.test_fn(5, TensorIncompatibleNumeric(0)).val, 10)
+      with self.assertRaises(TypeError):
+        result.test_fn(constant_op.constant(5), TensorIncompatibleNumeric(0))
+
   @test_util.run_deprecated_v1
   def test_if_basic(self):
 
@@ -112,11 +138,10 @@ class ControlFlowTest(converter_testing.TestCase):
       return obj
 
     with self.converted(test_fn, control_flow, {}) as result:
-      with self.cached_session() as sess:
-        res_obj = result.test_fn(constant_op.constant(1), TestClass(0, 0))
-        self.assertEqual(sess.run((res_obj.a, res_obj.b)), (-1, 0))
-        res_obj = result.test_fn(constant_op.constant(-1), TestClass(0, 0))
-        self.assertEqual(sess.run((res_obj.a, res_obj.b)), (0, -2))
+      res_obj = result.test_fn(constant_op.constant(1), TestClass(0, 0))
+      self.assertEqual(self.evaluate((res_obj.a, res_obj.b)), (-1, 0))
+      res_obj = result.test_fn(constant_op.constant(-1), TestClass(0, 0))
+      self.assertEqual(self.evaluate((res_obj.a, res_obj.b)), (0, -2))
 
   @test_util.run_deprecated_v1
   def test_if_single_output(self):
@@ -223,5 +248,7 @@ class ControlFlowTest(converter_testing.TestCase):
       return z
 
     self.assertTransformedResult(test_fn, [3, 3], 7)
+
+
 if __name__ == '__main__':
   test.main()
",0,test
121dd0762284314d739296de70b1ea1979cd5949,tensorflow/tensorflow,"Dispatch while loops based exclusively on the test closure, not the modified state. Upgrade the tests from v1-only mode.

PiperOrigin-RevId: 232817358",control_flow.py,"@@ -153,8 +153,7 @@ def while_stmt(test, body, init_state, extra_deps, opts=None):
   # TODO(mdan): Consider adding a generic mechanism for dynamic dispatch.
   # That could be something as simple as a collection of dispatch rules, with
   # some prioritization.
-  if any(tensor_util.is_tensor(v)
-         for v in nest.flatten(init_state + extra_deps)):
+  if any(tensor_util.is_tensor(v) for v in nest.flatten(extra_deps)):
     return _tf_while_stmt(test, body, init_state, opts)
   else:
     return _py_while_stmt(test, body, init_state, opts)
",0,test
121dd0762284314d739296de70b1ea1979cd5949,tensorflow/tensorflow,"Dispatch while loops based exclusively on the test closure, not the modified state. Upgrade the tests from v1-only mode.

PiperOrigin-RevId: 232817358",control_flow_test.py,"@@ -65,30 +65,41 @@ class WhileLoopTest(test.TestCase):
   def test_tensor(self):
     n = constant_op.constant(5)
     results = control_flow.while_stmt(
-        test=lambda i, sum: i < n,
-        body=lambda i, sum: (i + 1, sum + i,),
+        test=lambda i, s: i < n,
+        body=lambda i, s: (i + 1, s + i,),
         init_state=(0, 0),
         extra_deps=(n,))
-    with self.cached_session():
-      self.assertEqual((5, 10), self.evaluate(results))
+    self.assertEqual((5, 10), self.evaluate(results))
 
   @test_util.run_deprecated_v1
-  def test_tensor_dict_state(self):
+  def test_python_with_tensor_state(self):
     n = 5
-    init_state = {'i': constant_op.constant(0), 'sum': constant_op.constant(0)}
     results = control_flow.while_stmt(
-        test=lambda s: s['i'] < n,
-        body=lambda s: ({'i': s['i'] + 1, 'sum': s['sum'] + s['i']},),
-        init_state=(init_state,),
+        test=lambda i, s: i < n,
+        body=lambda i, s: (i + 1, s + i),
+        init_state=(0, constant_op.constant(0)),
         extra_deps=())
-    with self.cached_session():
-      self.assertEqual(({'i': 5, 'sum': 10},), self.evaluate(results))
+    result_i, result_s = results
+    self.assertEqual(5, result_i)
+    self.assertEqual(10, self.evaluate(result_s))
+
+  @test_util.run_deprecated_v1
+  def test_python_due_to_hidden_cond_type(self):
+    n = 5
+
+    # TODO(b/124002646): Improve the error message.
+    with self.assertRaises(Exception):
+      control_flow.while_stmt(
+          test=lambda i, s: i < n,
+          body=lambda i, s: (i + 1, s + i),
+          init_state=(constant_op.constant(0), constant_op.constant(0)),
+          extra_deps=())
 
   def test_python(self):
     n = 5
     results = control_flow.while_stmt(
-        test=lambda i, sum: i < n,
-        body=lambda i, sum: (i + 1, sum + i),
+        test=lambda i, s: i < n,
+        body=lambda i, s: (i + 1, s + i),
         init_state=(0, 0),
         extra_deps=(n,))
     self.assertEqual((5, 10), results)
",0,test
39c5b0470e7e6b6f79c8f55d89ea46585168f2a8,tensorflow/tensorflow,"Really delete old checkpoints this time.

Follows up on cl/188187349, which fixed checkpoint management for tf.train.Saver
when executing eagerly. Except I was recreating the tf.train.Saver objects each
save, so tfe.Checkpoint and friends did not benefit from that change.

Keeps the same tf.train.Saver around when executing eagerly. This limits object
graph mutations just like when graph building; if there are complaints I can
assign to Saver._var_list instead, since eager tf.train.Saver is not specialized
to its var_list argument.

PiperOrigin-RevId: 189211552",checkpointable_utils.py,"@@ -602,8 +602,7 @@ class CheckpointableSaver(object):
     """"""
     named_variables, graph_proto = _serialize_object_graph(
         self._root_checkpointable)
-    in_graph_mode = not context.executing_eagerly()
-    if in_graph_mode:
+    if not context.executing_eagerly():
       if session is None:
         session = ops.get_default_session()
       if self._object_graph_feed_tensor is None:
@@ -622,17 +621,17 @@ class CheckpointableSaver(object):
     named_variables[_OBJECT_GRAPH_PROTO_KEY] = _NoRestoreSaveable(
         tensor=object_graph_tensor,
         name=_OBJECT_GRAPH_PROTO_KEY)
-    if not in_graph_mode or self._last_save_object_graph != graph_proto:
-      if self._last_save_object_graph is not None and in_graph_mode:
+    if self._last_save_object_graph != graph_proto:
+      if self._last_save_object_graph is not None:
         raise NotImplementedError(
             ""Using a single Saver to save a mutated object graph is not ""
             ""currently supported when graph building. Use a different Saver ""
-            ""when the object graph changes (save ops will be duplicated), or ""
-            ""file a feature request if this limitation bothers you."")
+            ""when the object graph changes (save ops will be duplicated when ""
+            ""graph building), or file a feature request if this limitation ""
+            ""bothers you."")
       saver = saver_lib.Saver(var_list=named_variables)
-      if in_graph_mode:
-        self._last_save_saver = saver
-        self._last_save_object_graph = graph_proto
+      self._last_save_saver = saver
+      self._last_save_object_graph = graph_proto
     else:
       saver = self._last_save_saver
     with ops.device(""/cpu:0""):
",0,train
39c5b0470e7e6b6f79c8f55d89ea46585168f2a8,tensorflow/tensorflow,"Really delete old checkpoints this time.

Follows up on cl/188187349, which fixed checkpoint management for tf.train.Saver
when executing eagerly. Except I was recreating the tf.train.Saver objects each
save, so tfe.Checkpoint and friends did not benefit from that change.

Keeps the same tf.train.Saver around when executing eagerly. This limits object
graph mutations just like when graph building; if there are complaints I can
assign to Saver._var_list instead, since eager tf.train.Saver is not specialized
to its var_list argument.

PiperOrigin-RevId: 189211552",checkpointable_utils_test.py,"@@ -849,6 +849,26 @@ class CheckpointingTests(test.TestCase):
         saver.save(checkpoint_prefix)
         self.assertEqual(before_ops, graph.get_operations())
 
+  @test_util.run_in_graph_and_eager_modes(assert_no_eager_garbage=True)
+  def testCheckpointCleanup(self):
+    checkpoint_directory = self.get_temp_dir()
+    checkpoint_prefix = os.path.join(checkpoint_directory, ""ckpt"")
+    obj = checkpointable.Checkpointable()
+    obj.var = variable_scope.get_variable(name=""v"", initializer=0.)
+    self.evaluate(checkpointable_utils.gather_initializers(obj))
+    saver = checkpointable_utils.Checkpoint(obj=obj)
+    for _ in range(10):
+      saver.save(checkpoint_prefix)
+    expected_filenames = [""checkpoint""]
+    for checkpoint_number in range(6, 11):
+      expected_filenames.append(""ckpt-%d.index"" % (checkpoint_number,))
+      expected_filenames.append(
+          ""ckpt-%d.data-00000-of-00001"" % (checkpoint_number,))
+    six.assertCountEqual(
+        self,
+        expected_filenames,
+        os.listdir(checkpoint_directory))
+
   def testManyRestoresGraph(self):
     """"""Restores after the first should not modify the graph.""""""
     with context.graph_mode():
",0,train
a1e78629fa0b461273d0ff4c5b45e01ee4b8836d,tensorflow/tensorflow,"Fix to handle Reshape Layer in experimental TFLite writer library.

Changes:
 1. Updated handling of ReshapeParams.
 2. Added write_lib tests to check different scenarios.
PiperOrigin-RevId: 323950640
Change-Id: I20c4a5dcd3d80c591366edb7341634c0b13ffd45",option_writer_generator.cc,"@@ -265,6 +265,29 @@ void GenerateImportForResizeBilinearOp(FILE* fp) {
           ""  }\n  break;\n"");
 }
 
+// Reshape Op infers output shape either from Parameter or from shape tensor
+// that's is an additional input. When we have this additional shape tensor as
+// input we don't have the parameter present in this layer. In case of more than
+// one input we import an empty vector for the parameters.
+void GenerateImportForReshapeOp(FILE* fp) {
+  fprintf(fp,
+          ""  case BuiltinOperator_RESHAPE:  {\n""
+          ""    const auto* params = reinterpret_cast<const ""
+          ""TfLiteReshapeParams*>(builtin_op_data);\n""
+          ""    flatbuffers::Offset<void> union_type;\n""
+          ""    if (node.inputs->size > 1) {\n""
+          ""      union_type = CreateReshapeOptions(*fbb).Union();\n""
+          ""    } else {\n""
+          ""      auto val0 = fbb->CreateVector(std::vector<int>(params->shape, ""
+          ""params->shape + params->num_dimensions));\n""
+          ""      union_type = CreateReshapeOptions(*fbb, ""
+          ""val0).Union();\n""
+          ""    }\n""
+          ""    return std::make_pair(BuiltinOptions_ReshapeOptions, ""
+          ""union_type);\n""
+          ""  }\n  break;\n"");
+}
+
 void GenerateImportForOp(FILE* fp, const std::string& op_name,
                          const std::string& option_name,
                          const std::string& option_type,
@@ -276,6 +299,13 @@ void GenerateImportForOp(FILE* fp, const std::string& op_name,
     return;
   }
 
+  // Special case Reshape that may have 'new_shape' field missing from the
+  // parameters.
+  if (struct_name == ""TfLiteReshapeParams"") {
+    GenerateImportForReshapeOp(fp);
+    return;
+  }
+
   fprintf(fp, ""  case BuiltinOperator_%s:  {\n"", op_name.c_str());
   if (options->num_elems != 0) {
     fprintf(fp,
",0,train
a1e78629fa0b461273d0ff4c5b45e01ee4b8836d,tensorflow/tensorflow,"Fix to handle Reshape Layer in experimental TFLite writer library.

Changes:
 1. Updated handling of ReshapeParams.
 2. Added write_lib tests to check different scenarios.
PiperOrigin-RevId: 323950640
Change-Id: I20c4a5dcd3d80c591366edb7341634c0b13ffd45",writer_lib.cc,"@@ -31,7 +31,7 @@ namespace tflite {
 
 std::pair<BuiltinOptions, flatbuffers::Offset<void>> CreateBuiltinUnion(
     flatbuffers::FlatBufferBuilder* fbb, enum BuiltinOperator op,
-    void* builtin_op_data) {
+    void* builtin_op_data, const TfLiteNode& node) {
   switch (op) {
 #include ""tensorflow/lite/experimental/writer/option_writer_generated.h""
   }
@@ -82,7 +82,7 @@ SubgraphWriter::ExportOperators(flatbuffers::FlatBufferBuilder* fbb) {
       // builtin
       auto builtin_options_and_type = CreateBuiltinUnion(
           fbb, static_cast<enum BuiltinOperator>(registration.builtin_code),
-          node.builtin_data);
+          node.builtin_data, node);
       builtin_options = builtin_options_and_type.second;
       builtin_options_type = builtin_options_and_type.first;
     } else {
",0,train
a1e78629fa0b461273d0ff4c5b45e01ee4b8836d,tensorflow/tensorflow,"Fix to handle Reshape Layer in experimental TFLite writer library.

Changes:
 1. Updated handling of ReshapeParams.
 2. Added write_lib tests to check different scenarios.
PiperOrigin-RevId: 323950640
Change-Id: I20c4a5dcd3d80c591366edb7341634c0b13ffd45",writer_lib_test.cc,"@@ -15,6 +15,9 @@ limitations under the License.
 
 #include ""tensorflow/lite/experimental/writer/writer_lib.h""
 
+#include <numeric>
+#include <sstream>
+
 #include <gtest/gtest.h>
 #include ""tensorflow/lite/c/common.h""
 #include ""tensorflow/lite/interpreter.h""
@@ -184,6 +187,83 @@ TEST(Writer, PerTensorQuantizedModelTest) {
   CHECK_EQ(new_interpreter->AllocateTensors(), kTfLiteOk);
 }
 
+struct ReshapeTestPattern {
+  int num_inputs;
+  bool is_param_valid;
+};
+
+class ReshapeLayerTest : public ::testing::TestWithParam<ReshapeTestPattern> {};
+
+TEST_P(ReshapeLayerTest, ReshapeLayerTest) {
+  const auto param = GetParam();
+  Interpreter interpreter;
+  const int total_tensors = param.num_inputs + 1;
+  interpreter.AddTensors(total_tensors);
+  int output_shape[] = {1, 2, 3};
+  interpreter.SetTensorParametersReadWrite(/*tensor_index=*/0, kTfLiteFloat32,
+                                           /*name=*/""a"", /*dims=*/{6},
+                                           TfLiteQuantization());
+  ASSERT_LE(param.num_inputs, 2);
+  if (param.num_inputs == 2) {
+    interpreter.SetTensorParametersReadOnly(
+        /*tensor_index=*/1, kTfLiteInt32, /*name=*/""b"", /*dims=*/{3},
+        TfLiteQuantization(), reinterpret_cast<char*>(output_shape),
+        sizeof(output_shape));
+  }
+  interpreter.SetTensorParametersReadWrite(/*tensor_index=*/total_tensors - 1,
+                                           kTfLiteFloat32, /*name=*/""c"",
+                                           /*dims=*/{3}, TfLiteQuantization());
+
+  std::vector<int> input_tensors(param.num_inputs);
+  std::iota(input_tensors.begin(), input_tensors.end(), 0);
+
+  interpreter.SetInputs(input_tensors);
+  interpreter.SetOutputs({total_tensors - 1});
+  const char* initial_data = """";
+  tflite::ops::builtin::BuiltinOpResolver resolver;
+  TfLiteReshapeParams* builtin_data = reinterpret_cast<TfLiteReshapeParams*>(
+      malloc(sizeof(TfLiteReshapeParams)));
+  if (param.is_param_valid) {
+    builtin_data->num_dimensions = 3;
+    for (int dim = 0; dim < builtin_data->num_dimensions; ++dim) {
+      builtin_data->shape[dim] = output_shape[dim];
+    }
+  }
+  const TfLiteRegistration* reg = resolver.FindOp(BuiltinOperator_RESHAPE, 1);
+  interpreter.AddNodeWithParameters(input_tensors,
+                                    /*outputs=*/{total_tensors - 1},
+                                    initial_data, /*init_data_size=*/0,
+                                    reinterpret_cast<void*>(builtin_data), reg);
+
+  SubgraphWriter writer(&interpreter.primary_subgraph());
+  std::stringstream ss;
+  ss << ""/tmp/test_reshape_"" << param.num_inputs << param.is_param_valid
+     << "".tflite"";
+  std::string filename = ss.str();
+  writer.Write(filename);
+  std::unique_ptr<FlatBufferModel> model =
+      FlatBufferModel::BuildFromFile(filename.c_str());
+  InterpreterBuilder builder(*model, resolver);
+  std::unique_ptr<Interpreter> new_interpreter;
+  builder(&new_interpreter);
+  ASSERT_EQ(new_interpreter->AllocateTensors(), kTfLiteOk);
+}
+
+INSTANTIATE_TEST_SUITE_P(
+    Writer, ReshapeLayerTest,
+    ::testing::Values(ReshapeTestPattern{/*num_inputs=*/2,
+                                         /*is_param_valid=*/true},
+                      ReshapeTestPattern{/*num_inputs=*/2,
+                                         /*is_param_valid=*/false},
+                      ReshapeTestPattern{/*num_inputs=*/1,
+                                         /*is_param_valid=*/true}),
+    [](const ::testing::TestParamInfo<ReshapeLayerTest::ParamType>& info) {
+      std::stringstream ss;
+      ss << ""num_inputs_"" << info.param.num_inputs << ""_valid_param_""
+         << info.param.is_param_valid;
+      std::string name = ss.str();
+      return name;
+    });
 }  // namespace tflite
 
 int main(int argc, char** argv) {
",0,train
25337d2065bd3ef79b9018714c0cb5af46ca06dc,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2018-12-12

PiperOrigin-RevId: 225140840",compat.py,"@@ -32,7 +32,7 @@ from tensorflow.python.ops import variable_scope
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 11)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 12, 12)
 
 
 @tf_export(""compat.forward_compatible"")
",0,test
0fc6825fbe34165d5792e938b5f724a58d638ab2,tensorflow/tensorflow,"Support NNAPI Burst mode in the delegate execution provider.

PiperOrigin-RevId: 369781496
Change-Id: I53f3d0c587e22bd47a498e7e0fc9c4f16c689011",nnapi_delegate_provider.cc,"@@ -36,6 +36,8 @@ class NnapiDelegateProvider : public DelegateProvider {
                              ToolParam::Create<bool>(true));
     default_params_.AddParam(""nnapi_allow_fp16"",
                              ToolParam::Create<bool>(false));
+    default_params_.AddParam(""nnapi_use_burst_mode"",
+                             ToolParam::Create<bool>(false));
   }
 
   std::vector<Flag> CreateFlags(ToolParams* params) const final;
@@ -65,7 +67,13 @@ std::vector<Flag> NnapiDelegateProvider::CreateFlags(ToolParams* params) const {
       CreateFlag<bool>(""disable_nnapi_cpu"", params,
                        ""Disable the NNAPI CPU device""),
       CreateFlag<bool>(""nnapi_allow_fp16"", params,
-                       ""Allow fp32 computation to be run in fp16"")};
+                       ""Allow fp32 computation to be run in fp16""),
+      CreateFlag<bool>(
+          ""nnapi_use_burst_mode"", params,
+          ""use NNAPI Burst mode if supported. Burst mode allows accelerators ""
+          ""to efficiently manage resources, which would significantly reduce ""
+          ""overhead especially if the same delegate instance is to be used for ""
+          ""multiple inferences."")};
 
   return flags;
 }
@@ -93,6 +101,8 @@ void NnapiDelegateProvider::LogParams(const ToolParams& params,
                  verbose);
   LOG_TOOL_PARAM(params, bool, ""nnapi_allow_fp16"", ""Allow fp16 in NNAPI"",
                  verbose);
+  LOG_TOOL_PARAM(params, bool, ""nnapi_use_burst_mode"",
+                 ""Use burst mode in NNAPI"", verbose);
 }
 
 TfLiteDelegatePtr NnapiDelegateProvider::CreateTfLiteDelegate(
@@ -112,6 +122,10 @@ TfLiteDelegatePtr NnapiDelegateProvider::CreateTfLiteDelegate(
       options.allow_fp16 = true;
     }
 
+    if (params.Get<bool>(""nnapi_use_burst_mode"")) {
+      options.use_burst_computation = true;
+    }
+
     std::string string_execution_preference =
         params.Get<std::string>(""nnapi_execution_preference"");
     // Only set execution preference if user explicitly passes one. Otherwise,
",0,train
a96417300ab274f90dd223c0507a162bb5e7e521,tensorflow/tensorflow,"Replacing the deprecated notifyAll() with notify_all().

This is preparing for the upcoming tf release with python 3.10 support.

PiperOrigin-RevId: 413513658
Change-Id: I8bfb4e7f2ab52e80d4dbcfb8fb24cd268cd6ca5e",cluster_coordinator.py,"@@ -329,7 +329,7 @@ class _CoordinatedClosureQueue(object):
   def stop(self):
     with self._queue_lock:
       self._should_process_closures = False
-      self._closures_queued_condition.notifyAll()
+      self._closures_queued_condition.notify_all()
     self._watchdog.stop()
 
   def _cancel_all_closures(self):
@@ -408,9 +408,9 @@ class _CoordinatedClosureQueue(object):
         raise AssertionError(""There is no inflight closures to mark_finished."")
       self._inflight_closure_count -= 1
       if self._inflight_closure_count == 0:
-        self._no_inflight_closure_condition.notifyAll()
+        self._no_inflight_closure_condition.notify_all()
       if self._queue.empty() and self._inflight_closure_count == 0:
-        self._stop_waiting_condition.notifyAll()
+        self._stop_waiting_condition.notify_all()
       self._watchdog.report_closure_done()
 
   def put_back(self, closure):
@@ -426,7 +426,7 @@ class _CoordinatedClosureQueue(object):
         self._closures_queued_condition.notify()
       self._inflight_closure_count -= 1
       if self._inflight_closure_count == 0:
-        self._no_inflight_closure_condition.notifyAll()
+        self._no_inflight_closure_condition.notify_all()
 
   def wait(self, timeout=None):
     """"""Wait for all closures to be finished before returning.
@@ -459,8 +459,8 @@ class _CoordinatedClosureQueue(object):
         self._error = e
       self._inflight_closure_count -= 1
       if self._inflight_closure_count == 0:
-        self._no_inflight_closure_condition.notifyAll()
-      self._stop_waiting_condition.notifyAll()
+        self._no_inflight_closure_condition.notify_all()
+      self._stop_waiting_condition.notify_all()
 
   def done(self):
     """"""Returns true if the queue is empty and there is no inflight closure.
",0,train
a96417300ab274f90dd223c0507a162bb5e7e521,tensorflow/tensorflow,"Replacing the deprecated notifyAll() with notify_all().

This is preparing for the upcoming tf release with python 3.10 support.

PiperOrigin-RevId: 413513658
Change-Id: I8bfb4e7f2ab52e80d4dbcfb8fb24cd268cd6ca5e",lock_util.py,"@@ -99,7 +99,7 @@ class GroupLock(object):
     self._ready.acquire()
     self._group_member_counts[group_id] -= 1
     if self._group_member_counts[group_id] == 0:
-      self._ready.notifyAll()
+      self._ready.notify_all()
     self._ready.release()
 
   def _another_group_active(self, group_id):
",0,train
74ee9cb1effdee27fca298d7979676064b2c8c8e,tensorflow/tensorflow,"Make TRTEngineOp node names unique.

Add a unique graph sequence number to TRTEngineOp node names to avoid name
collision. Since the TRTEngineOp node names are used as the cache keys for the
resource cache objects for the operation, this can avoid mapping two different
TRTEngineOp nodes to the same cache objects.

Fix affected tests.

PiperOrigin-RevId: 304524561
Change-Id: I6a7f8c5f484f883f6c3d02df4967bbed5f758467",convert_graph.cc,"@@ -617,11 +617,6 @@ std::pair<int, Allocator*> GetDeviceAndAllocator(const ConversionParams& params,
   return std::make_pair(cuda_device_id, dev_allocator);
 }
 
-int64 GetNextGraphSequenceNumber() {
-  static std::atomic<int64> graph_sequence_num;
-  return graph_sequence_num++;
-}
-
 // Entry function from optimization pass.
 Status ConvertAfterShapes(const ConversionParams& params) {
   // Sanity checks.
@@ -671,12 +666,10 @@ Status ConvertAfterShapes(const ConversionParams& params) {
   std::vector<size_t> engine_bytes_size;
   segment::SegmentNodesVector converted_segments;
   converted_segments.reserve(initial_segments.size());
-  string engine_name_prefix =
-      StrCat(""TRTEngineOp_"", GetNextGraphSequenceNumber(), ""_"");
   for (size_t t = 0; t < initial_segments.size(); t++) {
     auto& curr_segment = initial_segments.at(t);
     EngineInfo curr_engine;
-    curr_engine.engine_name = StrCat(engine_name_prefix, t);
+    curr_engine.engine_name = StrCat(""TRTEngineOp_"", t);
     Status status =
         GetEngineInfo(&graph, *params.graph_properties, curr_segment, node_map,
                       reverse_topo_order, &curr_engine);
",0,train
74ee9cb1effdee27fca298d7979676064b2c8c8e,tensorflow/tensorflow,"Make TRTEngineOp node names unique.

Add a unique graph sequence number to TRTEngineOp node names to avoid name
collision. Since the TRTEngineOp node names are used as the cache keys for the
resource cache objects for the operation, this can avoid mapping two different
TRTEngineOp nodes to the same cache objects.

Fix affected tests.

PiperOrigin-RevId: 304524561
Change-Id: I6a7f8c5f484f883f6c3d02df4967bbed5f758467",tf_trt_integration_test_base.py,"@@ -522,25 +522,6 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
     logging.info(""Writing graph to %s/%s"", temp_dir, graph_name)
     graph_io.write_graph(gdef, temp_dir, graph_name)
 
-  # Remove the graph sequence number prefix from the name only if the name has
-  # a prefix TRTEngineOp_n_. When expecting_prefix is true, assert such a
-  # prefix exists.
-  def _RemoveGraphSequenceNumberImpl(self, name, expecting_prefix):
-    match = re.search(r""TRTEngineOp_\d+_"", name)
-    has_prefix = match and name.startswith(match.group(0))
-    assert (not expecting_prefix) or has_prefix
-    if has_prefix:
-      parts = name.split(""_"", maxsplit=2)
-      assert len(parts) == 3
-      return parts[0] + ""_"" + parts[2]
-    return name
-
-  def _RemoveGraphSequenceNumber(self, name):
-    return self._RemoveGraphSequenceNumberImpl(name, True)
-
-  def _MayRemoveGraphSequenceNumber(self, name):
-    return self._RemoveGraphSequenceNumberImpl(name, False)
-
   def _VerifyConnections(self, expected_engines, original_gdef, converted_gdef):
     old_to_new_node_map = {
         self._ToString(node.name): self._ToString(node.name)
@@ -598,14 +579,11 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
     # Compute the actual mapping from each node to its input nodes.
     actual_input_map = {}
     for node in converted_gdef.node:
-      name_str = node.name
-      if node.op == ""TRTEngineOp"":
-        name_str = self._RemoveGraphSequenceNumber(name_str)
+      name_str = self._ToString(node.name)
       actual_input_map[name_str] = set()
       input_set = actual_input_map[name_str]
       for inp in node.input:
         (prefix, node_name) = _InputName(inp)
-        node_name = self._MayRemoveGraphSequenceNumber(node_name)
         input_set.add(prefix + node_name)
 
     self.assertEqual(
@@ -650,8 +628,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
         self.assertIn(function_name, functions)
         if not IsQuantizationWithCalibration and not is_dynamic_engine:
           self.assertTrue(len(node.attr[""serialized_segment""].s), node.name)
-        self.assertIn(
-            self._RemoveGraphSequenceNumber(node.name), expected_engines)
+        self.assertIn(node.name, expected_engines)
         self.assertEqual(
             self._ToBytes(run_params.precision_mode),
             node.attr[""precision_mode""].s, node.name)
@@ -685,8 +662,7 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
         node.name for node in gdef_to_verify.node if node.op == ""TRTEngineOp""
     ]
     for func in gdef_to_verify.library.function:
-      if not re.search(r""TRTEngineOp_\d+_\d+_native_segment"",
-                       func.signature.name):
+      if not re.search(r""TRTEngineOp_\d+_native_segment"", func.signature.name):
         for node in func.node_def:
           all_op_names.append(node.name)
           if node.op == ""TRTEngineOp"":
@@ -694,12 +670,9 @@ class TfTrtIntegrationTestBase(test_util.TensorFlowTestCase):
     # Remove the function name prefix.
     def _Canonicalize(names):
       return set(self._ToString(name.split(""/"")[-1]) for name in names)
-    # Remove the graph sequence number prefix from all the names.
-    def _RemoveGraphSequenceNumber(names):
-      return set(self._RemoveGraphSequenceNumber(name) for name in names)
 
     all_op_names = _Canonicalize(all_op_names)
-    trt_op_names = _RemoveGraphSequenceNumber(_Canonicalize(trt_op_names))
+    trt_op_names = _Canonicalize(trt_op_names)
 
     if isinstance(expected_engines, dict):
       # For simplicity we don't verify the connections inside the engine in
",0,train
74ee9cb1effdee27fca298d7979676064b2c8c8e,tensorflow/tensorflow,"Make TRTEngineOp node names unique.

Add a unique graph sequence number to TRTEngineOp node names to avoid name
collision. Since the TRTEngineOp node names are used as the cache keys for the
resource cache objects for the operation, this can avoid mapping two different
TRTEngineOp nodes to the same cache objects.

Fix affected tests.

PiperOrigin-RevId: 304524561
Change-Id: I6a7f8c5f484f883f6c3d02df4967bbed5f758467",trt_convert_test.py,"@@ -20,7 +20,6 @@ from __future__ import print_function
 
 import gc
 import os
-import re
 import tempfile
 
 from absl.testing import parameterized
@@ -311,24 +310,6 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       converter.save(output_saved_model_dir=output_saved_model_dir)
     return output_graph_def
 
-  # Remove the graph sequence number prefix from the name only if the name has
-  # a prefix TRTEngineOp_n_.
-  def _MayRemoveGraphSequenceNumber(self, name):
-    prefix = re.search(r""TRTEngineOp_\d+_"", name)
-    if prefix and name.startswith(prefix.group(0)):
-      parts = name.split(""_"", maxsplit=2)
-      assert len(parts) == 3
-      return parts[0] + ""_"" + parts[2]
-    return name
-
-  # Return the unique TRTEngineOp in the given graph def.
-  def _GetUniqueTRTEngineOp(self, graph_def):
-    trt_engine_nodes = [
-        node for node in graph_def.node if node.op == ""TRTEngineOp""
-    ]
-    assert len(trt_engine_nodes) == 1
-    return trt_engine_nodes[0]
-
   def _TestTrtGraphConverter(self,
                              device,
                              output_saved_model_dir=None,
@@ -349,10 +330,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
       graph_defs_to_verify.append(saved_model_graph_def)
 
     for graph_def in graph_defs_to_verify:
-      node_name_to_op = {
-          self._MayRemoveGraphSequenceNumber(node.name): node.op
-          for node in graph_def.node
-      }
+      node_name_to_op = {node.name: node.op for node in graph_def.node}
       self.assertEqual(
           {
               ""input1"": ""Placeholder"",
@@ -456,13 +434,13 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     trt_op_names = []
     for node in graph_def.node:
       if node.op == ""TRTEngineOp"":
-        trt_op_names.append(self._MayRemoveGraphSequenceNumber(node.name))
+        trt_op_names.append(node.name)
         if check_fn:
           check_fn(node)
     for func in graph_def.library.function:
       for node in func.node_def:
         if node.op == ""TRTEngineOp"":
-          trt_op_names.append(self._MayRemoveGraphSequenceNumber(node.name))
+          trt_op_names.append(node.name)
           if check_fn:
             check_fn(node)
     self.assertEqual(1, len(trt_op_names))
@@ -495,15 +473,11 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     # Verify the converted GraphDef and ConcreteFunction.
     self._CheckTrtOps(converter._converted_func)  # pylint: disable=protected-access
 
-    trt_engine_name = self._GetUniqueTRTEngineOp(
-        converter._converted_graph_def).name
-
     # Save the converted model without any TRT engine cache.
     output_saved_model_dir = self.mkdtemp()
     converter.save(output_saved_model_dir)
     unexpected_asset_file = os.path.join(
-        output_saved_model_dir,
-        ""assets/trt-serialized-engine."" + trt_engine_name)
+        output_saved_model_dir, ""assets/trt-serialized-engine.TRTEngineOp_0"")
     self.assertFalse(os.path.exists(unexpected_asset_file))
 
     # Run the converted function to populate the engine cache.
@@ -516,8 +490,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     output_saved_model_dir = self.mkdtemp()
     converter.save(output_saved_model_dir)
     expected_asset_file = os.path.join(
-        output_saved_model_dir,
-        ""assets/trt-serialized-engine."" + trt_engine_name)
+        output_saved_model_dir, ""assets/trt-serialized-engine.TRTEngineOp_0"")
     self.assertTrue(os.path.exists(expected_asset_file))
     self.assertTrue(os.path.getsize(expected_asset_file))
 
@@ -593,9 +566,6 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
 
     converter.convert(calibration_input_fn=_CalibrationInputFn)
 
-    trt_engine_name = self._GetUniqueTRTEngineOp(
-        converter._converted_graph_def).name
-
     def _CheckFn(node):
       self.assertTrue(len(node.attr[""calibration_data""].s), node.name)
 
@@ -613,8 +583,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     output_saved_model_dir = self.mkdtemp()
     converter.save(output_saved_model_dir)
     expected_asset_file = os.path.join(
-        output_saved_model_dir,
-        ""assets/trt-serialized-engine."" + trt_engine_name)
+        output_saved_model_dir, ""assets/trt-serialized-engine.TRTEngineOp_0"")
     self.assertTrue(os.path.exists(expected_asset_file))
     self.assertTrue(os.path.getsize(expected_asset_file))
 
@@ -666,9 +635,6 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     converter = self._CreateConverterV2(input_saved_model_dir)
     converter.convert()
 
-    trt_engine_name = self._GetUniqueTRTEngineOp(
-        converter._converted_graph_def).name
-
     def _InputFn():
       yield np_input1, np_input2
 
@@ -679,7 +645,7 @@ class TrtConvertTest(test_util.TensorFlowTestCase, parameterized.TestCase):
     def _DestroyCache():
       with ops.device(""GPU:0""):
         handle = gen_trt_ops.create_trt_resource_handle(
-            resource_name=trt_engine_name)
+            resource_name=""TRTEngineOp_0"")
         gen_resource_variable_ops.destroy_resource_op(
             handle, ignore_lookup_error=False)
 
",0,train
2b559a9a086f7e8e79557c642c6d4f5115f855c5,tensorflow/tensorflow,"Improves constant shape inference for resource variables.

PiperOrigin-RevId: 223367586",resource_variable_ops_test.py,"@@ -29,6 +29,7 @@ from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import ops
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.framework import test_util
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
@@ -137,6 +138,14 @@ class ResourceVariableOpsTest(test_util.TensorFlowTestCase):
     self.evaluate(v[0].assign(2.0))
     self.assertAllEqual(self.evaluate(v), [2.0, 2.0])
 
+  @test_util.run_in_graph_and_eager_modes
+  def testVariableShape(self):
+    v = resource_variable_ops.ResourceVariable([1., 1.])
+    self.assertAllEqual(
+        tensor_util.constant_value(
+            resource_variable_ops.variable_shape(v.handle)),
+        [2])
+
   def testDifferentAssignGraph(self):
     with ops.Graph().as_default():
       v = resource_variable_ops.ResourceVariable(1.0)
",0,train
2b559a9a086f7e8e79557c642c6d4f5115f855c5,tensorflow/tensorflow,"Improves constant shape inference for resource variables.

PiperOrigin-RevId: 223367586",resource_variable_ops.py,"@@ -26,6 +26,7 @@ from tensorflow.core.framework import variable_pb2
 from tensorflow.python import pywrap_tensorflow
 from tensorflow.python.eager import context
 from tensorflow.python.eager import tape
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import cpp_shape_inference_pb2
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -64,6 +65,7 @@ def eager_safe_variable_handle(shape, dtype, shared_name, name, graph_mode):
                                                    name=name,
                                                    container=container)
   if graph_mode:
+    handle._handle_data = get_resource_handle_data(handle)  # pylint: disable=protected-access
     return handle
 
   # We do not want two distinct ResourceVariable objects for the same
@@ -1410,13 +1412,23 @@ def _ReadGrad(_, grad):
   return grad
 
 
+def variable_shape(handle, out_type=dtypes.int32):
+  if getattr(
+      handle, ""_handle_data"", None) is None or not handle._handle_data.is_set:
+    return gen_resource_variable_ops.variable_shape(handle, out_type=out_type)
+  shape_proto = handle._handle_data.shape_and_type[0].shape
+  if shape_proto.unknown_rank or any(x.size == -1 for x in shape_proto.dim):
+    return gen_resource_variable_ops.variable_shape(handle, out_type=out_type)
+  return constant_op.constant([x.size for x in shape_proto.dim], dtype=out_type)
+
+
 @ops.RegisterGradient(""ResourceGather"")
 def _GatherGrad(op, grad):
   """"""Gradient for gather op.""""""
   # Build appropriately shaped IndexedSlices
   handle = op.inputs[0]
   indices = op.inputs[1]
-  params_shape = gen_resource_variable_ops.variable_shape(handle)
+  params_shape = variable_shape(handle)
   size = array_ops.expand_dims(array_ops.size(indices), 0)
   values_shape = array_ops.concat([size, params_shape[1:]], 0)
   values = array_ops.reshape(grad, values_shape)
",0,train
4721480639b185cc9ce2eb1dbbcd25984a068453,tensorflow/tensorflow,spelling docstring for predict,training.py,"@@ -859,7 +859,7 @@ class Model(network.Network, version_utils.ModelVersionSelector):
           (Dataset, generator, Sequence) is given in the `Unpacking behavior
           for iterator-like inputs` section of `Model.fit`.
         batch_size: Integer or `None`.
-            Number of samples per gradient update.
+            Number of samples per batch.
             If unspecified, `batch_size` will default to 32.
             Do not specify the `batch_size` if your data is in the
             form of symbolic tensors, dataset,
",0,test
b76fbd5d4d3ed92209e124746850004099687219,tensorflow/tensorflow,"remove left-over debug printf statement

PiperOrigin-RevId: 266378933",fully_connected.cc,"@@ -61,7 +61,6 @@ TfLiteStatus CalculateOpData(TfLiteContext* context,
     int exponent;
     QuantizeMultiplier(real_multiplier, &data->output_multiplier, &exponent);
     data->output_shift = -exponent;
-    printf(""%d \n"", data->output_multiplier);
     TF_LITE_ENSURE_STATUS(CalculateActivationRangeQuantized(
         context, params->activation, output, &data->output_activation_min,
         &data->output_activation_max));
",0,train
3acc8eaf602b3e9a009f54e1e0164644dd793831,tensorflow/tensorflow,"Add sanity check for resize-bilinear input shape.

PiperOrigin-RevId: 245618186",resize_bilinear.cc,"@@ -40,9 +40,12 @@ TfLiteStatus ResizeOutputTensor(TfLiteContext* context,
                                 const TfLiteTensor* input,
                                 const TfLiteTensor* size,
                                 TfLiteTensor* output) {
+  const int32* size_data = GetTensorData<int32>(size);
+  // Sanity check, the up/down sampling size should always be positive.
+  TF_LITE_ENSURE(context, size_data[0] > 0);
+  TF_LITE_ENSURE(context, size_data[1] > 0);
   TfLiteIntArray* output_size = TfLiteIntArrayCreate(4);
   output_size->data[0] = input->dims->data[0];
-  const int32* size_data = GetTensorData<int32>(size);
   output_size->data[1] = size_data[0];
   output_size->data[2] = size_data[1];
   output_size->data[3] = input->dims->data[3];
",0,train
7cb3e1328e859187bd59879477f2dfb820fb98d4,tensorflow/tensorflow,"Add lowering for tf.XlaEinSum

PiperOrigin-RevId: 400872318
Change-Id: I45d2e1bc35d98880e4eb668ea926072b1696a788",legalize_tf_with_tf2xla.cc,"@@ -266,7 +266,6 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) {
     TypeID::get<TF::XlaConvV2Op>(),
     TypeID::get<TF::XlaDynamicSliceOp>(),
     TypeID::get<TF::XlaDynamicUpdateSliceOp>(),
-    TypeID::get<TF::XlaEinsumOp>(),
     TypeID::get<TF::XlaKeyValueSortOp>(),
     TypeID::get<TF::XlaPadOp>(),
     TypeID::get<TF::XlaSortOp>(),
",0,train
7cb3e1328e859187bd59879477f2dfb820fb98d4,tensorflow/tensorflow,"Add lowering for tf.XlaEinSum

PiperOrigin-RevId: 400872318
Change-Id: I45d2e1bc35d98880e4eb668ea926072b1696a788",xla_legalize_tf.cc,"@@ -164,6 +164,7 @@ const llvm::DenseSet<mlir::TypeID> &MlirPreferredOps() {
     TypeID::get<TF::TanhGradOp>(),
     TypeID::get<TF::XlaDotOp>(),
     TypeID::get<TF::XlaDotV2Op>(),
+    TypeID::get<TF::XlaEinsumOp>(),
     TypeID::get<TF::XlogyOp>(),
     TypeID::get<TF::ZetaOp>(),
 
",0,train
7cb3e1328e859187bd59879477f2dfb820fb98d4,tensorflow/tensorflow,"Add lowering for tf.XlaEinSum

PiperOrigin-RevId: 400872318
Change-Id: I45d2e1bc35d98880e4eb668ea926072b1696a788",randomized_tests.cc,"@@ -3497,6 +3497,56 @@ TEST_F(OpTest, XlaDotV2) {
   });
 }
 
+TEST_F(OpTest, XlaEinsum) {
+  Repeatedly([this]() {
+    std::string equation;
+    std::vector<int64> lhs_dims, rhs_dims;
+
+    enum EinsumType { matmul, batchmatmul, dot, outer };
+    int op_kind = Choose<int>({matmul, batchmatmul, dot, outer});
+    switch (op_kind) {
+      case matmul:
+      case batchmatmul: {
+        std::vector<int64> dims;
+        if (op_kind == matmul) {
+          equation = ""ij,jk->ik"";
+          dims = RandomDims(2, 2);
+        } else {
+          equation = ""...ij,...jk->...ik"";
+          dims = RandomDims(2);
+        }
+        int64_t ndims = dims.size();
+        int64_t inner_dim = RandomDim();
+        lhs_dims = dims;
+        rhs_dims = dims;
+        lhs_dims[ndims - 1] = inner_dim;
+        rhs_dims[ndims - 2] = inner_dim;
+        break;
+      }
+      case dot: {
+        equation = ""i,i->"";
+        std::vector<int64> dims = RandomDims(1, 1);
+        lhs_dims = dims;
+        rhs_dims = dims;
+        break;
+      }
+      case outer: {
+        equation = ""i,j->ij"";
+        lhs_dims = RandomDims(1, 1);
+        rhs_dims = RandomDims(1, 1);
+        break;
+      }
+    }
+
+    auto dtype = Choose<DataType>(kAllXlaTypes);
+    return ExpectTfAndXlaOutputsAreClose(OpTestBuilder(""XlaEinsum"")
+                                             .RandomInput(dtype, lhs_dims)
+                                             .RandomInput(dtype, rhs_dims)
+                                             .Attr(""equation"", equation)
+                                             .Attr(""T"", dtype));
+  });
+}
+
 TEST_F(OpTest, ZerosLike) {
   GTEST_SKIP() << ""b/201095155"";
   Repeatedly([this]() {
",0,train
7cb3e1328e859187bd59879477f2dfb820fb98d4,tensorflow/tensorflow,"Add lowering for tf.XlaEinSum

PiperOrigin-RevId: 400872318
Change-Id: I45d2e1bc35d98880e4eb668ea926072b1696a788",einsum_op.cc,"@@ -30,32 +30,8 @@ constexpr std::array<DataType, 9> kEinsumTypes = {
     {DT_INT32, DT_INT64, DT_UINT64, DT_HALF, DT_BFLOAT16, DT_FLOAT, DT_DOUBLE,
      DT_COMPLEX64, DT_COMPLEX128}};
 
-// Kernel which compiles XlaEinsum, an einsum op accepting two inputs.
-class XlaEinsumOp : public XlaOpKernel {
- public:
-  explicit XlaEinsumOp(OpKernelConstruction* ctx) : XlaOpKernel(ctx) {
-    OP_REQUIRES_OK(ctx, ctx->GetAttr(""equation"", &equation_));
-  }
-
-  ~XlaEinsumOp() override = default;
-
-  void Compile(XlaOpKernelContext* ctx) override {
-    xla::XlaOp lhs = ctx->Input(0);
-    if (equation_.find(',') == equation_.npos) {
-      ctx->SetOutput(0, xla::Einsum(lhs, equation_));
-    } else {
-      xla::XlaOp rhs = ctx->Input(1);
-      ctx->SetOutput(0, xla::Einsum(lhs, rhs, equation_));
-    }
-  }
-
- private:
-  string equation_;
-  TF_DISALLOW_COPY_AND_ASSIGN(XlaEinsumOp);
-};
-
 REGISTER_XLA_OP(Name(""XlaEinsum"").TypeConstraint(""T"", kEinsumTypes),
-                XlaEinsumOp);
+                MlirXlaOpKernel);
 REGISTER_XLA_OP(Name(""Einsum"").TypeConstraint(""T"", kEinsumTypes),
                 MlirXlaOpKernel);
 
",0,train
87c225ef0e8b1eac47dac471c8b6307ebd1f79be,tensorflow/tensorflow,"Add verifier for HLO Iota op.

Also fixes a bug in tf.RandomShuffle legalization caught by verifier.

PiperOrigin-RevId: 296109247
Change-Id: Icea818f51a6eab91f65efb65aa07f9639d9704a6",hlo_ops.cc,"@@ -202,6 +202,20 @@ OpFoldResult IotaOp::fold(ArrayRef<Attribute> operands) {
   return DenseIntElementsAttr::get(output_type, values);
 }
 
+static LogicalResult Verify(IotaOp op) {
+  auto shape = op.getType().cast<ShapedType>();
+  if (!shape.hasRank()) return success();
+
+  if (shape.getRank() == 0)
+    return op.emitOpError() << ""does not support scalars."";
+
+  auto iota_dimension = op.iota_dimension().getSExtValue();
+  if (iota_dimension >= shape.getRank() || iota_dimension < 0)
+    return op.emitOpError() << ""iota dimension cannot go beyond the output ""
+                               ""rank or be negative."";
+  return success();
+}
+
 //===----------------------------------------------------------------------===//
 // AbsOp
 //===----------------------------------------------------------------------===//
",0,train
87c225ef0e8b1eac47dac471c8b6307ebd1f79be,tensorflow/tensorflow,"Add verifier for HLO Iota op.

Also fixes a bug in tf.RandomShuffle legalization caught by verifier.

PiperOrigin-RevId: 296109247
Change-Id: Icea818f51a6eab91f65efb65aa07f9639d9704a6",legalize_tf.cc,"@@ -3362,7 +3362,7 @@ class ConvertRandomShuffleOp : public OpRewritePattern<TF::RandomShuffleOp> {
     auto indices_type =
         RankedTensorType::get({first_dim_size}, rewriter.getIntegerType(32));
     Value indices = rewriter.create<xla_hlo::IotaOp>(
-        op.getLoc(), indices_type, rewriter.getI64IntegerAttr(first_dim_size));
+        op.getLoc(), indices_type, rewriter.getI64IntegerAttr(0));
 
     // Generate random numbers to be used as swaps for the indices.
     Value swaps = CreateRngUniform32(op.getLoc(), first_dim_size, 0,
",0,train
d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client.

This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger.  (Actually, to the TensorFlow variant of the standard Google logger.)

This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true).

PiperOrigin-RevId: 179729641",curl_http_request.cc,"@@ -17,6 +17,7 @@ limitations under the License.
 
 #include ""tensorflow/core/platform/cloud/curl_http_request.h""
 
+#include ""third_party/absl/strings/string_view.h""
 #include ""tensorflow/core/lib/core/errors.h""
 #include ""tensorflow/core/lib/gtl/map_util.h""
 #include ""tensorflow/core/lib/strings/scanner.h""
@@ -24,13 +25,12 @@ limitations under the License.
 #include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/core/public/version.h""
 
+using absl::string_view;
+
 namespace tensorflow {
 
 namespace {
 
-// Set to 1 to enable verbose debug output from curl.
-constexpr uint64 kVerboseOutput = 0;
-
 // Proxy to the real libcurl implementation.
 class LibCurlProxy : public LibCurl {
  public:
@@ -114,6 +114,29 @@ class LibCurlProxy : public LibCurl {
     return ::curl_easy_strerror(errornum);
   }
 };
+
+int CurlDebugCallback(CURL* handle, curl_infotype type, char* data, size_t size,
+                      void* userptr) {
+  switch (type) {
+    case CURLINFO_HEADER_IN:
+      LOG(INFO) << ""< "" << string_view(data, size);
+      break;
+
+    case CURLINFO_HEADER_OUT:
+      LOG(INFO) << ""> "" << string_view(data, size);
+      break;
+
+    case CURLINFO_TEXT:
+      LOG(INFO) << ""* "" << string_view(data, size);
+      break;
+
+    default:
+      // We are not currently interested in the other CURLINFO_* types.
+      break;
+  }
+
+  return 0;
+}
 }  // namespace
 
 CurlHttpRequest::CurlHttpRequest() : CurlHttpRequest(LibCurlProxy::Load()) {}
@@ -129,7 +152,6 @@ CurlHttpRequest::CurlHttpRequest(LibCurl* libcurl, Env* env)
   //       default in //third_party:curl.BUILD and can be customized via an
   //       environment variable.
 
-  libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, kVerboseOutput);
   libcurl_->curl_easy_setopt(
       curl_, CURLOPT_USERAGENT,
       strings::StrCat(""TensorFlow/"", TF_VERSION_STRING).c_str());
@@ -164,6 +186,18 @@ CurlHttpRequest::~CurlHttpRequest() {
   }
 }
 
+void CurlHttpRequest::SetVerboseLogging(bool enabled) {
+  if (enabled) {
+    libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, static_cast<uint64>(1));
+    libcurl_->curl_easy_setopt(curl_, CURLOPT_DEBUGFUNCTION,
+                               reinterpret_cast<void*>(CurlDebugCallback));
+  } else {
+    libcurl_->curl_easy_setopt(curl_, CURLOPT_VERBOSE, static_cast<uint64>(0));
+    libcurl_->curl_easy_setopt(curl_, CURLOPT_DEBUGFUNCTION,
+                               static_cast<void*>(nullptr));
+  }
+}
+
 string CurlHttpRequest::EscapeString(const string& str) {
   char* out_char_str = libcurl_->curl_easy_escape(curl_, str.c_str(), 0);
   string out_str(out_char_str);
",0,train
d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client.

This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger.  (Actually, to the TensorFlow variant of the standard Google logger.)

This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true).

PiperOrigin-RevId: 179729641",curl_http_request.h,"@@ -140,6 +140,8 @@ class CurlHttpRequest : public HttpRequest {
 
   void SetTimeouts(uint32 connection, uint32 inactivity, uint32 total) override;
 
+  void SetVerboseLogging(bool enabled) override;
+
  private:
   /// A write callback in the form which can be accepted by libcurl.
   static size_t WriteCallback(const void* ptr, size_t size, size_t nmemb,
",0,train
d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client.

This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger.  (Actually, to the TensorFlow variant of the standard Google logger.)

This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true).

PiperOrigin-RevId: 179729641",curl_http_request_test.cc,"@@ -318,6 +318,33 @@ TEST(CurlHttpRequestTest, GetRequest_Direct) {
   EXPECT_EQ(200, http_request.GetResponseCode());
 }
 
+TEST(CurlHttpRequestTest, GetRequest_Verbose) {
+  FakeLibCurl libcurl(""get response"", 200);
+  CurlHttpRequest http_request(&libcurl);
+
+  std::vector<char> scratch(kTestContent.begin(), kTestContent.end());
+  scratch.reserve(100);
+
+  http_request.SetVerboseLogging(true);
+  http_request.SetUri(""http://www.testuri.com"");
+  http_request.AddAuthBearerHeader(""fake-bearer"");
+  http_request.SetRange(100, 199);
+  http_request.SetResultBuffer(&scratch);
+  TF_EXPECT_OK(http_request.Send());
+
+  EXPECT_EQ(""get response"", string(scratch.begin(), scratch.end()));
+
+  // Check interactions with libcurl.
+  EXPECT_TRUE(libcurl.is_initialized_);
+  EXPECT_EQ(""http://www.testuri.com"", libcurl.url_);
+  EXPECT_EQ(""100-199"", libcurl.range_);
+  EXPECT_EQ("""", libcurl.custom_request_);
+  EXPECT_EQ(1, libcurl.headers_->size());
+  EXPECT_EQ(""Authorization: Bearer fake-bearer"", (*libcurl.headers_)[0]);
+  EXPECT_FALSE(libcurl.is_post_);
+  EXPECT_EQ(200, http_request.GetResponseCode());
+}
+
 TEST(CurlHttpRequestTest, GetRequest_Empty) {
   FakeLibCurl libcurl("""", 200);
   CurlHttpRequest http_request(&libcurl);
",0,train
d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client.

This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger.  (Actually, to the TensorFlow variant of the standard Google logger.)

This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true).

PiperOrigin-RevId: 179729641",gcs_dns_cache_test.cc,"@@ -56,6 +56,7 @@ class TestHttpRequest : public HttpRequest {
 
   void SetTimeouts(uint32 connection, uint32 inactivity,
                    uint32 total) override {}
+  void SetVerboseLogging(bool enabled) override {}
 
   std::map<string, string> resolve_overrides_;
 };
",0,train
d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client.

This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger.  (Actually, to the TensorFlow variant of the standard Google logger.)

This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true).

PiperOrigin-RevId: 179729641",gcs_file_system.cc,"@@ -25,6 +25,7 @@ limitations under the License.
 #ifdef _WIN32
 #include <io.h>  //for _mktemp
 #endif
+#include ""third_party/absl/strings/numbers.h""
 #include ""include/json/json.h""
 #include ""tensorflow/core/lib/core/errors.h""
 #include ""tensorflow/core/lib/gtl/map_util.h""
@@ -117,6 +118,9 @@ constexpr char kReadRequestTimeout[] = ""GCS_READ_REQUEST_TIMEOUT_SECS"";
 // The environment variable to configure the overall request timeout for
 // upload requests.
 constexpr char kWriteRequestTimeout[] = ""GCS_WRITE_REQUEST_TIMEOUT_SECS"";
+// If set to true, then each HTTP request will log verbose output.
+// This is for debugging only.
+constexpr char kLogHttpRequestVerbose[] = ""GCS_LOG_HTTP_REQUEST_VERBOSE"";
 
 // TODO: DO NOT use a hardcoded path
 Status GetTmpFilename(string* filename) {
@@ -604,6 +608,10 @@ bool GetEnvVar(const char* varname, bool (*convert)(StringPiece, T*),
   return convert(env_value, value);
 }
 
+bool SimpleAtob(StringPiece text, bool* result) {
+  return absl::SimpleAtob(absl::string_view(text.data(), text.size()), result);
+}
+
 }  // namespace
 
 GcsFileSystem::GcsFileSystem()
@@ -684,6 +692,11 @@ GcsFileSystem::GcsFileSystem()
   if (GetEnvVar(kWriteRequestTimeout, strings::safe_strtou32, &timeout_value)) {
     timeouts_.write = timeout_value;
   }
+
+  bool log_verbose = false;
+  if (GetEnvVar(kLogHttpRequestVerbose, SimpleAtob, &log_verbose)) {
+    log_http_request_verbose_ = log_verbose;
+  }
 }
 
 GcsFileSystem::GcsFileSystem(
@@ -1389,6 +1402,10 @@ Status GcsFileSystem::CreateHttpRequest(std::unique_ptr<HttpRequest>* request) {
 
   new_request->AddAuthBearerHeader(auth_token);
 
+  if (log_http_request_verbose_) {
+    new_request->SetVerboseLogging(true);
+  }
+
   *request = std::move(new_request);
   return Status::OK();
 }
",0,train
d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client.

This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger.  (Actually, to the TensorFlow variant of the standard Google logger.)

This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true).

PiperOrigin-RevId: 179729641",gcs_file_system.h,"@@ -195,6 +195,9 @@ class GcsFileSystem : public FileSystem {
   /// The initial delay for exponential backoffs when retrying failed calls.
   const int64 initial_retry_delay_usec_ = 1000000L;
 
+  /// Controls whether we enable verbose logging in CurlHttpRequests.
+  bool log_http_request_verbose_ = false;
+
   TF_DISALLOW_COPY_AND_ASSIGN(GcsFileSystem);
 };
 
",0,train
d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client.

This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger.  (Actually, to the TensorFlow variant of the standard Google logger.)

This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true).

PiperOrigin-RevId: 179729641",http_request.h,"@@ -140,6 +140,13 @@ class HttpRequest {
   virtual void SetTimeouts(uint32 connection, uint32 inactivity,
                            uint32 total) = 0;
 
+  /// \brief Enables verbose logging for this HTTP request.
+  ///
+  /// The implementation is free to do whatever it wants with this request.
+  /// Generally, the implementation should use this as a hint to emit debug
+  /// logging somewhere.
+  virtual void SetVerboseLogging(bool enabled) = 0;
+
   TF_DISALLOW_COPY_AND_ASSIGN(HttpRequest);
 };
 
",0,train
d69515a21cae5bb002ec75783ef973ebf253fdec,tensorflow/tensorflow,"Add the capability of verbose logging for HTTP requests to the TensorFlow GCS client.

This change allows the GCS client within TensorFlow to log verbose information about HTTP requests to the standard Google logger.  (Actually, to the TensorFlow variant of the standard Google logger.)

This capability is disabled by default. It is enabled by setting the environment variable GCS_LOG_HTTP_REQUEST_VERBOSE=1 (or =true).

PiperOrigin-RevId: 179729641",http_request_fake.h,"@@ -167,6 +167,8 @@ class FakeHttpRequest : public CurlHttpRequest {
                                        inactivity, "" "", total, ""\n"");
   }
 
+  virtual void SetVerboseLogging(bool enabled) override {}
+
  private:
   string actual_request() const {
     string s;
",0,train
70743f654dc34f1765879f65b28d30e9d09c6954,tensorflow/tensorflow,"Delete test that loads weights between two models of different types.

This test assumes the following:
1. The list of layers is the same between the two models --> This isn't always the case because functional models include the input layer in the list, while sequential models don't.
or
2. The checkpointed weights are loaded in a specific order (this was the cause of the flakiness)

PiperOrigin-RevId: 288607470
Change-Id: Ic8db57a65f4f4910e8d404ab108ff4d686c56f2b",models_test.py,"@@ -28,7 +28,6 @@ from tensorflow.python import keras
 from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
-from tensorflow.python.framework import tensor_spec
 from tensorflow.python.keras import backend as K
 from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import metrics
@@ -338,37 +337,6 @@ class CheckpointingTests(keras_parameterized.TestCase):
     model.load_weights(save_prefix)
     self.assertEqual(12., self.evaluate(beta1_power))
 
-  @keras_parameterized.run_with_all_model_types(exclude_models=['subclass'])
-  def test_layer_tracking(self):
-    with self.cached_session():
-      model = _get_model(input_shape=(4,))
-
-      if testing_utils.get_model_type() == 'subclass':
-        # Subclassed model must be built separately.
-        model._set_inputs(tensor_spec.TensorSpec((None, 4)))
-
-      # Ensure that checkpoints are compatible with another model with the same
-      # layers, even if the model isn't built until after initialization.
-      layers = _get_layers(input_shape=None, add_input_layer=False)
-      model2 = models.Sequential(layers)
-      # Build model by calling it.
-      model2.predict_on_batch(np.random.random((10, 4)))
-
-      model_path = os.path.join(self.get_temp_dir(), 'model_ckpt')
-      model.save_weights(model_path)
-      model2_path = os.path.join(self.get_temp_dir(), 'model2_ckpt')
-      model2.save_weights(model2_path)
-
-      # Check that the checkpoints are compatible with both models.
-      model.load_weights(model2_path)
-      self.assertAllClose(self.evaluate(model.weights),
-                          self.evaluate(model2.weights))
-
-      model.load_weights(model_path)
-      model2.load_weights(model_path)
-      self.assertAllClose(self.evaluate(model.weights),
-                          self.evaluate(model2.weights))
-
 
 @keras_parameterized.run_all_keras_modes
 class TestModelBackend(keras_parameterized.TestCase):
",0,train
d63e3ea9a26fc049c654a966d0ebc56bc2747729,tensorflow/tensorflow,"Rollback of ""Replace a few calls of Session `run` with `evaluate`"" for
distribute_coordinator_test to fix breakage.

PiperOrigin-RevId: 222017627",distribute_coordinator_test.py,"@@ -235,7 +235,7 @@ class DistributeCoordinatorTestBase(test.TestCase):
         result = math_ops.add_n(xs)
 
       variables.global_variables_initializer().run()
-      result_value = self.evaluate(result)
+      result_value = sess.run(result)
     self.assertEqual(result_value, expected)
     if result_value == expected:
       self._result_correct += 1
@@ -294,7 +294,7 @@ class DistributeCoordinatorTestBase(test.TestCase):
           if len(uninit_vars) == 0:
             break
 
-      self.evaluate(train_op)
+      sess.run(train_op)
 
       # Synchronize workers after one step to make sure they all have finished
       # training.
@@ -327,7 +327,7 @@ class DistributeCoordinatorTestBase(test.TestCase):
 
     # The monitored session will run init or ready ops.
     with monitored_session.MonitoredSession() as sess:
-      self.evaluate(train_op)
+      sess.run(train_op)
 
       # Synchronize workers after one step to make sure they all have finished
       # training.
",0,train
5ea9724314362fe80760cf226addc7e4a2539493,tensorflow/tensorflow,"Minor typo fix.
Change: 117611495",convolutional.py,"@@ -150,7 +150,7 @@ def main(argv=None):  # pylint: disable=unused-argument
       shape=(EVAL_BATCH_SIZE, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS))
 
   # The variables below hold all the trainable weights. They are passed an
-  # initial value which will be assigned when when we call:
+  # initial value which will be assigned when we call:
   # {tf.initialize_all_variables().run()}
   conv1_weights = tf.Variable(
       tf.truncated_normal([5, 5, NUM_CHANNELS, 32],  # 5x5 filter, depth 32.
",0,train
7954fb8fd1104dd6e78781a895aea1022357da72,tensorflow/tensorflow,"Do not fail when TPUReplicateMetadata op is missing.

The rationale behind this change is that functionalizing control flow in TF v1 models adds functions to function library with nodes that have '_tpu_replicate' attrribute. The TPU cluster formation pass throws error on these functions when it does not find the TPUReplicateMetadata op associated with a _tpu_replicate attribute.

PiperOrigin-RevId: 292986754
Change-Id: I203c4e62db96c835bdad3f669500674dc4fce8c5",tpu_cluster_formation.cc,"@@ -414,10 +414,12 @@ LogicalResult FormClustersInBlock(Block* block,
     auto cluster_metadata = metadata_map.find(cluster.getFirst());
 
     // No TPUReplicateMetadata for a `_tpu_replicate` attribute.
-    if (cluster_metadata == metadata_map.end())
-      return cluster_ops.front()->emitError()
-             << ""TPUReplicateMetadata for associated '"" << kTPUReplicateAttr
-             << ""' attribute '"" << cluster.getFirst() << ""' is missing"";
+    if (cluster_metadata == metadata_map.end()) {
+      cluster_ops.front()->emitWarning()
+          << ""TPUReplicateMetadata for associated '"" << kTPUReplicateAttr
+          << ""' attribute '"" << cluster.getFirst() << ""' is missing"";
+      continue;
+    }
 
     llvm::SmallSetVector<Operation*, 8> preceding_users =
         CollectClusterPrecedingUsers(block, cluster_ops);
",0,test
f6e707ca2d5514fc21aa10c83e509f7077c73f4d,tensorflow/tensorflow,"Supporting conversion of argument attributes along their types.

This fixes a bug: previously, during conversion function argument
attributes were neither beings passed through nor converted. This fix
extends DialectConversion to allow for simultaneous conversion of the
function type and the argument attributes.

This was important when lowering MLIR to LLVM where attribute
information (e.g. noalias) needs to be preserved in MLIR(LLVMDialect).

Longer run it seems reasonable that we want to convert both the
function attribute and its type and the argument attributes, but that
requires a small refactoring in Function.h to aggregate these three
fields in an inner struct, which will require some discussion.

PiperOrigin-RevId: 236709409",Function.h,"@@ -46,6 +46,10 @@ class Function : public llvm::ilist_node_with_parent<Function, Module> {
 public:
   Function(Location location, StringRef name, FunctionType type,
            ArrayRef<NamedAttribute> attrs = {});
+  Function(Location location, StringRef name, FunctionType type,
+           ArrayRef<NamedAttribute> attrs,
+           ArrayRef<NamedAttributeList> argAttrs);
+
   ~Function();
 
   /// The source location the function was defined or derived from.
@@ -198,6 +202,10 @@ public:
     argAttrs[index].setAttrs(getContext(), attributes);
   }
 
+  /// Return all argument attributes of this function.
+  MutableArrayRef<NamedAttributeList> getAllArgAttrs() { return argAttrs; }
+  ArrayRef<NamedAttributeList> getAllArgAttrs() const { return argAttrs; }
+
   /// Return the specified attribute if present, null otherwise.
   Attribute getAttr(Identifier name) const { return attrs.get(name); }
   Attribute getAttr(StringRef name) const { return attrs.get(name); }
",0,train
f6e707ca2d5514fc21aa10c83e509f7077c73f4d,tensorflow/tensorflow,"Supporting conversion of argument attributes along their types.

This fixes a bug: previously, during conversion function argument
attributes were neither beings passed through nor converted. This fix
extends DialectConversion to allow for simultaneous conversion of the
function type and the argument attributes.

This was important when lowering MLIR to LLVM where attribute
information (e.g. noalias) needs to be preserved in MLIR(LLVMDialect).

Longer run it seems reasonable that we want to convert both the
function attribute and its type and the argument attributes, but that
requires a small refactoring in Function.h to aggregate these three
fields in an inner struct, which will require some discussion.

PiperOrigin-RevId: 236709409",DialectConversion.h,"@@ -191,7 +191,9 @@ protected:
   /// The default behavior of this function is to call convertType on individual
   /// function operands and results, and then create a new MLIR function type
   /// from those.
-  virtual FunctionType convertFunctionSignatureType(FunctionType t);
+  virtual std::pair<FunctionType, std::vector<NamedAttributeList>>
+  convertFunctionSignatureType(FunctionType t,
+                               ArrayRef<NamedAttributeList> argAttrs);
 };
 
 } // end namespace mlir
",0,train
bf0b5b619d633fcff14cc11243297537e83d77d2,tensorflow/tensorflow,add an extra check in case the rank is static,py_builtins.py,"@@ -478,12 +478,16 @@ def _tf_sorted(iterable, key, reverse):
     direction = 'DESCENDING'
   if key is not UNSPECIFIED:
     mapped = parallel_ops.vectorized_map(key, iterable)
+    if mapped.shape.rank is not None and mapped.shape.rank != 1:
+      raise ValueError('sort only supports only 1D tensors')
     with ops.control_dependencies(
-        [check_ops.assert_rank_v2(mapped, 1, 'only support 1-D tensor')]):
+        [check_ops.assert_rank_v2(mapped, 1, 'sort only supports only 1D tensors')]):
       order = sort_ops.argsort(mapped, direction=direction)
       return array_ops.gather_v2(iterable, order)
+  if iterable.shape.rank is not None and iterable.shape.rank != 1:
+    raise ValueError('sort only supports only 1D tensors')
   with ops.control_dependencies(
-      [check_ops.assert_rank_v2(iterable, 1, 'only support 1-D tensor')]):
+      [check_ops.assert_rank_v2(iterable, 1, 'sort only supports only 1D tensors')]):
     return sort_ops.sort(iterable, direction=direction)
 
 
",0,train
6b8469f225837eff0ecc6a92cc74d1605f3d4dae,tensorflow/tensorflow,"Add go_backwards support for keras fused lstm

PiperOrigin-RevId: 297287353
Change-Id: Idaebe3d0c84fc8be03651233a4af7c9cd46a23ca",lstm_utils.cc,"@@ -95,6 +95,14 @@ Value Transpose2D(OpBuilder* builder, Value value_to_transpose,
   return Transpose(builder, value_to_transpose, perm, type, location);
 }
 
+Value Reverse(OpBuilder* builder, Value value_to_reverse, int axis,
+              RankedTensorType type, mlir::Location location) {
+  auto axis_op = CreateI32SplatConst(builder, {1}, axis, location);
+  // The result type will be the same as the input.
+  return builder->create<TF::ReverseV2Op>(location, type, value_to_reverse,
+                                          axis_op);
+}
+
 ArrayRef<int64_t> GetRankedTensorShape(Value value) {
   return value.getType().cast<RankedTensorType>().getShape();
 }
@@ -615,6 +623,16 @@ LogicalResult ConvertKerasLSTMLayer(mlir::FuncOp func_op, OpBuilder* builder) {
     final_input_type = final_inputs.getType().dyn_cast<RankedTensorType>();
   }
 
+  // Handle go_backwards:
+  // LSTM in Keras semantic will reverse the input sequence if it's go_backwards
+  auto go_backwards_attr = func_op.getAttrOfType<BoolAttr>(""tf.go_backwards"");
+
+  if (go_backwards_attr != nullptr && go_backwards_attr.getValue()) {
+    // We assume input is already in {time, batch, size} layout.
+    final_inputs =
+        Reverse(builder, final_inputs, 0, final_input_type, func_op.getLoc());
+  }
+
   int batch = final_input_type.getDimSize(1);
   int time = final_input_type.getDimSize(0);
 
",0,train
f72bf79d9da541165f88161bd0a973c085ec924d,tensorflow/tensorflow,"Automated rollback of commit 05b15600b3c4472ec79aa865ea1d313c87b68a21

PiperOrigin-RevId: 273779697",training.py,"@@ -291,6 +291,22 @@ class Model(network.Network):
     self._experimental_run_tf_function = kwargs.pop(
         'experimental_run_tf_function', True)
 
+    # Prepare Session arguments (legacy).
+    kwargs.pop('cloning', None)  # Legacy DistStrat argument, never used.
+    allowed_kwargs = {'feed_dict', 'fetches', 'options', 'run_metadata'}
+    unknown_kwargs = set(kwargs.keys()) - allowed_kwargs
+    if unknown_kwargs:
+      raise TypeError(
+          'Invalid keyword argument(s) in `compile`: %s' % (unknown_kwargs,))
+    self._function_kwargs = kwargs
+    if self._function_kwargs:
+      self._experimental_run_tf_function = False
+      if self.run_eagerly:
+        raise ValueError(
+            'Session keyword arguments are not supported '
+            'when `run_eagerly=True`. You passed the following '
+            'Session arguments: %s' % (self._function_kwargs,))
+
     self._set_optimizer(optimizer)
     is_any_optimizer_v1 = any(isinstance(opt, optimizers.Optimizer)
                               for opt in nest.flatten(self.optimizer))
@@ -416,8 +432,6 @@ class Model(network.Network):
       # Functions for train, test and predict will
       # be compiled lazily when required.
       # This saves time when the user is not using all functions.
-      self._function_kwargs = kwargs
-
       self.train_function = None
       self.test_function = None
       self.predict_function = None
",0,train
f72bf79d9da541165f88161bd0a973c085ec924d,tensorflow/tensorflow,"Automated rollback of commit 05b15600b3c4472ec79aa865ea1d313c87b68a21

PiperOrigin-RevId: 273779697",training_test.py,"@@ -244,6 +244,38 @@ class CompileTest(keras_parameterized.TestCase):
           run_eagerly=testing_utils.should_run_eagerly(),
           experimental_run_tf_function=testing_utils.should_run_tf_function())
 
+  @keras_parameterized.run_all_keras_modes
+  def test_compile_with_session_kwargs(self):
+    model = testing_utils.get_small_sequential_mlp(
+        num_hidden=10, num_classes=2, input_dim=3)
+
+    # Test that unknown arguments are not accepted
+    with self.assertRaisesRegexp(
+        TypeError,
+        r'Invalid keyword argument'):
+      model.compile(
+          optimizer='adam',
+          loss='mse',
+          foo=True)
+
+    if testing_utils.should_run_eagerly():
+      # Test that Session kwargs cannot be used with run_eagerly
+      with self.assertRaisesRegexp(
+          ValueError,
+          r'not supported when `run_eagerly=True`'):
+        model.compile(
+            optimizer='adam',
+            loss='mse',
+            run_eagerly=True,
+            feed_dict={})
+    else:
+      # Test that Session kwargs trigger legacy path execution
+      model.compile(
+          optimizer='adam',
+          loss='mse',
+          feed_dict={})
+      self.assertFalse(model._experimental_run_tf_function)
+
 
 class TrainingTest(keras_parameterized.TestCase):
 
",0,train
87b040bcdf94d3b0799ad433c97e636b55f2d27b,tensorflow/tensorflow,"Free queues when Initialize() fails.

All queue ops except for RandomShuffleQueue do not unref correctly when
Initialize() fails. Since the caller does not free the queue when returned
status is fail, this is memory leak.

Fixing all other queue ops as done in RandomShuffleQueue.
Change: 133673012",fifo_queue_op.cc,"@@ -50,8 +50,13 @@ class FIFOQueueOp : public QueueOp {
     return [this](QueueInterface** ret) {
       FIFOQueue* queue = new FIFOQueue(capacity_, component_types_,
                                        component_shapes_, cinfo_.name());
-      *ret = queue;
-      return queue->Initialize();
+      Status s = queue->Initialize();
+      if (s.ok()) {
+        *ret = queue;
+      } else {
+        queue->Unref();
+      }
+      return s;
     };
   }
 
",0,train
87b040bcdf94d3b0799ad433c97e636b55f2d27b,tensorflow/tensorflow,"Free queues when Initialize() fails.

All queue ops except for RandomShuffleQueue do not unref correctly when
Initialize() fails. Since the caller does not free the queue when returned
status is fail, this is memory leak.

Fixing all other queue ops as done in RandomShuffleQueue.
Change: 133673012",padding_fifo_queue_op.cc,"@@ -42,7 +42,8 @@ namespace tensorflow {
 // tensor of handles to Queues in the corresponding device.
 class PaddingFIFOQueueOp : public QueueOp {
  public:
-  explicit PaddingFIFOQueueOp(OpKernelConstruction* context) : QueueOp(context) {
+  explicit PaddingFIFOQueueOp(OpKernelConstruction* context)
+      : QueueOp(context) {
     OP_REQUIRES_OK(context, context->GetAttr(""shapes"", &component_shapes_));
     for (const auto& shape : component_shapes_) {
       OP_REQUIRES(context, shape.dims() >= 0,
@@ -56,8 +57,13 @@ class PaddingFIFOQueueOp : public QueueOp {
     return [this](QueueInterface** ret) {
       PaddingFIFOQueue* queue = new PaddingFIFOQueue(
           capacity_, component_types_, component_shapes_, cinfo_.name());
-      *ret = queue;
-      return queue->Initialize();
+      Status s = queue->Initialize();
+      if (s.ok()) {
+        *ret = queue;
+      } else {
+        queue->Unref();
+      }
+      return s;
     };
   }
 
",0,train
87b040bcdf94d3b0799ad433c97e636b55f2d27b,tensorflow/tensorflow,"Free queues when Initialize() fails.

All queue ops except for RandomShuffleQueue do not unref correctly when
Initialize() fails. Since the caller does not free the queue when returned
status is fail, this is memory leak.

Fixing all other queue ops as done in RandomShuffleQueue.
Change: 133673012",priority_queue_op.cc,"@@ -53,8 +53,13 @@ class PriorityQueueOp : public QueueOp {
     return [this](QueueInterface** ret) {
       PriorityQueue* queue = new PriorityQueue(
           capacity_, component_types_, component_shapes_, cinfo_.name());
-      *ret = queue;
-      return queue->Initialize();
+      Status s = queue->Initialize();
+      if (s.ok()) {
+        *ret = queue;
+      } else {
+        queue->Unref();
+      }
+      return s;
     };
   }
 
",0,train
90393adbc0366515b9903407c9aa1a70799508c6,tensorflow/tensorflow,"Fix the forwardprop docstring

Labels were incorrectly broadcasting against predictions in the regression examples

Fixes #46848.

PiperOrigin-RevId: 355742564
Change-Id: I532d3f19ed38e1e9c06a0b1d009bd867e0d25983",forwardprop.py,"@@ -234,12 +234,13 @@ class ForwardAccumulator():
   Consider a simple linear regression:
 
   >>> x = tf.constant([[2.0, 3.0], [1.0, 4.0]])
+  >>> targets = tf.constant([[1.], [-1.]])
   >>> dense = tf.keras.layers.Dense(1)
   >>> dense.build([None, 2])
   >>> with tf.autodiff.ForwardAccumulator(
   ...    primals=dense.kernel,
   ...    tangents=tf.constant([[1.], [0.]])) as acc:
-  ...   loss = tf.reduce_sum((dense(x) - tf.constant([1., -1.])) ** 2.)
+  ...   loss = tf.reduce_sum((dense(x) - targets) ** 2.)
   >>> acc.jvp(loss)
   <tf.Tensor: shape=(), dtype=float32, numpy=...>
 
@@ -258,9 +259,10 @@ class ForwardAccumulator():
   invocations:
 
   >>> x = tf.constant([[2.0, 3.0], [1.0, 4.0]])
+  >>> targets = tf.constant([[1.], [-1.]])
   >>> dense = tf.keras.layers.Dense(1)
   >>> dense.build([None, 2])
-  >>> loss_fn = lambda: tf.reduce_sum((dense(x) - tf.constant([1., -1.])) ** 2.)
+  >>> loss_fn = lambda: tf.reduce_sum((dense(x) - targets) ** 2.)
   >>> kernel_fprop = []
   >>> with tf.autodiff.ForwardAccumulator(
   ...     dense.kernel, tf.constant([[1.], [0.]])) as acc:
",0,train
abd645085b1dd1496df847b05a1934d471a2f2c0,tensorflow/tensorflow,"Use the correct device ordinal to check whether the device the executable was
built for is equivalent to the device the it will run on.

Before this patch, if the device to run on was provided via a stream without
setting the device ordinal in the ExecutableRunOptions, we would check the
default device against the device the executable was built for.

PiperOrigin-RevId: 206892902",local_client.cc,"@@ -101,11 +101,14 @@ Status LocalExecutable::ValidateExecutionOptions(
     }
   }
 
-  // Verify that the device the executable was built for is equivalent to the
-  // device it will run on.
-  int run_device_ordinal = run_options.device_ordinal() == -1
-                               ? backend_->default_device_ordinal()
-                               : run_options.device_ordinal();
+  // Verify that the device the executable was built for is equivalent
+  // to the device it will run on.
+  int run_device_ordinal = run_options.device_ordinal();
+  if (run_device_ordinal == -1) {
+    run_device_ordinal = run_options.stream() != nullptr
+                             ? run_options.stream()->parent()->device_ordinal()
+                             : backend_->default_device_ordinal();
+  }
   TF_ASSIGN_OR_RETURN(bool devices_equivalent,
                       backend_->devices_equivalent(
                           run_device_ordinal, build_options_.device_ordinal()));
",0,test
87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,normalization.py,"@@ -1270,15 +1270,12 @@ class LayerNormalization(Layer):
 
       inputs = array_ops.reshape(inputs, squeezed_shape)
 
-      def _set_const_tensor(val, dtype, shape):
-        return array_ops.fill(shape, constant_op.constant(val, dtype=dtype))
-
       # self.gamma and self.beta have the wrong shape for fused_batch_norm, so
       # we cannot pass them as the scale and offset parameters. Therefore, we
       # create two constant tensors in correct shapes for fused_batch_norm and
       # later construct a separate calculation on the scale and offset.
-      scale = _set_const_tensor(1.0, self.dtype, [pre_dim])
-      offset = _set_const_tensor(0.0, self.dtype, [pre_dim])
+      scale = array_ops.ones([pre_dim], dtype=self.dtype)
+      offset = array_ops.zeros([pre_dim], dtype=self.dtype)
 
       # Compute layer normalization using the fused_batch_norm function.
       outputs, _, _ = nn.fused_batch_norm(
",0,train
87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,array_grad.py,"@@ -77,10 +77,11 @@ def _ConcatGradHelper(op, grad, start_value_index, end_value_index, dim_index):
     # with 0's everywhere and 1 in the concat dim position.
     # Note: Can't use sparse_to_dense since it isn't GPU-capable (for now)
     mask = array_ops.concat([
-        array_ops.fill(array_ops.expand_dims(concat_dim, 0), 0), [1],
-        array_ops.fill(shape_of_shape - concat_dim - 1, 0)
+        array_ops.zeros(array_ops.expand_dims(concat_dim, 0),
+                        dtype=dtypes.int32), [1],
+        array_ops.zeros(shape_of_shape - concat_dim - 1, dtype=dtypes.int32)
     ], 0)
-    begin = array_ops.fill(shape_of_shape, 0)
+    begin = array_ops.zeros(shape_of_shape, dtype=dtypes.int32)
     return mask, begin
 
   def _ExtractInputShapes(inputs):
",0,train
87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,embedding_ops.py,"@@ -525,8 +525,8 @@ def embedding_lookup_sparse(params,
       embeddings = array_ops.gather(embeddings, idx)
 
       # Reshape weights to allow broadcast
-      ones = array_ops.fill(
-          array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0), 1)
+      ones_shape = array_ops.expand_dims(array_ops.rank(embeddings) - 1, 0)
+      ones = array_ops.ones(ones_shape, dtype=dtypes.int32)
       bcast_weights_shape = array_ops.concat([array_ops.shape(weights), ones],
                                              0)
 
",0,train
87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,gradients_util.py,"@@ -28,7 +28,6 @@ from tensorflow.python import pywrap_tfe
 from tensorflow.python.eager import backprop
 from tensorflow.python.eager import backprop_util
 from tensorflow.python.eager import context
-from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import function as framework_function
 from tensorflow.python.framework import ops
@@ -172,9 +171,8 @@ def _DefaultGradYs(grad_ys,
               ""Gradients of complex tensors must set grad_ys (y.dtype = %r)"" %
               y.dtype)
         new_grad_ys.append(
-            array_ops.fill(
-                array_ops.shape(y),
-                constant_op.constant(1, dtype=y.dtype, name=""grad_ys_%d"" % i)))
+            array_ops.ones(
+                array_ops.shape(y), dtype=y.dtype, name=""grad_ys_%d"" % i))
         continue
       if y.dtype.is_floating or y.dtype.is_integer:
         if not grad_y.dtype.is_floating and not grad_y.dtype.is_integer:
",0,train
87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,image_ops_impl.py,"@@ -5548,7 +5548,8 @@ def non_max_suppression_padded_v2(boxes,
         array_ops.gather(array_ops.reshape(sorted_indices, [-1]),
                          gather_idx),
         [batch_size, -1])
-  invalid_index = array_ops.fill([batch_size, max_output_size], 0)
+  invalid_index = array_ops.zeros(
+      [batch_size, max_output_size], dtype=dtypes.int32)
   idx_index = array_ops.expand_dims(math_ops.range(max_output_size), 0)
   num_valid_expanded = array_ops.expand_dims(num_valid, 1)
   idx = array_ops.where(idx_index < num_valid_expanded,
",0,train
87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,math_grad.py,"@@ -330,9 +330,10 @@ def _SegmentMeanGrad(op, grad):
   input_rank = array_ops.rank(op.inputs[0])
   ones_shape = array_ops.concat([
       array_ops.shape(op.inputs[1]),
-      array_ops.fill(array_ops.expand_dims(input_rank - 1, 0), 1)
+      array_ops.ones(
+          array_ops.expand_dims(input_rank - 1, 0), dtype=dtypes.int32)
   ], 0)
-  ones = array_ops.fill(ones_shape, constant_op.constant(1, dtype=grad.dtype))
+  ones = array_ops.ones(ones_shape, dtype=grad.dtype)
   scaled_grad = math_ops.divide(grad, math_ops.segment_sum(ones, op.inputs[1]))
   return array_ops.gather(scaled_grad, op.inputs[1]), None
 
",0,train
87bdd515ecd1991cb11d5dac654f47033f520768,tensorflow/tensorflow,Prefer tf.ones and tf.zeros over tf.fill,math_ops.py,"@@ -4169,7 +4169,7 @@ def reduced_shape(input_shape, axes):
       ],  # [1, 2]
       [
           input_shape,  # [2, 3, 5, 7]
-          array_ops.fill(axes_shape, 1)
+          array_ops.ones(axes_shape, dtype=dtypes.int32)
       ])  # [1, 1]
 
 
",0,train
e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image

More and more models, such as MobilenetV3's EdgeTPU ones, are using
post-training full integer quantization. With this patch, I can get
reasonable results.

./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite
Loaded model mobilenet_edgetpu_224_1.0_int8.tflite
resolved reporter
INFO: Initialized TensorFlow Lite runtime.
invoked
average time: 15.363 ms
0.867188: 653 military uniform
0.0390625: 835 suit
0.015625: 458 bow tie
0.0078125: 907 Windsor tie
0.00390625: 716 pickelhaube",bitmap_helpers.h,"@@ -31,10 +31,12 @@ void resize(T* out, uint8_t* in, int image_height, int image_width,
             int wanted_channels, Settings* s);
 
 // explicit instantiation
-template void resize<uint8_t>(uint8_t*, unsigned char*, int, int, int, int, int,
-                              int, Settings*);
 template void resize<float>(float*, unsigned char*, int, int, int, int, int,
                             int, Settings*);
+template void resize<int8_t>(int8_t*, unsigned char*, int, int, int, int, int,
+                             int, Settings*);
+template void resize<uint8_t>(uint8_t*, unsigned char*, int, int, int, int, int,
+                              int, Settings*);
 
 }  // namespace label_image
 }  // namespace tflite
",0,train
e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image

More and more models, such as MobilenetV3's EdgeTPU ones, are using
post-training full integer quantization. With this patch, I can get
reasonable results.

./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite
Loaded model mobilenet_edgetpu_224_1.0_int8.tflite
resolved reporter
INFO: Initialized TensorFlow Lite runtime.
invoked
average time: 15.363 ms
0.867188: 653 military uniform
0.0390625: 835 suit
0.015625: 458 bow tie
0.0078125: 907 Windsor tie
0.00390625: 716 pickelhaube",bitmap_helpers_impl.h,"@@ -82,10 +82,19 @@ void resize(T* out, uint8_t* in, int image_height, int image_width,
   auto output_number_of_pixels = wanted_height * wanted_width * wanted_channels;
 
   for (int i = 0; i < output_number_of_pixels; i++) {
-    if (s->input_floating)
-      out[i] = (output[i] - s->input_mean) / s->input_std;
-    else
-      out[i] = (uint8_t)output[i];
+    switch (s->input_type) {
+      case kTfLiteFloat32:
+        out[i] = (output[i] - s->input_mean) / s->input_std;
+        break;
+      case kTfLiteInt8:
+        out[i] = output[i] - 128;
+        break;
+      case kTfLiteUInt8:
+        out[i] = output[i];
+        break;
+      default:
+        break;
+    }
   }
 }
 
",0,train
e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image

More and more models, such as MobilenetV3's EdgeTPU ones, are using
post-training full integer quantization. With this patch, I can get
reasonable results.

./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite
Loaded model mobilenet_edgetpu_224_1.0_int8.tflite
resolved reporter
INFO: Initialized TensorFlow Lite runtime.
invoked
average time: 15.363 ms
0.867188: 653 military uniform
0.0390625: 835 suit
0.015625: 458 bow tie
0.0078125: 907 Windsor tie
0.00390625: 716 pickelhaube",get_top_n.h,"@@ -27,10 +27,12 @@ void get_top_n(T* prediction, int prediction_size, size_t num_results,
                bool input_floating);
 
 // explicit instantiation so that we can use them otherwhere
-template void get_top_n<uint8_t>(uint8_t*, int, size_t, float,
-                                 std::vector<std::pair<float, int>>*, bool);
 template void get_top_n<float>(float*, int, size_t, float,
-                               std::vector<std::pair<float, int>>*, bool);
+                               std::vector<std::pair<float, int>>*, int);
+template void get_top_n<int8_t>(int8_t*, int, size_t, float,
+                                std::vector<std::pair<float, int>>*, int);
+template void get_top_n<uint8_t>(uint8_t*, int, size_t, float,
+                                 std::vector<std::pair<float, int>>*, int);
 
 }  // namespace label_image
 }  // namespace tflite
",0,train
e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image

More and more models, such as MobilenetV3's EdgeTPU ones, are using
post-training full integer quantization. With this patch, I can get
reasonable results.

./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite
Loaded model mobilenet_edgetpu_224_1.0_int8.tflite
resolved reporter
INFO: Initialized TensorFlow Lite runtime.
invoked
average time: 15.363 ms
0.867188: 653 military uniform
0.0390625: 835 suit
0.015625: 458 bow tie
0.0078125: 907 Windsor tie
0.00390625: 716 pickelhaube",get_top_n_impl.h,"@@ -30,19 +30,30 @@ extern bool input_floating;
 template <class T>
 void get_top_n(T* prediction, int prediction_size, size_t num_results,
                float threshold, std::vector<std::pair<float, int>>* top_results,
-               bool input_floating) {
+               int input_type) {
   // Will contain top N results in ascending order.
   std::priority_queue<std::pair<float, int>, std::vector<std::pair<float, int>>,
                       std::greater<std::pair<float, int>>>
       top_result_pq;
 
   const long count = prediction_size;  // NOLINT(runtime/int)
+  float value = 0.0;
+
   for (int i = 0; i < count; ++i) {
-    float value;
-    if (input_floating)
-      value = prediction[i];
-    else
-      value = prediction[i] / 255.0;
+    switch (input_type) {
+      case kTfLiteFloat32:
+        value = prediction[i];
+        break;
+      case kTfLiteInt8:
+        // value = prediction[i] / 128.0;
+        value = (prediction[i] + 128) / 256.0;
+        break;
+      case kTfLiteUInt8:
+        value = prediction[i] / 255.0;
+        break;
+      default:
+        break;
+    }
     // Only add it if it beats the threshold and has a chance at being in
     // the top N.
     if (value < threshold) {
",0,train
e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image

More and more models, such as MobilenetV3's EdgeTPU ones, are using
post-training full integer quantization. With this patch, I can get
reasonable results.

./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite
Loaded model mobilenet_edgetpu_224_1.0_int8.tflite
resolved reporter
INFO: Initialized TensorFlow Lite runtime.
invoked
average time: 15.363 ms
0.867188: 653 military uniform
0.0390625: 835 suit
0.015625: 458 bow tie
0.0078125: 907 Windsor tie
0.00390625: 716 pickelhaube",label_image.cc,"@@ -221,13 +221,18 @@ void RunInference(Settings* s) {
   int wanted_width = dims->data[2];
   int wanted_channels = dims->data[3];
 
-  switch (interpreter->tensor(input)->type) {
+  s->input_type = interpreter->tensor(input)->type;
+  switch (s->input_type) {
     case kTfLiteFloat32:
-      s->input_floating = true;
       resize<float>(interpreter->typed_tensor<float>(input), in.data(),
                     image_height, image_width, image_channels, wanted_height,
                     wanted_width, wanted_channels, s);
       break;
+    case kTfLiteInt8:
+      resize<int8_t>(interpreter->typed_tensor<int8_t>(input), in.data(),
+                     image_height, image_width, image_channels, wanted_height,
+                     wanted_width, wanted_channels, s);
+      break;
     case kTfLiteUInt8:
       resize<uint8_t>(interpreter->typed_tensor<uint8_t>(input), in.data(),
                       image_height, image_width, image_channels, wanted_height,
@@ -238,7 +243,6 @@ void RunInference(Settings* s) {
                  << interpreter->tensor(input)->type << "" yet"";
       exit(-1);
   }
-
   auto profiler =
       absl::make_unique<profiling::Profiler>(s->max_profiling_buffer_entries);
   interpreter->SetProfiler(profiler.get());
@@ -290,16 +294,22 @@ void RunInference(Settings* s) {
   switch (interpreter->tensor(output)->type) {
     case kTfLiteFloat32:
       get_top_n<float>(interpreter->typed_output_tensor<float>(0), output_size,
-                       s->number_of_results, threshold, &top_results, true);
+                       s->number_of_results, threshold, &top_results,
+                       s->input_type);
+      break;
+    case kTfLiteInt8:
+      get_top_n<int8_t>(interpreter->typed_output_tensor<int8_t>(0),
+                        output_size, s->number_of_results, threshold,
+                        &top_results, s->input_type);
       break;
     case kTfLiteUInt8:
       get_top_n<uint8_t>(interpreter->typed_output_tensor<uint8_t>(0),
                          output_size, s->number_of_results, threshold,
-                         &top_results, false);
+                         &top_results, s->input_type);
       break;
     default:
       LOG(FATAL) << ""cannot handle output type ""
-                 << interpreter->tensor(input)->type << "" yet"";
+                 << interpreter->tensor(output)->type << "" yet"";
       exit(-1);
   }
 
",0,train
e305ac4b75a9523bf047fdaef75159f13bd04b86,tensorflow/tensorflow,"[tflite] add int8 input/output to label_image

More and more models, such as MobilenetV3's EdgeTPU ones, are using
post-training full integer quantization. With this patch, I can get
reasonable results.

./label_image_int8 -m mobilenet_edgetpu_224_1.0_int8.tflite
Loaded model mobilenet_edgetpu_224_1.0_int8.tflite
resolved reporter
INFO: Initialized TensorFlow Lite runtime.
invoked
average time: 15.363 ms
0.867188: 653 military uniform
0.0390625: 835 suit
0.015625: 458 bow tie
0.0078125: 907 Windsor tie
0.00390625: 716 pickelhaube",label_image.h,"@@ -26,7 +26,7 @@ struct Settings {
   bool verbose = false;
   bool accel = false;
   bool old_accel = false;
-  bool input_floating = false;
+  int input_type = kTfLiteFloat32;
   bool profiling = false;
   bool allow_fp16 = false;
   bool gl_backend = false;
@@ -37,7 +37,6 @@ struct Settings {
   tflite::FlatBufferModel* model;
   string input_bmp_name = ""./grace_hopper.bmp"";
   string labels_file_name = ""./labels.txt"";
-  string input_layer_type = ""uint8_t"";
   int number_of_threads = 4;
   int number_of_results = 5;
   int max_profiling_buffer_entries = 1024;
",0,train
cdb5cd1786f295e699789cc822bca2e52a4cb81c,tensorflow/tensorflow,"Do not use fused batch norm in the 5D case.

https://github.com/tensorflow/tensorflow/commit/27d26a8d86bceda282ad9ba3e3116a00759d4ebc added support for using fused batch norm for 5D tensors, but this caused a regression in UNet. It's unclear why, but perhaps it is due to the fact fused batch norm uses Bessel's correction and nonfused batch norm does not.

PiperOrigin-RevId: 342728653
Change-Id: I23c705c73ac4f55c1c799d1530d1e9c6a9928ea0",normalization.py,"@@ -248,6 +248,7 @@ class BatchNormalizationBase(Layer):
     axis = [self.axis] if isinstance(self.axis, int) else self.axis
     # Axis -3 is equivalent to 1, and axis -1 is equivalent to 3, because the
     # input rank is required to be 4 (which is checked later).
+    # TODO(b/173253101): Once the input rank can be 5, update this check.
     if len(axis) > 1 or axis[0] not in (-3, -1, 1, 3):
       raise ValueError('Passing `fused=True` is only supported when axis is 1 '
                        'or 3. Got axis %s' % (axis,))
@@ -331,16 +332,19 @@ class BatchNormalizationBase(Layer):
       # TODO(yaozhang): if input is not 4D, reshape it to 4D and reshape the
       # output back to its original shape accordingly.
       if self._USE_V2_BEHAVIOR:
+        # TODO(b/173253101): Using fused in the 5D case is currently disabled
+        # due to a regression on UNet, so it is only currently only supported in
+        # the 4D case.
         if self.fused is None:
-          self.fused = ndims in (4, 5)
-        elif self.fused and ndims not in (4, 5):
+          self.fused = ndims == 4
+        elif self.fused and ndims != 4:
           raise ValueError('Batch normalization layers with `fused=True` only '
                            'support 4D or 5D input tensors. '
                            'Received tensor with shape: %s' %
                            (tuple(input_shape),))
       else:
         assert self.fused is not None
-        self.fused = (ndims in (4, 5) and self._fused_can_be_used())
+        self.fused = (ndims == 4 and self._fused_can_be_used())
       # TODO(chrisying): fused batch norm is currently not supported for
       # multi-axis batch norm and by extension virtual batches. In some cases,
       # it might be possible to use fused batch norm but would require reshaping
",0,train
cdb5cd1786f295e699789cc822bca2e52a4cb81c,tensorflow/tensorflow,"Do not use fused batch norm in the 5D case.

https://github.com/tensorflow/tensorflow/commit/27d26a8d86bceda282ad9ba3e3116a00759d4ebc added support for using fused batch norm for 5D tensors, but this caused a regression in UNet. It's unclear why, but perhaps it is due to the fact fused batch norm uses Bessel's correction and nonfused batch norm does not.

PiperOrigin-RevId: 342728653
Change-Id: I23c705c73ac4f55c1c799d1530d1e9c6a9928ea0",normalization_test.py,"@@ -241,6 +241,31 @@ class BatchNormalizationTest(keras_parameterized.TestCase):
     self.assertAllClose(model.bn.moving_mean.numpy(), [0.047], atol=3e-3)
     self.assertAllClose(model.bn.moving_variance.numpy(), [0.9], atol=3e-2)
 
+  @combinations.generate(combinations.combine(mode=['eager']))
+  def test_bessels_correction(self):
+    # Bessel's correction is currently only used in the fused case. In the
+    # future, it may be used in the nonfused case as well.
+
+    x = constant_op.constant([0., 2.], shape=[2, 1, 1, 1])
+    layer = normalization_v2.BatchNormalization(
+        momentum=0.5, moving_variance_initializer='zeros')
+    layer(x, training=True)
+    self.assertTrue(layer.fused)
+    # Since fused is used, Bessel's correction is used. The variance of [0, 2]
+    # is 2 with Bessel's correction. Since the momentum is 0.5, the variance is
+    # 2 * 0.5 == 1.
+    self.assertAllEqual(self.evaluate(layer.moving_variance), [1.])
+
+    x = constant_op.constant([0., 2.], shape=[2, 1, 1, 1, 1])
+    layer = normalization_v2.BatchNormalization(
+        momentum=0.5, moving_variance_initializer='zeros')
+    layer(x, training=True)
+    self.assertFalse(layer.fused)
+    # Since fused is not used, Bessel's correction is not used. The variance of
+    # [0, 2] is 1 without Bessel's correction. Since the momentum is 0.5, the
+    # variance is 1 * 0.5 == 0.5.
+    self.assertAllEqual(self.evaluate(layer.moving_variance), [0.5])
+
 
 class BatchNormalizationV1Test(keras_parameterized.TestCase):
 
@@ -291,6 +316,12 @@ class BatchNormalizationV2Test(keras_parameterized.TestCase):
     norm(inp)
     self.assertEqual(norm.fused, False)
 
+    norm = normalization_v2.BatchNormalization()
+    self.assertIsNone(norm.fused)
+    inp = keras.layers.Input(shape=(4, 4, 4, 4))
+    norm(inp)
+    self.assertEqual(norm.fused, False)
+
     norm = normalization_v2.BatchNormalization(virtual_batch_size=2)
     self.assertEqual(norm.fused, False)
     inp = keras.layers.Input(shape=(4, 4, 4))
",0,train
193ff560137f885a18398e83d4d490b0f9ea610a,tensorflow/tensorflow,"[XLA] [NFC] Add more information to buffer assignment: print parameter and output shape

PiperOrigin-RevId: 285440385
Change-Id: Ia993b9fd0218820403e4d9edb2da44e72c8677fd",buffer_assignment.cc,"@@ -298,6 +298,38 @@ static bool CompareHloValuesById(const HloValue* a, const HloValue* b) {
   return a->id() < b->id();
 }
 
+// Returns parameter instruction corresponding to the allocation or nullptr.
+static const HloInstruction* GetEntryParameterInstruction(
+    const BufferAllocation& alloc) {
+  for (const auto& p : alloc.assigned_buffers()) {
+    const HloValue* value = p.first;
+    const HloInstruction* instr = value->instruction();
+    if (instr->opcode() == HloOpcode::kParameter &&
+        instr->parent() == instr->parent()->parent()->entry_computation()) {
+      return instr;
+    }
+  }
+  return nullptr;
+}
+
+// Returns root module output instruction corresponding to the allocation or
+// nullptr.
+static const HloInstruction* GetOutputInstruction(
+    const BufferAllocation& alloc) {
+  for (const auto& p : alloc.assigned_buffers()) {
+    const HloValue* value = p.first;
+    for (const HloPosition& position : value->positions()) {
+      const HloInstruction* instr = position.instruction;
+      if (position.index.empty() &&
+          instr->parent()->root_instruction() == instr &&
+          instr->parent()->IsEntryComputation()) {
+        return instr;
+      }
+    }
+  }
+  return nullptr;
+}
+
 string BufferAllocation::ToString() const {
   string output;
   StrAppendFormat(&output, ""allocation %d: %p, size %d"", index_, this, size());
@@ -305,8 +337,15 @@ string BufferAllocation::ToString() const {
     StrAppend(&output, "", color "", color().value());
   }
   if (is_entry_computation_parameter()) {
-    StrAppend(&output, "", parameter "", parameter_number(), "" at ShapeIndex "",
-              param_shape_index().ToString());
+    const HloInstruction* param = GetEntryParameterInstruction(*this);
+    CHECK(param);
+    StrAppend(&output, "", parameter "", parameter_number(), "", shape |"",
+              param->shape().ToString(/*print_layout=*/false),
+              ""| at ShapeIndex "", param_shape_index().ToString());
+  }
+  if (const HloInstruction* instr = GetOutputInstruction(*this)) {
+    StrAppend(&output, "", output shape is |"",
+              instr->shape().ToString(/*print_layout=*/false), ""|"");
   }
   if (is_constant()) {
     StrAppend(&output, "", constant"");
",0,train
452952e289084468d06431db433ce5fbd031dfac,tensorflow/tensorflow,"Automated rollback of commit 31df1ce7dee077a5acaba2ddd43959665a8ae323

PiperOrigin-RevId: 235552900",callbacks.py,"@@ -1222,8 +1222,6 @@ class TensorBoard(Callback):
           with self._train_writer.as_default():
             with summary_ops_v2.always_record_summaries():
               summary_ops_v2.graph(K.get_graph())
-              if self.model._is_graph_network:  # pylint: disable=protected-access
-                summary_ops_v2.keras_model('keras', self.model, step=0)
 
   def _close_writers(self):
     """"""Close all remaining open file writers owned by this callback.
",0,train
a5ac44a0da3fb5e325195577149f27a4dae9ae4a,tensorflow/tensorflow,Add GetNumberOfEngineInputs function,utils.cc,"@@ -165,5 +165,21 @@ string GetLoadedTensorRTVersion() {
   return absl::StrCat(major, ""."", minor, ""."", patch);
 }
 
+int GetNumberOfEngineInputs(
+  const nvinfer1::ICudaEngine *engine) {
+  int n_bindings = engine->getNbBindings();
+  int n_input = 0;
+  for (int i=0; i < n_bindings; i++) {
+     if (engine->bindingIsInput(i)) n_input++;
+  }
+  // According to TensorRT 7 doc: ""If the engine has been built for K profiles,
+  // the first getNbBindings() / K bindings are used by profile number 0, the
+  // following getNbBindings() / K bindings are used by profile number 1 etc.""
+  // Therefore, to get the number of input tensors, we need to divide by the
+  // the number of profiles.
+  int n_profiles = engine->getNbOptimizationProfiles();
+  return n_input / n_profiles;
+}
+
 }  // namespace tensorrt
 }  // namespace tensorflow
",0,train
a5ac44a0da3fb5e325195577149f27a4dae9ae4a,tensorflow/tensorflow,Add GetNumberOfEngineInputs function,utils.h,"@@ -106,6 +106,11 @@ string GetLinkedTensorRTVersion();
 // TensorRT library version information {Maj, Min, Patch}.
 string GetLoadedTensorRTVersion();
 
+// Returns the number of inputs for the engine, which also correspends to the
+// number of input tensors for the network. This can differ from the number of
+// input bindings, because each profile has a set of bindings.
+int GetNumberOfEngineInputs(const nvinfer1::ICudaEngine *engine);
+
 #endif  // GOOGLE_CUDA && GOOGLE_TENSORRT
 
 }  // namespace tensorrt
",0,train
63c1befb8930f8f5f34ae9f2b10e8ae870493d86,tensorflow/tensorflow,Improve docs for tf.nn.depthwise_conv2d_native,nn_ops.cc,"@@ -831,11 +831,13 @@ a different filter to each input channel (expanding from 1 channel to
 `channel_multiplier` channels for each), then concatenates the results
 together. Thus, the output has `in_channels * channel_multiplier` channels.
 
+```
 for k in 0..in_channels-1
   for q in 0..channel_multiplier-1
     output[b, i, j, k * channel_multiplier + q] =
       sum_{di, dj} input[b, strides[1] * i + di, strides[2] * j + dj, k] *
                         filter[di, dj, k, q]
+```
 
 Must have `strides[0] = strides[3] = 1`.  For the most common case of the same
 horizontal and vertices strides, `strides = [1, stride, stride, 1]`.
",0,train
1e1f1aa3c2a505c6d44051291b7bf1b05a0923f7,tensorflow/tensorflow,Update network_test.py,network_test.py,"@@ -131,8 +131,8 @@ class NetworkConstructionTest(keras_parameterized.TestCase):
   def test_get_layer(self):
     # create a simple network
     x = input_layer_lib.Input(shape=(32,))
-    dense_a = keras.layers.Dense(4, name='dense_a')
-    dense_b = keras.layers.Dense(2, name='dense_b')
+    dense_a = layers.Dense(4, name='dense_a')
+    dense_b = layers.Dense(2, name='dense_b')
     y = dense_b(dense_a(x))
     network = network_lib.Network(x, y, name='dense_network')
 
",0,test
b51bba348602ef9e9d4e6269d019a40ec3d74c30,tensorflow/tensorflow,"Qualify uses of std::string

PiperOrigin-RevId: 324233556
Change-Id: I5537b9bd231c0ea1f267406a9b19ec943928e8cd",device_name_utils.h,"@@ -46,8 +46,8 @@ namespace tensorflow {
 class DeviceNameUtils {
  public:
   // Returns a fully qualified device name given the parameters.
-  static string FullName(const string& job, int replica, int task,
-                         const string& type, int id);
+  static std::string FullName(const std::string& job, int replica, int task,
+                              const std::string& type, int id);
 
   struct ParsedName {
     void Clear() {
@@ -79,13 +79,13 @@ class DeviceNameUtils {
     }
 
     bool has_job = false;
-    string job;
+    std::string job;
     bool has_replica = false;
     int replica = 0;
     bool has_task = false;
     int task = 0;
     bool has_type = false;
-    string type;
+    std::string type;
     bool has_id = false;
     int id = 0;
   };
@@ -107,7 +107,7 @@ class DeviceNameUtils {
   // an error and *canonical_name is set to """".
   static Status CanonicalizeDeviceName(StringPiece fullname,
                                        StringPiece basename,
-                                       string* canonical_name);
+                                       std::string* canonical_name);
 
   // Returns true if ""name"" specifies any non-trivial constraint on the device.
   static bool HasSomeDetails(const ParsedName& name) {
@@ -163,11 +163,11 @@ class DeviceNameUtils {
   static const ParsedName AddressSpace(const ParsedName& name);
 
   // Returns the local device given its ""type"" and ""id"".
-  static string LocalName(StringPiece type, int id);
+  static std::string LocalName(StringPiece type, int id);
 
   // Returns a short local device name (cpu:0, gpu:1, etc) based on
   // the given fullname.
-  static string LocalName(StringPiece fullname);
+  static std::string LocalName(StringPiece fullname);
 
   // If ""name"" is a valid local device name (cpu:0, gpu:1, etc.),
   // fills in parsed.type and parsed.id accordingly. Returns true iff
@@ -181,13 +181,14 @@ class DeviceNameUtils {
   // component into *device.  This function will still return true if
   // the task component is empty, but it requires the relative device
   // component to be fully specified.
-  static bool SplitDeviceName(StringPiece name, string* task, string* device);
+  static bool SplitDeviceName(StringPiece name, std::string* task,
+                              std::string* device);
 
   // Get the task name from ParsedName. Return false if the task component is
   // not fully specified.
-  static bool GetTaskName(const ParsedName& pn, string* task);
+  static bool GetTaskName(const ParsedName& pn, std::string* task);
 
-  static string ParsedNameToString(const ParsedName& pn);
+  static std::string ParsedNameToString(const ParsedName& pn);
 
   // Returns canonical and legacy full names for the given parsed
   // device name 'pn'. The returned string names are often useful to
@@ -202,8 +203,8 @@ class DeviceNameUtils {
 
   // Returns name of the CPU:0 device on the same host as the device
   // `device_name`.
-  static Status DeviceNameToCpuDeviceName(const string& device_name,
-                                          string* host_device_name);
+  static Status DeviceNameToCpuDeviceName(const std::string& device_name,
+                                          std::string* host_device_name);
 };
 
 std::ostream& operator<<(std::ostream& os,
",0,train
b51bba348602ef9e9d4e6269d019a40ec3d74c30,tensorflow/tensorflow,"Qualify uses of std::string

PiperOrigin-RevId: 324233556
Change-Id: I5537b9bd231c0ea1f267406a9b19ec943928e8cd",padding.h,"@@ -53,12 +53,12 @@ Status CheckValidPadding(Padding padding_type,
 
 // Return the string containing the list of valid padding types, that can be
 // used as an Attr() in REGISTER_OP.
-string GetPaddingAttrString();
+std::string GetPaddingAttrString();
 
 // Like GetPaddingAttrString(), but also includes EXPLICIT.
-string GetPaddingAttrStringWithExplicit();
+std::string GetPaddingAttrStringWithExplicit();
 
-string GetExplicitPaddingsAttrString();
+std::string GetExplicitPaddingsAttrString();
 
 // Sets padding value based on the given string padding value.
 Status GetPaddingFromString(StringPiece str_value, Padding* value);
",0,train
b51bba348602ef9e9d4e6269d019a40ec3d74c30,tensorflow/tensorflow,"Qualify uses of std::string

PiperOrigin-RevId: 324233556
Change-Id: I5537b9bd231c0ea1f267406a9b19ec943928e8cd",tensor_format.h,"@@ -97,18 +97,18 @@ enum FilterTensorFormat {
 
 // Parse tensor format from the given string.
 // Return true if the parsing succeeds, and false if it fails.
-bool FormatFromString(const string& format_str, TensorFormat* format);
+bool FormatFromString(const std::string& format_str, TensorFormat* format);
 
 // Parse tensor format from the given string.
 // Return true if the parsing succeeds, and false if it fails.
-bool FilterFormatFromString(const string& format_str,
+bool FilterFormatFromString(const std::string& format_str,
                             FilterTensorFormat* format);
 
 // Convert a tensor format into string.
-string ToString(TensorFormat format);
+std::string ToString(TensorFormat format);
 
 // Convert a filter tensor format into string.
-string ToString(FilterTensorFormat format);
+std::string ToString(FilterTensorFormat format);
 
 // Returns the number of spatial dims of a tensor of rank 'num_dims' and tensor
 // format 'format'.
@@ -504,13 +504,13 @@ inline void GetExplicitPaddingForDim(
 }
 
 // Return the string that specifies the data format for convnet operations.
-string GetConvnetDataFormatAttrString();
-string GetConvnet3dDataFormatAttrString();
+std::string GetConvnetDataFormatAttrString();
+std::string GetConvnet3dDataFormatAttrString();
 
 // Return the string that specifies the filter format for convnet operations.
-string GetConvnetFilterFormatAttrString();
-string GetConvnet3dFilterFormatAttrString();
-string GetConvnetDataFormat2D3DAttrString();
+std::string GetConvnetFilterFormatAttrString();
+std::string GetConvnet3dFilterFormatAttrString();
+std::string GetConvnetDataFormat2D3DAttrString();
 
 // Returns a tensor shape for the specified format and dimension sizes.
 // Works for both 2D and 3D operations. The output shapes are as follows:
",0,train
b51bba348602ef9e9d4e6269d019a40ec3d74c30,tensorflow/tensorflow,"Qualify uses of std::string

PiperOrigin-RevId: 324233556
Change-Id: I5537b9bd231c0ea1f267406a9b19ec943928e8cd",util.h,"@@ -49,12 +49,12 @@ class MovingAverage {
 
 // Returns a string printing bytes in ptr[0..n).  The output looks
 // like ""00 01 ef cd cd ef"".
-string PrintMemory(const char* ptr, size_t n);
+std::string PrintMemory(const char* ptr, size_t n);
 
 // Given a flattened index into a tensor, computes a string s so that
 // StrAppend(""tensor"", s) is a Python indexing expression.  E.g.,
 // ""tensor"", ""tensor[i]"", ""tensor[i, j]"", etc.
-string SliceDebugString(const TensorShape& shape, const int64 flat);
+std::string SliceDebugString(const TensorShape& shape, const int64 flat);
 
 // disable MKL in runtime
 #ifdef INTEL_MKL
",0,train
eb2f6d0410c70a383b60505cea518758d910a006,tensorflow/tensorflow,"VLOG(2) instead of VLOG(1) for detailed op printouts.

PiperOrigin-RevId: 157291238",virtual_scheduler.cc,"@@ -475,9 +475,9 @@ Costs VirtualScheduler::Summary() const {
   }
 
   // Also log the op description and their corresponding counts.
-  VLOG(1) << ""Node description, counts, cost:"";
+  VLOG(2) << ""Node description, counts, cost:"";
   for (const auto& item : op_counts_) {
-    VLOG(1) << ""Node: "" << item.first << "", Count: "" << item.second
+    VLOG(2) << ""Node: "" << item.first << "", Count: "" << item.second
             << "", Individual Cost: "" << op_costs_.at(item.first);
   }
 
",0,train
d2ce989ee65ce40d8cba8e446eaf64f8a5105adf,tensorflow/tensorflow,"Update tensorflow/core/kernels/data/experimental/snapshot_util.cc

Co-authored-by: Mihai Maruseac <mihai.maruseac@gmail.com>",snapshot_util.cc,"@@ -514,8 +514,8 @@ class Reader::NestedDataset : public DatasetBase {
     Status GetNextInternal(IteratorContext* ctx,
                            std::vector<Tensor>* out_tensors,
                            bool* end_of_sequence) override {
-      const int64 dataset_datasets_size = dataset()->datasets_.size();
-      *end_of_sequence = dataset_datasets_size == index_;
+      const int64 num_datasets = dataset()->datasets_.size();
+      *end_of_sequence = num_datasets == index_;
       if (!*end_of_sequence) {
         Tensor tensor(DT_VARIANT, TensorShape({}));
 
",0,train
c3830c42bd7ac47eb130b09bfdb75e900f767474,tensorflow/tensorflow,"Renaming `consumers` to `_consumers` in the `CompositeTensor` interface to avoid leaking it into the public API of classes that implement the interface.

The consequence of this change is that subclasses of `CompositeTensors` do not necessarily provide the `consumers` method. The existing subclasses -- `SparseTensor`, `RaggedTensor`, and `IndexedSlices` -- do, for legacy reasons.

PiperOrigin-RevId: 246651256",composite_tensor.py,"@@ -102,7 +102,7 @@ class CompositeTensor(object):
     """"""Returns True if this tensor's components belong to a TF graph.""""""
     raise NotImplementedError(""CompositeTensor._is_symbolic_tensor"")
 
-  def consumers(self):
+  def _consumers(self):
     """"""Returns a list of `Operation`s that consume this `CompositeTensor`.
 
     Returns:
",0,train
c3830c42bd7ac47eb130b09bfdb75e900f767474,tensorflow/tensorflow,"Renaming `consumers` to `_consumers` in the `CompositeTensor` interface to avoid leaking it into the public API of classes that implement the interface.

The consequence of this change is that subclasses of `CompositeTensors` do not necessarily provide the `consumers` method. The existing subclasses -- `SparseTensor`, `RaggedTensor`, and `IndexedSlices` -- do, for legacy reasons.

PiperOrigin-RevId: 246651256",ops.py,"@@ -1771,6 +1771,9 @@ class IndexedSlices(_TensorLike, composite_tensor.CompositeTensor):
   def _is_graph_tensor(self):
     return hasattr(self._values, ""graph"")
 
+  def consumers(self):
+    return self._consumers()
+
 
 IndexedSlicesValue = collections.namedtuple(
     ""IndexedSlicesValue"", [""values"", ""indices"", ""dense_shape""])
",0,train
c3830c42bd7ac47eb130b09bfdb75e900f767474,tensorflow/tensorflow,"Renaming `consumers` to `_consumers` in the `CompositeTensor` interface to avoid leaking it into the public API of classes that implement the interface.

The consequence of this change is that subclasses of `CompositeTensors` do not necessarily provide the `consumers` method. The existing subclasses -- `SparseTensor`, `RaggedTensor`, and `IndexedSlices` -- do, for legacy reasons.

PiperOrigin-RevId: 246651256",sparse_tensor.py,"@@ -260,6 +260,9 @@ class SparseTensor(_TensorLike, composite_tensor.CompositeTensor):
   def _is_graph_tensor(self):
     return hasattr(self._values, ""graph"")
 
+  def consumers(self):
+    return self._consumers()
+
 
 SparseTensorValue = collections.namedtuple(""SparseTensorValue"",
                                            [""indices"", ""values"", ""dense_shape""])
",0,train
c3830c42bd7ac47eb130b09bfdb75e900f767474,tensorflow/tensorflow,"Renaming `consumers` to `_consumers` in the `CompositeTensor` interface to avoid leaking it into the public API of classes that implement the interface.

The consequence of this change is that subclasses of `CompositeTensors` do not necessarily provide the `consumers` method. The existing subclasses -- `SparseTensor`, `RaggedTensor`, and `IndexedSlices` -- do, for legacy reasons.

PiperOrigin-RevId: 246651256",ragged_tensor.py,"@@ -1863,6 +1863,9 @@ class RaggedTensor(composite_tensor.CompositeTensor):
   def _is_graph_tensor(self):
     return hasattr(self._values, ""graph"")
 
+  def consumers(self):
+    return self._consumers()
+
 
 def is_ragged(value):
   """"""Returns true if `value` is a ragged tensor or ragged tensor value.""""""
",0,train
1d3d92dfdecd38daf068583b39aef7811a604601,tensorflow/tensorflow,"No-op refactor and comment fix.

PiperOrigin-RevId: 221786311",train.py,"@@ -1071,8 +1071,19 @@ def get_sequential_train_hooks(train_steps=namedtuples.GANTrainSteps(1, 1)):
   return get_hooks
 
 
+def _num_joint_steps(train_steps):
+  g_steps = train_steps.generator_train_steps
+  d_steps = train_steps.discriminator_train_steps
+  # Get the number of each type of step that should be run.
+  num_d_and_g_steps = min(g_steps, d_steps)
+  num_g_steps = g_steps - num_d_and_g_steps
+  num_d_steps = d_steps - num_d_and_g_steps
+
+  return num_d_and_g_steps, num_g_steps, num_d_steps
+
+
 def get_joint_train_hooks(train_steps=namedtuples.GANTrainSteps(1, 1)):
-  """"""Returns a hooks function for sequential GAN training.
+  """"""Returns a hooks function for joint GAN training.
 
   When using these train hooks, IT IS RECOMMENDED TO USE `use_locking=True` ON
   ALL OPTIMIZERS TO AVOID RACE CONDITIONS.
@@ -1105,12 +1116,7 @@ def get_joint_train_hooks(train_steps=namedtuples.GANTrainSteps(1, 1)):
   Returns:
     A function that takes a GANTrainOps tuple and returns a list of hooks.
   """"""
-  g_steps = train_steps.generator_train_steps
-  d_steps = train_steps.discriminator_train_steps
-  # Get the number of each type of step that should be run.
-  num_d_and_g_steps = min(g_steps, d_steps)
-  num_g_steps = g_steps - num_d_and_g_steps
-  num_d_steps = d_steps - num_d_and_g_steps
+  num_d_and_g_steps, num_g_steps, num_d_steps = _num_joint_steps(train_steps)
 
   def get_hooks(train_ops):
     g_op = train_ops.generator_train_op
",0,test
0c46c7bcb05b1502bda869db371bee198d5be28a,tensorflow/tensorflow,"Switch to doxygen-friendly comments in generated C++ op code.
Change: 144859450",cc_op_gen.cc,"@@ -76,9 +76,9 @@ string ToGuard(const std::string& path) {
 }
 
 // Change:     Into:
-//   ABC         // ABC
-//               //
-//   DEF         // DEF
+//   ABC         /// ABC
+//               ///
+//   DEF         /// DEF
 string MakeComment(StringPiece text, StringPiece indent) {
   string ret;
   while (!text.empty()) {
@@ -89,9 +89,9 @@ string MakeComment(StringPiece text, StringPiece indent) {
       if (text[newline] != ' ') last_non_space = newline;
     }
     if (last_non_space == -1) {
-      strings::StrAppend(&ret, indent, ""//\n"");
+      strings::StrAppend(&ret, indent, ""///\n"");
     } else {
-      strings::StrAppend(&ret, indent, ""// "",
+      strings::StrAppend(&ret, indent, ""/// "",
                          text.substr(0, last_non_space + 1), ""\n"");
     }
     text.remove_prefix(newline + 1);
",0,train
4316054cc62b030832cd0fd3cdd175e92f232ebf,tensorflow/tensorflow,"[tf.data] Only enables optimization `map_parallelization` on the main dataset pipeline.

PiperOrigin-RevId: 349334274
Change-Id: I40dd2650a59dc4677de33d0a62aee4b104412450",map_parallelization.cc,"@@ -65,6 +65,12 @@ Status MapParallelization::OptimizeAndCollectStats(Cluster* cluster,
   }
   MutableGraphView graph(output);
 
+  // If the GrapplerItem is derived from a FunctionDef, we don't optimize it,
+  // because we only want to enable extra map parallelism on the main dataset
+  // pipeline.
+  if (graph_utils::IsItemDerivedFromFunctionDef(item, graph))
+    return Status::OK();
+
   absl::flat_hash_set<string> nodes_to_delete;
   FunctionLibraryDefinition function_library(OpRegistry::Global(),
                                              item.graph.library());
",0,train
4316054cc62b030832cd0fd3cdd175e92f232ebf,tensorflow/tensorflow,"[tf.data] Only enables optimization `map_parallelization` on the main dataset pipeline.

PiperOrigin-RevId: 349334274
Change-Id: I40dd2650a59dc4677de33d0a62aee4b104412450",map_parallelization_test.cc,"@@ -57,12 +57,15 @@ TEST_P(AutotuneSetting, MapParallelizationTest) {
        NDef(""stop"", ""Const"", {}, {{""value"", 10}, {""dtype"", DT_INT32}}),
        NDef(""step"", ""Const"", {}, {{""value"", 1}, {""dtype"", DT_INT32}}),
        NDef(""range"", ""RangeDataset"", {""start"", ""stop"", ""step""}, {}),
-       MakeMapNode(""map"", ""range"", stateless_fun_name)},
+       MakeMapNode(""map"", ""range"", stateless_fun_name),
+       NDef(""Sink"", ""Identity"", {""map""}, {})},
       // FunctionLib
       {
           test::function::XTimesTwo(),
       });
 
+  item.fetch.push_back(""Sink"");
+
   GraphDef output;
   TF_ASSERT_OK(OptimizeWithMapParallelization(item, &output, autotune));
   EXPECT_EQ(graph_utils::ContainsNodeWithOp(""ParallelMapDataset"", output),
@@ -72,6 +75,39 @@ TEST_P(AutotuneSetting, MapParallelizationTest) {
 
 INSTANTIATE_TEST_SUITE_P(Test, AutotuneSetting, ::testing::Values(false, true));
 
+class FromFunctionDef : public ::testing::TestWithParam<string> {};
+
+TEST_P(FromFunctionDef, MapParallelizationTest) {
+  const string op = GetParam();
+  bool from_function_def = (op == ""_Retval"");
+
+  using test::function::NDef;
+  GrapplerItem item;
+  item.graph = test::function::GDef(
+      {NDef(""start"", ""Const"", {}, {{""value"", 0}, {""dtype"", DT_INT32}}),
+       NDef(""stop"", ""Const"", {}, {{""value"", 10}, {""dtype"", DT_INT32}}),
+       NDef(""step"", ""Const"", {}, {{""value"", 1}, {""dtype"", DT_INT32}}),
+       NDef(""range"", ""RangeDataset"", {""start"", ""stop"", ""step""}, {}),
+       MakeMapNode(""map"", ""range"", stateless_fun_name),
+       NDef(""Sink"", op, {""map""}, {})},
+      // FunctionLib
+      {
+          test::function::XTimesTwo(),
+      });
+
+  item.fetch.push_back(""Sink"");
+
+  GraphDef output;
+  TF_ASSERT_OK(OptimizeWithMapParallelization(item, &output, true));
+  EXPECT_EQ(graph_utils::ContainsNodeWithOp(""ParallelMapDataset"", output),
+            !from_function_def);
+  EXPECT_EQ(graph_utils::ContainsGraphNodeWithName(""map"", output),
+            from_function_def);
+}
+
+INSTANTIATE_TEST_SUITE_P(Test, FromFunctionDef,
+                         ::testing::Values(""Identity"", ""_Retval""));
+
 TEST(ParallelizeAssert, MapParallelizationTest) {
   using test::function::NDef;
   GrapplerItem item;
@@ -83,13 +119,16 @@ TEST(ParallelizeAssert, MapParallelizationTest) {
        NDef(""range"", ""RangeDataset"", {""start"", ""stop"", ""step""}, {}),
        MakeMapNode(""map1"", ""range"", stateful_fun_name),
        MakeMapNode(""map2"", ""map1"", stateless_fun_name),
-       NDef(""cache"", ""CacheDataset"", {""map2"", ""filename""}, {})},
+       NDef(""cache"", ""CacheDataset"", {""map2"", ""filename""}, {}),
+       NDef(""Sink"", ""Identity"", {""cache""}, {})},
       // FunctionLib
       {
           test::function::XTimesTwo(),
           test::function::RandomUniform(),
       });
 
+  item.fetch.push_back(""Sink"");
+
   GraphDef output;
   TF_ASSERT_OK(OptimizeWithMapParallelization(item, &output, true));
   EXPECT_TRUE(graph_utils::ContainsNodeWithOp(""ParallelMapDataset"", output));
",0,train
4316054cc62b030832cd0fd3cdd175e92f232ebf,tensorflow/tensorflow,"[tf.data] Only enables optimization `map_parallelization` on the main dataset pipeline.

PiperOrigin-RevId: 349334274
Change-Id: I40dd2650a59dc4677de33d0a62aee4b104412450",map_parallelization_test.py,"@@ -108,24 +108,49 @@ class MapParallelizationTest(test_base.DatasetTestBase, parameterized.TestCase):
           combinations.combine(apply_autotune=[None, True, False])))
   def testAutotuneOption(self, apply_autotune):
     next_nodes = [""ParallelMap""] if (apply_autotune is not False) else [""Map""]  # pylint: disable=g-bool-id-comparison
+    dataset = dataset_ops.Dataset.range(4).apply(
+        testing.assert_next(next_nodes)).map(lambda x: x + 2)
+
+    options = dataset_ops.Options()
+    options.experimental_optimization.apply_default_optimizations = False
+    options.experimental_optimization.map_parallelization = True
+    if apply_autotune is not None:
+      options.experimental_optimization.autotune = apply_autotune
+    dataset = dataset.with_options(options)
+    self.assertDatasetProduces(dataset, expected_output=[2, 3, 4, 5])
+
+  @combinations.generate(test_base.default_test_combinations())
+  def testNoParallelizationInsideInterleave(self):
 
     def func(i):
-      ds = dataset_ops.Dataset.range(i).apply(
-          testing.assert_next(next_nodes)).map(lambda x: x + 1)
+      ds = dataset_ops.Dataset.range(i).apply(testing.assert_next(
+          [""Map""])).map(lambda x: x + 1)
       return ds
 
     dataset = dataset_ops.Dataset.range(1, 4).interleave(
-        map_func=func, cycle_length=4, block_length=5)
-    dataset = dataset.apply(
-        testing.assert_next(next_nodes)).map(lambda x: x * 2)
+        map_func=func, cycle_length=2, block_length=2)
+    options = dataset_ops.Options()
+    options.experimental_optimization.apply_default_optimizations = False
+    options.experimental_optimization.map_parallelization = True
+    dataset = dataset.with_options(options)
+
+    self.assertDatasetProduces(dataset, expected_output=[1, 1, 2, 1, 2, 3])
 
+  @combinations.generate(test_base.default_test_combinations())
+  def testNoParallelizationInsideFlatMap(self):
+
+    def func(i):
+      ds = dataset_ops.Dataset.range(i).apply(testing.assert_next(
+          [""Map""])).map(lambda x: x + 1)
+      return ds
+
+    dataset = dataset_ops.Dataset.range(1, 4).flat_map(map_func=func)
     options = dataset_ops.Options()
     options.experimental_optimization.apply_default_optimizations = False
     options.experimental_optimization.map_parallelization = True
-    if apply_autotune is not None:
-      options.experimental_optimization.autotune = apply_autotune
     dataset = dataset.with_options(options)
-    self.assertDatasetProduces(dataset, expected_output=[2, 2, 4, 2, 4, 6])
+
+    self.assertDatasetProduces(dataset, expected_output=[1, 1, 2, 1, 2, 3])
 
 
 if __name__ == ""__main__"":
",0,train
21f5c12e3d9c5b0c2f4c45c70a3da08b4edf212d,tensorflow/tensorflow,"Fix PrepareForStrCat() for types that are AlphaNum constructible but not implicitly convertible.

In the latest dev version of Eigen, `Eigen::half` is implicitly
convertible to float.  This makes `std::is_constructible<AlphaNum,Eigen::half>` true (using the
float constructor), but since `Eigen::half` is not implicitly convertible to `AlphaNum` directly,
this leads to the compile error:
```
./third_party/tensorflow/core/platform/errors.h:107:1: error: no matching function for call to 'PrepareForStrCat'
...

./third_party/tensorflow/core/platform/errors.h:54:33: note: candidate function not viable: no known conversion from 'Eigen::half' to 'const strings::AlphaNum' for 1st argument
inline const strings::AlphaNum& PrepareForStrCat(const strings::AlphaNum& a) {
                                ^
./third_party/tensorflow/core/platform/errors.h:49:1: note: candidate template ignored: requirement '!std::is_constructible_v<tensorflow::strings::AlphaNum, Eigen::half>' was not satisfied [with T = Eigen::half]
PrepareForStrCat(const T& t) {
^
```
The same error occurs for any type implicitly convertible to int/float/double/string/etc...

To fix this, we need to change the condition to `!is_convertible<T, AlphaNum>`, so that
if `T` *is* implicitly convertible to `AlphaNum`, it will use the `AlphaNum` version,
otherwise it will use the stream operator version.

See [MR !278](https://gitlab.com/libeigen/eigen/-/merge_requests/278).

PiperOrigin-RevId: 343357980
Change-Id: Ibad29a54105a70c473ca4c9b205fce8356d155ab",errors.h,"@@ -44,7 +44,7 @@ namespace internal {
 // Eventually absl::strings will have native support for this and we will be
 // able to completely remove PrepareForStrCat().
 template <typename T>
-typename std::enable_if<!std::is_constructible<strings::AlphaNum, T>::value,
+typename std::enable_if<!std::is_convertible<T, strings::AlphaNum>::value,
                         std::string>::type
 PrepareForStrCat(const T& t) {
   std::stringstream ss;
",0,train
bee0d8ead2c4445546c089fd59c5c9ff98bbae0a,tensorflow/tensorflow,"Add support for legalizing mhlo.slice to lmhlo.slice

PiperOrigin-RevId: 330153599
Change-Id: I8b62f003b20742ab11fce19f50e38039be898606",map_hlo_to_lhlo_op.h,"@@ -69,6 +69,7 @@ MAP_HLO_TO_LHLO(RsqrtOp);
 MAP_HLO_TO_LHLO(SelectOp);
 MAP_HLO_TO_LHLO(SignOp);
 MAP_HLO_TO_LHLO(SinOp);
+MAP_HLO_TO_LHLO(SliceOp);
 MAP_HLO_TO_LHLO(SqrtOp);
 MAP_HLO_TO_LHLO(SubOp);
 MAP_HLO_TO_LHLO(TanhOp);
",0,train
bee0d8ead2c4445546c089fd59c5c9ff98bbae0a,tensorflow/tensorflow,"Add support for legalizing mhlo.slice to lmhlo.slice

PiperOrigin-RevId: 330153599
Change-Id: I8b62f003b20742ab11fce19f50e38039be898606",hlo_legalize_to_lhlo.cc,"@@ -497,6 +497,7 @@ void populateHLOToLHLOConversionPattern(
       HloToLhloOpConverter<mhlo::ReshapeOp>,
       HloToLhloOpConverter<mhlo::SelectOp>,
       HloToLhloOpConverter<mhlo::SignOp>,
+      HloToLhloOpConverter<mhlo::SliceOp>,
       HloToLhloOpConverter<mhlo::SqrtOp>,
       HloToLhloOpConverter<mhlo::SubOp>,
       HloToLhloOpConverter<mhlo::TanhOp>,
",0,train
d0c647ff2f6f3398252c9831c8b49e8a2c3c8db5,tensorflow/tensorflow,"Fix misleading comment.

PiperOrigin-RevId: 188450336",ir_array.h,"@@ -76,8 +76,7 @@ class IrArray {
           llvm::IRBuilder<>* ir_builder);
 
     // Constructs an index from the given multi-dimensional index and the shape
-    // that it indexes into. Also, computes the linear index according to
-    // ""shape"".
+    // that it indexes into.
     //
     // Precondition: ""shape"" has a layout.
     Index(tensorflow::gtl::ArraySlice<llvm::Value*> multidim,
",0,train
23ddb5c69a6c98e8654b6114b1aa33606460638a,tensorflow/tensorflow,"Move to do into comment.

PiperOrigin-RevId: 223398784",op_hint.py,"@@ -104,9 +104,9 @@ class OpHint(object):
   that make up the pseudo op. A similar process is done to any output that
   is to be exported from the current op.
 
-  TODO(aselle): When TensorFlow functions functionality works for arbitrary
-  constructs, this mechanism can be retired and changed to use python defun's.
   """"""
+  # TODO(aselle): When TensorFlow functions functionality works for arbitrary
+  # constructs, this mechanism can be retired and changed to use python defun's.
 
   # Attr constants that are used for representation in the GraphDef. These
   # will be used on every Identity op that is involved in a total OpHint.
",0,train
010506f4feb93ff210fe92d5b48b8b6da56fea9b,tensorflow/tensorflow,"Fix docstring typos in tf.distributions.bijectors.Bijector.

PiperOrigin-RevId: 171756150",bijector_impl.py,"@@ -158,7 +158,7 @@ class Bijector(object):
   # Evaluate forward transformation.
   fwd_x = my_bijector.forward(x)
   x == my_bijector.inverse(fwd_x)
-  x != my_bijector.forward(fwd_x)  # Not equal because g(x) != g(g(x)).
+  x != my_bijector.forward(fwd_x)  # Not equal because x != g(g(x)).
   ```
 
   - Computing a log-likelihood:
@@ -275,7 +275,7 @@ class Bijector(object):
       implies `g^{-1}` is differentiable in the image of `g`.
       Applying the chain rule to `y = g(x) = g(g^{-1}(y))` yields
       `I = g'(g^{-1}(y))*g^{-1}'(y)`.
-      The same theorem also implies `g{-1}'` is non-singular therefore:
+      The same theorem also implies `g^{-1}'` is non-singular therefore:
       `inv[ g'(g^{-1}(y)) ] = g^{-1}'(y)`.
       The claim follows from [properties of determinant](
   https://en.wikipedia.org/wiki/Determinant#Multiplicativity_and_matrix_groups).
",0,train
5ef5f683cafbf3d961cf900ea848621dee393625,tensorflow/tensorflow,Deprecated tf.Session removed in input_data.py,input_data.py,"@@ -122,7 +122,7 @@ def load_wav_file(filename):
   Returns:
     Numpy array holding the sample data as floats between -1.0 and 1.0.
   """"""
-  with tf.Session(graph=tf.Graph()) as sess:
+  with tf.compat.v1.Session(graph=tf.Graph()) as sess:
     wav_filename_placeholder = tf.placeholder(tf.string, [])
     wav_loader = io_ops.read_file(wav_filename_placeholder)
     wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1)
@@ -139,7 +139,7 @@ def save_wav_file(filename, wav_data, sample_rate):
     wav_data: 2D array of float PCM-encoded audio data.
     sample_rate: Samples per second to encode in the file.
   """"""
-  with tf.Session(graph=tf.Graph()) as sess:
+  with tf.compat.v1.Session(graph=tf.Graph()) as sess:
     wav_filename_placeholder = tf.placeholder(tf.string, [])
     sample_rate_placeholder = tf.placeholder(tf.int32, [])
     wav_data_placeholder = tf.placeholder(tf.float32, [None, 1])
@@ -349,7 +349,7 @@ class AudioProcessor(object):
     background_dir = os.path.join(self.data_dir, BACKGROUND_NOISE_DIR_NAME)
     if not os.path.exists(background_dir):
       return self.background_data
-    with tf.Session(graph=tf.Graph()) as sess:
+    with tf.compat.v1.Session(graph=tf.Graph()) as sess:
       wav_filename_placeholder = tf.placeholder(tf.string, [])
       wav_loader = io_ops.read_file(wav_filename_placeholder)
       wav_decoder = contrib_audio.decode_wav(wav_loader, desired_channels=1)
@@ -654,7 +654,7 @@ class AudioProcessor(object):
     words_list = self.words_list
     data = np.zeros((sample_count, desired_samples))
     labels = []
-    with tf.Session(graph=tf.Graph()) as sess:
+    with tf.compat.v1.Session(graph=tf.Graph()) as sess:
       wav_filename_placeholder = tf.placeholder(tf.string, [])
       wav_loader = io_ops.read_file(wav_filename_placeholder)
       wav_decoder = contrib_audio.decode_wav(
",0,train
795652118936b28b2ae3c1da63fd2efb5f445adc,tensorflow/tensorflow,"Reduce conservatism for Send/Recv ops

Split SendRecv effect into Send effect and Recv effect since we don't need any
dependencies between Send and Recv ops. Note that we do need dependencies
between different Send ops and different Recv ops unless we know that they
generate different rendezvous keys.

PiperOrigin-RevId: 425389580
Change-Id: Iccaca22d3abce26f9a9dfe1b10e9089211833823",tf_side_effects.h,"@@ -68,8 +68,12 @@ struct GeneratorOp : public ::mlir::SideEffects::Resource::Base<GeneratorOp> {
   StringRef getName() final { return ""<Default Generator>""; }
 };
 
-struct SendRecv : public ::mlir::SideEffects::Resource::Base<SendRecv> {
-  StringRef getName() final { return ""<SendRecv>""; }
+struct Send : public ::mlir::SideEffects::Resource::Base<Send> {
+  StringRef getName() final { return ""<Send>""; }
+};
+
+struct Recv : public ::mlir::SideEffects::Resource::Base<Recv> {
+  StringRef getName() final { return ""<Recv>""; }
 };
 
 struct RandomGenerator
",0,train
800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration

    This addresses compiler warnings.

--

PiperOrigin-RevId: 246386939",NestedMatcher.h,"@@ -63,8 +63,8 @@ struct NestedMatch {
   ArrayRef<NestedMatch> getMatchedChildren() { return matchedChildren; }
 
 private:
-  friend class NestedPattern;
-  friend class NestedPatternContext;
+  friend struct NestedPattern;
+  friend struct NestedPatternContext;
 
   /// Underlying global bump allocator managed by a NestedPatternContext.
   static llvm::BumpPtrAllocator *&allocator();
@@ -116,8 +116,8 @@ struct NestedPattern {
   unsigned getDepth() const;
 
 private:
-  friend class NestedPatternContext;
-  friend class NestedMatch;
+  friend struct NestedPatternContext;
+  friend struct NestedMatch;
   friend struct State;
 
   /// Underlying global bump allocator managed by a NestedPatternContext.
",0,train
800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration

    This addresses compiler warnings.

--

PiperOrigin-RevId: 246386939",Utils.h,"@@ -39,7 +39,7 @@ class AffineForOp;
 class Block;
 class FlatAffineConstraints;
 class Location;
-class MemRefAccess;
+struct MemRefAccess;
 class Operation;
 class Value;
 
",0,train
800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration

    This addresses compiler warnings.

--

PiperOrigin-RevId: 246386939",Builders.h,"@@ -239,7 +239,7 @@ private:
 
 /// Base class for ValueHandle, OperationHandle and BlockHandle.
 /// Not meant to be used outside of these classes.
-struct CapturableHandle {
+class CapturableHandle {
 protected:
   CapturableHandle() = default;
 };
",0,train
800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration

    This addresses compiler warnings.

--

PiperOrigin-RevId: 246386939",Helpers.h,"@@ -113,7 +113,8 @@ private:
 /// Assigning to an IndexedValue emits an actual `Store` operation, while
 /// converting an IndexedValue to a ValueHandle emits an actual `Load`
 /// operation.
-template <typename Load, typename Store> struct TemplatedIndexedValue {
+template <typename Load, typename Store> class TemplatedIndexedValue {
+public:
   explicit TemplatedIndexedValue(Type t) : base(t) {}
   explicit TemplatedIndexedValue(Value *v)
       : TemplatedIndexedValue(ValueHandle(v)) {}
",0,train
800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration

    This addresses compiler warnings.

--

PiperOrigin-RevId: 246386939",AffineExpr.h,"@@ -36,11 +36,11 @@ class IntegerSet;
 
 namespace detail {
 
-class AffineExprStorage;
-class AffineBinaryOpExprStorage;
-class AffineDimExprStorage;
-class AffineSymbolExprStorage;
-class AffineConstantExprStorage;
+struct AffineExprStorage;
+struct AffineBinaryOpExprStorage;
+struct AffineDimExprStorage;
+struct AffineSymbolExprStorage;
+struct AffineConstantExprStorage;
 
 } // namespace detail
 
",0,train
800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration

    This addresses compiler warnings.

--

PiperOrigin-RevId: 246386939",Location.h,"@@ -34,12 +34,12 @@ class Identifier;
 
 namespace detail {
 
-class LocationStorage;
-class UnknownLocationStorage;
-class FileLineColLocationStorage;
-class NameLocationStorage;
-class CallSiteLocationStorage;
-class FusedLocationStorage;
+struct LocationStorage;
+struct UnknownLocationStorage;
+struct FileLineColLocationStorage;
+struct NameLocationStorage;
+struct CallSiteLocationStorage;
+struct FusedLocationStorage;
 
 } // namespace detail
 
",0,train
800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration

    This addresses compiler warnings.

--

PiperOrigin-RevId: 246386939",Operation.h,"@@ -34,7 +34,7 @@ class BlockAndValueMapping;
 class Location;
 class MLIRContext;
 class OperandIterator;
-class OperationState;
+struct OperationState;
 class ResultIterator;
 class ResultTypeIterator;
 
",0,train
800706fbdb725c3db0f7fc41e1f348bba0c6e5c3,tensorflow/tensorflow,"Be consistent w.r.t. struct/class in forward declaration

    This addresses compiler warnings.

--

PiperOrigin-RevId: 246386939",StandardTypes.h,"@@ -22,7 +22,7 @@
 #include ""mlir/Support/LLVM.h""
 
 namespace llvm {
-class fltSemantics;
+struct fltSemantics;
 } // namespace llvm
 
 namespace mlir {
",0,train
a0fc1302d25c7f14c7893374a97751ab97373e9a,tensorflow/tensorflow,"Change zip(...)[1] to list(zip(...))[1], for python 3 compatibility.

PiperOrigin-RevId: 167654035",backprop.py,"@@ -186,7 +186,7 @@ def _aggregate_grads(gradients):
       ret.append(g_list[0])
     else:
       # TODO(xpan): Aggregate IndexedSlices.
-      ret.append((g_list[0][0], math_ops.add_n(zip(*g_list)[1])))
+      ret.append((g_list[0][0], math_ops.add_n(list(zip(*g_list))[1])))
   return ret
 
 
",0,test
b8b7f916f79dce0f6d7c65aabf7f6b0c8092574a,tensorflow/tensorflow,"Fix fp16 tf.linalg.band_part bug.

The issue were some output values were uninitialized when they should have been 0.

Unfortunately, I could not get this to reproduce in a unit test, but it was occurring in the Transformer Keras model.

PiperOrigin-RevId: 252733927",matrix_band_part_op.cc,"@@ -148,7 +148,8 @@ struct MatrixBandPartFunctor<CPUDevice, Scalar> {
     const bool in_place = input.data() == output.data();
     auto compute_shard = [=, &input, &output](int64 begin, int64 end) {
       if (!in_place) {
-        std::fill(output.data() + begin * n, output.data() + end * n, Scalar());
+        std::fill(output.data() + begin * n, output.data() + end * n,
+                  Scalar(0));
       }
       const int64 batch_begin = begin / m;
       const int64 batch_end = (end + m - 1) / m;
@@ -167,11 +168,11 @@ struct MatrixBandPartFunctor<CPUDevice, Scalar> {
           if (in_place) {
             if (band_start > 0) {
               std::fill(&output(batch, row, 0), &output(batch, row, band_start),
-                        Scalar());
+                        Scalar(0));
             }
             if (band_end < n) {
               std::fill(&output(batch, row, band_end), &output(batch, row, n),
-                        Scalar());
+                        Scalar(0));
             }
           } else {
             if (band_start < band_end) {
",0,train
b8b7f916f79dce0f6d7c65aabf7f6b0c8092574a,tensorflow/tensorflow,"Fix fp16 tf.linalg.band_part bug.

The issue were some output values were uninitialized when they should have been 0.

Unfortunately, I could not get this to reproduce in a unit test, but it was occurring in the Transformer Keras model.

PiperOrigin-RevId: 252733927",matrix_band_part_op_gpu.cu.cc,"@@ -42,7 +42,7 @@ __global__ void MatrixBandPartKernel(const int num_threads,
     const int band_start = (num_lower_diags < 0 ? 0 : row - num_lower_diags);
     const int band_end = (num_upper_diags < 0 ? n : row + num_upper_diags + 1);
     if (col < band_start || col >= band_end) {
-      output_ptr[index] = Scalar();
+      output_ptr[index] = Scalar(0);
     } else {
       output_ptr[index] = input_ptr[index];
     }
",0,train
9664ba19296e58f4437feab4d4b2789cc1e38fd4,tensorflow/tensorflow,"[XLA/GPU] Fix row reduction codegen: we only need 32 bytes of shared memory

PiperOrigin-RevId: 297881063
Change-Id: I40a924779b56c0b50ebf4b66fa7bf9202a833b19",ir_emitter_unnested.cc,"@@ -2156,9 +2156,9 @@ void IrEmitterUnnested::EmitPrologueForReduction(
         reduce_inst->shape().element_type(), module_);
     llvm::Type* buffer_type = [&] {
       if (reduction_info->IsRowReduction()) {
-        // Allocate __shared__ cache[num_partial_results][num_threads].
+        // Allocate __shared__ cache[num_partial_results][kWarpSize].
         return llvm::ArrayType::get(
-            llvm::ArrayType::get(primitive_type, num_threads_x),
+            llvm::ArrayType::get(primitive_type, kWarpSize),
             num_partial_results);
       } else {
         // Allocate __shared__
",0,train
9664ba19296e58f4437feab4d4b2789cc1e38fd4,tensorflow/tensorflow,"[XLA/GPU] Fix row reduction codegen: we only need 32 bytes of shared memory

PiperOrigin-RevId: 297881063
Change-Id: I40a924779b56c0b50ebf4b66fa7bf9202a833b19",gpu_kernel_tiling_test.cc,"@@ -815,6 +815,30 @@ ENTRY %primitive_computation_svd.38 (constant_5: f32[3,29,29], fusion.3: pred[3]
   EXPECT_TRUE(RunAndCompareNoHloPasses(kHloString, ErrorSpec{0.001}));
 }
 
+TEST_F(GpuKernelTilingTest, RowReductionCorrectShmemUsage) {
+  const char *const kHloString = R""(
+  HloModule RowReduce
+
+  Sum {
+    x.1 = f32[] parameter(0)
+    y.1 = f32[] parameter(1)
+    ROOT add.1 = f32[] add(x.1, y.1)
+  }
+
+  ENTRY reduce.1 {
+    parameter = f32[1048576] parameter(0)
+    init_value = f32[] constant(0)
+    ROOT reduce = f32[] reduce(parameter, init_value), dimensions={0}, to_apply=Sum
+  }
+  )"";
+  auto hlo_module = ParseAndReturnVerifiedModule(kHloString).ValueOrDie();
+  auto expected_ir = R""(
+; CHECK: shared_cache_{{[0-9]*}} = private addrspace({{[0-9]*}}) global [1 x [32 x float]]
+  )"";
+  CompileAndVerifyIr(std::move(hlo_module), expected_ir,
+                     /*match_optimized_ir=*/true);
+}
+
 }  // namespace
 }  // namespace gpu
 }  // namespace xla
",0,train
866bade85dec9a231350201fb147f9381eee55c7,tensorflow/tensorflow,"Remove deprecation on int64 type.

I have half a million lines of log-spew in my CI from this and clearly there has been ~no effort in the codebase to actually stop using it. Please do not deprecate such a critical type until at least its own codebase has substantially migrated off of it.

PiperOrigin-RevId: 399093429
Change-Id: I807d9e6c79c4c33f4a50f88ba28fe5fda194ddd4",integral_types.h,"@@ -26,7 +26,7 @@ namespace tensorflow {
 typedef signed char int8;
 typedef short int16;
 typedef int int32;
-[[deprecated(""Use int64_t instead."")]] typedef ::std::int64_t int64;
+typedef ::std::int64_t int64;
 
 typedef unsigned char uint8;
 typedef unsigned short uint16;
",0,train
dce7bc4c68929bf890912471a8c1ebb0d86ce044,tensorflow/tensorflow,"TFLite GPU Delegate: Implement unit tests for slice operation.

PiperOrigin-RevId: 252670406",slice_test.cc,"@@ -0,0 +1,174 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/lite/delegates/gpu/gl/kernels/slice.h""
+
+#include <vector>
+
+#include <gmock/gmock.h>
+#include <gtest/gtest.h>
+#include ""tensorflow/lite/delegates/gpu/common/operations.h""
+#include ""tensorflow/lite/delegates/gpu/gl/kernels/test_util.h""
+
+using ::testing::FloatNear;
+using ::testing::Pointwise;
+
+namespace tflite {
+namespace gpu {
+namespace gl {
+namespace {
+
+TEST(SliceTest, Identity) {
+  TensorRefFloat32 input;
+  input.type = DataType::FLOAT32;
+  input.ref = 0;
+  input.shape = BHWC(1, 1, 2, 2);
+
+  TensorRefFloat32 output;
+  output.type = DataType::FLOAT32;
+  output.ref = 1;
+  output.shape = BHWC(1, 1, 2, 2);
+
+  SliceAttributes attr;
+  attr.starts = HWC(0, 0, 0);
+  attr.ends = HWC(1, 2, 2);
+  attr.strides = HWC(1, 1, 1);
+
+  SingleOpModel model({ToString(OperationType::SLICE), attr}, {input},
+                      {output});
+  ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4}));
+  ASSERT_TRUE(model.Invoke(*NewSliceNodeShader()));
+  EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {1, 2, 3, 4}));
+}
+
+TEST(SliceTest, NegativeEnds) {
+  TensorRefFloat32 input;
+  input.type = DataType::FLOAT32;
+  input.ref = 0;
+  input.shape = BHWC(1, 1, 2, 2);
+
+  TensorRefFloat32 output;
+  output.type = DataType::FLOAT32;
+  output.ref = 1;
+  output.shape = BHWC(1, 1, 2, 2);
+
+  SliceAttributes attr;
+  attr.starts = HWC(0, 0, 0);
+  attr.ends = HWC(1, -1, -1);
+  attr.strides = HWC(1, 1, 1);
+
+  SingleOpModel model({ToString(OperationType::SLICE), attr}, {input},
+                      {output});
+  ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4}));
+  ASSERT_TRUE(model.Invoke(*NewSliceNodeShader()));
+  EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {1, 2, 3, 4}));
+}
+
+TEST(SliceTest, NegativeEndsNonZeroStarts) {
+  TensorRefFloat32 input;
+  input.type = DataType::FLOAT32;
+  input.ref = 0;
+  input.shape = BHWC(1, 1, 2, 2);
+
+  TensorRefFloat32 output;
+  output.type = DataType::FLOAT32;
+  output.ref = 1;
+  output.shape = BHWC(1, 1, 1, 1);
+
+  SliceAttributes attr;
+  attr.starts = HWC(0, 1, 0);
+  attr.ends = HWC(0, 1, 1);
+  attr.strides = HWC(1, 1, 1);
+
+  SingleOpModel model({ToString(OperationType::SLICE), attr}, {input},
+                      {output});
+  ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4}));
+  ASSERT_TRUE(model.Invoke(*NewSliceNodeShader()));
+  EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {3}));
+}
+
+TEST(SliceTest, StridesByHeight) {
+  TensorRefFloat32 input;
+  input.type = DataType::FLOAT32;
+  input.ref = 0;
+  input.shape = BHWC(1, 4, 1, 1);
+
+  TensorRefFloat32 output;
+  output.type = DataType::FLOAT32;
+  output.ref = 1;
+  output.shape = BHWC(1, 2, 1, 1);
+
+  SliceAttributes attr;
+  attr.starts = HWC(0, 0, 0);
+  attr.ends = HWC(-1, -1, -1);
+  attr.strides = HWC(2, 1, 1);
+
+  SingleOpModel model({ToString(OperationType::SLICE), attr}, {input},
+                      {output});
+  ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4}));
+  ASSERT_TRUE(model.Invoke(*NewSliceNodeShader()));
+  EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {1, 3}));
+}
+
+TEST(SliceTest, StridesByWidth) {
+  TensorRefFloat32 input;
+  input.type = DataType::FLOAT32;
+  input.ref = 0;
+  input.shape = BHWC(1, 1, 4, 1);
+
+  TensorRefFloat32 output;
+  output.type = DataType::FLOAT32;
+  output.ref = 1;
+  output.shape = BHWC(1, 1, 2, 1);
+
+  SliceAttributes attr;
+  attr.starts = HWC(0, 1, 0);
+  attr.ends = HWC(-1, -1, -1);
+  attr.strides = HWC(1, 2, 1);
+
+  SingleOpModel model({ToString(OperationType::SLICE), attr}, {input},
+                      {output});
+  ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4}));
+  ASSERT_TRUE(model.Invoke(*NewSliceNodeShader()));
+  EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {2, 4}));
+}
+
+TEST(SliceTest, StridesByChannels) {
+  TensorRefFloat32 input;
+  input.type = DataType::FLOAT32;
+  input.ref = 0;
+  input.shape = BHWC(1, 1, 1, 4);
+
+  TensorRefFloat32 output;
+  output.type = DataType::FLOAT32;
+  output.ref = 1;
+  output.shape = BHWC(1, 1, 1, 1);
+
+  SliceAttributes attr;
+  attr.starts = HWC(0, 0, 2);
+  attr.ends = HWC(-1, -1, -1);
+  attr.strides = HWC(1, 1, 3);
+
+  SingleOpModel model({ToString(OperationType::SLICE), attr}, {input},
+                      {output});
+  ASSERT_TRUE(model.PopulateTensor(0, {1, 2, 3, 4}));
+  ASSERT_TRUE(model.Invoke(*NewSliceNodeShader()));
+  EXPECT_THAT(model.GetOutput(0), Pointwise(FloatNear(1e-6), {3}));
+}
+
+}  // namespace
+}  // namespace gl
+}  // namespace gpu
+}  // namespace tflite
",0,train
1dbe0671a05257244fd9eae5701092c24540d872,tensorflow/tensorflow,"[XLA] fix bug in conditional_code_motion.cc by checking whether an instruction is dead before removing it.
The bug has to do with instructions in alternative branches (other than branch(0)) of a conditional may be placed into boundaries to move out multiple times, if they happen to be identical to those in branch(0) and are shared multiple times (while those in branch(0) are not shared). The fix tries to avoid deleting them if they are already deleted, or if they still have uses inside their conditional branch.

PiperOrigin-RevId: 330045927
Change-Id: I7a786eaa77085dd65609cc8639019874140474c0",conditional_code_motion.cc,"@@ -97,6 +97,17 @@ class BoundaryVisitor {
   absl::flat_hash_set<HloInstruction*> visited_;
 };
 
+template <class OpCollection>
+int64 CountNonLeafOps(const OpCollection& ops) {
+  absl::flat_hash_set<HloInstruction*> op_set;
+  for (auto op : ops) {
+    if (!op_set.contains(op) && op->opcode() != HloOpcode::kConstant) {
+      op_set.insert(op);
+    }
+  }
+  return op_set.size();
+}
+
 // Returns estimation of potential reuses carried by a given pair of
 // instructions.  Use different integers to classify different levels
 // of reuses This is used as a placeholder only, assuming all
@@ -120,7 +131,7 @@ int64 ReusesCarriedBy(HloInstruction* op, HloInstruction* user) {
       return 10;
     default:
       // Assume fusion will not happen anyway if user count > 1)
-      if (op->user_count() > 1) {
+      if (CountNonLeafOps(op->users()) > 1) {
         return 0;
       }
       return 10;
@@ -508,8 +519,16 @@ StatusOr<bool> ConditionalCodeMotion::MoveInstructionOut(
     VLOG(2) << ""computation is :"" << computation->ToString() << ""\n"";
     // Remove hoisted instructions from the branches.
     for (auto b2 : to_move_out) {
-      VLOG(2) << ""Removing boundary:"" << b2.ToString() << ""\n"";
-      TF_RETURN_IF_ERROR(computation->RemoveInstruction(b2.operands()[i]));
+      auto instr_to_remove = b2.operands()[i];
+      // Double check to make sure it is safe to delete the instruction.
+      // Complications may arise due to some operations in the alternative
+      // branches (branches 1..n) being placed into the boundaries multiple
+      // times.
+      if (!computation->IsMarkedAsDead(instr_to_remove) &&
+          instr_to_remove->user_count() == 0) {
+        VLOG(2) << ""Removing boundary:"" << b2.ToString() << ""\n"";
+        TF_RETURN_IF_ERROR(computation->RemoveInstruction(instr_to_remove));
+      }
     }
   }
   // Change conditional instruction shape to the shape of the new root.
@@ -847,17 +866,6 @@ class GroupConnectedBoundaries {
     }
     return b2;
   }
-  int64 CountNonLeafOps(const xla::HloInstruction::InstructionVector& ops) {
-    int64 count = 0;
-    absl::flat_hash_set<HloInstruction*> op_set;
-    for (auto op : ops) {
-      if (!op_set.contains(op) && op->opcode() != HloOpcode::kConstant) {
-        count++;
-        op_set.insert(op);
-      }
-    }
-    return count;
-  }
   // This function is reused both for moving the boundary outside or into a
   // conditional. As the result, the readability is somewhat compromised.
   // It might be nice to refactor this function to factor the outside-inside
",0,train
1dbe0671a05257244fd9eae5701092c24540d872,tensorflow/tensorflow,"[XLA] fix bug in conditional_code_motion.cc by checking whether an instruction is dead before removing it.
The bug has to do with instructions in alternative branches (other than branch(0)) of a conditional may be placed into boundaries to move out multiple times, if they happen to be identical to those in branch(0) and are shared multiple times (while those in branch(0) are not shared). The fix tries to avoid deleting them if they are already deleted, or if they still have uses inside their conditional branch.

PiperOrigin-RevId: 330045927
Change-Id: I7a786eaa77085dd65609cc8639019874140474c0",conditional_code_motion_test.cc,"@@ -828,6 +828,99 @@ ENTRY main {
                               op::GetTupleElement(op::Conditional(), 1))));
 }
 
+TEST_F(ConditionalCodeMotionTest, MoveReplicatedTupleEntryOut) {
+  absl::string_view hlo_string =
+      R""(
+HloModule RemoveIdenticalInstruction
+
+%add.64 (x.139: bf16[], y.139: bf16[]) -> bf16[] {
+  %x.139 = bf16[]{:T(512)} parameter(0)
+  %y.139 = bf16[]{:T(512)} parameter(1)
+  ROOT %add.44073 = bf16[]{:T(512)} add(bf16[]{:T(512)} %x.139, bf16[]{:T(512)} %y.139)
+}
+
+%add.181 (x.256: bf16[], y.256: bf16[]) -> bf16[] {
+  %x.256 = bf16[]{:T(512)} parameter(0)
+  %y.256 = bf16[]{:T(512)} parameter(1)
+  ROOT %add.44842 = bf16[]{:T(512)} add(bf16[]{:T(512)} %x.256, bf16[]{:T(512)} %y.256)
+}
+
+on_true {
+  arg_tuple.1 = (bf16[2,54,168,128], bf16[2,52,168,128]) parameter(0)
+  get-tuple-element.11 = bf16[2,54,168,128] get-tuple-element(arg_tuple.1), index=0
+  get-tuple-element.12 = bf16[2,52,168,128] get-tuple-element(arg_tuple.1), index=1
+  convolution.1 = bf16[3,3,128,128] convolution(bf16[2,54,168,128]
+    get-tuple-element.11, bf16[2,52,168,128]
+    get-tuple-element.12), window={size=52x168 pad=0_0x1_1},
+    dim_labels=f01b_i01o->01bf
+  all-reduce.1 = bf16[3,3,128,128]
+    all-reduce(bf16[3,3,128,128] %convolution.1),
+    channel_id=188, replica_groups={{0,1}}, use_global_device_ids=true,
+    to_apply=%add.64
+  convert.1 = f32[3,3,128,128] convert(bf16[3,3,128,128] %all-reduce.1)
+  all-reduce.3 = bf16[3,3,128,128]
+    all-reduce(bf16[3,3,128,128] %convolution.1),
+    channel_id=188, replica_groups={{0,1}}, use_global_device_ids=true,
+    to_apply=%add.64
+  convert.3 = f32[3,3,128,128] convert(bf16[3,3,128,128] %all-reduce.3)
+  ROOT tuple.1 = (f32[3,3,128,128], f32[3,3,128,128]) tuple(convert.1, convert.3)
+}
+
+on_false {
+  arg_tuple.2 = (bf16[2,86,104,128], bf16[2,84,104,128]) parameter(0)
+  get-tuple-element.21 = bf16[2,86,104,128]
+    get-tuple-element(arg_tuple.2), index=0
+  get-tuple-element.22 = bf16[2,84,104,128]
+    get-tuple-element(arg_tuple.2), index=1
+  convolution.2 = bf16[3,3,128,128]
+    convolution(bf16[2,86,104,128] get-tuple-element.21, bf16[2,84,104,128]
+    get-tuple-element.22), window={size=84x104 pad=0_0x1_1},
+    dim_labels=f01b_i01o->01bf
+  all-reduce.2 = bf16[3,3,128,128]
+    all-reduce(bf16[3,3,128,128] %convolution.2),
+    channel_id=485, replica_groups={{0,1}}, use_global_device_ids=true,
+    to_apply=%add.181
+  convert.2 = f32[3,3,128,128]
+    convert(bf16[3,3,128,128] %all-reduce.2)
+  ROOT tuple.2 = (f32[3,3,128,128], f32[3,3,128,128]) tuple(convert.2, convert.2)
+}
+
+ENTRY main {
+  pred.1 = pred[] parameter(0)
+  arg_tuple.3 = (bf16[2,54,168,128], bf16[2,52,168,128]) parameter(1)
+  arg_tuple.4 = (bf16[2,86,104,128], bf16[2,84,104,128]) parameter(2)
+  conditional = (f32[3,3,128,128], f32[3,3,128,128])
+    conditional(pred.1, arg_tuple.3, arg_tuple.4), true_computation=on_true,
+    false_computation=on_false
+  get-first-index = f32[3,3,128,128]
+    get-tuple-element(conditional), index=0
+  add.1 = f32[3,3,128,128] add(f32[3,3,128,128] get-first-index, f32[3,3,128,128] get-first-index)
+  ROOT result = (f32[3,3,128,128]) tuple(add.1)
+}
+)"";
+  auto module = ParseAndReturnVerifiedModule(hlo_string).ValueOrDie();
+  ConditionalCodeMotion pass(true, true);
+  ASSERT_TRUE(pass.Run(&*module).ValueOrDie());
+  const HloInstruction* conditional =
+      FindInstruction(module.get(), ""conditional"");
+  const HloComputation* on_true = conditional->branch_computation(0);
+  ASSERT_EQ(on_true->instruction_count(), 5);
+  const HloComputation* on_false = conditional->branch_computation(1);
+  ASSERT_EQ(on_false->instruction_count(), 5);
+
+  // Checks if conditional shape has changed.
+  ASSERT_TRUE(ShapeUtil::Compatible(
+      conditional->shape(), ShapeUtil::MakeTupleShape({ShapeUtil::MakeShape(
+                                BF16, {3, 3, 128, 128})})));
+  HloInstruction* root = module->entry_computation()->root_instruction();
+  EXPECT_THAT(
+      root,
+      AllOf(op::Tuple(op::Add(
+          op::Convert(op::AllReduce(op::GetTupleElement(op::Conditional()))),
+          op::Convert(
+              op::AllReduce(op::GetTupleElement(op::Conditional())))))));
+}
+
 }  // namespace conditional_opt
 
 }  // namespace xla
",0,train
491fb62d90f080d4daf32b5539ec9b4a2de71c6c,tensorflow/tensorflow,"Add cost estimator tests for the BiasAdd, ReLU, and Conv2D operations.

PiperOrigin-RevId: 186705930",op_level_cost_estimator.cc,"@@ -245,6 +245,8 @@ OpLevelCostEstimator::OpLevelCostEstimator() {
       {""Add"", Eigen::internal::functor_traits<
                   Eigen::internal::scalar_sum_op<float>>::Cost},
       {""ApproximateEqual"", 1},
+      {""BiasAdd"", Eigen::internal::functor_traits<
+                      Eigen::internal::scalar_sum_op<float>>::Cost},
       {""Div"", Eigen::internal::functor_traits<
                   Eigen::internal::scalar_quotient_op<float>>::Cost},
       {""Equal"", 1},
",0,train
491fb62d90f080d4daf32b5539ec9b4a2de71c6c,tensorflow/tensorflow,"Add cost estimator tests for the BiasAdd, ReLU, and Conv2D operations.

PiperOrigin-RevId: 186705930",op_level_cost_estimator_test.cc,"@@ -99,47 +99,81 @@ OpContext DescribeBatchMatMul(const std::vector<int>& dims_a,
 // Wrangles the minimum number of proto fields to set up a 4D Tensor for cost
 // estimation purposes.
 void DescribeTensor4D(int dim0, int dim1, int dim2, int dim3,
-                      OpInfo* op_features) {
-  auto input = op_features->add_inputs();
-  auto shape = input->mutable_shape();
+                      OpInfo::TensorProperties* tensor) {
+  auto shape = tensor->mutable_shape();
   shape->add_dim()->set_size(dim0);
   shape->add_dim()->set_size(dim1);
   shape->add_dim()->set_size(dim2);
   shape->add_dim()->set_size(dim3);
-  input->set_dtype(DT_FLOAT);
+  tensor->set_dtype(DT_FLOAT);
 }
 
-// Returns an OpInfo for Conv2D with the minimum set of fields set up.
+// DescribeConvolution constructs an OpContext for a Conv2D applied to an input
+// tensor with shape (batch, ix, iy, iz1) and a kernel tensor with shape
+// (kx, ky, iz2, oz).
 OpContext DescribeConvolution(int batch, int ix, int iy, int iz1, int iz2,
                               int kx, int ky, int oz) {
   OpContext op_context;
   SetCpuDevice(&op_context.op_info);
   op_context.op_info.set_op(""Conv2D"");
 
-  DescribeTensor4D(batch, ix, iy, iz1, &op_context.op_info);
-  DescribeTensor4D(kx, ky, iz2, oz, &op_context.op_info);
+  DescribeTensor4D(batch, ix, iy, iz1, op_context.op_info.add_inputs());
+  DescribeTensor4D(kx, ky, iz2, oz, op_context.op_info.add_inputs());
+
+  return op_context;
+}
+
+// DescribeUnaryOp constructs an OpContext for the given operation applied to
+// a 4-tensor with shape (size1, 1, 1, 1).
+OpContext DescribeUnaryOp(const string& op, int size1) {
+  OpContext op_context;
+  SetCpuDevice(&op_context.op_info);
+  op_context.op_info.set_op(op);
+
+  DescribeTensor4D(size1, 1, 1, 1, op_context.op_info.add_inputs());
+  DescribeTensor4D(size1, 1, 1, 1, op_context.op_info.add_outputs());
+
   return op_context;
 }
 
-OpContext DescribeOp(const string& op, int size1, int size2) {
+// DescribeBinaryOp constructs an OpContext for the given operation applied to
+// a 4-tensor with dimensions (size1, 1, 1, 1) and a 4-tensor with dimensions
+// (2 * size1, size2, 1, 1).
+//
+// The choice of dimension here is arbitrary, and is used strictly to test the
+// cost model for applying elementwise operations to tensors with unequal
+// dimension values.
+OpContext DescribeBinaryOp(const string& op, int size1, int size2) {
   OpContext op_context;
   SetCpuDevice(&op_context.op_info);
   op_context.op_info.set_op(op);
 
-  DescribeTensor4D(size1, 1, 1, 1, &op_context.op_info);
-  DescribeTensor4D(2 * size1, size2, 1, 1, &op_context.op_info);
+  DescribeTensor4D(size1, 1, 1, 1, op_context.op_info.add_inputs());
+  DescribeTensor4D(2 * size1, size2, 1, 1, op_context.op_info.add_inputs());
+  DescribeTensor4D(2 * size1, size2, 1, 1, op_context.op_info.add_outputs());
 
-  auto output = op_context.op_info.add_outputs();
-  auto shape = output->mutable_shape();
-  shape->add_dim()->set_size(2 * size1);
-  shape->add_dim()->set_size(size2);
-  shape->add_dim()->set_size(1);
-  shape->add_dim()->set_size(1);
-  output->set_dtype(DT_FLOAT);
+  return op_context;
+}
 
+// DescribeBiasAdd constructs an OpContext for a BiasAdd applied to a 4-tensor
+// with dimensions (1, 1, size2, size1) and a bias with dimension (size1),
+// according to the constraint that the bias must be 1D with size equal to that
+// of the last dimension of the input value.
+OpContext DescribeBiasAdd(int size1, int size2) {
+  OpContext op_context;
   SetCpuDevice(&op_context.op_info);
+  op_context.op_info.set_op(""BiasAdd"");
+
+  DescribeTensor4D(1, 1, size2, size1, op_context.op_info.add_inputs());
+  DescribeTensor4D(1, 1, size2, size1, op_context.op_info.add_outputs());
+
+  auto bias = op_context.op_info.add_inputs();
+  bias->mutable_shape()->add_dim()->set_size(size1);
+  bias->set_dtype(DT_FLOAT);
+
   return op_context;
 }
+
 }  // namespace
 
 class OpLevelCostEstimatorTest : public ::testing::Test {
@@ -166,8 +200,24 @@ class OpLevelCostEstimatorTest : public ::testing::Test {
   OpLevelCostEstimator estimator_;
 };
 
+TEST_F(OpLevelCostEstimatorTest, BiasAddExecutionTime) {
+  auto cost = PredictCosts(DescribeBiasAdd(1000, 10));
+  EXPECT_EQ(Costs::Duration(8400), cost.memory_time);
+  EXPECT_EQ(Costs::Duration(1000), cost.compute_time);
+  EXPECT_EQ(Costs::Duration(9400), cost.execution_time);
+  EXPECT_FALSE(cost.inaccurate);
+}
+
+TEST_F(OpLevelCostEstimatorTest, Conv2DExecutionTime) {
+  auto cost = PredictCosts(DescribeConvolution(16, 19, 19, 48, 48, 5, 5, 256));
+  EXPECT_EQ(Costs::Duration(233780), cost.memory_time);
+  EXPECT_EQ(Costs::Duration(354877440), cost.compute_time);
+  EXPECT_EQ(Costs::Duration(355111220), cost.execution_time);
+  EXPECT_FALSE(cost.inaccurate);
+}
+
 TEST_F(OpLevelCostEstimatorTest, DummyExecutionTime) {
-  auto cost = PredictCosts(DescribeOp(""Dummy"", 1000, 1));
+  auto cost = PredictCosts(DescribeBinaryOp(""Dummy"", 1000, 1));
   EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
   EXPECT_EQ(Costs::Duration(0), cost.compute_time);
   EXPECT_EQ(Costs::Duration(2000), cost.execution_time);
@@ -176,7 +226,7 @@ TEST_F(OpLevelCostEstimatorTest, DummyExecutionTime) {
 
 TEST_F(OpLevelCostEstimatorTest, ExecutionTimeSumOrMax) {
   SetComputeMemoryOverlap(true);
-  auto cost = PredictCosts(DescribeOp(""Dummy"", 1000, 1));
+  auto cost = PredictCosts(DescribeBinaryOp(""Dummy"", 1000, 1));
   EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
   EXPECT_EQ(Costs::Duration(0), cost.compute_time);
   EXPECT_EQ(Costs::Duration(2000), cost.execution_time);  // max(2000, 200)
@@ -185,7 +235,7 @@ TEST_F(OpLevelCostEstimatorTest, ExecutionTimeSumOrMax) {
 }
 
 TEST_F(OpLevelCostEstimatorTest, MulExecutionTime) {
-  auto cost = PredictCosts(DescribeOp(""Mul"", 1000, 1));
+  auto cost = PredictCosts(DescribeBinaryOp(""Mul"", 1000, 1));
   EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
   EXPECT_EQ(Costs::Duration(200), cost.compute_time);
   EXPECT_EQ(Costs::Duration(2200), cost.execution_time);
@@ -193,7 +243,7 @@ TEST_F(OpLevelCostEstimatorTest, MulExecutionTime) {
 }
 
 TEST_F(OpLevelCostEstimatorTest, MulBroadcastExecutionTime) {
-  auto cost = PredictCosts(DescribeOp(""Mul"", 1000, 2));
+  auto cost = PredictCosts(DescribeBinaryOp(""Mul"", 1000, 2));
   EXPECT_EQ(Costs::Duration(3600), cost.memory_time);
   EXPECT_EQ(Costs::Duration(400), cost.compute_time);
   EXPECT_EQ(Costs::Duration(4000), cost.execution_time);
@@ -201,13 +251,21 @@ TEST_F(OpLevelCostEstimatorTest, MulBroadcastExecutionTime) {
 }
 
 TEST_F(OpLevelCostEstimatorTest, ModExecutionTime) {
-  auto cost = PredictCosts(DescribeOp(""Mod"", 1000, 1));
+  auto cost = PredictCosts(DescribeBinaryOp(""Mod"", 1000, 1));
   EXPECT_EQ(Costs::Duration(2000), cost.memory_time);
   EXPECT_EQ(Costs::Duration(1600), cost.compute_time);
   EXPECT_EQ(Costs::Duration(3600), cost.execution_time);
   EXPECT_FALSE(cost.inaccurate);
 }
 
+TEST_F(OpLevelCostEstimatorTest, ReluExecutionTime) {
+  auto cost = PredictCosts(DescribeUnaryOp(""Relu"", 1000));
+  EXPECT_EQ(Costs::Duration(800), cost.memory_time);
+  EXPECT_EQ(Costs::Duration(100), cost.compute_time);
+  EXPECT_EQ(Costs::Duration(900), cost.execution_time);
+  EXPECT_FALSE(cost.inaccurate);
+}
+
 TEST_F(OpLevelCostEstimatorTest, UnknownOrPartialShape) {
   EXPECT_FALSE(PredictCosts(DescribeMatMul(2, 4, 7, 7)).inaccurate);
   EXPECT_TRUE(PredictCosts(DescribeMatMul(-1, 4, 7, 7)).inaccurate);
",0,train
6a20edf95fcaf45c46385eaf649e814a571737ed,tensorflow/tensorflow,"backward compatibility: Disallow changes to an OpDef attribute's default value.

PiperOrigin-RevId: 180611380",op_compatibility_test.cc,"@@ -163,6 +163,18 @@ class OpCompatibilityTest : public OpsTestBase {
 
     ExpectIncompatible(old_op_def, *new_op_def, compatibility_error);
   }
+
+  void ExpectDefaultChangeFailure(const OpDef& old_op_def,
+                                  const string& compatibility_error) {
+    // This should be all that is needed to get compatibility.
+    const OpDef* new_op_def = RegisteredOpDef();
+    AddDefaultsToNodeDef(*new_op_def, node_def());
+
+    // Validate that the NodeDef is valid.
+    TF_ASSERT_OK(ValidateNodeDef(*node_def(), *new_op_def));
+
+    ExpectIncompatible(old_op_def, *new_op_def, compatibility_error);
+  }
 };
 
 // Should be compatible if the Op hasn't changed (sanity check).
@@ -260,40 +272,6 @@ TEST_F(OpCompatibilityTest, AttrOrder) {
   EXPECT_EQ(""attr_order = AttrOrder[a=7, b=true]()"", Result());
 }
 
-// Should be able to add a default to an attr.
-REGISTER_OP(""AddDefault"").Output(""ndef: string"").Attr(""a: int = 1234"");
-REGISTER_KERNEL_BUILDER(Name(""AddDefault"").Device(DEVICE_CPU), TestKernel);
-
-TEST_F(OpCompatibilityTest, AddDefault) {
-  OpRegistrationData old_op;
-  TF_ASSERT_OK(OpDefBuilder(""AddDefault"")
-                   .Output(""ndef: string"")
-                   .Attr(""a: int"")
-                   .Finalize(&old_op));
-  TF_ASSERT_OK(NodeDefBuilder(""add_default"", &old_op.op_def)
-                   .Attr(""a"", 765)
-                   .Finalize(node_def()));
-  ExpectSuccess(old_op.op_def);
-  EXPECT_EQ(""add_default = AddDefault[a=765]()"", Result());
-}
-
-// Should be able to remove a default from an attr, *as long as that
-// attr has always existed*.
-REGISTER_OP(""RemoveDefault"").Output(""ndef: string"").Attr(""a: int"");
-REGISTER_KERNEL_BUILDER(Name(""RemoveDefault"").Device(DEVICE_CPU), TestKernel);
-
-TEST_F(OpCompatibilityTest, RemoveDefault) {
-  OpRegistrationData old_op;
-  TF_ASSERT_OK(OpDefBuilder(""RemoveDefault"")
-                   .Output(""ndef: string"")
-                   .Attr(""a: int = 91"")
-                   .Finalize(&old_op));
-  TF_ASSERT_OK(
-      NodeDefBuilder(""remove_default"", &old_op.op_def).Finalize(node_def()));
-  ExpectSuccess(old_op.op_def);
-  EXPECT_EQ(""remove_default = RemoveDefault[a=91]()"", Result());
-}
-
 // Should be able to make an input/output polymorphic.
 // Changing from int32 -> T (where T: type = DT_INT32 by default).
 REGISTER_OP(""TypePolymorphic"")
@@ -1054,9 +1032,56 @@ TEST_F(OpCompatibilityTest, RenameOutputListFails) {
                       ""Output signature mismatch 'old:T' vs. 'new:T'"");
 }
 
-// Changing an attr's default is not technically illegal, but should
-// be forbidden if it the attr ever didn't exist since it likely
-// affects semantics.
+// Should not be able to add a default to an attr.
+REGISTER_OP(""AddDefault"").Output(""ndef: string"").Attr(""a: int = 1234"");
+REGISTER_KERNEL_BUILDER(Name(""AddDefault"").Device(DEVICE_CPU), TestKernel);
+
+TEST_F(OpCompatibilityTest, AddDefault) {
+  OpRegistrationData old_op;
+  TF_ASSERT_OK(OpDefBuilder(""AddDefault"")
+                   .Output(""ndef: string"")
+                   .Attr(""a: int"")
+                   .Finalize(&old_op));
+  TF_ASSERT_OK(NodeDefBuilder(""add_default"", &old_op.op_def)
+                   .Attr(""a"", 765)
+                   .Finalize(node_def()));
+  ExpectDefaultChangeFailure(
+      old_op.op_def,
+      ""Attr 'a' has added/removed it's default; from no default to 1234"");
+}
+
+// Should not be able to remove a default from an attr.
+REGISTER_OP(""RemoveDefault"").Output(""ndef: string"").Attr(""a: int"");
+REGISTER_KERNEL_BUILDER(Name(""RemoveDefault"").Device(DEVICE_CPU), TestKernel);
+
+TEST_F(OpCompatibilityTest, RemoveDefault) {
+  OpRegistrationData old_op;
+  TF_ASSERT_OK(OpDefBuilder(""RemoveDefault"")
+                   .Output(""ndef: string"")
+                   .Attr(""a: int = 91"")
+                   .Finalize(&old_op));
+  TF_ASSERT_OK(
+      NodeDefBuilder(""remove_default"", &old_op.op_def).Finalize(node_def()));
+  ExpectDefaultChangeFailure(
+      old_op.op_def,
+      ""Attr 'a' has added/removed it's default; from 91 to no default"");
+}
+
+// Should not be able to change a default for an attr.
+REGISTER_OP(""ChangeDefault"").Output(""ndef: string"").Attr(""a: int = 1"");
+REGISTER_KERNEL_BUILDER(Name(""ChangeDefault"").Device(DEVICE_CPU), TestKernel);
+
+TEST_F(OpCompatibilityTest, ChangeDefault) {
+  OpRegistrationData old_op;
+  TF_ASSERT_OK(OpDefBuilder(""ChangeDefault"")
+                   .Output(""ndef: string"")
+                   .Attr(""a: int = 2"")
+                   .Finalize(&old_op));
+  TF_ASSERT_OK(
+      NodeDefBuilder(""change_default"", &old_op.op_def).Finalize(node_def()));
+  ExpectDefaultChangeFailure(
+      old_op.op_def, ""Attr 'a' has changed it's default value; from 2 to 1"");
+}
 
 }  // namespace
 }  // namespace tensorflow
",0,test
6a20edf95fcaf45c46385eaf649e814a571737ed,tensorflow/tensorflow,"backward compatibility: Disallow changes to an OpDef attribute's default value.

PiperOrigin-RevId: 180611380",op_def_util.cc,"@@ -449,6 +449,11 @@ string AllowedStr(const OpDef::AttrDef& attr) {
   return SummarizeAttrValue(attr.allowed_values());
 }
 
+string DefaultAttrStr(const OpDef::AttrDef& attr) {
+  if (!attr.has_default_value()) return ""no default"";
+  return SummarizeAttrValue(attr.default_value());
+}
+
 bool HigherMinimum(const OpDef::AttrDef& old_attr,
                    const OpDef::AttrDef& new_attr) {
   // Anything -> no restriction : not more restrictive.
@@ -610,6 +615,16 @@ Status OpDefCompatible(const OpDef& old_op, const OpDef& new_op) {
     VALIDATE(!HigherMinimum(old_attr, *new_attr), ""Attr '"", old_attr.name(),
              ""' has a higher minimum; from "", MinStr(old_attr), "" to "",
              MinStr(*new_attr));
+    VALIDATE(old_attr.has_default_value() == new_attr->has_default_value(),
+             ""Attr '"", old_attr.name(), ""' has added/removed it's default; "",
+             ""from "", DefaultAttrStr(old_attr), "" to "",
+             DefaultAttrStr(*new_attr));
+    VALIDATE(!old_attr.has_default_value() ||
+                 AreAttrValuesEqual(old_attr.default_value(),
+                                    new_attr->default_value()),
+             ""Attr '"", old_attr.name(), ""' has changed it's default value; "",
+             ""from "", DefaultAttrStr(old_attr), "" to "",
+             DefaultAttrStr(*new_attr));
   }
 
   for (const auto& new_attr : new_op.attr()) {
",0,test
3ac00edd723ea54d42b015273913f8f616b4cbea,tensorflow/tensorflow,"[lite] Update TAC and add options for inlining and legalizing to TFLite

PiperOrigin-RevId: 397205785
Change-Id: Ie0940174b4e995b3e2eb27571d4369b9d6870a2d",tac_module.cc,"@@ -29,8 +29,8 @@ namespace TFL {
 namespace tac {
 namespace {
 // TODO(b/177376459): We should make this configureable.
-void AddExportTFLPass(mlir::OpPassManager* pass_manager) {
-  pass_manager->addPass(mlir::createInlinerPass());
+void AddExportTFLPass(mlir::OpPassManager* pass_manager, bool enable_inliner) {
+  if (enable_inliner) pass_manager->addPass(mlir::createInlinerPass());
   pass_manager->addPass(mlir::createSymbolDCEPass());
   pass_manager->addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
   pass_manager->addNestedPass<mlir::FuncOp>(mlir::createCSEPass());
@@ -46,19 +46,21 @@ void TacModule::AddTACPass(mlir::OpPassManager* pass_manager,
       /*fold_all_constants=*/false));
   pass_manager->addPass(
       mlir::TFL::tac::CreateAlternativeSubgraphPass(device_specs));
-  // After we creat the alternative subgraph, we can still do canonicalization
-  // legalization & other optimizations as long as we're not inlining the
-  // function.
-  // And in fact, we probably need to do the proper legalization, for the
-  // compute cost to work. (in case we added some TF ops)
-  pass_manager->addPass(mlir::TFL::CreatePrepareTFPass(
-      /*unfold_batch_matmul=*/true,
-      /*allow_bf16_and_f16_type_legalization=*/false));
-  pass_manager->addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
-  pass_manager->addPass(
-      mlir::TFL::CreateLegalizeTFPass(/*run_tfl_runtime_verification=*/true));
-  pass_manager->addPass(
-      mlir::TFL::CreateOptimizePass(/*enable_canonicalization=*/true));
+  if (options_.legalize_to_tflite_ops) {
+    // After we creat the alternative subgraph, we can still do canonicalization
+    // legalization & other optimizations as long as we're not inlining the
+    // function.
+    // And in fact, we probably need to do the proper legalization, for the
+    // compute cost to work. (in case we added some TF ops)
+    pass_manager->addPass(mlir::TFL::CreatePrepareTFPass(
+        /*unfold_batch_matmul=*/true,
+        /*allow_bf16_and_f16_type_legalization=*/false));
+    pass_manager->addNestedPass<mlir::FuncOp>(mlir::createCanonicalizerPass());
+    pass_manager->addPass(
+        mlir::TFL::CreateLegalizeTFPass(/*run_tfl_runtime_verification=*/true));
+    pass_manager->addPass(
+        mlir::TFL::CreateOptimizePass(/*enable_canonicalization=*/true));
+  }
 
   pass_manager->addPass(mlir::TFL::tac::CreateComputeCostPass());
   pass_manager->addPass(mlir::TFL::tac::CreatePickSubgraphsPass());
@@ -79,7 +81,7 @@ absl::Status TacModule::RunTacPasses(mlir::ModuleOp* module, bool debug_mode) {
                        mlir::OpPassManager::Nesting::Implicit);
   AddTACPass(&pm, options_.hardware_backends);
   if (!debug_mode) {
-    AddExportTFLPass(&pm);
+    AddExportTFLPass(&pm, options_.enable_inliner);
   }
 
   mlir::StatusScopedDiagnosticHandler statusHandler(module->getContext(),
",0,train
3ac00edd723ea54d42b015273913f8f616b4cbea,tensorflow/tensorflow,"[lite] Update TAC and add options for inlining and legalizing to TFLite

PiperOrigin-RevId: 397205785
Change-Id: Ie0940174b4e995b3e2eb27571d4369b9d6870a2d",tac_module.h,"@@ -48,6 +48,10 @@ class TacModule {
     // This will output different alternative subgraphs in mlir format for debug
     // purpose.
     bool debug_mode = false;
+    // Whether to enable inliner passes or not.
+    bool enable_inliner = false;
+    // Whether to legalize ops to TFLite ops before exporting.
+    bool legalize_to_tflite_ops = false;
   };
 
   virtual ~TacModule() {}
",0,train
3ac00edd723ea54d42b015273913f8f616b4cbea,tensorflow/tensorflow,"[lite] Update TAC and add options for inlining and legalizing to TFLite

PiperOrigin-RevId: 397205785
Change-Id: Ie0940174b4e995b3e2eb27571d4369b9d6870a2d",tac_translate.cc,"@@ -126,6 +126,8 @@ absl::Status TargetAwareConversionMain() {
   if (!output_mlir || inline_subgraphs) {
     options.debug_mode = false;
   }
+  options.enable_inliner = true;
+  options.legalize_to_tflite_ops = true;
   mlir::TFL::tac::TacModule tac_module(options);
   mlir::DialectRegistry registry;
   mlir::RegisterAllTensorFlowDialects(registry);
",0,train
1f10b08b375f6d9c4800dc4183ef836b3d729605,tensorflow/tensorflow,"Allow using DNN to only train the embeddings and using the tree model for the final prediction.

PiperOrigin-RevId: 197462585",dnn_tree_combined_estimator.py,"@@ -45,6 +45,7 @@ from tensorflow.python.training import training_util
 
 _DNN_LEARNING_RATE = 0.001
 
+
 def _get_optimizer(optimizer):
   if callable(optimizer):
     return optimizer()
@@ -73,6 +74,7 @@ def _dnn_tree_combined_model_fn(features,
                                 dnn_input_layer_partitioner=None,
                                 dnn_input_layer_to_tree=True,
                                 dnn_steps_to_train=10000,
+                                predict_with_tree_only=False,
                                 tree_feature_columns=None,
                                 tree_center_bias=False,
                                 use_core_versions=False):
@@ -108,6 +110,8 @@ def _dnn_tree_combined_model_fn(features,
     as a feature to the tree.
     dnn_steps_to_train: Number of steps to train dnn for before switching
       to gbdt.
+    predict_with_tree_only: Whether to use only the tree model output as the
+      final prediction.
     tree_feature_columns: An iterable containing all the feature columns
       used by the model's boosted trees. If dnn_input_layer_to_tree is
       set to True, these features are in addition to dnn_feature_columns.
@@ -132,8 +136,7 @@ def _dnn_tree_combined_model_fn(features,
   dnn_parent_scope = ""dnn""
   dnn_partitioner = dnn_input_layer_partitioner or (
       partitioned_variables.min_max_variable_partitioner(
-          max_partitions=config.num_ps_replicas,
-          min_slice_size=64 << 20))
+          max_partitions=config.num_ps_replicas, min_slice_size=64 << 20))
 
   with variable_scope.variable_scope(
       dnn_parent_scope,
@@ -171,8 +174,7 @@ def _dnn_tree_combined_model_fn(features,
       _add_hidden_layer_summary(net, hidden_layer_scope.name)
       previous_layer = net
     with variable_scope.variable_scope(
-        ""logits"",
-        values=(previous_layer,)) as logits_scope:
+        ""logits"", values=(previous_layer,)) as logits_scope:
       dnn_logits = layers.fully_connected(
           previous_layer,
           head.logits_dimension,
@@ -190,8 +192,7 @@ def _dnn_tree_combined_model_fn(features,
           optimizer=_get_optimizer(dnn_optimizer),
           name=dnn_parent_scope,
           variables=ops.get_collection(
-              ops.GraphKeys.TRAINABLE_VARIABLES,
-              scope=dnn_parent_scope),
+              ops.GraphKeys.TRAINABLE_VARIABLES, scope=dnn_parent_scope),
           # Empty summaries to prevent optimizers from logging training_loss.
           summaries=[])
 
@@ -230,7 +231,10 @@ def _dnn_tree_combined_model_fn(features,
         update_op = state_ops.assign_add(global_step, 1).op
         return update_op
 
-  tree_train_logits = dnn_logits + tree_logits
+  if predict_with_tree_only:
+    tree_train_logits = tree_logits
+  else:
+    tree_train_logits = dnn_logits + tree_logits
 
   def _no_train_op_fn(loss):
     """"""Returns a no-op.""""""
@@ -288,10 +292,10 @@ def _dnn_tree_combined_model_fn(features,
   finalized_trees, attempted_trees = gbdt_model.get_number_of_trees_tensor()
 
   model_fn_ops.training_hooks.extend([
-      trainer_hooks.SwitchTrainOp(
-          dnn_train_op, dnn_steps_to_train, tree_train_op),
-      trainer_hooks.StopAfterNTrees(
-          num_trees, attempted_trees, finalized_trees)])
+      trainer_hooks.SwitchTrainOp(dnn_train_op, dnn_steps_to_train,
+                                  tree_train_op),
+      trainer_hooks.StopAfterNTrees(num_trees, attempted_trees, finalized_trees)
+  ])
 
   return model_fn_ops
 
@@ -318,6 +322,7 @@ class DNNBoostedTreeCombinedClassifier(estimator.Estimator):
                dnn_input_layer_partitioner=None,
                dnn_input_layer_to_tree=True,
                dnn_steps_to_train=10000,
+               predict_with_tree_only=False,
                tree_feature_columns=None,
                tree_center_bias=False,
                use_core_versions=False):
@@ -360,6 +365,8 @@ class DNNBoostedTreeCombinedClassifier(estimator.Estimator):
       as a feature to the tree.
       dnn_steps_to_train: Number of steps to train dnn for before switching
         to gbdt.
+      predict_with_tree_only: Whether to use only the tree model output as the
+        final prediction.
       tree_feature_columns: An iterable containing all the feature columns
         used by the model's boosted trees. If dnn_input_layer_to_tree is
         set to True, these features are in addition to dnn_feature_columns.
@@ -377,16 +384,32 @@ class DNNBoostedTreeCombinedClassifier(estimator.Estimator):
 
     def _model_fn(features, labels, mode, config):
       return _dnn_tree_combined_model_fn(
-          features, labels, mode, head, dnn_hidden_units, dnn_feature_columns,
-          tree_learner_config, num_trees, tree_examples_per_layer, config,
-          dnn_optimizer, dnn_activation_fn, dnn_dropout,
-          dnn_input_layer_partitioner, dnn_input_layer_to_tree,
-          dnn_steps_to_train, tree_feature_columns, tree_center_bias,
-          use_core_versions)
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          dnn_hidden_units=dnn_hidden_units,
+          dnn_feature_columns=dnn_feature_columns,
+          tree_learner_config=tree_learner_config,
+          num_trees=num_trees,
+          tree_examples_per_layer=tree_examples_per_layer,
+          config=config,
+          dnn_optimizer=dnn_optimizer,
+          dnn_activation_fn=dnn_activation_fn,
+          dnn_dropout=dnn_dropout,
+          dnn_input_layer_partitioner=dnn_input_layer_partitioner,
+          dnn_input_layer_to_tree=dnn_input_layer_to_tree,
+          dnn_steps_to_train=dnn_steps_to_train,
+          predict_with_tree_only=predict_with_tree_only,
+          tree_feature_columns=tree_feature_columns,
+          tree_center_bias=tree_center_bias,
+          use_core_versions=use_core_versions)
 
     super(DNNBoostedTreeCombinedClassifier, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir,
-        config=config, feature_engineering_fn=feature_engineering_fn)
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config,
+        feature_engineering_fn=feature_engineering_fn)
 
 
 class DNNBoostedTreeCombinedRegressor(estimator.Estimator):
@@ -410,6 +433,7 @@ class DNNBoostedTreeCombinedRegressor(estimator.Estimator):
                dnn_input_layer_partitioner=None,
                dnn_input_layer_to_tree=True,
                dnn_steps_to_train=10000,
+               predict_with_tree_only=False,
                tree_feature_columns=None,
                tree_center_bias=False,
                use_core_versions=False):
@@ -452,6 +476,8 @@ class DNNBoostedTreeCombinedRegressor(estimator.Estimator):
       as a feature to the tree.
       dnn_steps_to_train: Number of steps to train dnn for before switching
         to gbdt.
+      predict_with_tree_only: Whether to use only the tree model output as the
+        final prediction.
       tree_feature_columns: An iterable containing all the feature columns
         used by the model's boosted trees. If dnn_input_layer_to_tree is
         set to True, these features are in addition to dnn_feature_columns.
@@ -474,16 +500,32 @@ class DNNBoostedTreeCombinedRegressor(estimator.Estimator):
 
     def _model_fn(features, labels, mode, config):
       return _dnn_tree_combined_model_fn(
-          features, labels, mode, head, dnn_hidden_units, dnn_feature_columns,
-          tree_learner_config, num_trees, tree_examples_per_layer, config,
-          dnn_optimizer, dnn_activation_fn, dnn_dropout,
-          dnn_input_layer_partitioner, dnn_input_layer_to_tree,
-          dnn_steps_to_train, tree_feature_columns, tree_center_bias,
-          use_core_versions)
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          dnn_hidden_units=dnn_hidden_units,
+          dnn_feature_columns=dnn_feature_columns,
+          tree_learner_config=tree_learner_config,
+          num_trees=num_trees,
+          tree_examples_per_layer=tree_examples_per_layer,
+          config=config,
+          dnn_optimizer=dnn_optimizer,
+          dnn_activation_fn=dnn_activation_fn,
+          dnn_dropout=dnn_dropout,
+          dnn_input_layer_partitioner=dnn_input_layer_partitioner,
+          dnn_input_layer_to_tree=dnn_input_layer_to_tree,
+          dnn_steps_to_train=dnn_steps_to_train,
+          predict_with_tree_only=predict_with_tree_only,
+          tree_feature_columns=tree_feature_columns,
+          tree_center_bias=tree_center_bias,
+          use_core_versions=use_core_versions)
 
     super(DNNBoostedTreeCombinedRegressor, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir,
-        config=config, feature_engineering_fn=feature_engineering_fn)
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config,
+        feature_engineering_fn=feature_engineering_fn)
 
 
 class DNNBoostedTreeCombinedEstimator(estimator.Estimator):
@@ -508,6 +550,7 @@ class DNNBoostedTreeCombinedEstimator(estimator.Estimator):
                dnn_input_layer_partitioner=None,
                dnn_input_layer_to_tree=True,
                dnn_steps_to_train=10000,
+               predict_with_tree_only=False,
                tree_feature_columns=None,
                tree_center_bias=False,
                use_core_versions=False):
@@ -545,6 +588,8 @@ class DNNBoostedTreeCombinedEstimator(estimator.Estimator):
       as a feature to the tree.
       dnn_steps_to_train: Number of steps to train dnn for before switching
         to gbdt.
+      predict_with_tree_only: Whether to use only the tree model output as the
+        final prediction.
       tree_feature_columns: An iterable containing all the feature columns
         used by the model's boosted trees. If dnn_input_layer_to_tree is
         set to True, these features are in addition to dnn_feature_columns.
@@ -553,15 +598,32 @@ class DNNBoostedTreeCombinedEstimator(estimator.Estimator):
       use_core_versions: Whether feature columns and loss are from the core (as
         opposed to contrib) version of tensorflow.
     """"""
+
     def _model_fn(features, labels, mode, config):
       return _dnn_tree_combined_model_fn(
-          features, labels, mode, head, dnn_hidden_units, dnn_feature_columns,
-          tree_learner_config, num_trees, tree_examples_per_layer, config,
-          dnn_optimizer, dnn_activation_fn, dnn_dropout,
-          dnn_input_layer_partitioner, dnn_input_layer_to_tree,
-          dnn_steps_to_train, tree_feature_columns, tree_center_bias,
-          use_core_versions)
+          features=features,
+          labels=labels,
+          mode=mode,
+          head=head,
+          dnn_hidden_units=dnn_hidden_units,
+          dnn_feature_columns=dnn_feature_columns,
+          tree_learner_config=tree_learner_config,
+          num_trees=num_trees,
+          tree_examples_per_layer=tree_examples_per_layer,
+          config=config,
+          dnn_optimizer=dnn_optimizer,
+          dnn_activation_fn=dnn_activation_fn,
+          dnn_dropout=dnn_dropout,
+          dnn_input_layer_partitioner=dnn_input_layer_partitioner,
+          dnn_input_layer_to_tree=dnn_input_layer_to_tree,
+          dnn_steps_to_train=dnn_steps_to_train,
+          predict_with_tree_only=predict_with_tree_only,
+          tree_feature_columns=tree_feature_columns,
+          tree_center_bias=tree_center_bias,
+          use_core_versions=use_core_versions)
 
     super(DNNBoostedTreeCombinedEstimator, self).__init__(
-        model_fn=_model_fn, model_dir=model_dir,
-        config=config, feature_engineering_fn=feature_engineering_fn)
+        model_fn=_model_fn,
+        model_dir=model_dir,
+        config=config,
+        feature_engineering_fn=feature_engineering_fn)
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_binary.cc,"@@ -191,6 +191,14 @@ void EvaluateBinaryOperatorOnConstantInputs(Model* model,
 bool ResolveConstantBinaryOperator::Run(Model* model, std::size_t op_index) {
   const auto binary_it = model->operators.begin() + op_index;
   const auto* binary_op = binary_it->get();
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, binary_op->outputs[0])) {
+    return false;
+  }
+
   // Test for binary ops of types that we know how to resolve
   if (binary_op->type != OperatorType::kAdd &&
       binary_op->type != OperatorType::kMul &&
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_concatenation.cc,"@@ -144,6 +144,13 @@ bool ResolveConstantConcatenation::Run(Model* model, std::size_t op_index) {
   const auto* concat_op =
       static_cast<const ConcatenationOperator*>(concat_base_op);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, concat_op->outputs[0])) {
+    return false;
+  }
+
   for (const string& input_name : concat_op->inputs) {
     // We only expect constant unquantized arrays as input, otherwise we return.
     // We  also make sure the shapes of the input arrays are known and they are
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_fake_quant.cc,"@@ -69,6 +69,13 @@ bool ResolveConstantFakeQuant::Run(Model* model, std::size_t op_index) {
   const auto* fakequant_op =
       static_cast<const FakeQuantOperator*>(fakequant_base_op);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, fakequant_op->outputs[0])) {
+    return false;
+  }
+
   // Yield until the fakequant MinMax has been resolved.
   if (!fakequant_op->minmax) {
     return false;
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_fill.cc,"@@ -52,6 +52,13 @@ bool ResolveConstantFill::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_gather.cc,"@@ -71,6 +71,14 @@ bool ResolveConstantGather::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_pack.cc,"@@ -59,6 +59,14 @@ bool ResolveConstantPack::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_random_uniform.cc,"@@ -70,6 +70,13 @@ bool ResolveConstantRandomUniform::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 1);
   CHECK_EQ(op->outputs.size(), 1);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_range.cc,"@@ -28,6 +28,14 @@ bool ResolveConstantRange::Run(Model* model, std::size_t op_index) {
   auto* op = static_cast<RangeOperator*>(base_op);
 
   CHECK_EQ(op->inputs.size(), 3);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   const auto& start_array = model->GetArray(op->inputs[0]);
   if (!start_array.has_shape()) {
     // Yield until all input dims have been resolved.
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_reshape.cc,"@@ -33,6 +33,13 @@ bool ResolveConstantReshape::Run(Model* model, std::size_t op_index) {
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   // We require constant inputs.
   if (!IsConstantParameterArray(*model, op->inputs[0]) ||
       !IsConstantParameterArray(*model, op->inputs[1])) {
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_select.cc,"@@ -37,6 +37,14 @@ bool ResolveConstantSelect::Run(Model* model, std::size_t op_index) {
 
   CHECK_GE(op->inputs.size(), 3);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_shape_or_rank.cc,"@@ -27,6 +27,14 @@ bool ResolveConstantShapeOrRank::Run(Model* model, std::size_t op_index) {
   }
 
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been resolved
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_slice.cc,"@@ -96,6 +96,14 @@ bool ResolveConstantSlice::Run(Model* model, std::size_t op_index) {
   const SliceOperator* op = static_cast<const SliceOperator*>(base_op);
 
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_strided_slice.cc,"@@ -114,6 +114,14 @@ bool ResolveConstantStridedSlice::Run(Model* model, std::size_t op_index) {
       static_cast<const StridedSliceOperator*>(base_op);
 
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_tile.cc,"@@ -105,6 +105,13 @@ bool ResolveConstantTile::Run(Model* model, std::size_t op_index) {
   }
   const auto* op = static_cast<const TensorFlowTileOperator*>(base_op);
 
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   CHECK_GE(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
   auto& output_array = model->GetArray(op->outputs[0]);
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_transpose.cc,"@@ -111,6 +111,14 @@ bool ResolveConstantTranspose::Run(Model* model, std::size_t op_index) {
 
   CHECK_EQ(op->inputs.size(), 2);
   CHECK_EQ(op->outputs.size(), 1);
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, op->outputs[0])) {
+    return false;
+  }
+
   auto& output_array = model->GetArray(op->outputs[0]);
   if (output_array.data_type == ArrayDataType::kNone) {
     // Yield until the output type has been set by PropagateArrayDataTypes.
",0,train
8dc7bc7764150253c03a666eee84fc48f867d6a2,tensorflow/tensorflow,"In all constant-propagation transformations, check that the array we'd be turning into a constant is a
discardable array. If it's not discardable, it means that the user wants this array to keep existing
in a way that is observable to them, i.e. not as weights.

Typical example: a Fill op outputs an array that is passed as a RNN state array (non-discardable).
It seems that so far we have been relying on accidental ordering of graph transformations for such state
arrays not to be accidentally turned into constants. Instead, the desired graph transformation here is
RemoveUnusedOp noticing that such a Fill can be discarded since its output is a RNN state array.

So I don't have a test for this, but this seems to be tightening existing behavior, and should be good
to have as long as it does not regress anything.

PiperOrigin-RevId: 215500760",resolve_constant_unary.cc,"@@ -48,6 +48,14 @@ bool CopyMinMaxFromFirstInput(const Operator& op, Model* model) {
 bool ResolveConstantUnaryOperator::Run(Model* model, std::size_t op_index) {
   const auto unary_it = model->operators.begin() + op_index;
   const auto* unary_op = unary_it->get();
+
+  // If the output of this op is a non-discardable array such as an input_array
+  // or a state array of the model, then this is a job for RemoveUnusedOp, not
+  // for constants-propagation.
+  if (!IsDiscardableArray(*model, unary_op->outputs[0])) {
+    return false;
+  }
+
   // Test for unary ops of types that we know how to resolve.
   switch (unary_op->type) {
     case OperatorType::kCast:
",0,train
60f965adb6c0393fe6d2ce4b990af6ffa58c0852,tensorflow/tensorflow,"s/tf.contrib.eager.GradientTape/tf.GradientTape/

PiperOrigin-RevId: 201372249",gradients_impl.py,"@@ -548,9 +548,8 @@ def _GradientsHelper(ys,
                      src_graph=None):
   """"""Implementation of gradients().""""""
   if context.executing_eagerly():
-    raise RuntimeError(""tf.gradients not supported when eager execution ""
-                       ""is enabled. Use tf.contrib.eager.GradientTape ""
-                       ""instead."")
+    raise RuntimeError(""tf.gradients is not supported when eager execution ""
+                       ""is enabled. Use tf.GradientTape instead."")
   if src_graph is None:
     src_graph = ops.get_default_graph()
 
",0,train
832f57b14e5dfbad9946d039cd20a32a0314d9bb,tensorflow/tensorflow,"Fix tpu_strategy_tests on Cloud TPU.

PiperOrigin-RevId: 302738954
Change-Id: Ib3164d271186fe976d6154e1a0ae02bf0002f2fc",tpu_strategy_test.py,"@@ -310,10 +310,11 @@ class TPUStrategyTest(test.TestCase):
 
     bar(1)
 
-  # TODO(b/152251070): Re-enable once modified to work on Cloud TPU.
-  def disable_test_using_external_variable_inside_tf_function(self):
+  def test_using_external_variable_inside_tf_function(self):
     strategy = get_tpu_strategy()
-    dataset = dataset_ops.Dataset.range(10, output_type=dtypes.float32).batch(2)
+    dataset = dataset_ops.Dataset.range(
+        strategy.num_replicas_in_sync * 2,
+        output_type=dtypes.float32).batch(strategy.num_replicas_in_sync)
     input_iterator = iter(strategy.experimental_distribute_dataset(dataset))
 
     v = variables.Variable(2.0)
@@ -330,12 +331,12 @@ class TPUStrategyTest(test.TestCase):
         expected_result,
         strategy.experimental_local_results(train_step(next(input_iterator))))
 
-  # TODO(b/152251070): Re-enable once modified to work on Cloud TPU.
-  def disable_test_keras_metric_outside_strategy_scope_per_replica(self):
+  def test_keras_metric_outside_strategy_scope_per_replica(self):
     strategy = get_tpu_strategy()
     metric = keras.metrics.Mean(""test_metric"", dtype=dtypes.float32)
 
-    dataset = dataset_ops.Dataset.range(10).batch(2)
+    dataset = dataset_ops.Dataset.range(strategy.num_replicas_in_sync *
+                                        2).batch(2)
     dataset = strategy.experimental_distribute_dataset(dataset)
 
     @def_function.function
",0,test
71bbebbf4d04c1bcb6ed44e2156087c9fec06e9e,tensorflow/tensorflow,Moved final StatusGroup method calls,status_group_fuzz.cc,"@@ -55,9 +55,7 @@ extern ""C"" int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) {
   }
 
   sg.as_summary_status();
-
   sg.as_concatenated_status();
-
   sg.AttachLogMessages();
 
   return 0;
",0,test
074b66af3415cb3c60336b0a94f23aec04a715e3,tensorflow/tensorflow,"Change `dim` to `axis` for cosine_distance (#12801)

* Change `dim` to `axis` for cosine_distance

This fix changes  `dim` to `axis` for cosine_distance
so that the args are consistent with other methods in TensorFlow.

The backward-compatibility has been maintained in the fix.

This fix fixes 8205.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Change `dim` to `axis` for tf.losses.cosine_distance

so that args are consistent with other TensorFlow methods.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Update API goldens and address review feedback

This commit updates API goldens so that
`//tensorflow/tools/api/tests:api_compatibility_test`
could pass. Review feedback has also been addressed.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",loss_ops.py,"@@ -28,6 +28,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops import nn
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.util.deprecation import deprecated
+from tensorflow.python.util.deprecation import deprecated_args
 
 __all__ = [""absolute_difference"",
            ""add_loss"",
@@ -623,8 +624,9 @@ def mean_pairwise_squared_error(
 
 
 @deprecated(""2016-12-30"", ""Use tf.losses.cosine_distance instead."")
+@deprecated_args(None, ""dim is deprecated, use axis instead"", ""dim"")
 def cosine_distance(
-    predictions, labels=None, dim=None, weights=1.0, scope=None):
+    predictions, labels=None, axis=None, weights=1.0, scope=None, dim=None):
   """"""Adds a cosine-distance loss to the training procedure.
 
   Note that the function assumes that `predictions` and `labels` are already
@@ -633,10 +635,11 @@ def cosine_distance(
   Args:
     predictions: An arbitrary matrix.
     labels: A `Tensor` whose shape matches 'predictions'
-    dim: The dimension along which the cosine distance is computed.
+    axis: The dimension along which the cosine distance is computed.
     weights: Coefficients for the loss a scalar, a tensor of shape
       [batch_size] or a tensor whose shape matches `predictions`.
     scope: The scope for the operations performed in computing the loss.
+    dim: The old (deprecated) name for `axis`.
 
   Returns:
     A scalar `Tensor` representing the loss value.
@@ -645,8 +648,12 @@ def cosine_distance(
     ValueError: If `predictions` shape doesn't match `labels` shape, or
       `weights` is `None`.
   """"""
-  if dim is None:
-    raise ValueError(""`dim` cannot be None."")
+  if dim is not None:
+    if axis is not None:
+      raise ValueError(""Cannot specify both 'axis' and 'dim'"")
+    axis = dim
+  if axis is None and dim is None:
+    raise ValueError(""You must specify 'axis'."")
   with ops.name_scope(scope, ""cosine_distance_loss"",
                       [predictions, labels, weights]) as scope:
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
@@ -655,5 +662,5 @@ def cosine_distance(
     labels = math_ops.to_float(labels)
 
     radial_diffs = math_ops.multiply(predictions, labels)
-    losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[dim,])
+    losses = 1 - math_ops.reduce_sum(radial_diffs, reduction_indices=[axis,])
     return compute_weighted_loss(losses, weights, scope=scope)
",0,train
074b66af3415cb3c60336b0a94f23aec04a715e3,tensorflow/tensorflow,"Change `dim` to `axis` for cosine_distance (#12801)

* Change `dim` to `axis` for cosine_distance

This fix changes  `dim` to `axis` for cosine_distance
so that the args are consistent with other methods in TensorFlow.

The backward-compatibility has been maintained in the fix.

This fix fixes 8205.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Change `dim` to `axis` for tf.losses.cosine_distance

so that args are consistent with other TensorFlow methods.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>

* Update API goldens and address review feedback

This commit updates API goldens so that
`//tensorflow/tools/api/tests:api_compatibility_test`
could pass. Review feedback has also been addressed.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",losses_impl.py,"@@ -27,6 +27,7 @@ from tensorflow.python.ops import nn
 from tensorflow.python.ops import nn_ops
 from tensorflow.python.ops import weights_broadcast_ops
 from tensorflow.python.ops.losses import util
+from tensorflow.python.util.deprecation import deprecated_args
 
 
 class Reduction(object):
@@ -230,10 +231,12 @@ def absolute_difference(
         losses, weights, scope, loss_collection, reduction=reduction)
 
 
+@deprecated_args(None, ""dim is deprecated, use axis instead"", ""dim"")
 def cosine_distance(
-    labels, predictions, dim=None, weights=1.0, scope=None,
+    labels, predictions, axis=None, weights=1.0, scope=None,
     loss_collection=ops.GraphKeys.LOSSES,
-    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS):
+    reduction=Reduction.SUM_BY_NONZERO_WEIGHTS,
+    dim=None):
   """"""Adds a cosine-distance loss to the training procedure.
 
   Note that the function assumes that `predictions` and `labels` are already
@@ -242,13 +245,14 @@ def cosine_distance(
   Args:
     labels: `Tensor` whose shape matches 'predictions'
     predictions: An arbitrary matrix.
-    dim: The dimension along which the cosine distance is computed.
+    axis: The dimension along which the cosine distance is computed.
     weights: Optional `Tensor` whose rank is either 0, or the same rank as
       `labels`, and must be broadcastable to `labels` (i.e., all dimensions must
       be either `1`, or the same as the corresponding `losses` dimension).
     scope: The scope for the operations performed in computing the loss.
     loss_collection: collection to which this loss will be added.
     reduction: Type of reduction to apply to loss.
+    dim: The old (deprecated) name for `axis`.
 
   Returns:
     Weighted loss float `Tensor`. If `reduction` is `NONE`, this has the same
@@ -256,10 +260,14 @@ def cosine_distance(
 
   Raises:
     ValueError: If `predictions` shape doesn't match `labels` shape, or
-      `dim`, `labels`, `predictions` or `weights` is `None`.
+      `axis`, `labels`, `predictions` or `weights` is `None`.
   """"""
-  if dim is None:
-    raise ValueError(""`dim` cannot be None."")
+  if dim is not None:
+    if axis is not None:
+      raise ValueError(""Cannot specify both 'axis' and 'dim'"")
+    axis = dim
+  if axis is None and dim is None:
+    raise ValueError(""You must specify 'axis'."")
   if labels is None:
     raise ValueError(""labels must not be None."")
   if predictions is None:
@@ -271,7 +279,7 @@ def cosine_distance(
     predictions.get_shape().assert_is_compatible_with(labels.get_shape())
 
     radial_diffs = math_ops.multiply(predictions, labels)
-    losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(dim,), keep_dims=True)
+    losses = 1 - math_ops.reduce_sum(radial_diffs, axis=(axis,), keep_dims=True)
     return compute_weighted_loss(
         losses, weights, scope, loss_collection, reduction=reduction)
 
",0,train
221a5146df1afcc72d0d2490af487868031ba037,tensorflow/tensorflow,"Enable tuple type sharding when using a single element InfeedDequeueTuple.
This will enable spatial partitioning for single input Infeeds.

PiperOrigin-RevId: 243594818",xla_sharding.py,"@@ -120,9 +120,14 @@ class Sharding(object):
             tile_assignment_dimensions=tile_assignment_dims,
             tile_assignment_devices=range(num_devices)))
 
-  def apply_to_tensor(self, tensor):
-    """"""Applies this Sharding attribute to `tensor`.""""""
-    if len(tensor.op.outputs) > 1:
+  def apply_to_tensor(self, tensor, assign_tuple_sharding=False):
+    """"""Applies this Sharding attribute to `tensor`.
+
+    Args:
+      tensor: A tf.Tensor to split.
+      assign_tuple_sharding: If the sharding type should be a tuple.
+    """"""
+    if len(tensor.op.outputs) > 1 or assign_tuple_sharding:
       proto = self._get_or_create_tuple_proto(tensor.op)
       # We can't mutate an element of old_proto.tuple_shardings, so create
       # a new proto.
@@ -166,21 +171,30 @@ class Sharding(object):
 #   tensor = xla_sharding.replicate(tensor)
 
 
-def replicate(tensor):
-  Sharding.replicate().apply_to_tensor(tensor)
+def replicate(tensor, assign_tuple_sharding=False):
+  Sharding.replicate().apply_to_tensor(
+      tensor,
+      assign_tuple_sharding=assign_tuple_sharding)
   return tensor
 
 
-def assign_device(tensor, device):
-  Sharding.assign_device(device).apply_to_tensor(tensor)
+def assign_device(tensor, device, assign_tuple_sharding=False):
+  Sharding.assign_device(device).apply_to_tensor(
+      tensor,
+      assign_tuple_sharding=assign_tuple_sharding)
   return tensor
 
 
-def tile(tensor, tile_assignment):
-  Sharding.tile(tile_assignment).apply_to_tensor(tensor)
+def tile(tensor, tile_assignment, assign_tuple_sharding=False):
+  Sharding.tile(tile_assignment).apply_to_tensor(
+      tensor,
+      assign_tuple_sharding=assign_tuple_sharding
+  )
   return tensor
 
 
-def split(tensor, split_dimension, num_devices):
-  Sharding.split(tensor, split_dimension, num_devices).apply_to_tensor(tensor)
+def split(tensor, split_dimension, num_devices, assign_tuple_sharding=False):
+  Sharding.split(tensor, split_dimension, num_devices).apply_to_tensor(
+      tensor,
+      assign_tuple_sharding=assign_tuple_sharding)
   return tensor
",0,train
221a5146df1afcc72d0d2490af487868031ba037,tensorflow/tensorflow,"Enable tuple type sharding when using a single element InfeedDequeueTuple.
This will enable spatial partitioning for single input Infeeds.

PiperOrigin-RevId: 243594818",tpu_feed.py,"@@ -86,6 +86,8 @@ def partition_or_replicate_on_host(tensor, dims):
 def _tag_sharding_attribute_for_dequeued_tensor(tensor, dims):
   """"""Tags appropriate XLA sharding attribute to the dequeued tensor.
 
+  The sharding attribute of the dequeued tensor will be a tuple.
+
   Args:
     tensor: The dequeued tensor on TPU.
     dims: A list of integer describes how the tensor is partitioned.
@@ -94,12 +96,15 @@ def _tag_sharding_attribute_for_dequeued_tensor(tensor, dims):
     The same tensor with the xla_sharding attribute.
   """"""
   if dims is None:
-    return xla_sharding.replicate(tensor)
+    return xla_sharding.replicate(tensor, assign_tuple_sharding=True)
   elif np.prod(dims) == 1:
-    return xla_sharding.assign_device(tensor, 0)
+    return xla_sharding.assign_device(tensor, 0, assign_tuple_sharding=True)
   else:
     tile_assignment = np.arange(np.prod(dims)).reshape(dims)
-    return xla_sharding.tile(tensor=tensor, tile_assignment=tile_assignment)
+    return xla_sharding.tile(
+        tensor=tensor,
+        tile_assignment=tile_assignment,
+        assign_tuple_sharding=True)
 
 
 def tag_sharding_attribute_for_dequeued_tensors(dequeues, dims):
",0,train
f3b389ca9369e81c60ffa0615f1f87b19c19df85,tensorflow/tensorflow,"Automated rollback of commit b9a6fea1f0a501b226394431d0377eef0b40c4b0

PiperOrigin-RevId: 257837218",meta_optimizer.cc,"@@ -32,8 +32,8 @@ limitations under the License.
 #include ""tensorflow/core/grappler/optimizers/debug_stripper.h""
 #include ""tensorflow/core/grappler/optimizers/dependency_optimizer.h""
 #include ""tensorflow/core/grappler/optimizers/function_optimizer.h""
-#include ""tensorflow/core/grappler/optimizers/generic_layout_optimizer.h""
 #include ""tensorflow/core/grappler/optimizers/implementation_selector.h""
+#include ""tensorflow/core/grappler/optimizers/layout_optimizer.h""
 #include ""tensorflow/core/grappler/optimizers/loop_optimizer.h""
 #include ""tensorflow/core/grappler/optimizers/memory_optimizer.h""
 #include ""tensorflow/core/grappler/optimizers/model_pruner.h""
@@ -121,7 +121,7 @@ std::unique_ptr<GraphOptimizer> MetaOptimizer::MakeNewOptimizer(
   MK_OPT(""constfold"", new ConstantFolding(cpu_device_));
   MK_OPT(""shape"", new ShapeOptimizer());
   MK_OPT(""remap"", new Remapper(cfg_.remapping()));
-  MK_OPT(""layout"", new GenericLayoutOptimizer());
+  MK_OPT(""layout"", new LayoutOptimizer());
   MK_OPT(""auto_mixed_precision"",
          new AutoMixedPrecision(cfg_.auto_mixed_precision()));
   MK_OPT(""memory"", new MemoryOptimizer(RewriterConfig::MANUAL));
@@ -193,7 +193,7 @@ Status MetaOptimizer::InitializeOptimizers(
         MakeUnique<DependencyOptimizer>(cfg_.dependency_optimization()));
   }
   if (cfg_.layout_optimizer() != RewriterConfig::OFF) {
-    optimizers->push_back(MakeUnique<GenericLayoutOptimizer>());
+    optimizers->push_back(MakeUnique<LayoutOptimizer>());
   }
   if (AutoMixedPrecisionEnabled(cfg_.auto_mixed_precision())) {
     optimizers->push_back(
@@ -267,7 +267,7 @@ Status MetaOptimizer::InitializeCustomGraphOptimizers(
       TF_RETURN_IF_ERROR(custom_optimizer->Init(&optimizer_config));
       optimizers->push_back(std::move(custom_optimizer));
     } else {
-      // If there are no custom optimizers with given name, try to initialize a
+      // If there are no custom optimizers with given name, try to initalize a
       // default optimizer. This way, custom configurable optimizers can be
       // mixed with default optimizers in any order.
       auto optimizer = MakeNewOptimizer(optimizer_config.name());
",0,train
f96dcc1584ada70a1e58513dab4af82fe54cb3fa,tensorflow/tensorflow,"Make tf.group() a tf.no_op()
Change: 118211924",control_flow_ops_py_test.py,"@@ -1468,6 +1468,11 @@ class ControlFlowTest(tf.test.TestCase):
     self.assertAllClose([0.0], v1_val)
     self.assertAllClose([1.0], v2_val)
 
+  def testGroupEmpty(self):
+    op = tf.group()
+    self.assertEqual(op.type, ""NoOp"")
+    self.assertEqual(op.control_inputs, [])
+
   def testMergeShapes(self):
     # All inputs unknown.
     p1 = tf.placeholder(tf.float32)
",0,test
f96dcc1584ada70a1e58513dab4af82fe54cb3fa,tensorflow/tensorflow,"Make tf.group() a tf.no_op()
Change: 118211924",control_flow_ops.py,"@@ -1685,7 +1685,7 @@ def group(*inputs, **kwargs):
   See also `tuple` and `with_dependencies`.
 
   Args:
-    *inputs: One or more tensors to group.
+    *inputs: Zero or more tensors to group.
     **kwargs: Optional parameters to pass when constructing the NodeDef.
     name: A name for this operation (optional).
 
@@ -1693,16 +1693,16 @@ def group(*inputs, **kwargs):
     An Operation that executes all its inputs.
 
   Raises:
-    ValueError: If an unknown keyword argument is provided, or if there are
-                no inputs.
+    ValueError: If an unknown keyword argument is provided.
   """"""
   name = kwargs.pop(""name"", None)
   if kwargs:
     raise ValueError(""Unknown keyword arguments: "" + "", "".join(kwargs.keys()))
-  if not inputs:
-    # TODO(touts): Would make sense to return a NoOp.
-    raise ValueError(""No inputs provided"")
   with ops.op_scope(inputs, name, ""group_deps"") as name:
+    # Grouping no inputs means do nothing
+    if not inputs:
+      return no_op(name=name)
+
     # Sorts *inputs according to their devices.
     ops_on_device = {}  # device -> operations specified on the device.
     for inp in inputs:
",0,test
97249979d9a76ae05d590f9cbe199c0b47712b4f,tensorflow/tensorflow,"bug fix: evaluate nodes before swap the original graph

PiperOrigin-RevId: 190291844",constant_folding_test.cc,"@@ -1922,6 +1922,8 @@ TEST_F(ConstantFoldingTest, PartialFolding_Concat) {
   item.fetch = {""concat0"", ""concat1"", ""concat2"", ""concat3"", ""concat4"",
                 ""concat5"", ""concat6"", ""concat7"", ""concat8"", ""concat9""};
 
+  auto tensors_expected = EvaluateNodes(item.graph, {""concat0""});
+  EXPECT_EQ(1, tensors_expected.size());
   ConstantFolding optimizer(nullptr /* cpu_device */);
   GraphDef output;
   Status status = optimizer.Optimize(nullptr, item, &output);
@@ -1971,9 +1973,7 @@ TEST_F(ConstantFoldingTest, PartialFolding_Concat) {
     }
   }
 
-  auto tensors_expected = EvaluateNodes(item.graph, {""concat0""});
   auto tensors = EvaluateNodes(output, {""concat0""});
-  EXPECT_EQ(1, tensors_expected.size());
   EXPECT_EQ(1, tensors.size());
   test::ExpectTensorNear<float>(tensors_expected[0], tensors[0], 1e-6);
 }
",0,train
ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers

PiperOrigin-RevId: 224843723",cluster_resolver.py,"@@ -22,6 +22,8 @@ import abc
 
 import six
 
+from tensorflow.python.client import session
+from tensorflow.python.framework import ops
 from tensorflow.python.training.server_lib import ClusterSpec
 
 
@@ -32,6 +34,14 @@ def format_master_url(master, rpc_layer=None):
     return master
 
 
+def get_accelerator_devices(master, config_proto):
+  # TODO(frankchn): Add support for eager mode as well as graph mode.
+  with ops.Graph().as_default():
+    with session.Session(master, config=config_proto) as s:
+      devices = s.list_devices()
+  return devices
+
+
 @six.add_metaclass(abc.ABCMeta)
 class ClusterResolver(object):
   """"""Abstract class for all implementations of ClusterResolvers.
@@ -91,7 +101,6 @@ class ClusterResolver(object):
     """"""
     raise NotImplementedError()
 
-  @abc.abstractmethod
   def num_accelerators(self,
                        task_type=None,
                        task_index=None,
@@ -119,7 +128,9 @@ class ClusterResolver(object):
       config_proto: (Optional) Configuration for starting a new session to
         query how many accelerator cores it has.
     """"""
-    raise NotImplementedError()
+    master = self.master(task_type, task_index)
+    devices = get_accelerator_devices(master, config_proto)
+    return sum(1 for d in devices if d.device_type == accelerator_type)
 
   @abc.abstractproperty
   def environment(self):
",0,train
ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers

PiperOrigin-RevId: 224843723",cluster_resolver_test.py,"@@ -18,11 +18,64 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.client import session
+from tensorflow.python.distribute.cluster_resolver import ClusterResolver
 from tensorflow.python.distribute.cluster_resolver import SimpleClusterResolver
 from tensorflow.python.distribute.cluster_resolver import UnionClusterResolver
 from tensorflow.python.platform import test
 from tensorflow.python.training import server_lib
 
+mock = test.mock
+
+
+class MockBaseClusterResolver(ClusterResolver):
+
+  def cluster_spec(self):
+    return None
+
+  def master(self, task_type=None, task_index=None, rpc_layer=None):
+    return """"
+
+  def environment(self):
+    return """"
+
+
+class BaseClusterResolverTest(test.TestCase):
+
+  @mock.patch.object(session.BaseSession, ""list_devices"")
+  def testNumAcceleratorsSuccess(self, mock_list_devices):
+    device_names = [
+        ""/job:worker/task:0/device:GPU:0"",
+        ""/job:worker/task:0/device:GPU:1"",
+        ""/job:worker/task:0/device:GPU:2"",
+        ""/job:worker/task:0/device:GPU:3"",
+    ]
+    device_list = [
+        session._DeviceAttributes(
+            name, ""GPU"", 1024, 0) for name in device_names
+    ]
+    mock_list_devices.return_value = device_list
+
+    resolver = MockBaseClusterResolver()
+    self.assertEqual(resolver.num_accelerators(), 4)
+
+  @mock.patch.object(session.BaseSession, ""list_devices"")
+  def testNumAcceleratorsFilterSuccess(self, mock_list_devices):
+    device_names = [
+        ""/job:worker/task:0/device:TPU:0"",
+        ""/job:worker/task:0/device:TPU:1"",
+        ""/job:worker/task:0/device:TPU:2"",
+        ""/job:worker/task:0/device:TPU:3"",
+    ]
+    device_list = [
+        session._DeviceAttributes(
+            name, ""TPU"", 1024, 0) for name in device_names
+    ]
+    mock_list_devices.return_value = device_list
+
+    resolver = MockBaseClusterResolver()
+    self.assertEqual(resolver.num_accelerators(), 0)
+
 
 class UnionClusterResolverTest(test.TestCase):
   # TODO(frankchn): Transform to parameterized test after it is included in the
",0,train
ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers

PiperOrigin-RevId: 224843723",gce_cluster_resolver.py,"@@ -51,7 +51,6 @@ class GceClusterResolver(ClusterResolver):
                task_type='worker',
                task_index=0,
                rpc_layer='grpc',
-               num_accelerators=0,
                credentials='default',
                service=None):
     """"""Creates a new GceClusterResolver object.
@@ -73,8 +72,6 @@ class GceClusterResolver(ClusterResolver):
         can be distinguished from each other.
       rpc_layer: The RPC layer TensorFlow should use to communicate across
         instances.
-      num_accelerators: Number of accelerators (GPUs) present per
-        instance.
       credentials: GCE Credentials. If nothing is specified, this defaults to
         GoogleCredentials.get_application_default().
       service: The GCE API object returned by the googleapiclient.discovery
@@ -90,7 +87,6 @@ class GceClusterResolver(ClusterResolver):
     self._task_type = task_type
     self._task_index = task_index
     self._rpc_layer = rpc_layer
-    self._num_accelerators = num_accelerators
     self._port = port
     self._credentials = credentials
 
@@ -201,12 +197,3 @@ class GceClusterResolver(ClusterResolver):
   @rpc_layer.setter
   def rpc_layer(self, rpc_layer):
     self._rpc_layer = rpc_layer
-
-  def num_accelerators(self,
-                       task_type=None,
-                       task_index=None,
-                       accelerator_type='GPU',
-                       config_proto=None):
-    # Unused
-    del task_type, task_index, accelerator_type, config_proto
-    return self._num_accelerators
",0,train
ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers

PiperOrigin-RevId: 224843723",kubernetes_cluster_resolver.py,"@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-from tensorflow.python.client import device_lib
 from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
 from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url
 from tensorflow.python.training import server_lib
@@ -167,16 +166,3 @@ class KubernetesClusterResolver(ClusterResolver):
     on internal systems.
     """"""
     return ''
-
-  def num_accelerators(self,
-                       task_type=None,
-                       task_index=None,
-                       accelerator_type='GPU',
-                       config_proto=None):
-    # TODO(frankchn): Make querying non-local accelerators work
-    if task_type is not None or task_index is not None:
-      raise NotImplementedError('Querying non-local accelerators is not yet'
-                                'implemented.')
-
-    local_devices = device_lib.list_local_devices(config_proto)
-    return sum(d.device_type == accelerator_type for d in local_devices)
",0,train
ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers

PiperOrigin-RevId: 224843723",tfconfig_cluster_resolver.py,"@@ -54,8 +54,7 @@ class TFConfigClusterResolver(ClusterResolver):
                task_type=None,
                task_index=None,
                rpc_layer=None,
-               environment=None,
-               num_accelerators=0):
+               environment=None):
     """"""Creates a new TFConfigClusterResolver.
 
     Args:
@@ -66,17 +65,11 @@ class TFConfigClusterResolver(ClusterResolver):
       rpc_layer: (String, optional) Overrides the rpc layer TensorFlow uses.
       environment: (String, optional) Overrides the environment TensorFlow
         operates in.
-      num_accelerators: (Integer, optional) Specifies the number of
-        accelerators (e.g. GPUs, TPUs, others) that each node has.
     """"""
-    # TODO(frankchn): num_accelerators is a stop-gap and will be removed
-    # in favor of autodetection of devices soon.
-
     self._task_type = task_type
     self._task_index = task_index
     self._rpc_layer = rpc_layer
     self._environment = environment
-    self._num_accelerators = num_accelerators
 
   @property
   def task_type(self):
@@ -117,16 +110,6 @@ class TFConfigClusterResolver(ClusterResolver):
   def rpc_layer(self, rpc_layer):
     self._rpc_layer = rpc_layer
 
-  def num_accelerators(self,
-                       task_type=None,
-                       task_index=None,
-                       accelerator_type='GPU',
-                       config_proto=None):
-    # TODO(frankchn): Connect to server (w/ session_config) in the future.
-    # Unused, we do not connect to another server here right now.
-    del task_type, task_index, accelerator_type, config_proto
-    return self._num_accelerators
-
   def cluster_spec(self):
     """"""Returns a ClusterSpec based on the TF_CONFIG environment variable.
 
",0,train
ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers

PiperOrigin-RevId: 224843723",tfconfig_cluster_resolver_test.py,"@@ -168,13 +168,11 @@ class TFConfigClusterResolverTest(test.TestCase):
     }
     """"""
 
-    cluster_resolver = TFConfigClusterResolver(task_type='ps', task_index=0,
-                                               num_accelerators=8)
+    cluster_resolver = TFConfigClusterResolver(task_type='ps', task_index=0)
 
     self.assertEqual('grpc://ps0:2222', cluster_resolver.master())
     self.assertEqual('ps', cluster_resolver.task_type)
     self.assertEqual(0, cluster_resolver.task_index)
-    self.assertEqual(8, cluster_resolver.num_accelerators())
 
     cluster_resolver.task_type = 'worker'
     cluster_resolver.task_index = 1
",0,train
ea02fb88d2abe11b1a7779abb0a7d50e07f9d7b8,tensorflow/tensorflow,"Unify num_accelerators for all Cluster Resolvers

PiperOrigin-RevId: 224843723",tpu_cluster_resolver.py,"@@ -25,11 +25,10 @@ import re
 from six.moves.urllib.request import Request
 from six.moves.urllib.request import urlopen
 
-from tensorflow.python.client import session
 from tensorflow.python.distribute.cluster_resolver.cluster_resolver import ClusterResolver
 from tensorflow.python.distribute.cluster_resolver.cluster_resolver import format_master_url
+from tensorflow.python.distribute.cluster_resolver.cluster_resolver import get_accelerator_devices
 from tensorflow.python.framework import errors
-from tensorflow.python.framework import ops
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
 from tensorflow.python.util import compat
@@ -451,17 +450,16 @@ class TPUClusterResolver(ClusterResolver):
         retrieve the system metadata.
 
     Raises:
-      RuntimeError: If this is used with a non-TPU accelerator_type.
+      RuntimeError: If we cannot talk to a TPU worker after retrying or if the
+        number of TPU devices per host is different.
     """"""
     retry_count = 1
     # TODO(b/120564445): Replace with standard library for retries.
     while True:
       try:
-        with ops.Graph().as_default():
-          with session.Session(self.master(), config=config_proto) as s:
-            devices = s.list_devices()
-            device_details = _get_device_dict_and_cores(devices)
-            break
+        device_details = _get_device_dict_and_cores(
+            get_accelerator_devices(self.master(), config_proto=config_proto))
+        break
       except errors.DeadlineExceededError:
         error_message = ('Failed to connect to master. The TPU might not be '
                          'ready (e.g. still scheduling) or the master '
",0,train
add7a1a911b430ed14f8b6a1609dd3796587d131,tensorflow/tensorflow,"Make tf.contrib.proto.* TF2-friendly.

This included fixing a bug where shape inference caught an incorrect shape,
but since eager mode doesn't run shape inference the core code caused a
segfault.

PiperOrigin-RevId: 237316781",decode_proto_op_test_base.py,"@@ -296,14 +296,13 @@ class DecodeProtoOpTestBase(test_base.ProtoOpTestBase, parameterized.TestCase):
     field_names = ['sizes']
     field_types = [dtypes.int32]
 
-    with self.cached_session() as sess:
-      ctensor, vtensor = self._decode_module.decode_proto(
-          batch,
-          message_type=msg_type,
-          field_names=field_names,
-          output_types=field_types,
-          sanitize=sanitize)
-      with self.assertRaisesRegexp(errors.DataLossError,
-                                   'Unable to parse binary protobuf'
-                                   '|Failed to consume entire buffer'):
-        _ = sess.run([ctensor] + vtensor)
+    with self.assertRaisesRegexp(
+        errors.DataLossError, 'Unable to parse binary protobuf'
+        '|Failed to consume entire buffer'):
+      self.evaluate(
+          self._decode_module.decode_proto(
+              batch,
+              message_type=msg_type,
+              field_names=field_names,
+              output_types=field_types,
+              sanitize=sanitize))
",0,train
add7a1a911b430ed14f8b6a1609dd3796587d131,tensorflow/tensorflow,"Make tf.contrib.proto.* TF2-friendly.

This included fixing a bug where shape inference caught an incorrect shape,
but since eager mode doesn't run shape inference the core code caused a
segfault.

PiperOrigin-RevId: 237316781",encode_proto_op_test_base.py,"@@ -30,7 +30,9 @@ from google.protobuf import text_format
 
 from tensorflow.contrib.proto.python.kernel_tests import proto_op_test_base as test_base
 from tensorflow.contrib.proto.python.kernel_tests import test_example_pb2
+from tensorflow.python.eager import context
 from tensorflow.python.framework import dtypes
+from tensorflow.python.framework import errors
 from tensorflow.python.ops import array_ops
 
 
@@ -50,56 +52,86 @@ class EncodeProtoOpTestBase(test_base.ProtoOpTestBase, parameterized.TestCase):
     self._decode_module = decode_module
     self._encode_module = encode_module
 
+  def testBadSizesShape(self):
+    if context.executing_eagerly():
+      expected_error = (errors.InvalidArgumentError,
+                        r'Invalid shape for field double_value.')
+    else:
+      expected_error = (ValueError,
+                        r'Shape must be at least rank 2 but is rank 0')
+    with self.assertRaisesRegexp(*expected_error):
+      self.evaluate(
+          self._encode_module.encode_proto(
+              sizes=1,
+              values=[np.double(1.0)],
+              message_type='tensorflow.contrib.proto.TestValue',
+              field_names=['double_value']))
+
   def testBadInputs(self):
     # Invalid field name
-    with self.cached_session():
-      with self.assertRaisesOpError('Unknown field: non_existent_field'):
-        self._encode_module.encode_proto(
-            sizes=[[1]],
-            values=[np.array([[0.0]], dtype=np.int32)],
-            message_type='tensorflow.contrib.proto.TestValue',
-            field_names=['non_existent_field']).eval()
+    with self.assertRaisesOpError('Unknown field: non_existent_field'):
+      self.evaluate(
+          self._encode_module.encode_proto(
+              sizes=[[1]],
+              values=[np.array([[0.0]], dtype=np.int32)],
+              message_type='tensorflow.contrib.proto.TestValue',
+              field_names=['non_existent_field']))
 
     # Incorrect types.
-    with self.cached_session():
-      with self.assertRaisesOpError(
-          'Incompatible type for field double_value.'):
-        self._encode_module.encode_proto(
-            sizes=[[1]],
-            values=[np.array([[0.0]], dtype=np.int32)],
-            message_type='tensorflow.contrib.proto.TestValue',
-            field_names=['double_value']).eval()
+    with self.assertRaisesOpError('Incompatible type for field double_value.'):
+      self.evaluate(
+          self._encode_module.encode_proto(
+              sizes=[[1]],
+              values=[np.array([[0.0]], dtype=np.int32)],
+              message_type='tensorflow.contrib.proto.TestValue',
+              field_names=['double_value']))
 
     # Incorrect shapes of sizes.
-    with self.cached_session():
+    for sizes_value in 1, np.array([[[0, 0]]]):
       with self.assertRaisesOpError(
           r'sizes should be batch_size \+ \[len\(field_names\)\]'):
-        sizes = array_ops.placeholder(dtypes.int32)
-        values = array_ops.placeholder(dtypes.float64)
-        self._encode_module.encode_proto(
-            sizes=sizes,
-            values=[values],
-            message_type='tensorflow.contrib.proto.TestValue',
-            field_names=['double_value']).eval(feed_dict={
-                sizes: [[[0, 0]]],
-                values: [[0.0]]
-            })
+        if context.executing_eagerly():
+          self.evaluate(
+              self._encode_module.encode_proto(
+                  sizes=sizes_value,
+                  values=[np.array([[0.0]])],
+                  message_type='tensorflow.contrib.proto.TestValue',
+                  field_names=['double_value']))
+        else:
+          with self.cached_session():
+            sizes = array_ops.placeholder(dtypes.int32)
+            values = array_ops.placeholder(dtypes.float64)
+            self._encode_module.encode_proto(
+                sizes=sizes,
+                values=[values],
+                message_type='tensorflow.contrib.proto.TestValue',
+                field_names=['double_value']).eval(feed_dict={
+                    sizes: sizes_value,
+                    values: [[0.0]]
+                })
 
     # Inconsistent shapes of values.
-    with self.cached_session():
-      with self.assertRaisesOpError(
-          'Values must match up to the last dimension'):
-        sizes = array_ops.placeholder(dtypes.int32)
-        values1 = array_ops.placeholder(dtypes.float64)
-        values2 = array_ops.placeholder(dtypes.int32)
-        (self._encode_module.encode_proto(
-            sizes=[[1, 1]],
-            values=[values1, values2],
-            message_type='tensorflow.contrib.proto.TestValue',
-            field_names=['double_value', 'int32_value']).eval(feed_dict={
-                values1: [[0.0]],
-                values2: [[0], [0]]
-            }))
+    with self.assertRaisesOpError('Values must match up to the last dimension'):
+      if context.executing_eagerly():
+        self.evaluate(
+            self._encode_module.encode_proto(
+                sizes=[[1, 1]],
+                values=[np.array([[0.0]]),
+                        np.array([[0], [0]])],
+                message_type='tensorflow.contrib.proto.TestValue',
+                field_names=['double_value', 'int32_value']))
+      else:
+        with self.cached_session():
+          values1 = array_ops.placeholder(dtypes.float64)
+          values2 = array_ops.placeholder(dtypes.int32)
+          (self._encode_module.encode_proto(
+              sizes=[[1, 1]],
+              values=[values1, values2],
+              message_type='tensorflow.contrib.proto.TestValue',
+              field_names=['double_value', 'int32_value']).eval(feed_dict={
+                  values1: [[0.0]],
+                  values2: [[0], [0]]
+              }))
 
   def _testRoundtrip(self, in_bufs, message_type, fields):
 
",0,train
add7a1a911b430ed14f8b6a1609dd3796587d131,tensorflow/tensorflow,"Make tf.contrib.proto.* TF2-friendly.

This included fixing a bug where shape inference caught an incorrect shape,
but since eager mode doesn't run shape inference the core code caused a
segfault.

PiperOrigin-RevId: 237316781",encode_proto_op.cc,"@@ -525,11 +525,16 @@ class EncodeProtoOp : public OpKernel {
           ctx,
           proto_utils::IsCompatibleType(field_descs_[i]->type(), v.dtype()),
           errors::InvalidArgument(
-              ""Incompatible type for field "" + field_names_[i] +
-                  "".  Saw dtype: "",
-              DataTypeString(v.dtype()),
+              ""Incompatible type for field "", field_names_[i],
+              "".  Saw dtype: "", DataTypeString(v.dtype()),
               "" but field type is: "", field_descs_[i]->type_name()));
 
+      OP_REQUIRES(
+          ctx, TensorShapeUtils::IsMatrixOrHigher(v.shape()),
+          errors::InvalidArgument(""Invalid shape for field "", field_names_[i],
+                                  "".  Saw shape "", v.shape().DebugString(),
+                                  "" but it should be at least a matrix.""));
+
       // All value tensors must have the same shape prefix (i.e. batch size).
       TensorShape shape_prefix = v.shape();
       shape_prefix.RemoveDim(shape_prefix.dims() - 1);
",0,train
016e44afb875b8316b0d7239ebab1f92882aaf82,tensorflow/tensorflow,Typo correction in resize_bicubic_op.cc,resize_bicubic_op.cc,"@@ -130,7 +130,7 @@ class CachedInterpolation {
     }
     // We use 2 hands and walk through, copying from one to another where
     // we already have values.
-    // Invarient, new_indicies_hand <= cached_values_hand
+    // Invariant, new_indicies_hand <= cached_values_hand
     const std::array<int64, 4> new_x_indices{{x_0, x_1, x_2, x_3}};
     int cached_values_hand = 0;
     int new_indicies_hand = 0;
",0,test
410ef4f3b097d3ff47d2bc342bb3ac5bc9aedf72,tensorflow/tensorflow,Fix typo,mnist.py,"@@ -153,7 +153,7 @@ def evaluation(logits, labels):
   """"""
   # For a classifier model, we can use the in_top_k Op.
   # It returns a bool tensor with shape [batch_size] that is true for
-  # the examples where the label's is was in the top k (here k=1)
+  # the examples where the label is in the top k (here k=1)
   # of all logits for that example.
   correct = tf.nn.in_top_k(logits, labels, 1)
   # Return the number of true entries.
",0,train
86c8647f110220835c7783f96bf563fcc369378b,tensorflow/tensorflow,"Proxy decorator_target.__get__ in TFDecorator

Prior to this change TFDecorator.__get__ mimicked the behaviour of functions
via partial(self.__call__, instance). This is no needed as calling
__get__ on a function would have a ~similar effect; and in fact incorrect
if target implements a custom __get__ method.

PiperOrigin-RevId: 234957239",tf_decorator.py,"@@ -59,7 +59,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import functools as _functools
 import traceback as _traceback
 
 
@@ -212,8 +211,8 @@ class TFDecorator(object):
     else:
       self.__doc__ = ''
 
-  def __get__(self, obj, objtype):
-    return _functools.partial(self.__call__, obj)
+  def __get__(self, instance, owner):
+    return self._decorated_target.__get__(instance, owner)
 
   def __call__(self, *args, **kwargs):
     return self._decorated_target(*args, **kwargs)
",0,train
86c8647f110220835c7783f96bf563fcc369378b,tensorflow/tensorflow,"Proxy decorator_target.__get__ in TFDecorator

Prior to this change TFDecorator.__get__ mimicked the behaviour of functions
via partial(self.__call__, instance). This is no needed as calling
__get__ on a function would have a ~similar effect; and in fact incorrect
if target implements a custom __get__ method.

PiperOrigin-RevId: 234957239",tf_decorator_test.py,"@@ -170,6 +170,17 @@ class TfDecoratorTest(test.TestCase):
     self.assertEqual('Return parameters.',
                      TestDecoratedClass().return_params.__doc__)
 
+  def testTarget__get__IsProxied(self):
+    class Descr(object):
+
+      def __get__(self, instance, owner):
+        return self
+
+    class Foo(object):
+      foo = tf_decorator.TFDecorator('Descr', Descr())
+
+    self.assertIsInstance(Foo.foo, Descr)
+
 
 def test_wrapper(*args, **kwargs):
   return test_function(*args, **kwargs)
",0,train
39ef9c36e9ba11ddfc222eec57027d478f26b6f7,tensorflow/tensorflow,"In interactive_graphviz, more complete help command",interactive_graphviz.cc,"@@ -168,7 +168,9 @@ void DoHelpCommand() {
     <height> is specified, the new computation contains nodes up to <height>
     nodes above the root.
   help
-    Prints this usage information.)""
+    Prints this usage information.
+  quit
+    Exit the application.)""
             << std::endl;
 }
 
",0,train
b4a71efb3b1ebf0184bfc18d6b423e8aae952010,tensorflow/tensorflow,"[MLIR][KernelGen] Add experimental JIT-compiled div kernels for i8, ui32, and ui64 on GPU

PiperOrigin-RevId: 404234804
Change-Id: I8f391254bdcea06429bd88b42b54ad2b18501b54",gpu_binary_ops_test.cc,"@@ -347,6 +347,26 @@ GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
     test::DefaultInputNonZero<uint16_t>(), baseline_div,
     test::OpsTestConfig().ExpectStrictlyEqual())
 
+// These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED) && \
+    defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
+GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
+    Div,
+    /*test_name=*/Int8, int8_t, int8_t, test::DefaultInput<int8_t>(),
+    test::DefaultInputNonZero<int8_t>(), baseline_div,
+    test::OpsTestConfig().ExpectStrictlyEqual())
+GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
+    Div,
+    /*test_name=*/Uint32, uint32_t, uint32_t, test::DefaultInput<uint32_t>(),
+    test::DefaultInputNonZero<uint32_t>(), baseline_div,
+    test::OpsTestConfig().ExpectStrictlyEqual())
+GENERATE_DEFAULT_TESTS_WITH_SPECIFIC_INPUT_VALUES(
+    Div,
+    /*test_name=*/Uint64, uint64_t, uint64_t, test::DefaultInput<uint64_t>(),
+    test::DefaultInputNonZero<uint64_t>(), baseline_div,
+    test::OpsTestConfig().ExpectStrictlyEqual())
+#endif
+
 // The following tests don't work with Eigen kernels if the Eigen kernels are
 // compiled with nvcc.
 #if defined(MLIR_GENERATED_GPU_KERNELS_ENABLED)
",0,train
b4a71efb3b1ebf0184bfc18d6b423e8aae952010,tensorflow/tensorflow,"[MLIR][KernelGen] Add experimental JIT-compiled div kernels for i8, ui32, and ui64 on GPU

PiperOrigin-RevId: 404234804
Change-Id: I8f391254bdcea06429bd88b42b54ad2b18501b54",gpu_op_div.cc,"@@ -28,6 +28,13 @@ GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_INT64);
 GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_COMPLEX64);
 GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_COMPLEX128);
 
+// These kernels are JIT-compiled.
+#if defined(MLIR_GENERATED_EXPERIMENTAL_KERNELS_ENABLED)
+GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_INT8);
+GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_UINT32);
+GENERATE_AND_REGISTER_BINARY_GPU_KERNEL(Div, DT_UINT64);
+#endif
+
 REGISTER_ALIASED_GPU_KERNEL(RealDiv, Div, DT_HALF, DT_HALF);
 REGISTER_ALIASED_GPU_KERNEL(RealDiv, Div, DT_FLOAT, DT_FLOAT);
 REGISTER_ALIASED_GPU_KERNEL(RealDiv, Div, DT_DOUBLE, DT_DOUBLE);
",0,train
69f60d4c8cb5edb6fdc63b837b6db29562d28744,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2018-10-09

PiperOrigin-RevId: 216323343",compat.py,"@@ -26,7 +26,7 @@ import datetime
 from tensorflow.python.util import tf_contextlib
 from tensorflow.python.util.tf_export import tf_export
 
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 8)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2018, 10, 9)
 
 
 @tf_export(""compat.forward_compatible"")
",0,train
e18c81a0c5e20c170c4de5d1c484034eafd2aa55,tensorflow/tensorflow,"Fix sign errors in tf.contrib.distributions.logistic.{log_cdf, log_survival_function} and add accompanying tests. Fixes #10131

PiperOrigin-RevId: 157259406",logistic_test.py,"@@ -71,6 +71,52 @@ class LogisticTest(test.TestCase):
       self.assertEqual(cdf.get_shape(), (6,))
       self.assertAllClose(cdf.eval(), expected_cdf)
 
+  def testLogisticLogCDF(self):
+    with self.test_session():
+      batch_size = 6
+      np_loc = np.array([2.0] * batch_size, dtype=np.float32)
+      loc = constant_op.constant(np_loc)
+      scale = 1.5
+
+      dist = logistic.Logistic(loc, scale)
+      x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float32)
+      logcdf = dist.log_cdf(x)
+      expected_logcdf = stats.logistic.logcdf(x, np_loc, scale)
+
+      self.assertEqual(logcdf.get_shape(), (6,))
+      self.assertAllClose(logcdf.eval(), expected_logcdf)
+
+  def testLogisticSurvivalFunction(self):
+    with self.test_session():
+      batch_size = 6
+      np_loc = np.array([2.0] * batch_size, dtype=np.float32)
+      loc = constant_op.constant(np_loc)
+      scale = 1.5
+
+      dist = logistic.Logistic(loc, scale)
+      x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float32)
+      survival_function = dist.survival_function(x)
+      expected_survival_function = stats.logistic.sf(x, np_loc, scale)
+
+      self.assertEqual(survival_function.get_shape(), (6,))
+      self.assertAllClose(survival_function.eval(), expected_survival_function)
+
+  def testLogisticLogSurvivalFunction(self):
+    with self.test_session():
+      batch_size = 6
+      np_loc = np.array([2.0] * batch_size, dtype=np.float32)
+      loc = constant_op.constant(np_loc)
+      scale = 1.5
+
+      dist = logistic.Logistic(loc, scale)
+      x = np.array([2.5, 2.5, 4.0, 0.1, 1.0, 2.0], dtype=np.float32)
+      logsurvival_function = dist.log_survival_function(x)
+      expected_logsurvival_function = stats.logistic.logsf(x, np_loc, scale)
+
+      self.assertEqual(logsurvival_function.get_shape(), (6,))
+      self.assertAllClose(logsurvival_function.eval(),
+                          expected_logsurvival_function)
+
   def testLogisticMean(self):
     with self.test_session():
       loc = [2.0, 1.5, 1.0]
",0,train
e18c81a0c5e20c170c4de5d1c484034eafd2aa55,tensorflow/tensorflow,"Fix sign errors in tf.contrib.distributions.logistic.{log_cdf, log_survival_function} and add accompanying tests. Fixes #10131

PiperOrigin-RevId: 157259406",logistic.py,"@@ -190,13 +190,13 @@ class Logistic(distribution.Distribution):
     return math_ops.exp(self._log_prob(x))
 
   def _log_cdf(self, x):
-    return nn_ops.softplus(-self._z(x))
+    return -nn_ops.softplus(-self._z(x))
 
   def _cdf(self, x):
     return math_ops.sigmoid(self._z(x))
 
   def _log_survival_function(self, x):
-    return nn_ops.softplus(self._z(x))
+    return -nn_ops.softplus(self._z(x))
 
   def _survival_function(self, x):
     return math_ops.sigmoid(-self._z(x))
",0,train
e3930fc11f042416a34ed5526bc506e1e0e32660,tensorflow/tensorflow,"Add user_ops.my_fact to the new TensorFlow API.

PiperOrigin-RevId: 189415577",user_ops.py,"@@ -23,8 +23,10 @@ from tensorflow.python.ops import gen_user_ops as _gen_user_ops
 
 # go/tf-wildcard-import
 from tensorflow.python.ops.gen_user_ops import *  # pylint: disable=wildcard-import
+from tensorflow.python.util.tf_export import tf_export
 
 
+@tf_export('user_ops.my_fact')
 def my_fact():
   """"""Example of overriding the generated code for an Op.""""""
   return _gen_user_ops.fact()
",0,train
e3930fc11f042416a34ed5526bc506e1e0e32660,tensorflow/tensorflow,"Add user_ops.my_fact to the new TensorFlow API.

PiperOrigin-RevId: 189415577",api_compatibility_test.py,"@@ -268,17 +268,6 @@ class ApiCompatibilityTest(test.TestCase):
         for filename in golden_file_list
     }
 
-    # user_ops is an empty module. It is currently available in TensorFlow API
-    # but we don't keep empty modules in the new API.
-    # We delete user_ops from golden_proto_dict to make sure assert passes
-    # when diffing new API against goldens.
-    # TODO(annarev): remove user_ops from goldens once we switch to new API.
-    tf_module = golden_proto_dict['tensorflow'].tf_module
-    for i in range(len(tf_module.member)):
-      if tf_module.member[i].name == 'user_ops':
-        del tf_module.member[i]
-        break
-
     # Diff them. Do not fail if called with update.
     # If the test is run to update goldens, only report diffs but do not fail.
     self._AssertProtoDictEquals(
",0,train
7fdcb7d20e3ced8e415cf18ad3cdd519ff34403c,tensorflow/tensorflow,"Convert `DCHECK` for L2 metric to an `OP_REQUIRES`.

PiperOrigin-RevId: 411121699
Change-Id: Ie0e981aabb7e0db5deb93db55269a34519fead0f",stats_ops.cc,"@@ -326,7 +326,8 @@ class BoostedTreesCalculateBestFeatureSplitOp : public OpKernel {
     OP_REQUIRES(context, l2_t->NumElements() == 1,
                 errors::InvalidArgument(""l2 argument must be a scalar""));
     const auto l2 = l2_t->scalar<float>()();
-    DCHECK_GE(l2, 0);
+    OP_REQUIRES(context, l2 >= 0,
+                errors::InvalidArgument(""l2 = "", l2, "" but it should be >= 0""));
 
     const Tensor* tree_complexity_t;
     OP_REQUIRES_OK(context,
",0,test
5890405631a44f53a2d4d5c0ce5b625e1ae340cb,tensorflow/tensorflow,"Move IsZeroVector and BatchQuantizeFloats calls to top.

Update Calibration and FP16 versions to be consistent with the Float version.

PiperOrigin-RevId: 319050730
Change-Id: I1f026189f0b71570f230e794a690dca5be30d597",lstm_eval.cc,"@@ -511,6 +511,12 @@ inline void LstmStepFloat(
   float* cell_gate_scratch = scratch2;
   float* output_gate_scratch = scratch3;
 
+  const bool is_input_all_zeros =
+      tensor_utils::IsZeroVector(input_ptr, n_batch * n_input);
+  const bool is_aux_input_all_zeros =
+      (aux_input_ptr == nullptr ||
+       tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input));
+
   // Initialize scratch buffers with bias for regular lstm or initialize with
   // zero for layer norm lstm.
   if (use_layer_norm) {
@@ -535,7 +541,7 @@ inline void LstmStepFloat(
 
   // For each batch and cell: compute input_weight * input.
   // Skip if input is all zeros.
-  if (!tensor_utils::IsZeroVector(input_ptr, n_batch * n_input)) {
+  if (!is_input_all_zeros) {
     if (!use_cifg) {
       tensor_utils::MatrixBatchVectorMultiplyAccumulate(
           input_to_input_weights_ptr, n_cell, n_input, input_ptr, n_batch,
@@ -555,8 +561,7 @@ inline void LstmStepFloat(
 
   // For each batch and cell: compute aux_input_weight * aux_input.
   // Skip if auxiliary input is not available or all zeros.
-  if (aux_input_ptr != nullptr &&
-      !tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input)) {
+  if (!is_aux_input_all_zeros) {
     if (!use_cifg) {
       tensor_utils::MatrixBatchVectorMultiplyAccumulate(
           aux_input_to_input_weights_ptr, n_cell, n_aux_input, aux_input_ptr,
@@ -807,28 +812,6 @@ inline void LstmStepHybrid(
   float* cell_gate_scratch = scratch2;
   float* output_gate_scratch = scratch3;
 
-  // Initialize scratch buffers with bias for regular lstm or initialize with
-  // zero for layer norm lstm.
-  if (use_layer_norm) {
-    if (!use_cifg) {
-      std::fill_n(input_gate_scratch, n_cell * n_batch, 0.0f);
-    }
-    std::fill_n(forget_gate_scratch, n_cell * n_batch, 0.0f);
-    std::fill_n(cell_gate_scratch, n_cell * n_batch, 0.0f);
-    std::fill_n(output_gate_scratch, n_cell * n_batch, 0.0f);
-  } else {
-    if (!use_cifg) {
-      tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell,
-                                            n_batch, input_gate_scratch);
-    }
-    tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
-                                          forget_gate_scratch);
-    tensor_utils::VectorBatchVectorAssign(cell_gate_bias_ptr, n_cell, n_batch,
-                                          cell_gate_scratch);
-    tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
-                                          output_gate_scratch);
-  }
-
   int32_t* input_to_input_row_sums = nullptr;
   int32_t* input_to_forget_row_sums = nullptr;
   int32_t* input_to_cell_row_sums = nullptr;
@@ -896,10 +879,53 @@ inline void LstmStepHybrid(
     }
   }
 
-  if (!tensor_utils::IsZeroVector(input_ptr, n_batch * n_input)) {
+  const bool is_input_all_zeros =
+      tensor_utils::IsZeroVector(input_ptr, n_batch * n_input);
+  const bool is_aux_input_all_zeros =
+      (aux_input_ptr == nullptr ||
+       tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input));
+  const bool is_output_state_all_zeros =
+      tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output);
+
+  if (!is_input_all_zeros) {
     tensor_utils::BatchQuantizeFloats(input_ptr, n_batch, n_input,
                                       quantized_input_ptr, input_sf, input_zp,
                                       asymmetric_quantize_inputs);
+  }
+  if (!is_aux_input_all_zeros) {
+    tensor_utils::BatchQuantizeFloats(aux_input_ptr, n_batch, n_aux_input,
+                                      quantized_aux_input_ptr, aux_input_sf,
+                                      aux_input_zp, asymmetric_quantize_inputs);
+  }
+  if (!is_output_state_all_zeros) {
+    tensor_utils::BatchQuantizeFloats(
+        output_state_ptr, n_batch, n_output, quantized_output_state_ptr,
+        output_state_sf, output_state_zp, asymmetric_quantize_inputs);
+  }
+
+  // Initialize scratch buffers with bias for regular lstm or initialize with
+  // zero for layer norm lstm.
+  if (use_layer_norm) {
+    if (!use_cifg) {
+      std::fill_n(input_gate_scratch, n_cell * n_batch, 0.0f);
+    }
+    std::fill_n(forget_gate_scratch, n_cell * n_batch, 0.0f);
+    std::fill_n(cell_gate_scratch, n_cell * n_batch, 0.0f);
+    std::fill_n(output_gate_scratch, n_cell * n_batch, 0.0f);
+  } else {
+    if (!use_cifg) {
+      tensor_utils::VectorBatchVectorAssign(input_gate_bias_ptr, n_cell,
+                                            n_batch, input_gate_scratch);
+    }
+    tensor_utils::VectorBatchVectorAssign(forget_gate_bias_ptr, n_cell, n_batch,
+                                          forget_gate_scratch);
+    tensor_utils::VectorBatchVectorAssign(cell_gate_bias_ptr, n_cell, n_batch,
+                                          cell_gate_scratch);
+    tensor_utils::VectorBatchVectorAssign(output_gate_bias_ptr, n_cell, n_batch,
+                                          output_gate_scratch);
+  }
+
+  if (!is_input_all_zeros) {
     if (!use_cifg) {
       tensor_utils::MatrixBatchVectorMultiplyAccumulate(
           input_to_input_weights_ptr, n_cell, n_input, quantized_input_ptr,
@@ -933,12 +959,7 @@ inline void LstmStepHybrid(
 
   // For each batch and cell: compute aux_input_weight * aux_input.
   // Skip if auxiliary input is not available or all zeros.
-  if (aux_input_ptr != nullptr &&
-      !tensor_utils::IsZeroVector(aux_input_ptr, n_batch * n_aux_input)) {
-    tensor_utils::BatchQuantizeFloats(aux_input_ptr, n_batch, n_aux_input,
-                                      quantized_aux_input_ptr, aux_input_sf,
-                                      aux_input_zp, asymmetric_quantize_inputs);
-
+  if (!is_aux_input_all_zeros) {
     if (!use_cifg) {
       tensor_utils::MatrixBatchVectorMultiplyAccumulate(
           aux_input_to_input_weights_ptr, n_cell, n_aux_input,
@@ -973,11 +994,7 @@ inline void LstmStepHybrid(
         context);
   }
 
-  if (!tensor_utils::IsZeroVector(output_state_ptr, n_batch * n_output)) {
-    // Save quantization and matmul computation for all zero input.
-    tensor_utils::BatchQuantizeFloats(
-        output_state_ptr, n_batch, n_output, quantized_output_state_ptr,
-        output_state_sf, output_state_zp, asymmetric_quantize_inputs);
+  if (!is_output_state_all_zeros) {
     // For each batch and cell: compute recurrent_weight * output_state.
     if (!use_cifg) {
       tensor_utils::MatrixBatchVectorMultiplyAccumulate(
",0,train
5890405631a44f53a2d4d5c0ce5b625e1ae340cb,tensorflow/tensorflow,"Move IsZeroVector and BatchQuantizeFloats calls to top.

Update Calibration and FP16 versions to be consistent with the Float version.

PiperOrigin-RevId: 319050730
Change-Id: I1f026189f0b71570f230e794a690dca5be30d597",lstm.cc,"@@ -62,7 +62,7 @@ void UpdateLstmCellFloat(int n_batch, int n_cell, float* cell_state,
   }
 }
 
-void CalculateLstmOutputFloat(
+void CalculateLstmOutputCalibration(
     int n_batch, int n_cell, int n_output, const float* cell_state,
     const float* output_gate, TfLiteFusedActivation activation,
     const float* projection_weights, const float* projection_bias,
@@ -97,7 +97,7 @@ void CalculateLstmOutputFloat(
   }
 }
 
-inline void LstmStepWithAuxInput(
+inline void LstmStepCalibration(
     const float* input_ptr, const float* input_to_input_weights_ptr,
     const float* input_to_forget_weights_ptr,
     const float* input_to_cell_weights_ptr,
@@ -126,18 +126,19 @@ inline void LstmStepWithAuxInput(
     float* scratch1, float* scratch2, float* scratch3, float* output_ptr,
     Logger* logger, const std::vector<int>& intermediate_tensor_indexes,
     ErrorReporter* error_reporter) {
-  // Make named scratch buffers for the different gates.
-  float* input_gate_scratch = scratch0;
-  float* forget_gate_scratch = scratch1;
-  float* cell_gate_scratch = scratch2;
-  float* output_gate_scratch = scratch3;
-
+  ruy::profiler::ScopeLabel label(""LstmStepCalibration"");
   // Since we have already checked that weights are all there or none, we can
   // check the existence of only one to the get the condition.
   const bool use_cifg = (input_to_input_weights_ptr == nullptr);
   const bool use_peephole = (cell_to_output_weights_ptr != nullptr);
   const bool use_layer_norm = (forget_layer_norm_coefficients_ptr != nullptr);
 
+  // Make named scratch buffers for the different gates.
+  float* input_gate_scratch = scratch0;
+  float* forget_gate_scratch = scratch1;
+  float* cell_gate_scratch = scratch2;
+  float* output_gate_scratch = scratch3;
+
   // Initialize scratch buffers with bias for regular lstm or initialize with
   // zero for layer norm lstm.
   if (use_layer_norm) {
@@ -177,7 +178,8 @@ inline void LstmStepWithAuxInput(
       input_to_output_weights_ptr, n_cell, n_input, input_ptr, n_batch,
       output_gate_scratch);
 
-  // If auxiliary input is available then compute aux_input_weight * aux_input
+  // For each batch and cell: compute aux_input_weight * aux_input.
+  // Skip if auxiliary input is not available.
   if (aux_input_ptr != nullptr) {
     if (!use_cifg) {
       tensor_utils::MatrixBatchVectorMultiplyAccumulate(
@@ -293,11 +295,11 @@ inline void LstmStepWithAuxInput(
   tensor_utils::ApplySigmoidToVector(output_gate_scratch, n_batch * n_cell,
                                      output_gate_scratch);
 
-  CalculateLstmOutputFloat(n_batch, n_cell, n_output, cell_state_ptr,
-                           output_gate_scratch, params->activation,
-                           projection_weights_ptr, projection_bias_ptr,
-                           params->proj_clip, output_state_ptr, scratch2,
-                           logger, intermediate_tensor_indexes, error_reporter);
+  CalculateLstmOutputCalibration(
+      n_batch, n_cell, n_output, cell_state_ptr, output_gate_scratch,
+      params->activation, projection_weights_ptr, projection_bias_ptr,
+      params->proj_clip, output_state_ptr, scratch2, logger,
+      intermediate_tensor_indexes, error_reporter);
 
   // Copy output_state to the output. Note that the output batch rows may not be
   // contiguous (output_batch_leading_dim != n_output).
@@ -307,7 +309,7 @@ inline void LstmStepWithAuxInput(
   }
 }
 
-TfLiteStatus EvalFloat(
+TfLiteStatus EvalCalibration(
     const TfLiteTensor* input, const TfLiteTensor* input_to_input_weights,
     const TfLiteTensor* input_to_forget_weights,
     const TfLiteTensor* input_to_cell_weights,
@@ -392,7 +394,7 @@ TfLiteStatus EvalFloat(
       float* output_ptr_time =
           GetTensorData<float>(output) + t_rel * output_step + output_offset;
 
-      LstmStepWithAuxInput(
+      LstmStepCalibration(
           input_ptr, GetTensorData<float>(input_to_input_weights),
           GetTensorData<float>(input_to_forget_weights),
           GetTensorData<float>(input_to_cell_weights),
@@ -454,7 +456,7 @@ TfLiteStatus EvalFloat(
         float* cell_gate_scratch_ptr = cell_gate_scratch + b * n_cell;
         float* output_gate_scratch_ptr = output_gate_scratch + b * n_cell;
 
-        LstmStepWithAuxInput(
+        LstmStepCalibration(
             input_ptr, GetTensorData<float>(input_to_input_weights),
             GetTensorData<float>(input_to_forget_weights),
             GetTensorData<float>(input_to_cell_weights),
@@ -587,7 +589,7 @@ TfLiteStatus lstm_eval(TfLiteContext* context, TfLiteNode* node, Logger* logger,
 
   switch (input_to_output_weights->type) {
     case kTfLiteFloat32: {
-      return EvalFloat(
+      return EvalCalibration(
           input, input_to_input_weights, input_to_forget_weights,
           input_to_cell_weights, input_to_output_weights,
           recurrent_to_input_weights, recurrent_to_forget_weights,
",0,train
246c1fab4668b85ab059d5c734be15cb1db4f1d1,tensorflow/tensorflow,"Fix string type inference for array, asarray and zero_like, and empty_like.

tf only has a var-length string dtype, thus all fixed length numpy dtypes 'U8', 'S8' loose their length between a tf.string conversion. We can only gurrantee no 'loss' of data during the conversion.

ones_like is updated too, to be consistent with other unary array creation functions. However ones_like throws an error rather than using a string ""1""s as numpy do. I don't intend making ones_like like numpy with this CL.

PiperOrigin-RevId: 391407698
Change-Id: I1fbde2c08df210ebf3841b27d9e55c3c75c72282",np_array_ops.py,"@@ -66,13 +66,8 @@ def zeros(shape, dtype=float):  # pylint: disable=redefined-outer-name
 
 @np_utils.np_doc('zeros_like')
 def zeros_like(a, dtype=None):  # pylint: disable=missing-docstring
-  if dtype is None:
-    # We need to let np_utils.result_type decide the dtype, not tf.zeros_like
-    dtype = np_utils.result_type(a)
-  else:
-    # TF and numpy has different interpretations of Python types such as
-    # `float`, so we let `np_utils.result_type` decide.
-    dtype = np_utils.result_type(dtype)
+  dtype = np_utils.result_type_unary(a, dtype)
+
   dtype = dtypes.as_dtype(dtype)  # Work around b/149877262
   return array_ops.zeros_like(a, dtype)
 
@@ -86,10 +81,7 @@ def ones(shape, dtype=float):  # pylint: disable=redefined-outer-name
 
 @np_utils.np_doc('ones_like')
 def ones_like(a, dtype=None):
-  if dtype is None:
-    dtype = np_utils.result_type(a)
-  else:
-    dtype = np_utils.result_type(dtype)
+  dtype = np_utils.result_type_unary(a, dtype)
   return array_ops.ones_like(a, dtype)
 
 
@@ -161,8 +153,7 @@ def _array_internal(val, dtype=None, copy=True, ndmin=0):  # pylint: disable=red
   result_t = val
 
   if not isinstance(result_t, ops.Tensor):
-    if not dtype:
-      dtype = np_utils.result_type(result_t)
+    dtype = np_utils.result_type_unary(result_t, dtype)
     # We can't call `convert_to_tensor(result_t, dtype=dtype)` here because
     # convert_to_tensor doesn't allow incompatible arguments such as (5.5, int)
     # while np.array allows them. We need to convert-then-cast.
",0,train
246c1fab4668b85ab059d5c734be15cb1db4f1d1,tensorflow/tensorflow,"Fix string type inference for array, asarray and zero_like, and empty_like.

tf only has a var-length string dtype, thus all fixed length numpy dtypes 'U8', 'S8' loose their length between a tf.string conversion. We can only gurrantee no 'loss' of data during the conversion.

ones_like is updated too, to be consistent with other unary array creation functions. However ones_like throws an error rather than using a string ""1""s as numpy do. I don't intend making ones_like like numpy with this CL.

PiperOrigin-RevId: 391407698
Change-Id: I1fbde2c08df210ebf3841b27d9e55c3c75c72282",np_array_ops_test.py,"@@ -86,7 +86,7 @@ class ArrayCreationTest(test.TestCase):
 
     self.all_types = [
         int, float, np.int16, np.int32, np.int64, np.float16, np.float32,
-        np.float64
+        np.float64, np.complex64, np.complex128
     ]
 
     source_array_data = [
@@ -1262,6 +1262,51 @@ class ArrayMathTest(test.TestCase, parameterized.TestCase):
     else:
       self.assertAllEqual(result, expected_result)
 
+
+class StringArrayTest(test.TestCase, parameterized.TestCase):
+
+  StringParameters = parameterized.named_parameters(  # pylint: disable=invalid-name
+      # Tensorflow always encodes python string into bytes, regardless of
+      # requested dtype.
+      ('str_u8', 'abcde\U0001f005', 'U8', b'abcde\xf0\x9f\x80\x85'),
+      ('str_s8', 'abcde\U0001f005', 'S8', b'abcde\xf0\x9f\x80\x85'),
+      ('str_none', 'abcde\U0001f005', None, b'abcde\xf0\x9f\x80\x85'),
+      ('zstr_u8', '\0abcde\U0001f005', 'U8', b'\0abcde\xf0\x9f\x80\x85'),
+      ('zstr_s8', '\0abcde\U0001f005', 'S8', b'\0abcde\xf0\x9f\x80\x85'),
+      ('zstr_none', '\0abcde\U0001f005', None, b'\0abcde\xf0\x9f\x80\x85'),
+      ('bytes_u8', b'abcdef', 'U8', b'abcdef'),
+      ('bytes_s8', b'abcdef', 'S8', b'abcdef'),
+      ('bytes_none', b'abcdef', None, b'abcdef'),
+      ('zbytes_u8', b'\0abcdef', 'U8', b'\0abcdef'),
+      ('zbytes_s8', b'\0abcdef', 'S8', b'\0abcdef'),
+      ('zbytes_none', b'\0abcdef', None, b'\0abcdef'),
+  )
+
+  @StringParameters
+  def testArray(self, a, dtype, a_as_bytes):
+    b = np_array_ops.array(a, dtype=dtype)
+    self.assertIsInstance(b.numpy(), bytes)
+    self.assertEqual(b.numpy(), a_as_bytes)
+
+  @StringParameters
+  def testAsArray(self, a, dtype, a_as_bytes):
+    b = np_array_ops.asarray(a, dtype=dtype)
+    self.assertIsInstance(b.numpy(), bytes)
+    self.assertEqual(b.numpy(), a_as_bytes)
+
+  @StringParameters
+  def testZerosLike(self, a, dtype, unused_a_as_bytes):
+    b = np_array_ops.zeros_like(a, dtype=dtype)
+    self.assertIsInstance(b.numpy(), bytes)
+    self.assertEqual(b.numpy(), b'')
+
+  @StringParameters
+  def testEmptyLike(self, a, dtype, unused_a_as_bytes):
+    b = np_array_ops.empty_like(a, dtype=dtype)
+    self.assertIsInstance(b.numpy(), bytes)
+    self.assertEqual(b.numpy(), b'')
+
+
 if __name__ == '__main__':
   ops.enable_eager_execution()
   ops.enable_numpy_style_type_promotion()
",0,train
246c1fab4668b85ab059d5c734be15cb1db4f1d1,tensorflow/tensorflow,"Fix string type inference for array, asarray and zero_like, and empty_like.

tf only has a var-length string dtype, thus all fixed length numpy dtypes 'U8', 'S8' loose their length between a tf.string conversion. We can only gurrantee no 'loss' of data during the conversion.

ones_like is updated too, to be consistent with other unary array creation functions. However ones_like throws an error rather than using a string ""1""s as numpy do. I don't intend making ones_like like numpy with this CL.

PiperOrigin-RevId: 391407698
Change-Id: I1fbde2c08df210ebf3841b27d9e55c3c75c72282",np_utils.py,"@@ -513,6 +513,24 @@ def result_type(*arrays_and_dtypes):  # pylint: disable=missing-function-docstri
   return np_dtypes._result_type(*arrays_and_dtypes)  # pylint: disable=protected-access
 
 
+def result_type_unary(a, dtype):  # pylint: disable=missing-function-docstring
+  """"""Find the result type from a single input and a dtype.""""""
+  if dtype:
+    # We need to let np_utils.result_type decide the dtype, not tf.zeros_like
+    return result_type(dtype)
+
+  # np_utils.result_type treats string inputs as dtype strings, not as strings.
+  # but for unary we want to treat it as a string input.
+  if isinstance(a, str):
+    return np.unicode_
+  elif isinstance(a, bytes):
+    return np.bytes_
+
+  # TF and numpy has different interpretations of Python types such as
+  # `float`, so we let `np_utils.result_type` decide.
+  return result_type(a)
+
+
 def _result_type_binary(t1, t2):  # pylint: disable=missing-function-docstring
   """"""A specialization of result_type for 2 arguments for performance reasons.""""""
   try:
",0,train
db82f8d7a38bef9a5603eecc8911c005d669794c,tensorflow/tensorflow,"Add all valid fusibles of the original ops to fusibles of a newly created fusion op in multi-output fusion.

PiperOrigin-RevId: 290461690
Change-Id: I80312f9cdeeb0432291c7016b81ae91ce27c1ab0",multi_output_fusion.cc,"@@ -158,8 +158,6 @@ HloInstruction* MultiOutputFusion::CreateFusion(HloInstruction* base,
           base->shape(), HloInstruction::FusionKind::kLoop, base));
 
   // Update candidate_ and all_fusion_candidates_.
-  std::vector<std::pair<HloInstruction*, int64>> new_fusibles =
-      GetNewFusibles(base, to_fuse);
   int64 index;
   if (candidates_index_.contains(input_fusion)) {
     index = candidates_index_[input_fusion];
@@ -170,13 +168,6 @@ HloInstruction* MultiOutputFusion::CreateFusion(HloInstruction* base,
     all_fusion_candidates_.push_back(input_fusion);
   }
 
-  // Update the worklist_.
-  FusionCandidate& candidate_node = candidates_[index];
-  for (auto it : new_fusibles) {
-    candidate_node.fusibles.emplace_back(it.first, it.second);
-    worklist_.emplace(input_fusion, it.first, it.second);
-  }
-
   reachability_->Replace(base, input_fusion);
   TF_CHECK_OK(computation()->ReplaceInstruction(base, input_fusion));
   return input_fusion;
@@ -199,13 +190,19 @@ bool MultiOutputFusion::IsProfitableOperand(HloInstruction* instr) {
 }
 
 std::vector<std::pair<HloInstruction*, int64>>
-MultiOutputFusion::GetNewFusibles(HloInstruction* fusion,
-                                  HloInstruction* fused) {
+MultiOutputFusion::GetNewFusibles(HloInstruction* instr1,
+                                  HloInstruction* instr2) {
+  HloInstruction* fusion = instr1;
+  HloInstruction* fused = instr2;
+  if (is_fused(instr1)) {
+    fusion = instr2;
+    fused = instr1;
+  }
+
   FusionCandidate& fusion_node = candidates_[get_candidate_id(fusion)];
   FusionCandidate& fused_node = candidates_[get_candidate_id(fused)];
 
-  // Update the fusible list for fusion. Variable new_fusibles keeps
-  // track of the new or changed entries.
+  // The second entry of the pair is an old profit value.
   std::vector<std::pair<HloInstruction*, int64>> new_fusibles;
   absl::flat_hash_set<HloInstruction*> in_list;
   auto it = fusion_node.fusibles.begin();
@@ -216,11 +213,7 @@ MultiOutputFusion::GetNewFusibles(HloInstruction* fusion,
       continue;
     }
     in_list.insert(instr);
-    int64 profit = GetProfit(instr, fusion);
-    if (profit > it->second) {
-      it->second = profit;
-      new_fusibles.emplace_back(instr, profit);
-    }
+    new_fusibles.emplace_back(instr, it->second);
     ++it;
   }
 
@@ -235,16 +228,17 @@ MultiOutputFusion::GetNewFusibles(HloInstruction* fusion,
     if (in_list.contains(instr)) {
       continue;
     }
-    int64 profit = GetProfit(instr, fusion);
-    fusion_node.fusibles.emplace_back(instr, profit);
-    new_fusibles.emplace_back(instr, profit);
+    // Set old profit to zero because instr is not originally fusible to
+    // fusion_node.
+    new_fusibles.emplace_back(instr, 0);
   }
   fused_node.fusibles.clear();
 
   return new_fusibles;
 }
 
-void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) {
+void MultiOutputFusion::UpdateBeforeFuse(HloInstruction* instr1,
+                                         HloInstruction* instr2) {
   HloInstruction* fusion = instr1;
   HloInstruction* fused = instr2;
   if (is_fused(instr1)) {
@@ -264,13 +258,34 @@ void MultiOutputFusion::Update(HloInstruction* instr1, HloInstruction* instr2) {
   // Update the reachability graph.
   UpdateReachability(fusion, fused, all_fusion_candidates_,
                      [this](HloInstruction* instr) { return is_fused(instr); });
+}
 
-  std::vector<std::pair<HloInstruction*, int64>> new_fusibles =
-      GetNewFusibles(fusion, fused);
-
-  // Update the worklist_.
+void MultiOutputFusion::UpdateAfterFuse(
+    HloInstruction* fusion,
+    const std::vector<std::pair<HloInstruction*, int64>>& new_fusibles,
+    bool new_fusion_node) {
+  FusionCandidate& candidate_node = candidates_[candidates_index_[fusion]];
   for (auto it : new_fusibles) {
-    worklist_.emplace(fusion, it.first, it.second);
+    int64 profit = GetProfit(it.first, fusion);
+    if (new_fusion_node) {
+      // If `fusion' is a new fusion node, then add all fusibles.
+      if (profit > 0) {
+        candidate_node.fusibles.emplace_back(it.first, profit);
+        worklist_.emplace(fusion, it.first, profit);
+      }
+    } else {
+      if (profit > it.second) {
+        // If the new profit is higher than the old profit, add the fusible
+        // into worklist.
+        worklist_.emplace(fusion, it.first, profit);
+      }
+      if (it.second == 0) {
+        // If the old profit is zero, that means `it.first' is not
+        // originally fusible to the base op of `fusion', so we must add it
+        // to candidate_node.fusibles.
+        candidate_node.fusibles.emplace_back(it.first, profit);
+      }
+    }
   }
 }
 
@@ -388,17 +403,23 @@ bool MultiOutputFusion::Perform() {
                 << instr2->fused_instructions_computation()->ToString(
                        HloPrintOptions().set_indent_amount(1));
       }
-      Update(instr1, instr2);
-      HloInstruction* ret = Fuse(instr1, instr2);
-      if (ret != instr1) {
+      UpdateBeforeFuse(instr1, instr2);
+      std::vector<std::pair<HloInstruction*, int64>> new_fusibles =
+          GetNewFusibles(instr1, instr2);
+      HloInstruction* fusion = Fuse(instr1, instr2);
+      if (fusion != instr1) {
         set_is_fused(instr1);
       }
-      if (ret != instr2) {
+      if (fusion != instr2) {
         set_is_fused(instr2);
       }
+      UpdateAfterFuse(
+          fusion, new_fusibles,
+          /*new_fusion_node=*/(fusion != instr1) && (fusion != instr2));
+
       changed = true;
-      VLOG(2) << ""After fusion, \t this: "" << ret->name() << ""\n""
-              << ret->fused_instructions_computation()->ToString(
+      VLOG(2) << ""After fusion, \t this: "" << fusion->name() << ""\n""
+              << fusion->fused_instructions_computation()->ToString(
                      HloPrintOptions().set_indent_amount(1));
     }
   }
",0,train
db82f8d7a38bef9a5603eecc8911c005d669794c,tensorflow/tensorflow,"Add all valid fusibles of the original ops to fusibles of a newly created fusion op in multi-output fusion.

PiperOrigin-RevId: 290461690
Change-Id: I80312f9cdeeb0432291c7016b81ae91ce27c1ab0",multi_output_fusion.h,"@@ -110,11 +110,12 @@ class MultiOutputFusion : public HloModulePass {
   // InstructionFusion instead.
   virtual bool DoProducerConsumerMultiOutputFusion();
 
-  // Return a list of new fusible instructions that can be fused into `fusion'
-  // fused with `fused'. The second entry in the vector is a profit value from
-  // fusing the corresponding instruction.
+  // Return a list of fusible instructions that can be fused into the fusion of
+  // instr1 and instr2. The second entry in the vector is an old profit value
+  // from fusing the corresponding instruction and the base op of the new
+  // fusion.
   std::vector<std::pair<HloInstruction*, int64>> GetNewFusibles(
-      HloInstruction* fusion, HloInstruction* fused);
+      HloInstruction* instr1, HloInstruction* instr2);
 
   // Create a new fusion instruction and add `base' into it.
   // Prepare for fusing `to_fuse' into the created fusion by updating
@@ -140,9 +141,16 @@ class MultiOutputFusion : public HloModulePass {
     bool operator<(const ToBeFused& rhs) const { return score < rhs.score; }
   };
 
-  // Update the internal data structures after instr1 and instr2 are fused into
+  // Update the internal data structures before instr1 and instr2 are fused into
   // one fusion instruction.
-  void Update(HloInstruction* instr1, HloInstruction* instr2);
+  void UpdateBeforeFuse(HloInstruction* instr1, HloInstruction* instr2);
+
+  // Update the internal data structures after instructions are fused into
+  // one fusion instruction.
+  void UpdateAfterFuse(
+      HloInstruction* fusion,
+      const std::vector<std::pair<HloInstruction*, int64>>& new_fusibles,
+      bool new_fusion_node);
 
   int64 get_candidate_id(HloInstruction* instr) {
     return FindOrDie(candidates_index_, instr);
",0,train
33bcb53486aa286ad16b0d1d7a2715febf696364,tensorflow/tensorflow,"Allow clients to pass a filename to the constructor of Saver.

PiperOrigin-RevId: 156741424",saver.py,"@@ -656,7 +656,7 @@ class BaseSaverBuilder(object):
       restore_sequentially: A Bool, which if true, causes restore of different
         variables to happen sequentially within each device.
       filename: If known at graph construction time, filename used for variable
-        loading/saving.
+        loading/saving. If None, then the default name ""model"" will be used.
 
     Returns:
       A SaverDef proto.
@@ -674,7 +674,7 @@ class BaseSaverBuilder(object):
     with ops.name_scope(name, ""save"",
                         [saveable.op for saveable in saveables]) as name:
       # Add the Constant string tensor for the filename.
-      filename_tensor = constant_op.constant(filename)
+      filename_tensor = constant_op.constant(filename or ""model"")
 
       # Add the save ops.
       if sharded:
@@ -1033,7 +1033,8 @@ class Saver(object):
                allow_empty=False,
                write_version=saver_pb2.SaverDef.V2,
                pad_step_number=False,
-               save_relative_paths=False):
+               save_relative_paths=False,
+               filename=None):
     """"""Creates a `Saver`.
 
     The constructor adds ops to save and restore variables.
@@ -1109,6 +1110,8 @@ class Saver(object):
       save_relative_paths: If `True`, will write relative paths to the
         checkpoint state file. This is needed if the user wants to copy the
         checkpoint directory and reload from the copied directory.
+      filename: If known at graph construction time, filename used for variable
+        loading/saving.
 
     Raises:
       TypeError: If `var_list` is invalid.
@@ -1132,6 +1135,7 @@ class Saver(object):
     self._is_empty = None
     self._write_version = write_version
     self._pad_step_number = pad_step_number
+    self._filename = filename
     if not defer_build:
       self.build()
     if self.saver_def:
@@ -1164,7 +1168,8 @@ class Saver(object):
           max_to_keep=self._max_to_keep,
           keep_checkpoint_every_n_hours=self._keep_checkpoint_every_n_hours,
           name=self._name,
-          restore_sequentially=self._restore_sequentially)
+          restore_sequentially=self._restore_sequentially,
+          filename=self._filename)
     elif self.saver_def and self._name:
       # Since self._name is used as a name_scope by builder(), we are
       # overloading the use of this field to represent the ""import_scope"" as
",0,train
33bcb53486aa286ad16b0d1d7a2715febf696364,tensorflow/tensorflow,"Allow clients to pass a filename to the constructor of Saver.

PiperOrigin-RevId: 156741424",saver_test.py,"@@ -236,6 +236,15 @@ class SaverTest(test.TestCase):
       self.assertEqual(b""k1"", v2.keys().eval())
       self.assertEqual(30.0, v2.values().eval())
 
+  def testFilenameTensor(self):
+    v0 = variables.Variable(0, name=""v0"")
+    filename = b""somerandomfilename""
+    save = saver_module.Saver({""v0"": v0}, filename=filename)
+    with self.test_session() as sess:
+      tensor = sess.graph.get_tensor_by_name(
+          save.saver_def.filename_tensor_name)
+      self.assertEqual(sess.run(tensor), filename)
+
   def testInvalidPath(self):
     v0 = variables.Variable(0, name=""v0"")
     for ver in (saver_pb2.SaverDef.V1, saver_pb2.SaverDef.V2):
",0,train
0bfdc5429e1c23fdcc94168ef734a152fb981fff,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2019-10-24

PiperOrigin-RevId: 276443625
Change-Id: I20e8893026dac243b663496e15ab17a936d7c2d8",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 10, 23)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2019, 10, 24)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
87a80d8bd2ac0235b2f6632dbc024b0509520897,tensorflow/tensorflow,Addressing review comments,mkl_conv_ops.cc,"@@ -66,6 +66,7 @@ struct MklConvFwdParams {
   memory::dims padding_left;
   memory::dims padding_right;
   MKL_TENSOR_FORMAT tf_fmt;
+  bool native_format;
   string dtypes = string("""");
   struct PostOpParam {
     string name;
@@ -79,7 +80,7 @@ struct MklConvFwdParams {
                    memory::dims bias_dims, memory::dims dst_dims,
                    memory::dims strides, memory::dims dilations,
                    memory::dims padding_left, memory::dims padding_right,
-                   MKL_TENSOR_FORMAT tf_fmt)
+                   MKL_TENSOR_FORMAT tf_fmt, bool native_format)
       : src_dims(src_dims),
         filter_dims(filter_dims),
         bias_dims(bias_dims),
@@ -88,13 +89,13 @@ struct MklConvFwdParams {
         dilations(dilations),
         padding_left(padding_left),
         padding_right(padding_right),
-        tf_fmt(tf_fmt) {}
+        tf_fmt(tf_fmt),
+        native_format(native_format) {}
 };
 
 // With quantization, input, filter, and output can have different types
 // so we use different template parameter for each type
-template <typename Tinput, typename Tfilter, typename Tbias, typename Toutput,
-          bool native_format>
+template <typename Tinput, typename Tfilter, typename Tbias, typename Toutput>
 class MklConvFwdPrimitive : public MklPrimitive {
  public:
   explicit MklConvFwdPrimitive(const MklConvFwdParams& convFwdDims)
@@ -233,7 +234,7 @@ class MklConvFwdPrimitive : public MklPrimitive {
 
   void Setup(const MklConvFwdParams& convFwdDims) {
     MEMORY_FORMAT user_data_fmt;
-    if (native_format) {
+    if (convFwdDims.native_format) {
       user_data_fmt = MklTensorFormatToMklDnnDataFormat(convFwdDims.tf_fmt);
     } else {
       // Create memory descriptors for convolution data w/ no specified format
@@ -370,31 +371,29 @@ class MklConvFwdPrimitive : public MklPrimitive {
 // TODO(nhasabni): We should not require passing a type to MklPrimitiveFactory.
 // But removing the need for type in MklPrimitiveFactory is going to require
 // change to every MKL op. So not doing it now. Instead passing float.
-template <typename Tinput, typename Tfilter, typename Tbias, typename Toutput,
-          bool native_format>
+template <typename Tinput, typename Tfilter, typename Tbias, typename Toutput>
 class MklConvFwdPrimitiveFactory : public MklPrimitiveFactory<float> {
  public:
-  static MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Toutput, native_format>*
-  Get(const MklConvFwdParams& convFwdDims, bool do_not_cache) {
-    MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Toutput, native_format>*
-        conv_fwd = nullptr;
+  static MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Toutput>* Get(
+      const MklConvFwdParams& convFwdDims, bool do_not_cache) {
+    MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Toutput>* conv_fwd = nullptr;
 
     if (do_not_cache) {
       // Always create a new primitive
-      conv_fwd = new MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Toutput,
-                                         native_format>(convFwdDims);
+      conv_fwd =
+          new MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Toutput>(convFwdDims);
     } else {
       // Try to find a suitable one in pool
-      conv_fwd = dynamic_cast<
-          MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Toutput, native_format>*>(
-          MklConvFwdPrimitiveFactory<Tinput, Tfilter, Tbias, Toutput,
-                                     native_format>::GetInstance()
-              .GetConvFwd(convFwdDims));
+      conv_fwd =
+          dynamic_cast<MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Toutput>*>(
+              MklConvFwdPrimitiveFactory<Tinput, Tfilter, Tbias,
+                                         Toutput>::GetInstance()
+                  .GetConvFwd(convFwdDims));
       if (conv_fwd == nullptr) {
-        conv_fwd = new MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Toutput,
-                                           native_format>(convFwdDims);
-        MklConvFwdPrimitiveFactory<Tinput, Tfilter, Tbias, Toutput,
-                                   native_format>::GetInstance()
+        conv_fwd = new MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Toutput>(
+            convFwdDims);
+        MklConvFwdPrimitiveFactory<Tinput, Tfilter, Tbias,
+                                   Toutput>::GetInstance()
             .SetConvFwd(convFwdDims, conv_fwd);
       }
     }
@@ -426,7 +425,7 @@ class MklConvFwdPrimitiveFactory : public MklPrimitiveFactory<float> {
     key_creator.AddAsKey(convFwdDims.padding_left);
     key_creator.AddAsKey(convFwdDims.padding_right);
     key_creator.AddAsKey(convFwdDims.dtypes);
-    if (native_format) {
+    if (convFwdDims.native_format) {
       key_creator.AddAsKey(convFwdDims.tf_fmt);
     }
 
@@ -689,23 +688,22 @@ class MklConvOp : public OpKernel {
            IsConv1x1StrideNot1(filter_dims, strides));
 
       // Get a conv2d fwd from primitive pool
-      MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Ttemp_output, native_format>*
-          conv_fwd = nullptr;
+      MklConvFwdPrimitive<Tinput, Tfilter, Tbias, Ttemp_output>* conv_fwd =
+          nullptr;
       memory::dims bias_dims = {};
       if (fuse_biasadd_) {
         conv_utl.GetBiasSizeInMklOrder(kInputIndex_Bias, &bias_dims);
       }
-      MklConvFwdParams convFwdDims(src_dims, filter_dims,
-                                   fuse_biasadd_ ? bias_dims : NONE_DIMS,
-                                   dst_dims_mkl_order, strides, dilations,
-                                   padding_left, padding_right, tf_fmt);
+      MklConvFwdParams convFwdDims(
+          src_dims, filter_dims, fuse_biasadd_ ? bias_dims : NONE_DIMS,
+          dst_dims_mkl_order, strides, dilations, padding_left, padding_right,
+          tf_fmt, native_format);
 
       // TODO(mdfaijul): Extend the basic parameters for data types and fusions
       this->ExtendConvFwdParams(context, convFwdDims);
       conv_fwd =
-          MklConvFwdPrimitiveFactory<Tinput, Tfilter, Tbias, Ttemp_output,
-                                     native_format>::Get(convFwdDims,
-                                                         do_not_cache);
+          MklConvFwdPrimitiveFactory<Tinput, Tfilter, Tbias, Ttemp_output>::Get(
+              convFwdDims, do_not_cache);
       // Allocate output tensors `dst_tensor` and `filter_out_tensor`
       MklDnnShape output_mkl_shape;
       std::shared_ptr<ConvFwdPd> conv_fwd_pd = conv_fwd->GetPrimitiveDesc();
",0,train
8077ae1d1e8bfe6a5cc55df07ad82ae91f431d2e,tensorflow/tensorflow,Minor changes in comments,mkl_util.h,"@@ -33,7 +33,7 @@ limitations under the License.
 #endif
 
 #ifdef INTEL_MKL_ML_ONLY
-// Using pragma as #warning doesn't work with all compilers
+// Using pragma message since #warning doesn't work with all compilers
 #pragma message(""Compiling for INTEL MKL ML only will be deprecated soon."")
 #pragma message(""Please use MKL DNN (the default option for --config=mkl)"")
 #endif
",0,train
4900e8a17367aa8c158e5e783d4776ffc206a77b,tensorflow/tensorflow,"Automated rollback of commit 1f856fb5d978177123ddd5ac5a3e4bb669288d65

PiperOrigin-RevId: 232750954",backend.py,"@@ -4143,8 +4143,8 @@ def conv1d(x,
   x = nn.convolution(
       input=x,
       filter=kernel,
-      dilation_rate=dilation_rate,
-      strides=strides,
+      dilation_rate=(dilation_rate,),
+      strides=(strides,),
       padding=padding,
       data_format=tf_data_format)
   if data_format == 'channels_first' and tf_data_format == 'NWC':
",0,train
4900e8a17367aa8c158e5e783d4776ffc206a77b,tensorflow/tensorflow,"Automated rollback of commit 1f856fb5d978177123ddd5ac5a3e4bb669288d65

PiperOrigin-RevId: 232750954",nn_ops.py,"@@ -65,9 +65,7 @@ def _get_sequence(value, n, channel_index, name):
     return value
   elif current_n == 1:
     value = list((value[0],) * n)
-  elif current_n == n:
-    value = list(value)
-  else:
+  elif current_n != n:
     raise ValueError(""{} should be of length 1, {} or {} but was {}"".format(
         name, n, n + 2, current_n))
 
@@ -883,14 +881,21 @@ def convolution(
   filter = deprecated_argument_lookup(""filters"", filters, ""filter"", filter)
   dilation_rate = deprecated_argument_lookup(
       ""dilations"", dilations, ""dilation_rate"", dilation_rate)
-  return convolution_internal(
-      input,
-      filter,
-      strides=strides,
-      padding=padding,
-      data_format=data_format,
-      dilations=dilation_rate,
-      name=name)
+  # pylint: enable=line-too-long
+  with ops.name_scope(name, ""convolution"", [input, filter]) as name:
+    input = ops.convert_to_tensor(input, name=""input"")  # pylint: disable=redefined-builtin
+    input_shape = input.get_shape()
+    filter = ops.convert_to_tensor(filter, name=""filter"")  # pylint: disable=redefined-builtin
+    filter_shape = filter.get_shape()
+    op = Convolution(
+        input_shape,
+        filter_shape,
+        padding,
+        strides=strides,
+        dilation_rate=dilation_rate,
+        name=name,
+        data_format=data_format)
+    return op(input, filter)
 
 
 @tf_export(""nn.convolution"", v1=[])
@@ -902,15 +907,14 @@ def convolution_v2(
     data_format=None,
     dilations=None,
     name=None):
-  return convolution_internal(
+  return convolution(
       input,  # pylint: disable=redefined-builtin
       filters,
-      strides=strides,
       padding=padding,
-      data_format=data_format,
-      dilations=dilations,
-      name=name)
-
+      strides=strides,
+      dilation_rate=dilations,
+      name=name,
+      data_format=data_format)
 
 convolution_v2.__doc__ = deprecation.rewrite_argument_docstring(
     deprecation.rewrite_argument_docstring(
@@ -918,67 +922,6 @@ convolution_v2.__doc__ = deprecation.rewrite_argument_docstring(
     ""filter"", ""filters"")
 
 
-def convolution_internal(
-    input,  # pylint: disable=redefined-builtin
-    filters,
-    strides=None,
-    padding=""VALID"",
-    data_format=None,
-    dilations=None,
-    name=None):
-  """"""Internal function which performs rank agnostic convolution.""""""
-  with ops.name_scope(name, ""convolution"", [input, filter]) as name:
-    if input.shape is not None:
-      n = len(input.shape) - 2
-    elif filters.shape is not None:
-      n = len(filters.shape) - 2
-    else:
-      raise ValueError(""rank of input or filter must be known"")
-
-    if n < 1 or n > 3:
-      raise ValueError(
-          ""Input tensor must be of rank 3, 4 or 5 but was {}."".format(n + 2))
-
-    if data_format is None:
-      channel_index = n + 1
-    else:
-      channel_index = 1 if data_format.startswith(""NC"") else n + 1
-
-    strides = _get_sequence(strides, n, channel_index, ""strides"")
-    dilations = _get_sequence(dilations, n, channel_index, ""dilations"")
-
-    conv_ops = {1: conv1d, 2: gen_nn_ops.conv2d, 3: gen_nn_ops.conv3d}
-
-    if all(i == 1 for i in dilations):
-      # fast path if no dilation as gradient only supported on GPU for dilations
-      op = conv_ops.get(n)
-      return op(
-          input,
-          filters,
-          strides,
-          padding=padding,
-          data_format=data_format,
-          dilations=dilations,
-          name=name)
-    else:
-      if channel_index == 1:
-        strides = strides[2:]
-        dilations = dilations[2:]
-      else:
-        strides = strides[1:-1]
-        dilations = dilations[1:-1]
-
-      op = Convolution(
-          tensor_shape.as_shape(input.shape),
-          tensor_shape.as_shape(filters.shape),
-          padding,
-          strides=strides,
-          dilation_rate=dilations,
-          name=name,
-          data_format=data_format)
-      return op(input, filters)
-
-
 class Convolution(object):
   """"""Helper class for convolution.
 
@@ -4097,9 +4040,10 @@ def conv1d(
       entries by which the filter is moved right at each step.
     padding: 'SAME' or 'VALID'
     use_cudnn_on_gpu: An optional `bool`.  Defaults to `True`.
-    data_format: An optional `string` from `""NWC"", ""NCW""`.  Defaults to `""NWC""`,
-      the data is stored in the order of [batch, in_width, in_channels].  The
-      `""NCW""` format stores data as [batch, in_channels, in_width].
+    data_format: An optional `string` from `""NWC"", ""NCW""`.  Defaults
+      to `""NWC""`, the data is stored in the order of
+      [batch, in_width, in_channels].  The `""NCW""` format stores
+      data as [batch, in_channels, in_width].
     name: A name for the operation (optional).
     input: Alias for value.
     dilations: An int or list of `ints` that has length `1` or `3` which
@@ -4182,9 +4126,10 @@ def conv1d_v2(
     stride: An int or list of `ints` that has length `1` or `3`.  The number of
       entries by which the filter is moved right at each step.
     padding: 'SAME' or 'VALID'
-    data_format: An optional `string` from `""NWC"", ""NCW""`.  Defaults to `""NWC""`,
-      the data is stored in the order of [batch, in_width, in_channels].  The
-      `""NCW""` format stores data as [batch, in_channels, in_width].
+    data_format: An optional `string` from `""NWC"", ""NCW""`.  Defaults
+      to `""NWC""`, the data is stored in the order of
+      [batch, in_width, in_channels].  The `""NCW""` format stores
+      data as [batch, in_channels, in_width].
     dilations: An int or list of `ints` that has length `1` or `3` which
       defaults to 1. The dilation factor for each dimension of input. If set to
       k > 1, there will be k-1 skipped cells between each filter element on that
",0,train
75ba615684492a49e67fd2c2a59af4ee0e56838b,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-02-04

PiperOrigin-RevId: 293094987
Change-Id: I3359d88507c68e1a9d96de43890f53f14c816f07",compat.py,"@@ -31,7 +31,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 2, 3)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 2, 4)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
07e0f88dd1ea60039267f4aeb57d6e24128e8c3b,tensorflow/tensorflow,"Whitelist InTopKV2, NextAfter and XlaKeyValueSort ops for the fallback path

Enabled relevant tests.

PiperOrigin-RevId: 335374607
Change-Id: I109c39459944648317c3a5274be4b5fe6c6e9586",legalize_tf_with_tf2xla.cc,"@@ -151,6 +151,7 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) {
     TypeID::get<TF::IgammaOp>(),
     TypeID::get<TF::IgammacOp>(),
     TypeID::get<TF::IgammaGradAOp>(),
+    TypeID::get<TF::InTopKV2Op>(),
     TypeID::get<TF::InvertOp>(),
     TypeID::get<TF::InvOp>(),
     TypeID::get<TF::LRNOp>(),
@@ -177,6 +178,7 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) {
     TypeID::get<TF::MulOp>(),
     TypeID::get<TF::MultinomialOp>(),
     TypeID::get<TF::NegOp>(),
+    TypeID::get<TF::NextAfterOp>(),
     TypeID::get<TF::NonMaxSuppressionV4Op>(),
     TypeID::get<TF::NotEqualOp>(),
     TypeID::get<TF::PadOp>(),
@@ -241,6 +243,7 @@ bool IsOpAllowedTf2XlaFallback(Operation* op) {
     TypeID::get<TF::XlaDynamicSliceOp>(),
     TypeID::get<TF::XlaDynamicUpdateSliceOp>(),
     TypeID::get<TF::XlaEinsumOp>(),
+    TypeID::get<TF::XlaKeyValueSortOp>(),
     TypeID::get<TF::XlaPadOp>(),
     TypeID::get<TF::Xlog1pyOp>(),
     TypeID::get<TF::XlogyOp>()
",0,train
07e0f88dd1ea60039267f4aeb57d6e24128e8c3b,tensorflow/tensorflow,"Whitelist InTopKV2, NextAfter and XlaKeyValueSort ops for the fallback path

Enabled relevant tests.

PiperOrigin-RevId: 335374607
Change-Id: I109c39459944648317c3a5274be4b5fe6c6e9586",binary_ops_test.py,"@@ -474,7 +474,6 @@ class BinaryOpsTest(xla_test.XLATestCase):
           expected=np.array([1 << 32, 1 << 36, 1 << 32, 1 << 36],
                             dtype=np.int64))
 
-  @test_util.disable_mlir_bridge(""Enable tf.NextAfter Compilation"")
   def testNextAfter(self):
     for dtype in self.numeric_types:
       if dtype in [np.float32, np.float64]:
",0,train
f0b228a36dacaef00f7215df0d8ab3d3a84749a2,tensorflow/tensorflow,"TFL MCU: Move reference L2Normalization implementation into its own file.
so that we won't need to import all the dependencies.

This CL simply copies the existing code into the new file.

PiperOrigin-RevId: 307134277
Change-Id: Idf7f9dffe6d6505337caefc736dce372aa014b14",common.h,"@@ -29,6 +29,8 @@ limitations under the License.
 
 namespace tflite {
 
+constexpr int kReverseShift = -1;
+
 inline void GetActivationMinMax(FusedActivationFunctionType ac,
                                 float* output_activation_min,
                                 float* output_activation_max) {
",0,train
f0b228a36dacaef00f7215df0d8ab3d3a84749a2,tensorflow/tensorflow,"TFL MCU: Move reference L2Normalization implementation into its own file.
so that we won't need to import all the dependencies.

This CL simply copies the existing code into the new file.

PiperOrigin-RevId: 307134277
Change-Id: Idf7f9dffe6d6505337caefc736dce372aa014b14",fully_connected.h,"@@ -23,8 +23,6 @@ limitations under the License.
 namespace tflite {
 namespace reference_ops {
 
-const int kReverseShift = -1;
-
 inline void FullyConnected(
     const FullyConnectedParams& params, const RuntimeShape& input_shape,
     const float* input_data, const RuntimeShape& weights_shape,
",0,train
f0b228a36dacaef00f7215df0d8ab3d3a84749a2,tensorflow/tensorflow,"TFL MCU: Move reference L2Normalization implementation into its own file.
so that we won't need to import all the dependencies.

This CL simply copies the existing code into the new file.

PiperOrigin-RevId: 307134277
Change-Id: Idf7f9dffe6d6505337caefc736dce372aa014b14",l2normalization.h,"@@ -41,8 +41,8 @@ inline void L2Normalization(int32_t input_zero_point, int32_t outer_size,
     }
     int32_t inv_l2norm_multiplier;
     int inv_l2norm_shift;
-    GetInvSqrtQuantizedMultiplierExp(acc, /*reverse_shift*/ -1,
-                                     &inv_l2norm_multiplier, &inv_l2norm_shift);
+    GetInvSqrtQuantizedMultiplierExp(acc, kReverseShift, &inv_l2norm_multiplier,
+                                     &inv_l2norm_shift);
 
     for (int inner_index = 0; inner_index < depth; ++inner_index) {
       int32_t input =
",0,train
f0b228a36dacaef00f7215df0d8ab3d3a84749a2,tensorflow/tensorflow,"TFL MCU: Move reference L2Normalization implementation into its own file.
so that we won't need to import all the dependencies.

This CL simply copies the existing code into the new file.

PiperOrigin-RevId: 307134277
Change-Id: Idf7f9dffe6d6505337caefc736dce372aa014b14",l2normalization.h,"@@ -0,0 +1,88 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#ifndef TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
+#define TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
+
+#include <cmath>
+
+#include ""tensorflow/lite/c/common.h""
+#include ""tensorflow/lite/kernels/internal/common.h""
+#include ""tensorflow/lite/kernels/internal/types.h""
+
+namespace tflite {
+
+namespace reference_ops {
+
+inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
+                            const RuntimeShape& input_shape,
+                            const float* input_data,
+                            const RuntimeShape& output_shape,
+                            float* output_data, float epsilon = 1e-6) {
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int outer_size =
+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int depth =
+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  for (int i = 0; i < outer_size; ++i) {
+    float squared_l2_norm = 0;
+    for (int c = 0; c < depth; ++c) {
+      const float val = input_data[depth * i + c];
+      squared_l2_norm += val * val;
+    }
+    float l2_norm = std::sqrt(squared_l2_norm);
+    l2_norm = std::max(l2_norm, epsilon);
+    for (int c = 0; c < depth; ++c) {
+      output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
+    }
+  }
+}
+
+inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
+                            const RuntimeShape& input_shape,
+                            const uint8* input_data,
+                            const RuntimeShape& output_shape,
+                            uint8* output_data) {
+  const int trailing_dim = input_shape.DimensionsCount() - 1;
+  const int depth =
+      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
+  const int outer_size =
+      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
+  const int32 input_zero_point = op_params.input_zero_point;
+
+  for (int i = 0; i < outer_size; ++i) {
+    int32 square_l2_norm = 0;
+    for (int c = 0; c < depth; c++) {
+      int32 diff = input_data[depth * i + c] - input_zero_point;
+      square_l2_norm += diff * diff;
+    }
+    int32 inv_l2norm_multiplier;
+    int inv_l2norm_shift;
+    GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
+                                     &inv_l2norm_multiplier, &inv_l2norm_shift);
+    for (int c = 0; c < depth; c++) {
+      int32 diff = input_data[depth * i + c] - input_zero_point;
+      int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
+          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
+      int32 unclamped_output_val = 128 + rescaled_diff;
+      int32 output_val = std::min(255, std::max(0, unclamped_output_val));
+      output_data[depth * i + c] = static_cast<uint8>(output_val);
+    }
+  }
+}
+
+
+}  // namespace reference_ops
+}  // namespace tflite
+#endif  // TENSORFLOW_LITE_KERNELS_INTERNAL_REFERENCE_L2NORMALIZATION_H_
",0,train
f0b228a36dacaef00f7215df0d8ab3d3a84749a2,tensorflow/tensorflow,"TFL MCU: Move reference L2Normalization implementation into its own file.
so that we won't need to import all the dependencies.

This CL simply copies the existing code into the new file.

PiperOrigin-RevId: 307134277
Change-Id: Idf7f9dffe6d6505337caefc736dce372aa014b14",reference_ops.h,"@@ -42,6 +42,7 @@ limitations under the License.
 #include ""tensorflow/lite/kernels/internal/reference/dequantize.h""
 #include ""tensorflow/lite/kernels/internal/reference/floor.h""
 #include ""tensorflow/lite/kernels/internal/reference/fully_connected.h""
+#include ""tensorflow/lite/kernels/internal/reference/l2normalization.h""
 #include ""tensorflow/lite/kernels/internal/reference/logistic.h""
 #include ""tensorflow/lite/kernels/internal/reference/maximum_minimum.h""
 #include ""tensorflow/lite/kernels/internal/reference/mul.h""
@@ -294,62 +295,6 @@ inline void QuantizeLeakyRelu(const LeakyReluParams& params,
   }
 }
 
-inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
-                            const RuntimeShape& input_shape,
-                            const float* input_data,
-                            const RuntimeShape& output_shape,
-                            float* output_data, float epsilon = 1e-6) {
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-  for (int i = 0; i < outer_size; ++i) {
-    float squared_l2_norm = 0;
-    for (int c = 0; c < depth; ++c) {
-      const float val = input_data[depth * i + c];
-      squared_l2_norm += val * val;
-    }
-    float l2_norm = std::sqrt(squared_l2_norm);
-    l2_norm = std::max(l2_norm, epsilon);
-    for (int c = 0; c < depth; ++c) {
-      output_data[depth * i + c] = input_data[depth * i + c] / l2_norm;
-    }
-  }
-}
-
-inline void L2Normalization(const tflite::L2NormalizationParams& op_params,
-                            const RuntimeShape& input_shape,
-                            const uint8* input_data,
-                            const RuntimeShape& output_shape,
-                            uint8* output_data) {
-  const int trailing_dim = input_shape.DimensionsCount() - 1;
-  const int depth =
-      MatchingDim(input_shape, trailing_dim, output_shape, trailing_dim);
-  const int outer_size =
-      MatchingFlatSizeSkipDim(input_shape, trailing_dim, output_shape);
-  const int32 input_zero_point = op_params.input_zero_point;
-  for (int i = 0; i < outer_size; ++i) {
-    int32 square_l2_norm = 0;
-    for (int c = 0; c < depth; c++) {
-      int32 diff = input_data[depth * i + c] - input_zero_point;
-      square_l2_norm += diff * diff;
-    }
-    int32 inv_l2norm_multiplier;
-    int inv_l2norm_shift;
-    GetInvSqrtQuantizedMultiplierExp(square_l2_norm, kReverseShift,
-                                     &inv_l2norm_multiplier, &inv_l2norm_shift);
-    for (int c = 0; c < depth; c++) {
-      int32 diff = input_data[depth * i + c] - input_zero_point;
-      int32 rescaled_diff = MultiplyByQuantizedMultiplierSmallerThanOneExp(
-          128 * diff, inv_l2norm_multiplier, inv_l2norm_shift);
-      int32 unclamped_output_val = 128 + rescaled_diff;
-      int32 output_val = std::min(255, std::max(0, unclamped_output_val));
-      output_data[depth * i + c] = static_cast<uint8>(output_val);
-    }
-  }
-}
-
 // T is expected to be either float or int.
 template <typename T>
 inline void AddN(const RuntimeShape& input_shape, const size_t num_inputs,
",0,train
1c6f10152fe850463422108f03b6b022b8f24ccc,tensorflow/tensorflow,"Split up conv_ops_fused kernels.

This improves build times by allowing the double, float, and half implementations to build in parallel.

PiperOrigin-RevId: 235576953",conv_ops_fused_double.cc,"@@ -0,0 +1,39 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/core/framework/register_types.h""
+#include ""tensorflow/core/kernels/conv_ops_fused_impl.h""
+
+namespace tensorflow {
+
+// If we're using the alternative GEMM-based implementation of Conv2D for the
+// CPU implementation, don't register this EigenTensor-based version.
+// TODO(b/119765980): Upgrade upstream Eigen to set `m_can_use_xsmm=false` for
+// contractions with non-default contraction output kernels.
+#if !defined(USE_GEMM_FOR_CONV) && !defined(EIGEN_USE_LIBXSMM)
+TF_CALL_double(REGISTER_FUSED_CPU_CONV2D);
+#endif  // !USE_GEMM_FOR_CONV
+
+#if GOOGLE_CUDA
+
+namespace functor {
+DECLARE_FUNCTOR_GPU_SPEC(double);
+}  // namespace functor
+
+TF_CALL_double(REGISTER_FUSED_GPU_CONV2D);
+
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
",0,train
1c6f10152fe850463422108f03b6b022b8f24ccc,tensorflow/tensorflow,"Split up conv_ops_fused kernels.

This improves build times by allowing the double, float, and half implementations to build in parallel.

PiperOrigin-RevId: 235576953",conv_ops_fused_float.cc,"@@ -0,0 +1,39 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/core/framework/register_types.h""
+#include ""tensorflow/core/kernels/conv_ops_fused_impl.h""
+
+namespace tensorflow {
+
+// If we're using the alternative GEMM-based implementation of Conv2D for the
+// CPU implementation, don't register this EigenTensor-based version.
+// TODO(b/119765980): Upgrade upstream Eigen to set `m_can_use_xsmm=false` for
+// contractions with non-default contraction output kernels.
+#if !defined(USE_GEMM_FOR_CONV) && !defined(EIGEN_USE_LIBXSMM)
+TF_CALL_float(REGISTER_FUSED_CPU_CONV2D);
+#endif  // !USE_GEMM_FOR_CONV
+
+#if GOOGLE_CUDA
+
+namespace functor {
+DECLARE_FUNCTOR_GPU_SPEC(float);
+}  // namespace functor
+
+TF_CALL_float(REGISTER_FUSED_GPU_CONV2D);
+
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
",0,train
1c6f10152fe850463422108f03b6b022b8f24ccc,tensorflow/tensorflow,"Split up conv_ops_fused kernels.

This improves build times by allowing the double, float, and half implementations to build in parallel.

PiperOrigin-RevId: 235576953",conv_ops_fused_half.cc,"@@ -0,0 +1,29 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/core/framework/register_types.h""
+#include ""tensorflow/core/kernels/conv_ops_fused_impl.h""
+
+namespace tensorflow {
+
+#if GOOGLE_CUDA
+
+namespace functor {
+DECLARE_FUNCTOR_GPU_SPEC(Eigen::half);
+}  // namespace functor
+
+#endif  // GOOGLE_CUDA
+
+}  // namespace tensorflow
",0,train
1c6f10152fe850463422108f03b6b022b8f24ccc,tensorflow/tensorflow,"Split up conv_ops_fused kernels.

This improves build times by allowing the double, float, and half implementations to build in parallel.

PiperOrigin-RevId: 235576953",conv_ops_fused_impl.h,"@@ -28,6 +28,9 @@ limitations under the License.
 //
 // NOTE: GPU only supports fusion of Conv2D + BiasAdd + <optional Relu>.
 
+#ifndef TENSORFLOW_CORE_KERNELS_CONV_OPS_FUSED_IMPL_H_
+#define TENSORFLOW_CORE_KERNELS_CONV_OPS_FUSED_IMPL_H_
+
 #define USE_EIGEN_TENSOR
 #define EIGEN_USE_THREADS
 
@@ -63,7 +66,6 @@ namespace tensorflow {
 typedef Eigen::ThreadPoolDevice CPUDevice;
 typedef Eigen::GpuDevice GPUDevice;
 
-namespace {
 // Supported Conv2D fusions. Not all of them supported on all type of devices.
 enum class FusedComputationType {
   // NOTE(ezhulenev): CuDNN `cudnnConvolutionBiasActivationForward` supports
@@ -463,12 +465,12 @@ class FusedConvParameters : public ConvParameters {
   se::dnn::ActivationMode activation_mode_;
 };
 
-bool operator==(const FusedConvParameters& lhs,
+inline bool operator==(const FusedConvParameters& lhs,
                 const FusedConvParameters& rhs) {
   return lhs.get_data_as_tuple() == rhs.get_data_as_tuple();
 }
 
-bool operator!=(const FusedConvParameters& lhs,
+inline bool operator!=(const FusedConvParameters& lhs,
                 const FusedConvParameters& rhs) {
   return !(lhs == rhs);
 }
@@ -482,7 +484,7 @@ using AutoTuneFusedConv =
     AutoTuneSingleton<FusedConvAutoTuneGroup, FusedConvParameters,
                       se::dnn::AlgorithmConfig>;
 
-int64 ConvolveScratchSize() {
+inline int64 ConvolveScratchSize() {
   static int64 convolve_scratch_size = GetDnnWorkspaceLimit(
       // default value is in bytes despite the name of the environment variable
       ""TF_CUDNN_WORKSPACE_LIMIT_IN_MB"", 1LL << 32  // 4GB
@@ -822,8 +824,6 @@ struct LaunchFusedConv2DOp<GPUDevice, T> {
 
 #endif  // GOOGLE_CUDA
 
-}  // namespace
-
 template <typename Device, typename T>
 class FusedConv2DOp : public OpKernel {
  public:
@@ -962,22 +962,9 @@ class FusedConv2DOp : public OpKernel {
       Name(""_FusedConv2D"").Device(DEVICE_CPU).TypeConstraint<T>(""T""), \
       FusedConv2DOp<CPUDevice, T>);
 
-// If we're using the alternative GEMM-based implementation of Conv2D for the
-// CPU implementation, don't register this EigenTensor-based version.
-// TODO(b/119765980): Upgrade upstream Eigen to set `m_can_use_xsmm=false` for
-// contractions with non-default contraction output kernels.
-#if !defined(USE_GEMM_FOR_CONV) && !defined(EIGEN_USE_LIBXSMM)
-TF_CALL_float(REGISTER_FUSED_CPU_CONV2D);
-TF_CALL_double(REGISTER_FUSED_CPU_CONV2D);
-#endif  // !USE_GEMM_FOR_CONV
-
-#undef REGISTER_FUSED_CPU_CONV2D
-
 #if GOOGLE_CUDA
 
-// Forward declarations of the functor specializations for GPU.
-namespace functor {
-#define DECLARE_GPU_SPEC(T)                                              \
+#define DECLARE_FUNCTOR_GPU_SPEC(T)                                      \
   template <>                                                            \
   void TransformFilter<GPUDevice, T, int, 4>::operator()(                \
       const GPUDevice& d, FilterTensorFormat dst_filter_format,          \
@@ -992,23 +979,14 @@ namespace functor {
       typename TTypes<T, 4, int>::Tensor out, TensorFormat data_format); \
   extern template struct PadInput<GPUDevice, T, int, 4>
 
-DECLARE_GPU_SPEC(float);
-DECLARE_GPU_SPEC(Eigen::half);
-DECLARE_GPU_SPEC(double);
-#undef DECLARE_GPU_SPEC
-}  // namespace functor
-
 // Registration of the GPU implementations.
 #define REGISTER_FUSED_GPU_CONV2D(T)                                  \
   REGISTER_KERNEL_BUILDER(                                            \
       Name(""_FusedConv2D"").Device(DEVICE_GPU).TypeConstraint<T>(""T""), \
       FusedConv2DOp<GPUDevice, T>);
 
-TF_CALL_float(REGISTER_FUSED_GPU_CONV2D);
-TF_CALL_double(REGISTER_FUSED_GPU_CONV2D);
-
-#undef REGISTER_FUSED_GPU_CONV2D
-
 #endif  // GOOGLE_CUDA
 
 }  // namespace tensorflow
+
+#endif  // TENSORFLOW_CORE_KERNELS_CONV_OPS_FUSED_IMPL_H_
",0,train
d72d45b6701279e12a0dd8325f143162b9060f33,tensorflow/tensorflow,"Add default_batch_size to ExportEstimator.
Change: 127753127",monitors.py,"@@ -837,11 +837,15 @@ class GraphDump(BaseMonitor):
 class ExportMonitor(EveryN):
   """"""Monitor that exports Estimator every N steps.""""""
 
+  # TODO(philstahlfeld): Investigate switching export.export_estimator
+  # configuration values to **kwargs so that updates to the export_estimator
+  # function don't have to be reflected here.
   def __init__(self,
                every_n_steps,
                export_dir,
                exports_to_keep=5,
-               signature_fn=None):
+               signature_fn=None,
+               default_batch_size=1):
     """"""Initializes ExportMonitor.
 
     Args:
@@ -851,11 +855,13 @@ class ExportMonitor(EveryN):
       signature_fn: Function that given `Tensor` of `Example` strings,
         `dict` of `Tensor`s for features and `dict` of `Tensor`s for predictions
         and returns default and named exporting signautres.
+      default_batch_size: Default batch size of the `Example` placeholder.
     """"""
     super(ExportMonitor, self).__init__(every_n_steps=every_n_steps)
     self.export_dir = export_dir
     self.exports_to_keep = exports_to_keep
     self.signature_fn = signature_fn
+    self._default_batch_size = default_batch_size
 
   def every_n_step_end(self, step, outputs):
     super(ExportMonitor, self).every_n_step_end(step, outputs)
@@ -863,7 +869,8 @@ class ExportMonitor(EveryN):
       export.export_estimator(self._estimator,
                               self.export_dir,
                               exports_to_keep=self.exports_to_keep,
-                              signature_fn=self.signature_fn)
+                              signature_fn=self.signature_fn,
+                              default_batch_size=self._default_batch_size)
     except (RuntimeError, TypeError):
       # Currently we are not syncronized with saving checkpoints, which leads to
       # runtime errors when we are calling export on the same global step.
@@ -875,7 +882,8 @@ class ExportMonitor(EveryN):
     export.export_estimator(self._estimator,
                             self.export_dir,
                             exports_to_keep=self.exports_to_keep,
-                            signature_fn=self.signature_fn)
+                            signature_fn=self.signature_fn,
+                            default_batch_size=self._default_batch_size)
 
 
 class CheckpointSaver(EveryN):
",0,train
ec36e9c3efbf0cf84cee0ef43164d914b2e626c5,tensorflow/tensorflow,"Apply clang-tidy fixes for llvm-qualified-auto in fusion_utils.cc (NFC)

PiperOrigin-RevId: 418588406
Change-Id: I0fc78fb3fac74d194710108cbcba3c371c560643",fusion_utils.cc,"@@ -351,8 +351,8 @@ void FusionPattern::calculateOperandsAndResults() {
 
 // Supports using EquivalenceClasses for Value
 bool operator<(const ValueWrapper& lhs, const ValueWrapper& rhs) {
-  auto lhs_value = lhs.getValue().getAsOpaquePointer();
-  auto rhs_value = rhs.getValue().getAsOpaquePointer();
+  auto* lhs_value = lhs.getValue().getAsOpaquePointer();
+  auto* rhs_value = rhs.getValue().getAsOpaquePointer();
   return lhs_value < rhs_value;
 }
 
",0,train
4b0687d70c4bcab5ec2837345bd0115a0b356946,tensorflow/tensorflow,"[XLA] Add --xla_hlo_profile_last_run flag to replay_computation.

When using replay_computation for profiling, you usually only want to
do one or two warmup runs and then profile the last run of your model.
This flag makes that possible.

PiperOrigin-RevId: 189208924",replay_computation.cc,"@@ -40,6 +40,7 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/client/global_data.h""
 #include ""tensorflow/compiler/xla/client/lib/testing.h""
 #include ""tensorflow/compiler/xla/client/local_client.h""
+#include ""tensorflow/compiler/xla/execution_options_util.h""
 #include ""tensorflow/compiler/xla/literal_util.h""
 #include ""tensorflow/compiler/xla/service/session.pb.h""
 #include ""tensorflow/compiler/xla/shape_util.h""
@@ -66,6 +67,7 @@ struct Options {
   bool use_fake_data = false;
   bool print_result = true;
   int num_runs = 1;
+  bool xla_hlo_profile_last_run = false;
 };
 
 // Invokes the given computation passing arbitrary data for every (unbound)
@@ -122,16 +124,21 @@ StatusOr<std::unique_ptr<Literal>> ReplayComputation(
   std::unique_ptr<Literal> result;
   for (int i = 0; i < opts.num_runs; ++i) {
     ExecutionProfile profile;
+    ExecutionOptions execution_options = CreateDefaultExecutionOptions();
+    if (opts.xla_hlo_profile_last_run && i == opts.num_runs - 1) {
+      execution_options.mutable_debug_options()->set_xla_hlo_profile(true);
+    }
+
     if (opts.print_result) {
-      TF_ASSIGN_OR_RETURN(result, client->ExecuteAndTransfer(
-                                      computation, execute_arguments,
-                                      /*execution_options=*/nullptr, &profile));
+      TF_ASSIGN_OR_RETURN(
+          result, client->ExecuteAndTransfer(computation, execute_arguments,
+                                             &execution_options, &profile));
     } else {
       // If we're not printing the result, execute the computation but don't
       // bother retrieving the result.  This can be a significant speedup.
       TF_RETURN_IF_ERROR(client
                              ->Execute(computation, execute_arguments,
-                                       /*execution_options=*/nullptr, &profile)
+                                       &execution_options, &profile)
                              .status());
     }
     LOG(INFO) << ""Execution took ""
@@ -191,6 +198,9 @@ int main(int argc, char** argv) {
                        ""Number of times to run each computation""),
       tensorflow::Flag(""fake_infeed_shape"", &opts.fake_infeed_shape,
                        ""Shape of fake data to construct for (infinite) infeed""),
+      tensorflow::Flag(
+          ""xla_hlo_profile_last_run"", &opts.xla_hlo_profile_last_run,
+          ""Pass --xla_hlo_profile the last time we run the computation.""),
   };
   xla::string usage = tensorflow::Flags::Usage(argv[0], flag_list);
   bool parse_ok = tensorflow::Flags::Parse(&argc, argv, flag_list);
",0,train
f292f31b57480d0b33f5c0feb5fb128e43c865dc,tensorflow/tensorflow,"Disabling benchmarkScanDefun for TFRT due to lack of MLIR lowering support.

PiperOrigin-RevId: 324694962
Change-Id: I2398161dff9403ac115a031c5942f753daff7871",benchmarks_test.py,"@@ -1260,6 +1260,8 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
 
     self._run(scan, 100)
 
+  @test_util.disable_tfrt(
+      ""tf.While not supported in TF to CoreRT lowing. b/162685874"")
   def benchmarkScanDefun(self):
     elems = math_ops.range(1600)
 
",0,train
bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`.
This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available.
PiperOrigin-RevId: 238523747",ir_emitter.cc,"@@ -302,7 +302,7 @@ Status IrEmitter::HandleGetTupleElement(HloInstruction* get_tuple_element) {
   const Shape& shape = get_tuple_element->shape();
   emitted_value_[get_tuple_element] = llvm_ir::EmitGetTupleElement(
       shape, get_tuple_element->tuple_index(), MinimumAlignmentForShape(shape),
-      GetEmittedValueFor(operand), &b_, module_);
+      GetEmittedValueFor(operand), &b_);
   return Status::OK();
 }
 
@@ -322,7 +322,7 @@ Status IrEmitter::HandleTupleSelect(HloInstruction* tuple_select) {
   TF_RETURN_IF_ERROR(EmitTargetAddressForOp(tuple_select));
   llvm_ir::EmitTupleSelect(GetIrArrayFor(tuple_select), GetIrArrayFor(pred),
                            GetEmittedValueFor(on_true),
-                           GetEmittedValueFor(on_false), &b_, module_);
+                           GetEmittedValueFor(on_false), &b_);
   return Status::OK();
 }
 
@@ -345,8 +345,7 @@ Status IrEmitter::HandleInfeed(HloInstruction* instruction) {
                       assignment_.GetUniqueSlice(infeed, {1}));
   llvm::Value* token_address = EmitBufferPointer(
       token_slice, ShapeUtil::GetTupleElementShape(infeed->shape(), 1));
-  llvm_ir::EmitTuple(GetIrArrayFor(infeed), {data_address, token_address}, &b_,
-                     module_);
+  llvm_ir::EmitTuple(GetIrArrayFor(infeed), {data_address, token_address}, &b_);
 
   if (data_shape.IsTuple()) {
     TF_RET_CHECK(!ShapeUtil::IsNestedTuple(data_shape));
@@ -377,7 +376,7 @@ Status IrEmitter::HandleInfeed(HloInstruction* instruction) {
     }
 
     llvm_ir::EmitTuple(llvm_ir::IrArray(data_address, data_shape),
-                       tuple_element_addresses, &b_, module_);
+                       tuple_element_addresses, &b_);
   } else {
     TF_RETURN_IF_ERROR(
         EmitXfeedTransfer(XfeedKind::kInfeed, data_shape, data_address));
@@ -498,7 +497,7 @@ Status IrEmitter::HandleOutfeed(HloInstruction* outfeed) {
         ShapeUtil::GetTupleElementShape(operand_shape, i);
     llvm::Value* tuple_element = llvm_ir::EmitGetTupleElement(
         tuple_element_shape, i, MinimumAlignmentForShape(tuple_element_shape),
-        value, &b_, module_);
+        value, &b_);
     TF_RETURN_IF_ERROR(EmitXfeedTransfer(XfeedKind::kOutfeed,
                                          tuple_element_shape, tuple_element));
   }
@@ -621,8 +620,7 @@ Status IrEmitter::HandleSort(HloInstruction* hlo) {
         GetProfileCountersArgument(), less_than_function});
 
   if (sort->values_count() > 0) {
-    llvm_ir::EmitTuple(GetIrArrayFor(sort), destination_addresses, &b_,
-                       module_);
+    llvm_ir::EmitTuple(GetIrArrayFor(sort), destination_addresses, &b_);
   }
   return Status::OK();
 }
@@ -633,7 +631,7 @@ Status IrEmitter::HandleTuple(HloInstruction* tuple) {
   for (auto operand : tuple->operands()) {
     base_ptrs.push_back(GetEmittedValueFor(operand));
   }
-  llvm_ir::EmitTuple(GetIrArrayFor(tuple), base_ptrs, &b_, module_);
+  llvm_ir::EmitTuple(GetIrArrayFor(tuple), base_ptrs, &b_);
   return Status::OK();
 }
 
@@ -1349,7 +1347,7 @@ Status IrEmitter::HandleAllReduce(HloInstruction* crs) {
     MemCpy(operand_ptrs.back(), /*DstAlign=*/1, in_ptr,
            /*SrcAlign=*/1, ShapeUtil::ByteSizeOf(operand_shape));
   }
-  llvm_ir::EmitTuple(GetIrArrayFor(crs), operand_ptrs, &b_, module_);
+  llvm_ir::EmitTuple(GetIrArrayFor(crs), operand_ptrs, &b_);
   return Status::OK();
 }
 
@@ -2289,7 +2287,7 @@ Status IrEmitter::HandleCustomCall(HloInstruction* custom_call) {
       llvm::Value* addr = EmitBufferPointer(slice, elem_shape);
       base_ptrs.push_back(addr);
     }
-    llvm_ir::EmitTuple(GetIrArrayFor(custom_call), base_ptrs, &b_, module_);
+    llvm_ir::EmitTuple(GetIrArrayFor(custom_call), base_ptrs, &b_);
   }
   auto* output_address_arg =
       PointerCast(GetEmittedValueFor(custom_call), i8_ptr_type);
@@ -2980,7 +2978,7 @@ Status IrEmitter::EmitTargetElementLoop(
     for (int64 i = 0; i < output_arrays.size(); ++i) {
       tuple_operand_ptrs.push_back(output_arrays[i].GetBasePointer());
     }
-    llvm_ir::EmitTuple(target_array, tuple_operand_ptrs, &b_, module_);
+    llvm_ir::EmitTuple(target_array, tuple_operand_ptrs, &b_);
 
   } else {
     if (ShouldEmitParallelLoopFor(*target_op)) {
",0,train
bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`.
This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available.
PiperOrigin-RevId: 238523747",hlo_to_ir_bindings.cc,"@@ -135,11 +135,11 @@ llvm::Value* HloToIrBindings::EmitGetTupleElement(const HloInstruction* gte,
   if (gte->operand(0)->opcode() != HloOpcode::kGetTupleElement) {
     return llvm_ir::EmitGetTupleElement(
         gte->shape(), gte->tuple_index(), /*alignment=*/1,
-        GetTypedIrValue(*gte->operand(0), {}, base_ptr), b_, module_);
+        GetTypedIrValue(*gte->operand(0), {}, base_ptr), b_);
   }
   return llvm_ir::EmitGetTupleElement(
       gte->shape(), gte->tuple_index(), /*alignment=*/1,
-      EmitGetTupleElement(gte->operand(0), base_ptr), b_, module_);
+      EmitGetTupleElement(gte->operand(0), base_ptr), b_);
 }
 
 // Returns true if `value` has a name that should not be changed.
",0,train
bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`.
This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available.
PiperOrigin-RevId: 238523747",ir_emitter.cc,"@@ -115,7 +115,7 @@ Status IrEmitter::HandleGetTupleElement(HloInstruction* get_tuple_element) {
           get_tuple_element->shape(), get_tuple_element->tuple_index(),
           // TODO(b/26344050): tighten the alignment here
           // based on the real element type.
-          /*alignment=*/1, GetBasePointer(*operand), &b_, module_));
+          /*alignment=*/1, GetBasePointer(*operand), &b_));
   return Status::OK();
 }
 
@@ -144,7 +144,7 @@ Status IrEmitter::HandleTuple(HloInstruction* tuple) {
   for (const HloInstruction* operand : tuple->operands()) {
     base_ptrs.push_back(GetBasePointer(*operand));
   }
-  llvm_ir::EmitTuple(GetIrArray(*tuple, *tuple), base_ptrs, &b_, module_);
+  llvm_ir::EmitTuple(GetIrArray(*tuple, *tuple), base_ptrs, &b_);
   return Status::OK();
 }
 
@@ -434,7 +434,7 @@ Status IrEmitter::HandleTupleSelect(HloInstruction* tuple_select) {
   llvm_ir::EmitTupleSelect(GetIrArray(*tuple_select, *tuple_select),
                            GetIrArray(*pred, *tuple_select),
                            GetBasePointer(*on_true), GetBasePointer(*on_false),
-                           &b_, module_);
+                           &b_);
   return Status::OK();
 }
 
",0,train
bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`.
This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available.
PiperOrigin-RevId: 238523747",ir_emitter_nested.cc,"@@ -123,7 +123,7 @@ Status IrEmitterNested::EmitTargetElementLoop(
         ConstructIrArrayForOutputs(hlo);
     TF_RETURN_IF_ERROR(
         llvm_ir::LoopEmitter(element_generator, target_arrays, &b_).EmitLoop());
-    llvm_ir::EmitTuple(GetIrArray(hlo, hlo), target_arrays, &b_, module_);
+    llvm_ir::EmitTuple(GetIrArray(hlo, hlo), target_arrays, &b_);
     return Status::OK();
   }
   return llvm_ir::LoopEmitter(element_generator, GetIrArray(hlo, hlo), &b_)
",0,train
bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`.
This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available.
PiperOrigin-RevId: 238523747",ir_emitter_unnested.cc,"@@ -2201,7 +2201,7 @@ Status IrEmitterUnnested::EmitTargetElementLoopInThunk(
   // kernel *anyway*.
   std::vector<IrArray> output_arrays = ConstructIrArrayForOutputs(hlo);
   KernelSupportLibrary{&b_}.If(""emit_mof_tuple"", IsBlock0Thread0(&b_), [&] {
-    llvm_ir::EmitTuple(GetIrArray(hlo, hlo), output_arrays, &b_, module_);
+    llvm_ir::EmitTuple(GetIrArray(hlo, hlo), output_arrays, &b_);
   });
 
   // For multioutput fusion, we need to emit each operand and the root.
@@ -3103,8 +3103,7 @@ LaunchDimensions IrEmitterUnnested::EmitKernel(
   if (!reduction_info && unnested_hlo->IsMultiOutputFusion()) {
     KernelSupportLibrary{&b_}.If(""emit_mof_tuple"", IsBlock0Thread0(&b_), [&] {
       llvm_ir::EmitTuple(GetIrArray(*unnested_hlo, *unnested_hlo),
-                         ConstructIrArrayForOutputs(*unnested_hlo), &b_,
-                         module_);
+                         ConstructIrArrayForOutputs(*unnested_hlo), &b_);
     });
   }
 
",0,train
bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`.
This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available.
PiperOrigin-RevId: 238523747",fused_ir_emitter.cc,"@@ -121,9 +121,9 @@ Status FusedIrEmitter::HandleGetTupleElement(
     }
 
     // Lookup tuple element pointer.
-    return llvm_ir::EmitGetTupleElement(
-        get_tuple_element->shape(), get_tuple_element->tuple_index(),
-        /*alignment=*/1, tuple_ptr, b_, module_);
+    return llvm_ir::EmitGetTupleElement(get_tuple_element->shape(),
+                                        get_tuple_element->tuple_index(),
+                                        /*alignment=*/1, tuple_ptr, b_);
   };
 
   if (!get_tuple_element->shape().IsTuple()) {
",0,train
bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`.
This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available.
PiperOrigin-RevId: 238523747",tuple_ops.cc,"@@ -29,9 +29,14 @@ limitations under the License.
 namespace xla {
 namespace llvm_ir {
 
+static llvm::Module* getModuleFromBuilder(llvm::IRBuilder<>* b) {
+  return b->GetInsertBlock()->getModule();
+}
+
 void EmitTupleSelect(const IrArray& select, const IrArray& pred,
                      llvm::Value* on_true, llvm::Value* on_false,
-                     llvm::IRBuilder<>* b, llvm::Module* module) {
+                     llvm::IRBuilder<>* b) {
+  llvm::Module* module = getModuleFromBuilder(b);
   CHECK(ShapeUtil::IsScalar(pred.GetShape()));
 
   llvm::LoadInst* pred_value =
@@ -65,7 +70,8 @@ void EmitTupleSelect(const IrArray& select, const IrArray& pred,
 }
 
 void EmitTuple(const IrArray& tuple, absl::Span<llvm::Value* const> operands,
-               llvm::IRBuilder<>* b, llvm::Module* module) {
+               llvm::IRBuilder<>* b) {
+  llvm::Module* module = getModuleFromBuilder(b);
   for (size_t i = 0; i < operands.size(); ++i) {
     auto* store = b->CreateStore(
         b->CreatePointerCast(operands[i], PrimitiveTypeToIrType(TUPLE, module)),
@@ -76,18 +82,19 @@ void EmitTuple(const IrArray& tuple, absl::Span<llvm::Value* const> operands,
 }
 
 void EmitTuple(const IrArray& tuple, absl::Span<const IrArray> buffers,
-               llvm::IRBuilder<>* b, llvm::Module* module) {
+               llvm::IRBuilder<>* b) {
   std::vector<llvm::Value*> buffer_ptrs;
   buffer_ptrs.reserve(buffers.size());
   absl::c_transform(
       buffers, std::back_inserter(buffer_ptrs),
       [](const llvm_ir::IrArray& buffer) { return buffer.GetBasePointer(); });
-  llvm_ir::EmitTuple(tuple, buffer_ptrs, b, module);
+  llvm_ir::EmitTuple(tuple, buffer_ptrs, b);
 }
 
 llvm::Value* EmitGetTupleElement(const Shape& target_shape, int64 index,
                                  int alignment, llvm::Value* operand,
-                                 llvm::IRBuilder<>* b, llvm::Module* module) {
+                                 llvm::IRBuilder<>* b) {
+  llvm::Module* module = getModuleFromBuilder(b);
   llvm::Value* element_ptr =
       b->CreateInBoundsGEP(operand, {b->getInt64(0), b->getInt64(index)});
   llvm::LoadInst* src_buffer = b->CreateLoad(element_ptr);
",0,train
bb087feb3b1fd8b050b9e9cd3b256f9bc4f1de1c,tensorflow/tensorflow,"NFC: Change all helper functions in `tuple_ops` not to require `llvm::Module` argument, as it can be derived from the already provided `llvm::IRBuilder`.
This makes functions easier to code and the calls less verbose, and additionally enables making calls from helper classes where `llvm::Module*` might not be available.
PiperOrigin-RevId: 238523747",tuple_ops.h,"@@ -61,17 +61,17 @@ namespace llvm_ir {
 //   output[i] = pred ? tuple_on_true[i] : tuple_on_false[i]
 void EmitTupleSelect(const IrArray& select, const IrArray& pred,
                      llvm::Value* on_true, llvm::Value* on_false,
-                     llvm::IRBuilder<>* b, llvm::Module* module);
+                     llvm::IRBuilder<>* b);
 
 // A tuple is an array of pointers, one for each operand. Each pointer points to
 // the output buffer of its corresponding operand.
 void EmitTuple(const IrArray& tuple, absl::Span<llvm::Value* const> operands,
-               llvm::IRBuilder<>* b, llvm::Module* module);
+               llvm::IRBuilder<>* b);
 
 // Similar to EmitTuple above, except that the output buffers are provided in
 // the form of IrArray.
 void EmitTuple(const IrArray& tuple, absl::Span<const IrArray> buffers,
-               llvm::IRBuilder<>* b, llvm::Module* module);
+               llvm::IRBuilder<>* b);
 
 // A tuple is an array of pointers, one for each operand. Each pointer points to
 // the output buffer of its corresponding operand. A GetTupleElement instruction
@@ -79,7 +79,7 @@ void EmitTuple(const IrArray& tuple, absl::Span<const IrArray> buffers,
 // Returns an llvm value representing a pointer to the tuple element buffer.
 llvm::Value* EmitGetTupleElement(const Shape& target_shape, int64 index,
                                  int alignment, llvm::Value* operand,
-                                 llvm::IRBuilder<>* b, llvm::Module* module);
+                                 llvm::IRBuilder<>* b);
 }  // namespace llvm_ir
 }  // namespace xla
 
",0,train
bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes.

The classes in
  tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h
have two implementations.  This is achieved by using a #if directive to provide
the implementation directly in each header files, or alternately, a no-op
implementation on mobile platforms from one of the header files
mobile_{counter,gauge,percentile_sampler,sampler}.h.

I believe the intent is that the mobile*.h be used only via the main header
files, and never be included directly, but nothing was preventing this.  If
someone had included one of the mobile*.h files directly, and the resulting
object file were linked with one that used the primary header files on a
non-mobile platform, it may cause problems.  There would have been no error at
compile or link time, yet the classes would be defined in two different ways,
leading to an unchecked ODR violation and undefined results.  For example, the
linker potentially could pick an arbitrary version of each routine in the
class.

This change tries to avoid the potential problem in two ways:
- by restricting the visibility of the mobile_*.h variants (for bazel builds,
  at least); and
- by causing the mobile_*.h files to use #error if they appear not to have been
  used on a mobile platform, or not included from their respective primary
  header files.

Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime
build rule.

PiperOrigin-RevId: 341680724
Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",counter.h,"@@ -24,7 +24,10 @@ limitations under the License.
 // We replace this implementation with a null implementation for mobile
 // platforms.
 #ifdef IS_MOBILE_PLATFORM
+#define TENSORFLOW_INCLUDED_FROM_COUNTER_H  // prevent accidental use of
+                                            // mobile_counter.h
 #include ""tensorflow/core/lib/monitoring/mobile_counter.h""
+#undef TENSORFLOW_INCLUDED_FROM_COUNTER_H
 #else
 
 #include <array>
",0,train
bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes.

The classes in
  tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h
have two implementations.  This is achieved by using a #if directive to provide
the implementation directly in each header files, or alternately, a no-op
implementation on mobile platforms from one of the header files
mobile_{counter,gauge,percentile_sampler,sampler}.h.

I believe the intent is that the mobile*.h be used only via the main header
files, and never be included directly, but nothing was preventing this.  If
someone had included one of the mobile*.h files directly, and the resulting
object file were linked with one that used the primary header files on a
non-mobile platform, it may cause problems.  There would have been no error at
compile or link time, yet the classes would be defined in two different ways,
leading to an unchecked ODR violation and undefined results.  For example, the
linker potentially could pick an arbitrary version of each routine in the
class.

This change tries to avoid the potential problem in two ways:
- by restricting the visibility of the mobile_*.h variants (for bazel builds,
  at least); and
- by causing the mobile_*.h files to use #error if they appear not to have been
  used on a mobile platform, or not included from their respective primary
  header files.

Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime
build rule.

PiperOrigin-RevId: 341680724
Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",gauge.h,"@@ -24,7 +24,10 @@ limitations under the License.
 // We replace this implementation with a null implementation for mobile
 // platforms.
 #ifdef IS_MOBILE_PLATFORM
+#define TENSORFLOW_INCLUDED_FROM_GAUGE_H  // prevent accidental use of
+                                          // mobile_gauge.h
 #include ""tensorflow/core/lib/monitoring/mobile_gauge.h""
+#undef TENSORFLOW_INCLUDED_FROM_GAUGE_H
 #else
 
 #include <array>
",0,train
bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes.

The classes in
  tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h
have two implementations.  This is achieved by using a #if directive to provide
the implementation directly in each header files, or alternately, a no-op
implementation on mobile platforms from one of the header files
mobile_{counter,gauge,percentile_sampler,sampler}.h.

I believe the intent is that the mobile*.h be used only via the main header
files, and never be included directly, but nothing was preventing this.  If
someone had included one of the mobile*.h files directly, and the resulting
object file were linked with one that used the primary header files on a
non-mobile platform, it may cause problems.  There would have been no error at
compile or link time, yet the classes would be defined in two different ways,
leading to an unchecked ODR violation and undefined results.  For example, the
linker potentially could pick an arbitrary version of each routine in the
class.

This change tries to avoid the potential problem in two ways:
- by restricting the visibility of the mobile_*.h variants (for bazel builds,
  at least); and
- by causing the mobile_*.h files to use #error if they appear not to have been
  used on a mobile platform, or not included from their respective primary
  header files.

Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime
build rule.

PiperOrigin-RevId: 341680724
Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",mobile_counter.h,"@@ -18,6 +18,14 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_COUNTER_H_
 #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_COUNTER_H_
 
+#if !defined(IS_MOBILE_PLATFORM) || !defined(TENSORFLOW_INCLUDED_FROM_COUNTER_H)
+// If this header file were included directly, and something else included its
+// non-mobile counterpart, there could be an unchecked ODR violation on the
+// classes below.
+#error do not include mobile_counter.h directly; use counter.h instead
+#endif  // !defined(IS_MOBILE_PLATFORM) ||
+        // !defined(TENSORFLOW_INCLUDED_FROM_COUNTER_H)
+
 #include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/types.h""
",0,train
bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes.

The classes in
  tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h
have two implementations.  This is achieved by using a #if directive to provide
the implementation directly in each header files, or alternately, a no-op
implementation on mobile platforms from one of the header files
mobile_{counter,gauge,percentile_sampler,sampler}.h.

I believe the intent is that the mobile*.h be used only via the main header
files, and never be included directly, but nothing was preventing this.  If
someone had included one of the mobile*.h files directly, and the resulting
object file were linked with one that used the primary header files on a
non-mobile platform, it may cause problems.  There would have been no error at
compile or link time, yet the classes would be defined in two different ways,
leading to an unchecked ODR violation and undefined results.  For example, the
linker potentially could pick an arbitrary version of each routine in the
class.

This change tries to avoid the potential problem in two ways:
- by restricting the visibility of the mobile_*.h variants (for bazel builds,
  at least); and
- by causing the mobile_*.h files to use #error if they appear not to have been
  used on a mobile platform, or not included from their respective primary
  header files.

Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime
build rule.

PiperOrigin-RevId: 341680724
Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",mobile_gauge.h,"@@ -18,6 +18,14 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_GAUGE_H_
 #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_GAUGE_H_
 
+#if !defined(IS_MOBILE_PLATFORM) || !defined(TENSORFLOW_INCLUDED_FROM_GAUGE_H)
+// If this header file were included directly, and something else included its
+// non-mobile counterpart, there could be an unchecked ODR violation on the
+// classes below.
+#error do not include mobile_gauge.h directly; use gauge.h instead
+#endif  // !defined(IS_MOBILE_PLATFORM) ||
+        // !defined(TENSORFLOW_INCLUDED_FROM_GAUGE_H)
+
 #include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/types.h""
",0,train
bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes.

The classes in
  tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h
have two implementations.  This is achieved by using a #if directive to provide
the implementation directly in each header files, or alternately, a no-op
implementation on mobile platforms from one of the header files
mobile_{counter,gauge,percentile_sampler,sampler}.h.

I believe the intent is that the mobile*.h be used only via the main header
files, and never be included directly, but nothing was preventing this.  If
someone had included one of the mobile*.h files directly, and the resulting
object file were linked with one that used the primary header files on a
non-mobile platform, it may cause problems.  There would have been no error at
compile or link time, yet the classes would be defined in two different ways,
leading to an unchecked ODR violation and undefined results.  For example, the
linker potentially could pick an arbitrary version of each routine in the
class.

This change tries to avoid the potential problem in two ways:
- by restricting the visibility of the mobile_*.h variants (for bazel builds,
  at least); and
- by causing the mobile_*.h files to use #error if they appear not to have been
  used on a mobile platform, or not included from their respective primary
  header files.

Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime
build rule.

PiperOrigin-RevId: 341680724
Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",mobile_percentile_sampler.h,"@@ -13,9 +13,20 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+// Null implementation of the PercentileSampler metric for mobile platforms.
+
 #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_PERCENTILE_SAMPLER_H_
 #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_PERCENTILE_SAMPLER_H_
 
+#if !defined(IS_MOBILE_PLATFORM) || \
+    !defined(TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H)
+// If this header file were included directly, and something else included its
+// non-mobile counterpart, there could be an unchecked ODR violation on the
+// classes below.
+#error do not include mobile_percentile_sampler.h directly; use percetile_sampler.h instead
+#endif  // !defined(IS_MOBILE_PLATFORM) ||
+        // !defined(TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H)
+
 #include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/lib/monitoring/collection_registry.h""
 #include ""tensorflow/core/lib/monitoring/metric_def.h""
",0,train
bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes.

The classes in
  tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h
have two implementations.  This is achieved by using a #if directive to provide
the implementation directly in each header files, or alternately, a no-op
implementation on mobile platforms from one of the header files
mobile_{counter,gauge,percentile_sampler,sampler}.h.

I believe the intent is that the mobile*.h be used only via the main header
files, and never be included directly, but nothing was preventing this.  If
someone had included one of the mobile*.h files directly, and the resulting
object file were linked with one that used the primary header files on a
non-mobile platform, it may cause problems.  There would have been no error at
compile or link time, yet the classes would be defined in two different ways,
leading to an unchecked ODR violation and undefined results.  For example, the
linker potentially could pick an arbitrary version of each routine in the
class.

This change tries to avoid the potential problem in two ways:
- by restricting the visibility of the mobile_*.h variants (for bazel builds,
  at least); and
- by causing the mobile_*.h files to use #error if they appear not to have been
  used on a mobile platform, or not included from their respective primary
  header files.

Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime
build rule.

PiperOrigin-RevId: 341680724
Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",mobile_sampler.h,"@@ -18,6 +18,14 @@ limitations under the License.
 #ifndef TENSORFLOW_CORE_LIB_MONITORING_MOBILE_SAMPLER_H_
 #define TENSORFLOW_CORE_LIB_MONITORING_MOBILE_SAMPLER_H_
 
+#if !defined(IS_MOBILE_PLATFORM) || !defined(TENSORFLOW_INCLUDED_FROM_SAMPLER_H)
+// If this header file were included directly, and something else included its
+// non-mobile counterpart, there could be an unchecked ODR violation on the
+// classes below.
+#error do not include mobile_sampler.h directly; use sampler.h to include it instead
+#endif  // !defined(IS_MOBILE_PLATFORM) ||
+        // !defined(TENSORFLOW_INCLUDED_FROM_SAMPLER_H)
+
 #include <memory>
 
 #include ""tensorflow/core/framework/summary.pb.h""
",0,train
bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes.

The classes in
  tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h
have two implementations.  This is achieved by using a #if directive to provide
the implementation directly in each header files, or alternately, a no-op
implementation on mobile platforms from one of the header files
mobile_{counter,gauge,percentile_sampler,sampler}.h.

I believe the intent is that the mobile*.h be used only via the main header
files, and never be included directly, but nothing was preventing this.  If
someone had included one of the mobile*.h files directly, and the resulting
object file were linked with one that used the primary header files on a
non-mobile platform, it may cause problems.  There would have been no error at
compile or link time, yet the classes would be defined in two different ways,
leading to an unchecked ODR violation and undefined results.  For example, the
linker potentially could pick an arbitrary version of each routine in the
class.

This change tries to avoid the potential problem in two ways:
- by restricting the visibility of the mobile_*.h variants (for bazel builds,
  at least); and
- by causing the mobile_*.h files to use #error if they appear not to have been
  used on a mobile platform, or not included from their respective primary
  header files.

Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime
build rule.

PiperOrigin-RevId: 341680724
Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",percentile_sampler.h,"@@ -24,7 +24,11 @@ limitations under the License.
 // We replace this implementation with a null implementation for mobile
 // platforms.
 #ifdef IS_MOBILE_PLATFORM
+#define TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H  // prevent accidental use
+                                                       // of
+// mobile_percentile_sampler.h
 #include ""tensorflow/core/lib/monitoring/mobile_percentile_sampler.h""
+#undef TENSORFLOW_INCLUDED_FROM_PERCENTILE_SAMPLER_H
 #else
 
 #include <cmath>
",0,train
bccd445da9e75a9f196e52a8a9d1b388598851a4,tensorflow/tensorflow,"Avoid risk of including conflicting implementations of monitoring classes.

The classes in
  tensorflow/core/lib/monitoring/{counter,gauge,percentile_sampler,sampler}.h
have two implementations.  This is achieved by using a #if directive to provide
the implementation directly in each header files, or alternately, a no-op
implementation on mobile platforms from one of the header files
mobile_{counter,gauge,percentile_sampler,sampler}.h.

I believe the intent is that the mobile*.h be used only via the main header
files, and never be included directly, but nothing was preventing this.  If
someone had included one of the mobile*.h files directly, and the resulting
object file were linked with one that used the primary header files on a
non-mobile platform, it may cause problems.  There would have been no error at
compile or link time, yet the classes would be defined in two different ways,
leading to an unchecked ODR violation and undefined results.  For example, the
linker potentially could pick an arbitrary version of each routine in the
class.

This change tries to avoid the potential problem in two ways:
- by restricting the visibility of the mobile_*.h variants (for bazel builds,
  at least); and
- by causing the mobile_*.h files to use #error if they appear not to have been
  used on a mobile platform, or not included from their respective primary
  header files.

Also, include {mobile_,}percentile_sampler.h in the mobile_srcs_only_runtime
build rule.

PiperOrigin-RevId: 341680724
Change-Id: I1de71dd209f2769e162c0c5522cf0cf5006ef5ff",sampler.h,"@@ -24,7 +24,10 @@ limitations under the License.
 // We replace this implementation with a null implementation for mobile
 // platforms.
 #ifdef IS_MOBILE_PLATFORM
+#define TENSORFLOW_INCLUDED_FROM_SAMPLER_H  // prevent accidental use of
+                                            // mobile_sampler.h
 #include ""tensorflow/core/lib/monitoring/mobile_sampler.h""
+#undef TENSORFLOW_INCLUDED_FROM_SAMPLER_H
 #else
 
 #include <float.h>
",0,train
4ec3fcdc87687d33c1597aff9296041a6bb00434,tensorflow/tensorflow,"Adds support for explicitly assigning the replica to the VariableDeviceChooser. This is necessary for when the device with replica is set in a surrounding arg_scope.

PiperOrigin-RevId: 200567897",variables.py,"@@ -712,7 +712,8 @@ class VariableDeviceChooser(object):
                num_tasks=0,
                job_name='ps',
                device_type='CPU',
-               device_index=0):
+               device_index=0,
+               replica=None):
     """"""Initialize VariableDeviceChooser.
 
     Usage:
@@ -733,12 +734,15 @@ class VariableDeviceChooser(object):
     self._job_name = job_name
     self._device_type = device_type
     self._device_index = device_index
+    self._replica = replica
     self._num_tasks = num_tasks
     self._next_task_id = 0
 
   def __call__(self, op):
-    device_spec = tf_device.DeviceSpec(device_type=self._device_type,
-                                       device_index=self._device_index)
+    device_spec = tf_device.DeviceSpec(
+        replica=self._replica,
+        device_type=self._device_type,
+        device_index=self._device_index)
     if self._num_tasks > 0:
       task_id = self._next_task_id
       self._next_task_id = (self._next_task_id + 1) % self._num_tasks
",0,train
4ec3fcdc87687d33c1597aff9296041a6bb00434,tensorflow/tensorflow,"Adds support for explicitly assigning the replica to the VariableDeviceChooser. This is necessary for when the device with replica is set in a surrounding arg_scope.

PiperOrigin-RevId: 200567897",variables_test.py,"@@ -506,6 +506,35 @@ class VariablesTest(test.TestCase):
       self.assertDeviceEqual(e.device, '/job:ps/task:1/cpu:0')
       self.assertDeviceEqual(e.initial_value.device, '/cpu:99')
 
+  def testVariableWithVariableDeviceChooserWithReplica(self):
+
+    with ops.Graph().as_default():
+      device_fn = variables_lib2.VariableDeviceChooser(replica=3, num_tasks=2)
+      with arg_scope([variables_lib2.variable], device=device_fn):
+        a = variables_lib2.variable('a', [])
+        b = variables_lib2.variable('b', [])
+        c = variables_lib2.variable('c', [], device='cpu:12')
+        d = variables_lib2.variable('d', [])
+        with ops.device('cpu:99'):
+          e_init = constant_op.constant(12)
+        e = variables_lib2.variable('e', initializer=e_init)
+      # The values below highlight how the VariableDeviceChooser puts initial
+      # values on the same device as the variable job.
+      self.assertDeviceEqual(a.device, '/job:ps/replica:3/task:0/cpu:0')
+      self.assertEqual(a.initial_value.op.colocation_groups(),
+                       a.op.colocation_groups())
+      self.assertDeviceEqual(b.device, '/job:ps/replica:3/task:1/cpu:0')
+      self.assertEqual(b.initial_value.op.colocation_groups(),
+                       b.op.colocation_groups())
+      self.assertDeviceEqual(c.device, '/cpu:12')
+      self.assertEqual(c.initial_value.op.colocation_groups(),
+                       c.op.colocation_groups())
+      self.assertDeviceEqual(d.device, '/job:ps/replica:3/task:0/cpu:0')
+      self.assertEqual(d.initial_value.op.colocation_groups(),
+                       d.op.colocation_groups())
+      self.assertDeviceEqual(e.device, '/job:ps/replica:3/task:1/cpu:0')
+      self.assertDeviceEqual(e.initial_value.device, '/cpu:99')
+
   def testVariableGPUPlacement(self):
 
     with ops.Graph().as_default():
@@ -930,8 +959,8 @@ class AssignFromCheckpointTest(test.TestCase):
       return saver.save(sess, checkpoint_dir, global_step=global_step)
 
   def testLoadExistingVariables(self):
-    model_dir = tempfile.mkdtemp(prefix=os.path.join(self.get_temp_dir(),
-                                                     'load_existing_variables'))
+    model_dir = tempfile.mkdtemp(
+        prefix=os.path.join(self.get_temp_dir(), 'load_existing_variables'))
 
     init_value0 = 10.0
     init_value1 = 20.0
@@ -944,8 +973,8 @@ class AssignFromCheckpointTest(test.TestCase):
       var1 = variables_lib2.variable('my_var1', shape=[])
 
       vars_to_restore = {'v0': var0, 'v1': var1}
-      op, feed_dict = variables_lib2.assign_from_checkpoint(model_path,
-                                                            vars_to_restore)
+      op, feed_dict = variables_lib2.assign_from_checkpoint(
+          model_path, vars_to_restore)
 
       # Initialize the variables.
       sess.run(variables_lib.global_variables_initializer())
@@ -960,8 +989,8 @@ class AssignFromCheckpointTest(test.TestCase):
   # Tests restoring PartitionedVariables and tests using a dictionary
   # of lists as the assign_from_checkpoint() var_list param.
   def testLoadPartitionedVariables(self):
-    model_dir = tempfile.mkdtemp(prefix=os.path.join(
-        self.get_temp_dir(), 'load_partitioned_variables'))
+    model_dir = tempfile.mkdtemp(
+        prefix=os.path.join(self.get_temp_dir(), 'load_partitioned_variables'))
 
     init_value0 = np.array([[10.0, 11.0], [12.0, 13.0]])
     init_value1 = np.array([20.0])  # Partitioned into 1 part, edge case.
@@ -974,15 +1003,14 @@ class AssignFromCheckpointTest(test.TestCase):
       partitioner = partitioned_variables.variable_axis_size_partitioner(2)
       var0 = variables_lib2.variable(
           'var0', shape=init_value0.shape, partitioner=partitioner)
-      var0full = variables_lib2.variable(
-          'var0full', shape=init_value0.shape)
+      var0full = variables_lib2.variable('var0full', shape=init_value0.shape)
       var1 = variables_lib2.variable(
           'var1', shape=init_value1.shape, partitioner=partitioner)
 
       # Convert var0 and var1 into a list of underlying variables.
       vars_to_restore = {'var0': list(var0) + [var0full], 'var1': list(var1)}
-      op, feed_dict = variables_lib2.assign_from_checkpoint(model_path,
-                                                            vars_to_restore)
+      op, feed_dict = variables_lib2.assign_from_checkpoint(
+          model_path, vars_to_restore)
 
       # Initialize the variables.
       sess.run(variables_lib.global_variables_initializer())
@@ -992,16 +1020,18 @@ class AssignFromCheckpointTest(test.TestCase):
 
       # Request and test the variable values. PartitionedVariables can't
       # be evaled so we wrap them in an identity.
-      self.assertTrue(np.array_equal(
-          init_value0, array_ops.identity(var0).eval()))
-      self.assertTrue(np.array_equal(
-          init_value0, var0full.eval()))
-      self.assertTrue(np.array_equal(
-          init_value1, array_ops.identity(var1).eval()))
+      self.assertTrue(
+          np.array_equal(init_value0,
+                         array_ops.identity(var0).eval()))
+      self.assertTrue(np.array_equal(init_value0, var0full.eval()))
+      self.assertTrue(
+          np.array_equal(init_value1,
+                         array_ops.identity(var1).eval()))
 
   def testRaisesValueErrorIfAVariableIsntFound(self):
-    model_dir = tempfile.mkdtemp(prefix=os.path.join(
-        self.get_temp_dir(), 'raises_value_error_if_var_isnt_found'))
+    model_dir = tempfile.mkdtemp(
+        prefix=os.path.join(self.get_temp_dir(),
+                            'raises_value_error_if_var_isnt_found'))
 
     init_value0 = 10.0
     init_value1 = 20.0
@@ -1019,8 +1049,9 @@ class AssignFromCheckpointTest(test.TestCase):
         variables_lib2.assign_from_checkpoint(model_path, vars_to_restore)
 
   def testInitFromCheckpointWithScopes(self):
-    model_dir = tempfile.mkdtemp(prefix=os.path.join(
-        self.get_temp_dir(), 'init_from_checkpoint_with_scopes'))
+    model_dir = tempfile.mkdtemp(
+        prefix=os.path.join(self.get_temp_dir(),
+                            'init_from_checkpoint_with_scopes'))
 
     init_value0 = np.asarray(
         [1.0, 3.0, 9.0], dtype=np.float32).reshape((1, 3, 1))
@@ -1038,8 +1069,8 @@ class AssignFromCheckpointTest(test.TestCase):
         var1 = variables_lib2.variable('my_var1', shape=init_value1.shape)
 
       vars_to_restore = {'layer0/v0': var0, 'layer1/v1': var1}
-      op, feed_dict = variables_lib2.assign_from_checkpoint(model_path,
-                                                            vars_to_restore)
+      op, feed_dict = variables_lib2.assign_from_checkpoint(
+          model_path, vars_to_restore)
 
       # Initialize the variables.
       sess.run(variables_lib.global_variables_initializer())
@@ -1081,8 +1112,8 @@ class AssignFromCheckpointFnTest(test.TestCase):
       return saver.save(sess, checkpoint_dir, global_step=global_step)
 
   def testLoadExistingVariables(self):
-    model_dir = tempfile.mkdtemp(prefix=os.path.join(self.get_temp_dir(),
-                                                     'load_existing_variables'))
+    model_dir = tempfile.mkdtemp(
+        prefix=os.path.join(self.get_temp_dir(), 'load_existing_variables'))
     if gfile.Exists(model_dir):
       gfile.DeleteRecursively(model_dir)
 
@@ -1097,8 +1128,8 @@ class AssignFromCheckpointFnTest(test.TestCase):
       var1 = variables_lib2.variable('my_var1', shape=[])
 
       vars_to_restore = {'v0': var0, 'v1': var1}
-      init_fn = variables_lib2.assign_from_checkpoint_fn(model_path,
-                                                         vars_to_restore)
+      init_fn = variables_lib2.assign_from_checkpoint_fn(
+          model_path, vars_to_restore)
 
       # Initialize the variables.
       sess.run(variables_lib.global_variables_initializer())
@@ -1111,8 +1142,9 @@ class AssignFromCheckpointFnTest(test.TestCase):
       self.assertEqual(init_value1, var1.eval())
 
   def testLoadExistingVariablesDifferentShapeDefaultDoesNotAllowReshape(self):
-    model_dir = tempfile.mkdtemp(prefix=os.path.join(
-        self.get_temp_dir(), 'load_existing_vars_no_reshape'))
+    model_dir = tempfile.mkdtemp(
+        prefix=os.path.join(self.get_temp_dir(),
+                            'load_existing_vars_no_reshape'))
     if gfile.Exists(model_dir):
       gfile.DeleteRecursively(model_dir)
 
@@ -1127,8 +1159,8 @@ class AssignFromCheckpointFnTest(test.TestCase):
       var1 = variables_lib2.variable('my_var1', shape=[])
 
       vars_to_restore = {'v0': var0, 'v1': var1}
-      init_fn = variables_lib2.assign_from_checkpoint_fn(model_path,
-                                                         vars_to_restore)
+      init_fn = variables_lib2.assign_from_checkpoint_fn(
+          model_path, vars_to_restore)
 
       # Initialize the variables.
       sess.run(variables_lib.global_variables_initializer())
@@ -1138,9 +1170,10 @@ class AssignFromCheckpointFnTest(test.TestCase):
         init_fn(sess)
 
   def testLoadExistingVariablesDifferentShapeAllowReshape(self):
-    model_dir = tempfile.mkdtemp(prefix=os.path.join(
-        self.get_temp_dir(),
-        'load_existing_variables_different_shape_allow_reshape'))
+    model_dir = tempfile.mkdtemp(
+        prefix=os.path.join(
+            self.get_temp_dir(),
+            'load_existing_variables_different_shape_allow_reshape'))
     if gfile.Exists(model_dir):
       gfile.DeleteRecursively(model_dir)
 
@@ -1169,8 +1202,8 @@ class AssignFromCheckpointFnTest(test.TestCase):
       self.assertEqual(init_value1, var1.eval())
 
   def testNotFoundError(self):
-    model_dir = tempfile.mkdtemp(prefix=os.path.join(self.get_temp_dir(),
-                                                     'not_found_error'))
+    model_dir = tempfile.mkdtemp(
+        prefix=os.path.join(self.get_temp_dir(), 'not_found_error'))
     if gfile.Exists(model_dir):
       gfile.DeleteRecursively(model_dir)
 
@@ -1186,8 +1219,8 @@ class AssignFromCheckpointFnTest(test.TestCase):
       var2 = variables_lib2.variable('my_var2', shape=[])
 
       vars_to_restore = {'v0': var0, 'v1': var1, 'v2': var2}
-      init_fn = variables_lib2.assign_from_checkpoint_fn(model_path,
-                                                         vars_to_restore)
+      init_fn = variables_lib2.assign_from_checkpoint_fn(
+          model_path, vars_to_restore)
 
       # Initialize the variables.
       sess.run(variables_lib.global_variables_initializer())
@@ -1197,8 +1230,8 @@ class AssignFromCheckpointFnTest(test.TestCase):
         init_fn(sess)
 
   def testMissingVariablesList(self):
-    model_dir = tempfile.mkdtemp(prefix=os.path.join(self.get_temp_dir(),
-                                                     'missing_variables_list'))
+    model_dir = tempfile.mkdtemp(
+        prefix=os.path.join(self.get_temp_dir(), 'missing_variables_list'))
     if gfile.Exists(model_dir):
       gfile.DeleteRecursively(model_dir)
 
@@ -1228,8 +1261,8 @@ class AssignFromCheckpointFnTest(test.TestCase):
       self.assertEqual(init_value1, var1.eval())
 
   def testMissingVariablesDict(self):
-    model_dir = tempfile.mkdtemp(prefix=os.path.join(self.get_temp_dir(),
-                                                     'missing_variables_dict'))
+    model_dir = tempfile.mkdtemp(
+        prefix=os.path.join(self.get_temp_dir(), 'missing_variables_dict'))
     if gfile.Exists(model_dir):
       gfile.DeleteRecursively(model_dir)
 
@@ -1279,9 +1312,8 @@ class ZeroInitializerOpTest(test.TestCase):
   def testZeroInitializer(self):
     for dtype in (dtypes.int32, dtypes.int64, dtypes.float32, dtypes.float64):
       for use_init in (False, True):
-        self._testZeroInitializer(
-            [10, 20], array_ops.ones(
-                [10, 20], dtype=dtype), use_init)
+        self._testZeroInitializer([10, 20], array_ops.ones(
+            [10, 20], dtype=dtype), use_init)
 
 
 class ZeroVarInitializerOpTest(test.TestCase):
",0,train
5de9d3c392d5531eb3bbcefd007fcc25db7448cd,tensorflow/tensorflow,"Added the ability to wait for queues to start running before returning from
QueueRunner::Start(). This provides a reliable way to check the value of the status_ variable.
Change: 137769682",queue_runner.cc,"@@ -48,6 +48,7 @@ Status QueueRunner::Init(const QueueRunnerDef& queue_runner_def) {
   thread_pool_.reset(new thread::ThreadPool(
       Env::Default(), SanitizeThreadSuffix(queue_name_), runs_));
   should_stop_ = false;
+
   return Status::OK();
 }
 
@@ -57,11 +58,29 @@ QueueRunner::~QueueRunner() {
   Join();
 }
 
-Status QueueRunner::Start(Session* sess) {
+Status QueueRunner::Start(Session* sess) { return Start(sess, 0); }
+
+Status QueueRunner::Start(Session* sess, int wait_for) {
+  counter_.reset(new BlockingCounter(runs_));
   for (const string& enqueue_op : enqueue_op_names_) {
     thread_pool_->Schedule(
         std::bind(&QueueRunner::Run, this, sess, enqueue_op));
   }
+  // Wait for up to 'wait_for' milliseconds.
+  if (wait_for > 0) {
+    if (!counter_->WaitFor(std::chrono::milliseconds(wait_for))) {
+      return Status(error::DEADLINE_EXCEEDED,
+                    ""Queues not fed before the timeout"");
+    }
+    // Check the status of the queue runner as well as the result of the enqueue
+    // operations.
+    mutex_lock l(mu_);
+    if (!enqueue_status_.ok()) {
+      return enqueue_status_;
+    } else {
+      return status_;
+    }
+  }
   return Status::OK();
 }
 
@@ -76,13 +95,23 @@ Status QueueRunner::Stop(Session* sess) {
 
 Status QueueRunner::Join() {
   thread_pool_.reset();
+  mutex_lock l(mu_);
   return status_;
 }
 
 void QueueRunner::Run(Session* sess, const string& enqueue_op) {
   bool decremented = false;
+  bool first_iteration = true;
   while (!should_stop_.load()) {
     auto status = sess->Run({}, {}, {enqueue_op}, nullptr);
+    if (first_iteration) {
+      if (!status.ok()) {
+        mutex_lock l(mu_);
+        enqueue_status_ = status;
+      }
+      counter_->DecrementCount();
+      first_iteration = false;
+    }
     if (status.ok()) {
       continue;
     } else if (queue_closed_exception_types_.count(
@@ -114,6 +143,7 @@ void QueueRunner::Run(Session* sess, const string& enqueue_op) {
       // subsequent queues.
       Stop(sess);
     }
+    first_iteration = false;
   }
 
   if (!decremented) {
",0,train
5de9d3c392d5531eb3bbcefd007fcc25db7448cd,tensorflow/tensorflow,"Added the ability to wait for queues to start running before returning from
QueueRunner::Start(). This provides a reliable way to check the value of the status_ variable.
Change: 137769682",queue_runner.h,"@@ -21,6 +21,7 @@ limitations under the License.
 #include <unordered_set>
 #include <vector>
 
+#include ""tensorflow/core/lib/core/blocking_counter.h""
 #include ""tensorflow/core/lib/core/error_codes.pb.h""
 #include ""tensorflow/core/lib/core/status.h""
 #include ""tensorflow/core/lib/core/threadpool.h""
@@ -46,6 +47,10 @@ class QueueRunner {
   // Starts the queue runner with the given session.
   Status Start(Session* sess);
 
+  // Starts the queue runner with the given session, and wait for up to the
+  // specified time (in milliseconds) for the queues to start to fill up.
+  Status Start(Session* sess, int wait_for);
+
   // Requests to stop and runs the cancel op.
   Status Stop(Session* sess);
 
@@ -78,7 +83,9 @@ class QueueRunner {
   mutex mu_;
   // TODO(yuefengz): implement c++ coordinator.
   int runs_ = 0;
-  Status status_;
+  Status status_ GUARDED_BY(mu_);
+  Status enqueue_status_ GUARDED_BY(mu_);
+  std::unique_ptr<BlockingCounter> counter_;
 };
 
 }  // namespace tensorflow
",0,train
5de9d3c392d5531eb3bbcefd007fcc25db7448cd,tensorflow/tensorflow,"Added the ability to wait for queues to start running before returning from
QueueRunner::Start(). This provides a reliable way to check the value of the status_ variable.
Change: 137769682",queue_runner_test.cc,"@@ -317,5 +317,22 @@ TEST(QueueRunnerTest, EmptyEnqueueOps) {
             Code::INVALID_ARGUMENT);
 }
 
+TEST(QueueRunnerTest, StartTimeout) {
+  GraphDef graph_def = BuildDoubleQueueGraph();
+  SessionOptions options;
+  std::unique_ptr<Session> session(NewSession(options));
+  TF_CHECK_OK(session->Create(graph_def));
+
+  QueueRunnerDef queue_runner_def = BuildQueueRunnerDef(
+      kQueueName1, {kEnqueueOp1}, kCloseOp1, kCancelOp1, {});
+
+  std::unique_ptr<QueueRunner> qr;
+  TF_EXPECT_OK(QueueRunner::New(queue_runner_def, &qr));
+  // This will timeout since queue0 is not fed and queue1 is fetching data from
+  // queue0.
+  EXPECT_EQ(qr->Start(session.get(), 1).code(), Code::DEADLINE_EXCEEDED);
+  session->Close();
+}
+
 }  // namespace
 }  // namespace tensorflow
",0,train
5de9d3c392d5531eb3bbcefd007fcc25db7448cd,tensorflow/tensorflow,"Added the ability to wait for queues to start running before returning from
QueueRunner::Start(). This provides a reliable way to check the value of the status_ variable.
Change: 137769682",blocking_counter.h,"@@ -31,7 +31,7 @@ class BlockingCounter {
     DCHECK_EQ((initial_count << 1) >> 1, initial_count);
   }
 
-  ~BlockingCounter() { DCHECK_EQ(state_ >> 1, 0); }
+  ~BlockingCounter() {}
 
   inline void DecrementCount() {
     unsigned int v = state_.fetch_sub(2, std::memory_order_acq_rel) - 2;
@@ -53,6 +53,20 @@ class BlockingCounter {
       cond_var_.wait(l);
     }
   }
+  // Wait for the specified time, return false iff the count has not dropped to
+  // zero before the timeout expired.
+  inline bool WaitFor(std::chrono::milliseconds ms) {
+    unsigned int v = state_.fetch_or(1, std::memory_order_acq_rel);
+    if ((v >> 1) == 0) return true;
+    mutex_lock l(mu_);
+    while (!notified_) {
+      const std::cv_status status = cond_var_.wait_for(l, ms);
+      if (status == std::cv_status::timeout) {
+        return false;
+      }
+    }
+    return true;
+  }
 
  private:
   mutex mu_;
",0,train
9cd3b856a732c62e803ad60d2464e5043a9be7c1,tensorflow/tensorflow,Just exporting linalg.normalize,nn_impl.py,"@@ -436,7 +436,7 @@ def swish(features):
   return features * math_ops.sigmoid(features)
 
 
-@tf_export(""math.normalize"", ""linalg.normalize"", ""nn.normalize"")
+@tf_export(""linalg.normalize"")
 def normalize(tensor,
               ord='euclidean',
               axis=None,
",0,test
a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),evaluator.py,"@@ -178,7 +178,7 @@ class Evaluator(object):
       call_op: An op that updates evaluation state on a mini-batch of examples.
         Must generate an tf.errors.OutOfRangeError when done.
       results_op: A dictionary of tensors that compute the final evaluation
-        results from the evaulation state.
+        results from the evaluation state.
       sess: The Session to run the evaluation in. Defaults to the default
         Session.
 
",0,test
a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),resnet50.py,"@@ -97,7 +97,7 @@ class _ConvBlock(tfe.Network):
 
   Args:
       kernel_size: the kernel size of middle conv layer at main path
-      filters: list of integers, the filterss of 3 conv layer at main path
+      filters: list of integers, the filters of 3 conv layer at main path
       stage: integer, current stage label, used for generating layer names
       block: 'a','b'..., current block label, used for generating layer names
       data_format: data_format for the input ('channels_first' or
",0,test
a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),rnn_ptb.py,"@@ -88,7 +88,7 @@ class Embedding(tf.layers.Layer):
 
 
 class PTBModel(tfe.Network):
-  """"""LSTM for word language modelling.
+  """"""LSTM for word language modeling.
 
   Model described in:
   (Zaremba, et. al.) Recurrent Neural Network Regularization
@@ -340,7 +340,7 @@ if __name__ == ""__main__"":
   parser.add_argument(
       ""--logdir"", type=str, default="""", help=""Directory for checkpoint."")
   parser.add_argument(
-      ""--epoch"", type=int, default=20, help=""Number of epoches."")
+      ""--epoch"", type=int, default=20, help=""Number of epochs."")
   parser.add_argument(""--batch-size"", type=int, default=20, help=""Batch size."")
   parser.add_argument(
       ""--seq-len"", type=int, default=35, help=""Sequence length."")
",0,test
a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),data.py,"@@ -51,11 +51,11 @@ def get_non_parenthesis_words(items):
   """"""Get the non-parenthesis items from a SNLI parsed sentence.
 
   Args:
-    items: Data items from a parsed SNLI setence, with parentheses. E.g.,
+    items: Data items from a parsed SNLI sentence, with parentheses. E.g.,
       [""("", ""Man"", ""("", ""("", ""("", ""("", ""("", ""wearing"", ""pass"", "")"", ...
 
   Returns:
-    A list of non-parenthis word items, all converted to lower case. E.g.,
+    A list of non-parentheses word items, all converted to lower case. E.g.,
       [""man"", ""wearing"", ""pass"", ...
   """"""
   return [x.lower() for x in items if x not in PARENTHESES and x]
@@ -201,7 +201,7 @@ def load_word_vectors(data_root, vocab):
 
 
 def calculate_bins(length2count, min_bin_size):
-  """"""Cacluate bin boundaries given a histogram of lengths and mininum bin size.
+  """"""Calculate bin boundaries given a histogram of lengths and minimum bin size.
 
   Args:
     length2count: A `dict` mapping length to sentence count.
@@ -335,9 +335,9 @@ class SnliData(object):
         # The sorting above and the batching here makes sure that sentences of
         # similar max lengths are batched together, minimizing the inefficiency
         # due to uneven max lengths. The sentences are batched differently in
-        # each call to get_generator() due to the shuffling before sotring
+        # each call to get_generator() due to the shuffling before sorting
         # above. The pad_and_reverse_word_ids() and pad_transitions() functions
-        # take care of any remaning unevenness of the max sentence lengths.
+        # take care of any remaining unevenness of the max sentence lengths.
         end = min(begin + batch_size, len(labels))
         # Transpose, because the SPINN model requires time-major, instead of
         # batch-major.
",0,test
a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),network_test.py,"@@ -688,7 +688,7 @@ class NetworkTest(test.TestCase):
     net2(one)
     # Layer names typically are globally unique rather than being unique within
     # the scope of their first use. However, within a Network they must be named
-    # locally so that previous Layer consutrciton does not interfere with
+    # locally so that previous Layer construction does not interfere with
     # variable naming (e.g. add a Layer construction before the Network,
     # suddenly your previously saved checkpoint is incompatible).
     self.assertEqual(""dense"", net1.l1.name)
",0,test
a68bf328ff6d4261203f2aa723d77174a771a0ec,tensorflow/tensorflow,minor spelling tweaks for eager execution docs (#16355),saver.py,"@@ -82,7 +82,7 @@ def restore_variables_on_create(save_path, map_func=None):
       map_func_wrapper = lambda self, x: x
     else:
       if not callable(map_func):
-        raise ValueError(""map_func must be callaled."")
+        raise ValueError(""map_func must be callable."")
       map_func_wrapper = lambda self, x: map_func(x)
 
     ckpt_var_cache = dict()
",0,test
a844366fa89373a29590f38f0a0a15e9aff1694b,tensorflow/tensorflow,"Added fix for Mali G710 matching to G71 in gpu_info.

PiperOrigin-RevId: 414020574
Change-Id: Ib1ff48a2639462ccfbc27801daf3b3bf1fe2c5e9",gpu_info.cc,"@@ -105,20 +105,21 @@ AdrenoGpu GetAdrenoGpuVersion(const std::string& gpu_description) {
 }
 
 MaliGpu GetMaliGpuVersion(const std::string& gpu_description) {
-  const std::map<std::string, MaliGpu> kMapping = {
+  // Order must be preserved
+  const std::vector<std::pair<std::string, MaliGpu>> kMapping = {
       {""t604"", MaliGpu::kT604}, {""t622"", MaliGpu::kT622},
       {""t624"", MaliGpu::kT624}, {""t628"", MaliGpu::kT628},
       {""t658"", MaliGpu::kT658}, {""t678"", MaliGpu::kT678},
       {""t720"", MaliGpu::kT720}, {""t760"", MaliGpu::kT760},
       {""t820"", MaliGpu::kT820}, {""t830"", MaliGpu::kT830},
       {""t860"", MaliGpu::kT860}, {""t880"", MaliGpu::kT880},
-      {""g31"", MaliGpu::kG31},   {""g51"", MaliGpu::kG51},
-      {""g71"", MaliGpu::kG71},   {""g52"", MaliGpu::kG52},
+      {""g310"", MaliGpu::kG310}, {""g31"", MaliGpu::kG31},
+      {""g510"", MaliGpu::kG510}, {""g51"", MaliGpu::kG51},
+      {""g52"", MaliGpu::kG52},   {""g57"", MaliGpu::kG57},
+      {""g610"", MaliGpu::kG610}, {""g68"", MaliGpu::kG68},
+      {""g710"", MaliGpu::kG710}, {""g71"", MaliGpu::kG71},
       {""g72"", MaliGpu::kG72},   {""g76"", MaliGpu::kG76},
-      {""g57"", MaliGpu::kG57},   {""g77"", MaliGpu::kG77},
-      {""g68"", MaliGpu::kG68},   {""g78"", MaliGpu::kG78},
-      {""g310"", MaliGpu::kG310}, {""g510"", MaliGpu::kG510},
-      {""g610"", MaliGpu::kG610}, {""g710"", MaliGpu::kG710},
+      {""g77"", MaliGpu::kG77},   {""g78"", MaliGpu::kG78},
   };
   for (const auto& v : kMapping) {
     if (gpu_description.find(v.first) != std::string::npos) {
",0,test
419ebe51e023e871590b19eb4df1c1fdbe9da51e,tensorflow/tensorflow,"[XLA:Python] Allow multiple partitions to correspond to a single executable.

XLA SPMD partitioning only produces a single executable, rather than one per partition.

PiperOrigin-RevId: 292646667
Change-Id: I810c80578bbd9fe7e785aa4798a849ecaba8db30",local_client.cc,"@@ -693,9 +693,12 @@ PyLocalExecutable::PyLocalExecutable(
   const int num_replicas = device_assignment_->replica_count();
   const int num_partitions = device_assignment_->computation_count();
 
-  CHECK_EQ(num_partitions, executables_.size())
-      << ""Number of executables "" << executables_.size()
-      << "" did not match number of partitions "" << num_partitions;
+  // SPMD sharding produces a single executable for multiple partitions.
+  if (executables_.size() > 1) {
+    CHECK_EQ(num_partitions, executables_.size())
+        << ""Number of executables "" << executables_.size()
+        << "" did not match number of partitions "" << num_partitions;
+  }
 
   for (int replica = 0; replica < num_replicas; ++replica) {
     for (int partition = 0; partition < num_partitions; ++partition) {
@@ -789,8 +792,11 @@ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalExecutable::ExecuteHelper(
   auto compute_reservation = std::make_shared<Semaphore::ScopedReservation>(
       device_state->compute_semaphore().ScopedAcquire(1));
 
+  // SPMD sharding produces a single executable for multiple partitions.
+  int executable_idx = executables_.size() > 1 ? partition : 0;
+
   StatusOr<ScopedShapedBuffer> result_buffer_or_status =
-      executables_[partition]->RunAsync(argument_buffer_ptrs, options);
+      executables_[executable_idx]->RunAsync(argument_buffer_ptrs, options);
 
   VLOG(1) << ""Replica "" << replica << "" partition "" << partition
           << "" completed; ok="" << result_buffer_or_status.ok();
@@ -820,7 +826,7 @@ StatusOr<std::unique_ptr<PyLocalBuffer>> PyLocalExecutable::ExecuteHelper(
 
   device_state->ThenRelease(
       device_state->compute_stream(),
-      std::make_tuple(executables_[partition], compute_reservation,
+      std::make_tuple(executables_[executable_idx], compute_reservation,
                       device_assignment_));
   return absl::make_unique<PyLocalBuffer>(
       result_buffer.on_host_shape(), result_buffer.on_device_shape(),
",0,train
775ee0b3f7e96a295920b8723cfb42d7e9d8cacb,tensorflow/tensorflow,"Extend `lmhlo.fusion` op rewrite pattern to `lmhlo.scatter` and `lmhlo.sort`.

PiperOrigin-RevId: 404871130
Change-Id: I024d97c22643d9c313b015d0e1cd7ba6d1e778c3",kernel_ops_pattern.cc,"@@ -12,7 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 
-// Pattern to lower lmhlo.fusion ops to gpu dialect.
+// Pattern to lower lmhlo ops with help of the ir emitter to gpu device code
+// and gpu dialect ops (gpu.launch_func and gpu.memcpy).
 
 #include <iterator>
 #include <numeric>
@@ -54,6 +55,7 @@
 namespace tensorflow {
 
 using mlir::ArrayRef;
+using mlir::Operation;
 using mlir::SmallVector;
 using mlir::Value;
 using mlir::memref::GetGlobalOp;
@@ -67,8 +69,8 @@ using ConstantInfo = xla::gpu::GpuExecutable::ConstantInfo;
 
 namespace {
 
-// Replaces all lmhlo.fusion ops within a module with tfrt_gpu.launch ops.
-struct FusionRewritePattern : mlir::OpRewritePattern<mlir::ModuleOp> {
+// Replaces lmhlo ops within a module with gpu.launch_func and gpu.memcpy ops.
+struct KernelOpsPattern : mlir::OpRewritePattern<mlir::ModuleOp> {
   using OpRewritePattern<mlir::ModuleOp>::OpRewritePattern;
 
   mlir::LogicalResult matchAndRewrite(
@@ -76,8 +78,8 @@ struct FusionRewritePattern : mlir::OpRewritePattern<mlir::ModuleOp> {
 };
 
 struct RewriteData {
-  mlir::lmhlo::FusionOp fusion_op;
-  mlir::SetVector<Value> captures;
+  Operation* op;
+  mlir::SmallVector<Value, 4> arguments;
   std::vector<xla::BufferAllocation> allocations;
   std::unique_ptr<ThunkSequence> thunks;
   std::vector<ConstantInfo> constants;
@@ -93,13 +95,13 @@ static llvm::Error MakeError(xla::Status status) {
   return MakeError(status.error_message());
 }
 
-// Clones `fusion_op` into a function within a module with `captures` arguments.
-// The `get_global_ops` are the def ops of `captures`, or null otherwise.
+// Clones `op` into a function within a module with `arguments`.
+// The `get_global_ops` are the def ops of `arguments`, or null otherwise.
 static std::tuple<mlir::OwningModuleRef, mlir::FuncOp> CloneToModule(
-    mlir::lmhlo::FusionOp fusion_op, mlir::ValueRange captures,
+    Operation* op, mlir::ValueRange arguments,
     mlir::MutableArrayRef<GetGlobalOp> get_global_ops) {
-  auto loc = fusion_op->getLoc();
-  auto* context = fusion_op->getContext();
+  auto loc = op->getLoc();
+  auto* context = op->getContext();
   mlir::OpBuilder builder(context);
 
   mlir::OwningModuleRef module_op = builder.create<mlir::ModuleOp>(loc);
@@ -115,8 +117,8 @@ static std::tuple<mlir::OwningModuleRef, mlir::FuncOp> CloneToModule(
   }
 
   auto func_type = builder.getType<mlir::FunctionType>(
-      mlir::TypeRange(captures), mlir::TypeRange());
-  auto func_name = fusion_op->getParentOfType<mlir::FuncOp>().getName();
+      mlir::TypeRange(arguments), mlir::TypeRange());
+  auto func_name = op->getParentOfType<mlir::FuncOp>().getName();
   auto func_op = builder.create<mlir::FuncOp>(loc, func_name, func_type);
   // Annotate the function arguments if they refer to a memref.global op.
   for (auto pair : llvm::enumerate(get_global_ops)) {
@@ -128,14 +130,14 @@ static std::tuple<mlir::OwningModuleRef, mlir::FuncOp> CloneToModule(
 
   builder.setInsertionPointToEnd(func_op.addEntryBlock());
   mlir::BlockAndValueMapping mapping;
-  for (const auto& pair : llvm::zip_first(captures, func_op.getArguments()))
+  for (const auto& pair : llvm::zip_first(arguments, func_op.getArguments()))
     mapping.map(std::get<0>(pair), std::get<1>(pair));
   // Clone the memref.get_global ops.
   for (auto get_global_op : get_global_ops) {
     if (!get_global_op) continue;
     mapping.map(get_global_op, builder.clone(*get_global_op)->getResult(0));
   }
-  auto* clone = builder.clone(*fusion_op, mapping);
+  auto* clone = builder.clone(*op, mapping);
   auto name_loc = mlir::NameLoc::get(builder.getIdentifier(func_name));
   clone->setLoc(mlir::FusedLoc::get(context, {loc, name_loc}));
   builder.create<mlir::lmhlo::TerminatorOp>(loc);
@@ -145,11 +147,10 @@ static std::tuple<mlir::OwningModuleRef, mlir::FuncOp> CloneToModule(
 
 // Converts the argument's shaped types into buffer allocations.
 static llvm::Expected<std::vector<xla::BufferAllocation>> GetAllocations(
-    const mlir::SetVector<Value>& captures,
-    ArrayRef<GetGlobalOp> get_global_ops) {
+    ArrayRef<Value> arguments, ArrayRef<GetGlobalOp> get_global_ops) {
   std::vector<xla::BufferAllocation> allocations;
-  allocations.reserve(captures.size());
-  for (Value argument : captures) {
+  allocations.reserve(arguments.size());
+  for (Value argument : arguments) {
     mlir::ShapedType type = argument.getType().dyn_cast<mlir::ShapedType>();
     if (!type || !type.hasStaticShape())
       return MakeError(""Expected static shapes"");
@@ -208,23 +209,25 @@ Emit(mlir::FuncOp func_op, absl::Span<const xla::BufferAllocation> allocations,
                          std::move(ir_emitter_context.constants()));
 }
 
-// Returns the data to rewrite fusion_op without changing the IR.
-static llvm::Expected<RewriteData> Match(mlir::lmhlo::FusionOp fusion_op) {
+// Returns the data to rewrite op without changing the IR.
+static llvm::Expected<RewriteData> Match(Operation* op) {
+  mlir::SmallVector<Value, 4> arguments = op->getOperands();
   mlir::SetVector<Value> captures;
-  getUsedValuesDefinedAbove(fusion_op->getRegions(), captures);
+  getUsedValuesDefinedAbove(op->getRegions(), captures);
+  llvm::copy(captures, std::back_inserter(arguments));
 
-  // Collect captures that are defined by a memref.get_global op. The created
-  // module's annotations make the ir emitter recognize them as constants.
+  // Collect arguments that are defined by a memref.get_global op. The
+  // created module's annotations make the ir emitter recognize them as
+  // constants.
   SmallVector<GetGlobalOp, 4> get_global_ops;
-  get_global_ops.reserve(captures.size());
+  get_global_ops.reserve(arguments.size());
   llvm::transform(
-      captures, std::back_inserter(get_global_ops),
+      arguments, std::back_inserter(get_global_ops),
       [](Value argument) { return argument.getDefiningOp<GetGlobalOp>(); });
 
-  auto allocations = GetAllocations(captures, get_global_ops);
+  auto allocations = GetAllocations(arguments, get_global_ops);
   if (!allocations) return allocations.takeError();
-  auto module_op =
-      CloneToModule(fusion_op, captures.getArrayRef(), get_global_ops);
+  auto module_op = CloneToModule(op, arguments, get_global_ops);
 
   xla::HloModuleConfig hlo_module_config;
   xla::DebugOptions options = xla::GetDebugOptionsFromFlags();
@@ -265,21 +268,23 @@ static llvm::Expected<RewriteData> Match(mlir::lmhlo::FusionOp fusion_op) {
                                     hlo_module_config, libdevice_dir);
   if (!ptx.ok()) return MakeError(ptx.status());
 
-  return RewriteData{
-      fusion_op,         std::move(captures),  std::move(*allocations),
-      std::move(thunks), std::move(constants), std::move(*ptx)};
+  return RewriteData{op,
+                     std::move(arguments),
+                     std::move(*allocations),
+                     std::move(thunks),
+                     std::move(constants),
+                     std::move(*ptx)};
 }
 
-// Replaces fusion_op with gpu.launch_func.
-static void Rewrite(mlir::lmhlo::FusionOp fusion_op,
-                    mlir::PatternRewriter& rewriter,
-                    mlir::SymbolTable& symbol_table, ArrayRef<Value> captures,
+// Replaces op with gpu.launch_func and gpu.memcpy ops.
+static void Rewrite(Operation* op, mlir::PatternRewriter& rewriter,
+                    mlir::SymbolTable& symbol_table, ArrayRef<Value> arguments,
                     ThunkSequence* thunks, ArrayRef<ConstantInfo> constants,
                     mlir::StringRef gpu_module_data) {
   mlir::OpBuilder::InsertionGuard guard(rewriter);
-  auto loc = fusion_op->getLoc();
+  auto loc = op->getLoc();
 
-  rewriter.setInsertionPoint(fusion_op->getParentOfType<mlir::FuncOp>());
+  rewriter.setInsertionPoint(op->getParentOfType<mlir::FuncOp>());
   auto gpu_module = rewriter.create<mlir::gpu::GPUModuleOp>(loc, ""gpu_module"");
   symbol_table.insert(gpu_module);
   gpu_module->setAttr(tfrt::gpu::getGpuBinaryAttrName(),
@@ -302,7 +307,7 @@ static void Rewrite(mlir::lmhlo::FusionOp fusion_op,
           static_cast<const DeviceToDeviceCopyThunk*>(thunk.get());
       auto get_argument = [&](const xla::BufferAllocation::Slice& slice) {
         assert(slice.offset() == 0 && slice.size() == copy_thunk->size_bytes());
-        Value result = captures[slice.index()];
+        Value result = arguments[slice.index()];
         // Annotate defining memref.get_global with the gpu_module symbol.
         // Unlike kernel thunks below, which use the global in the kernel only.
         if (auto op = result.getDefiningOp<GetGlobalOp>()) {
@@ -311,7 +316,7 @@ static void Rewrite(mlir::lmhlo::FusionOp fusion_op,
         }
         return result;
       };
-      rewriter.setInsertionPoint(fusion_op);
+      rewriter.setInsertionPoint(op);
       rewriter.create<mlir::gpu::MemcpyOp>(
           loc, mlir::TypeRange(), mlir::ValueRange(),
           get_argument(copy_thunk->destination()),
@@ -321,11 +326,11 @@ static void Rewrite(mlir::lmhlo::FusionOp fusion_op,
 
     const auto* kernel_thunk = static_cast<const KernelThunk*>(thunk.get());
     rewriter.setInsertionPointToStart(gpu_module.getBody());
-    SmallVector<Value, 4> arguments;
-    for (auto argument : kernel_thunk->arguments())
-      arguments.push_back(captures[argument->index()]);
+    SmallVector<Value, 4> kernel_args;
+    for (auto kernel_arg : kernel_thunk->arguments())
+      kernel_args.push_back(arguments[kernel_arg->index()]);
     auto func_type = rewriter.getType<mlir::FunctionType>(
-        mlir::TypeRange(mlir::ValueRange(arguments)), mlir::TypeRange());
+        mlir::TypeRange(mlir::ValueRange(kernel_args)), mlir::TypeRange());
     mlir::gpu::GPUFuncOp kernel_func = rewriter.create<mlir::gpu::GPUFuncOp>(
         loc, kernel_thunk->kernel_name(), func_type);
     kernel_func->setAttr(mlir::gpu::GPUDialect::getKernelFuncAttrName(),
@@ -333,7 +338,7 @@ static void Rewrite(mlir::lmhlo::FusionOp fusion_op,
     rewriter.setInsertionPointToEnd(&kernel_func.getBody().back());
     rewriter.create<mlir::gpu::ReturnOp>(loc);
 
-    rewriter.setInsertionPoint(fusion_op);
+    rewriter.setInsertionPoint(op);
     auto make_const_idx = [&](int64_t value) {
       auto attr = rewriter.getIndexAttr(value);
       return rewriter.create<mlir::arith::ConstantOp>(loc, attr).getResult();
@@ -349,28 +354,34 @@ static void Rewrite(mlir::lmhlo::FusionOp fusion_op,
 
     rewriter.create<mlir::gpu::LaunchFuncOp>(
         loc, kernel_func, grid_size, block_size,
-        /*shared_memory_size_bytes=*/nullptr, arguments);
+        /*shared_memory_size_bytes=*/nullptr, kernel_args);
   }
 
-  rewriter.eraseOp(fusion_op);
+  rewriter.eraseOp(op);
 }
 
-mlir::LogicalResult FusionRewritePattern::matchAndRewrite(
+mlir::LogicalResult KernelOpsPattern::matchAndRewrite(
     mlir::ModuleOp module_op, mlir::PatternRewriter& rewriter) const {
   SmallVector<RewriteData, 4> rewrites;
 
-  // Gather data to rewrite each lmhlo.fusion op without changing the IR.
-  auto callback = [&](mlir::lmhlo::FusionOp fusion_op) -> mlir::WalkResult {
-    auto data = Match(fusion_op);
-    if (!data)
-      return rewriter.notifyMatchFailure(fusion_op, toString(data.takeError()));
-    rewrites.emplace_back(std::move(*data));
-    return mlir::success();
+  // Get data to rewrite kernel ops without changing the IR.
+  auto walk = [&](auto concrete_op) {
+    return module_op.walk([&](decltype(concrete_op) op) -> mlir::WalkResult {
+      auto data = Match(op);
+      if (!data)
+        return rewriter.notifyMatchFailure(op, toString(data.takeError()));
+      rewrites.emplace_back(std::move(*data));
+      return mlir::success();
+    });
   };
-  if (module_op.walk(callback).wasInterrupted()) return mlir::failure();
+  if (walk(mlir::lmhlo::FusionOp()).wasInterrupted() ||
+      walk(mlir::lmhlo::ScatterOp()).wasInterrupted() ||
+      walk(mlir::lmhlo::SortOp()).wasInterrupted())
+    return mlir::failure();
 
-  if (rewrites.empty())
-    return rewriter.notifyMatchFailure(module_op, ""No lmhlo.fusion ops"");
+  if (rewrites.empty()) {
+    return rewriter.notifyMatchFailure(module_op, ""No kernel ops"");
+  }
 
   // Mark module as gpu.container_module.
   rewriter.updateRootInPlace(module_op, [&] {
@@ -378,18 +389,18 @@ mlir::LogicalResult FusionRewritePattern::matchAndRewrite(
                        rewriter.getUnitAttr());
   });
 
-  // Replace the lmhlo.fusion ops with gpu.launch_func.
+  // Replace the kernel ops with gpu.launch_func.
   mlir::SymbolTable symbol_table(module_op);
   for (const auto& data : rewrites) {
-    Rewrite(data.fusion_op, rewriter, symbol_table, data.captures.getArrayRef(),
-            data.thunks.get(), data.constants, data.gpu_module_data);
+    Rewrite(data.op, rewriter, symbol_table, data.arguments, data.thunks.get(),
+            data.constants, data.gpu_module_data);
   }
 
   return mlir::success();
 }
 
-void populateFusionConversionPattern(mlir::RewritePatternSet& patterns) {
-  patterns.add<FusionRewritePattern>(patterns.getContext());
+void populateKernelOpsPattern(mlir::RewritePatternSet& patterns) {
+  patterns.add<KernelOpsPattern>(patterns.getContext());
 }
 
 }  // namespace tensorflow
",0,test
775ee0b3f7e96a295920b8723cfb42d7e9d8cacb,tensorflow/tensorflow,"Extend `lmhlo.fusion` op rewrite pattern to `lmhlo.scatter` and `lmhlo.sort`.

PiperOrigin-RevId: 404871130
Change-Id: I024d97c22643d9c313b015d0e1cd7ba6d1e778c3",lmhlo_to_gpu_binary.cc,"@@ -30,7 +30,7 @@ limitations under the License.
 
 namespace tensorflow {
 
-void populateFusionConversionPattern(mlir::RewritePatternSet&);
+void populateKernelOpsPattern(mlir::RewritePatternSet&);
 
 namespace {
 
@@ -42,7 +42,7 @@ struct ConvertLmhloToGpuBinaryPass
  private:
   void runOnOperation() override {
     mlir::RewritePatternSet patterns(&getContext());
-    populateFusionConversionPattern(patterns);
+    populateKernelOpsPattern(patterns);
     if (failed(applyOpPatternsAndFold(getOperation(), std::move(patterns))))
       return signalPassFailure();
   }
",0,test
706a5baa6e633ffbbcdf49f69e3ef88421001a76,tensorflow/tensorflow,"Add partial shape inference for values that are used as shapes.

With this change, the shape inference for `tf.reshape()` will
correctly observe that, for example:

```python
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.int32)
z = tf.reshape(x, [y, 37])
print(z.get_shape())  # ==> (?, 37)
```

Partially addresses #2938.
Change: 125875146",tensor_util.py,"@@ -628,3 +628,55 @@ def constant_value(tensor):
     # conservatively prevent it from being fed.
     tensor.graph.prevent_feeding(tensor)
   return ret
+
+
+def constant_value_as_shape(tensor):  # pylint: disable=invalid-name
+  """"""A version of `constant_value()` that returns a `TensorShape`.
+
+  This version should be used when a constant tensor value is
+  interpreted as a (possibly partial) shape, e.g. in the shape
+  function for `tf.reshape()`. By explicitly requesting a
+  `TensorShape` as the return value, it is possible to represent
+  unknown dimensions; by contrast, `constant_value()` is
+  all-or-nothing.
+
+  Args:
+    tensor: The rank-1 Tensor to be evaluated.
+
+  Returns:
+    A `TensorShape` based on the constant value of the given `tensor`.
+  """"""
+  shape = tensor.get_shape().with_rank(1)
+  if tensor.get_shape() == [0]:
+    return tensor_shape.scalar()
+  elif tensor.op.type == ""Shape"":
+    return tensor.op.inputs[0].get_shape()
+  elif tensor.op.type == ""Pack"":
+    ret = tensor_shape.scalar()  # Empty list.
+    for pack_input in tensor.op.inputs:
+      # `pack_input` must be a scalar. Attempt to evaluate it, and append it
+      # to `ret`.
+      pack_input_val = constant_value(pack_input)
+      if pack_input_val is None or pack_input_val < 0:
+        new_dim = tensor_shape.Dimension(None)
+      else:
+        new_dim = tensor_shape.Dimension(pack_input_val)
+      ret = ret.concatenate([new_dim])
+    return ret
+  elif tensor.op.type == ""Concat"":
+    # We assume that `tensor.op.inputs[0]` evaluates to 0, as this is
+    # the only legal value when concatenating vectors, and it will
+    # have been checked by a previous shape function.
+    ret = tensor_shape.scalar()  # Empty list.
+    for concat_input in tensor.op.inputs[1:]:
+      # `concat_input` must be a vector. Attempt to evaluate it as a shape,
+      # and concatenate it with `ret`.
+      ret = ret.concatenate(constant_value_as_shape(concat_input))
+    return ret
+  else:
+    ret = tensor_shape.unknown_shape(shape[0].value)
+    value = constant_value(tensor)
+    if value is not None:
+      ret = ret.merge_with(tensor_shape.TensorShape(
+          [d if d != -1 else None for d in value]))
+    return ret
",0,train
706a5baa6e633ffbbcdf49f69e3ef88421001a76,tensorflow/tensorflow,"Add partial shape inference for values that are used as shapes.

With this change, the shape inference for `tf.reshape()` will
correctly observe that, for example:

```python
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.int32)
z = tf.reshape(x, [y, 37])
print(z.get_shape())  # ==> (?, 37)
```

Partially addresses #2938.
Change: 125875146",tensor_util_test.py,"@@ -565,5 +565,38 @@ class ConstantValueTest(tf.test.TestCase):
     self.assertIs(None, c_val)
 
 
+class ConstantValueAsShapeTest(tf.test.TestCase):
+
+  def testConstant(self):
+    np_val = np.random.rand(3).astype(np.int32)
+    tf_val = tf.constant(np_val)
+    self.assertEqual(tf.TensorShape(np_val),
+                     tensor_util.constant_value_as_shape(tf_val))
+
+    tf_val = tf.constant([], dtype=tf.int32)
+    self.assertEqual(tf.TensorShape([]),
+                     tensor_util.constant_value_as_shape(tf_val))
+
+  def testShape(self):
+    tf_val = tf.shape(tf.constant(0.0, shape=[1, 2, 3]))
+    c_val = tensor_util.constant_value_as_shape(tf_val)
+    self.assertEqual(tf.TensorShape([1, 2, 3]), c_val)
+
+  def testPack(self):
+    tf_val = tf.pack([tf.constant(16), 37, tf.placeholder(tf.int32)])
+    c_val = tensor_util.constant_value_as_shape(tf_val)
+    self.assertEqual([16, 37, None], c_val.as_list())
+
+  def testConcat(self):
+    tf_val = tf.concat(0, [[16, 37], tf.placeholder(tf.int32, shape=(2,))])
+    c_val = tensor_util.constant_value_as_shape(tf_val)
+    self.assertEqual([16, 37, None, None], c_val.as_list())
+
+    tf_val = tf.concat(0,
+                       [[16, 37], tf.placeholder(tf.int32, shape=(1,)), [48]])
+    c_val = tensor_util.constant_value_as_shape(tf_val)
+    self.assertEqual([16, 37, None, 48], c_val.as_list())
+
+
 if __name__ == ""__main__"":
   tf.test.main()
",0,train
706a5baa6e633ffbbcdf49f69e3ef88421001a76,tensorflow/tensorflow,"Add partial shape inference for values that are used as shapes.

With this change, the shape inference for `tf.reshape()` will
correctly observe that, for example:

```python
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.int32)
z = tf.reshape(x, [y, 37])
print(z.get_shape())  # ==> (?, 37)
```

Partially addresses #2938.
Change: 125875146",reshape_op_test.py,"@@ -99,11 +99,6 @@ class ReshapeTest(tf.test.TestCase):
     self._testBothReshape(x, [1, -1, 5])
 
   def testErrors(self):
-    x = tf.constant(0.0, shape=[1, 0, 3])
-    with self.assertRaisesRegexp(
-        ValueError, ""cannot infer the missing input size""):
-      tf.reshape(x, [0, -1, 5])
-
     y = tf.constant(0.0, shape=[23, 29, 31])
     with self.assertRaisesRegexp(ValueError, ""isn't divisible by 17""):
       tf.reshape(y, [17, -1])
@@ -128,6 +123,20 @@ class ReshapeTest(tf.test.TestCase):
     y = tf.reshape(x, tf.placeholder(tf.int32, shape=(3,)))
     self.assertEqual([None, None, None], y.get_shape().as_list())
 
+    # Unknown input shape, partial new shape using `tf.pack()`.
+    y = tf.reshape(x, [tf.placeholder(tf.int32), 37])
+    self.assertEqual([None, 37], y.get_shape().as_list())
+
+    # Unknown input shape, partial new shape using `tf.concat()`.
+    y = tf.reshape(x, tf.concat(0, [tf.placeholder(tf.int32, shape=(2,)),
+                                    [37, 42]]))
+    self.assertEqual([None, None, 37, 42], y.get_shape().as_list())
+
+    # Unknown input shape, partial new shape using `tf.shape()`.
+    y = tf.reshape(x, tf.shape(tf.placeholder(tf.float32,
+                                              shape=[None, 37, None])))
+    self.assertEqual([None, 37, None], y.get_shape().as_list())
+
 
 if __name__ == ""__main__"":
   tf.test.main()
",0,train
706a5baa6e633ffbbcdf49f69e3ef88421001a76,tensorflow/tensorflow,"Add partial shape inference for values that are used as shapes.

With this change, the shape inference for `tf.reshape()` will
correctly observe that, for example:

```python
x = tf.placeholder(tf.float32)
y = tf.placeholder(tf.int32)
z = tf.reshape(x, [y, 37])
print(z.get_shape())  # ==> (?, 37)
```

Partially addresses #2938.
Change: 125875146",array_ops.py,"@@ -1780,45 +1780,38 @@ def _ReshapeShape(op):
       num_elements *= dim
   else:
     num_elements = tensor_shape.Dimension(None)
-  new_shape_shape = op.inputs[1].get_shape().with_rank(1)
-  new_shape = tensor_util.constant_value(op.inputs[1])
-  if new_shape is None:
-    # Attempt to infer the rank of the output from the length of
-    # new_shape.
-    return [tensor_shape.unknown_shape(ndims=new_shape_shape[0].value)]
-  new_shape = np.reshape(new_shape, -1).tolist()
-  if -1 not in new_shape:
+  new_shape = tensor_util.constant_value_as_shape(op.inputs[1])
+  if new_shape.ndims is None:
+    # We have no information about the shape of the output.
+    return [new_shape]
+  if None not in new_shape.as_list():
     # The new shape is fully defined.
     if (num_elements.value is not None
         and num_elements.value != np.prod(new_shape)):
       raise ValueError(
           ""Cannot reshape a tensor with %d elements to shape %s (%d elements)""
           % (num_elements.value, new_shape, np.prod(new_shape)))
-    return [tensor_shape.TensorShape(new_shape)]
   elif num_elements.value is not None:
     # We know the number of elements, so we can calculate the missing
     # dimension in the new_shape.
     known_elements = 1
-    unknown_index = None
+    unknown_indices = []
     for i, dim in enumerate(new_shape):
-      if dim == -1:
-        unknown_index = i
+      if dim.value is None:
+        unknown_indices.append(i)
       else:
-        known_elements *= dim
-    if known_elements == 0:
-      raise ValueError(""cannot infer the missing input size for ""
-                       ""an empty tensor unless all specified ""
-                       ""input sizes are non-zero"")
-    if num_elements % known_elements != 0:
-      raise ValueError(""input has %s elements, which isn't divisible by %d"" %
-                       (num_elements, known_elements))
-    new_shape[unknown_index] = num_elements // known_elements
-    return [tensor_shape.TensorShape(new_shape)]
-  else:
-    # We don't know the input shape, but we know n-1 of the dimensions
-    # in the new shape.
-    new_shape[new_shape.index(-1)] = None
-    return [tensor_shape.TensorShape(new_shape)]
+        known_elements *= dim.value
+    if known_elements != 0:
+      if num_elements % known_elements != 0:
+        raise ValueError(""input has %s elements, which isn't divisible by %d"" %
+                         (num_elements, known_elements))
+      if len(unknown_indices) == 1:
+        unknown_index = unknown_indices[0]
+        new_shape = new_shape.merge_with(
+            new_shape[:unknown_index].concatenate(
+                [num_elements // known_elements]).concatenate(
+                    new_shape[unknown_index+1:]))
+  return [new_shape]
 
 
 @ops.RegisterShape(""BroadcastGradientArgs"")
",0,train
15fe88c0ed7ae2e024b345a5929e277398b66dad,tensorflow/tensorflow,"[MLIR] Move documentation closer to FunctionPasses for consistency.

PiperOrigin-RevId: 273463381",hlo_legalize_to_lhlo.cc,"@@ -159,6 +159,46 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context,
       context);
 }
 
+// Lowers from HLO dialect to LHLO dialect allocating/deallocating temporary
+// buffers if necessary.
+//
+// Example fusion with HLO ops.
+//
+// func @fusion(%arg0: memref<2x2xf32>,
+//              %arg1: memref<2x2xf32>,
+//              %arg2: memref<2x2xf32>,
+//              %arg3: memref<2x2xf32>) {
+//   ""xla_lhlo.fusion""() ({
+//     %0 = tensor_load %arg1 : memref<2x2xf32>
+//     %1 = tensor_load %arg2 : memref<2x2xf32>
+//     %2 = ""xla_hlo.add""(%0, %1) {name = ""add""} :
+//         (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+//     %3 = tensor_load %arg0 : memref<2x2xf32>
+//     %4 = ""xla_hlo.mul""(%2, %3) {name = ""multiply""} :
+//         (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
+//     tensor_store %4, %arg3 : memref<2x2xf32>
+//     ""xla_lhlo.terminator""() : () -> ()
+//   }) {name = ""fusion""} : () -> ()
+//   return
+// }
+//
+// Transformed fusion with LHLO ops.
+// func @fusion(%arg0: memref<2x2xf32>,
+//              %arg1: memref<2x2xf32>,
+//              %arg2: memref<2x2xf32>,
+//              %arg3: memref<2x2xf32>) {
+//   ""xla_lhlo.fusion""() ( {
+//     %0 = alloc() {temp = true} : memref<2x2xf32>
+//     ""xla_lhlo.add""(%arg1, %arg2, %0) :
+//         (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
+//     ""xla_lhlo.mul""(%0, %arg0, %arg3) :
+//         (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
+//     dealloc %0 : memref<2x2xf32>
+//     ""xla_lhlo.terminator""() : () -> ()
+//   }) {name = ""fusion""} : () -> ()
+//   return
+//  }
+// }
 struct HloLegalizeToLhlo : public FunctionPass<HloLegalizeToLhlo> {
   void runOnFunction() override {
     OwningRewritePatternList patterns;
",0,train
15fe88c0ed7ae2e024b345a5929e277398b66dad,tensorflow/tensorflow,"[MLIR] Move documentation closer to FunctionPasses for consistency.

PiperOrigin-RevId: 273463381",lhlo_legalize_to_linalg.cc,"@@ -126,25 +126,6 @@ Operation* GetLinalgBodyOp<xla_lhlo::AndOp>(Location loc, Type element_type,
              : nullptr;
 }
 
-// Converts LHLO ops to Linalg generic.
-// Sample result for xla_lhlo::AddOp.
-//
-// ""xla_lhlo.add""(%arg1, %arg2, %out) :
-//      (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
-//
-// will be converted to
-//
-// #map0 = (d0, d1) -> (d0, d1)
-// ""linalg.generic""(%arg1, %arg2, %out) ( {
-//   ^bb0(%arg4: f32, %arg5: f32):
-//     %0 = addf %arg4, %arg5 : f32
-//     ""linalg.yield""(%0) : (f32) -> ()
-//   }) {
-//     indexing_maps = [#map0, #map0, #map0],
-//     n_loop_types = [2, 0, 0],
-//     n_views = [3, 1]
-//   } : (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
-// }
 template <typename LhloOp>
 class LhloToLinalgOpConverter : public ConversionPattern {
  public:
@@ -229,6 +210,25 @@ void populateLHLOToLinalgConversionPattern(MLIRContext* context,
                    LhloToLinalgOpConverter<xla_lhlo::SubOp>>(context);
 }
 
+// Converts LHLO ops to Linalg generic.
+// Sample result for xla_lhlo::AddOp.
+//
+// ""xla_lhlo.add""(%arg1, %arg2, %out) :
+//      (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
+//
+// will be converted to
+//
+// #map0 = (d0, d1) -> (d0, d1)
+// ""linalg.generic""(%arg1, %arg2, %out) ( {
+//   ^bb0(%arg4: f32, %arg5: f32):
+//     %0 = addf %arg4, %arg5 : f32
+//     ""linalg.yield""(%0) : (f32) -> ()
+//   }) {
+//     indexing_maps = [#map0, #map0, #map0],
+//     n_loop_types = [2, 0, 0],
+//     n_views = [3, 1]
+//   } : (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
+// }
 struct LhloLegalizeToLinalg : public FunctionPass<LhloLegalizeToLinalg> {
   void runOnFunction() override {
     OwningRewritePatternList patterns;
",0,train
15fe88c0ed7ae2e024b345a5929e277398b66dad,tensorflow/tensorflow,"[MLIR] Move documentation closer to FunctionPasses for consistency.

PiperOrigin-RevId: 273463381",passes.h,"@@ -45,44 +45,6 @@ std::unique_ptr<OpPassBase<FuncOp>> createLegalizeToStdPass();
 
 // Lowers from HLO dialect to LHLO dialect allocating/deallocating temporary
 // buffers if necessary.
-//
-// Example fusion with HLO ops.
-//
-// func @fusion(%arg0: memref<2x2xf32>,
-//              %arg1: memref<2x2xf32>,
-//              %arg2: memref<2x2xf32>,
-//              %arg3: memref<2x2xf32>) {
-//   ""xla_lhlo.fusion""() ({
-//     %0 = tensor_load %arg1 : memref<2x2xf32>
-//     %1 = tensor_load %arg2 : memref<2x2xf32>
-//     %2 = ""xla_hlo.add""(%0, %1) {name = ""add""} :
-//         (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
-//     %3 = tensor_load %arg0 : memref<2x2xf32>
-//     %4 = ""xla_hlo.mul""(%2, %3) {name = ""multiply""} :
-//         (tensor<2x2xf32>, tensor<2x2xf32>) -> tensor<2x2xf32>
-//     tensor_store %4, %arg3 : memref<2x2xf32>
-//     ""xla_lhlo.terminator""() : () -> ()
-//   }) {name = ""fusion""} : () -> ()
-//   return
-// }
-//
-// Transformed fusion with LHLO ops.
-// func @fusion(%arg0: memref<2x2xf32>,
-//              %arg1: memref<2x2xf32>,
-//              %arg2: memref<2x2xf32>,
-//              %arg3: memref<2x2xf32>) {
-//   ""xla_lhlo.fusion""() ( {
-//     %0 = alloc() {temp = true} : memref<2x2xf32>
-//     ""xla_lhlo.add""(%arg1, %arg2, %0) :
-//         (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
-//     ""xla_lhlo.mul""(%0, %arg0, %arg3) :
-//         (memref<2x2xf32>, memref<2x2xf32>, memref<2x2xf32>) -> ()
-//     dealloc %0 : memref<2x2xf32>
-//     ""xla_lhlo.terminator""() : () -> ()
-//   }) {name = ""fusion""} : () -> ()
-//   return
-//  }
-// }
 std::unique_ptr<OpPassBase<FuncOp>> createLegalizeToLhloPass();
 
 }  // namespace xla_hlo
",0,train
666277f9a4a3a11e9350555e0974ae827a438cf9,tensorflow/tensorflow,"Support batched inputs to SparseSoftmaxCrossEntropyWithLogitsGradientFunction.
This requires broadcasting the incoming grads to the softmax grad.

PiperOrigin-RevId: 332372721
Change-Id: Ifa048f20d16de9997dec3d8111360d27b55ea941",gradient_checker_test.cc,"@@ -155,6 +155,12 @@ TEST_P(GradientCheckerTest, TestGradCheckMul) {
 }
 
 TEST_P(GradientCheckerTest, TestGradCheckSoftmax) {
+  bool use_function = !std::get<2>(GetParam());
+  if (use_function) {
+    // TODO(b/168850692): Enable this.
+    GTEST_SKIP() << ""Can't take gradient of ""
+                    ""SparseSoftmaxCrossEntropyWithLogits in tracing mode."";
+  }
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
 
",0,train
666277f9a4a3a11e9350555e0974ae827a438cf9,tensorflow/tensorflow,"Support batched inputs to SparseSoftmaxCrossEntropyWithLogitsGradientFunction.
This requires broadcasting the incoming grads to the softmax grad.

PiperOrigin-RevId: 332372721
Change-Id: Ifa048f20d16de9997dec3d8111360d27b55ea941",mnist_gradients_test.cc,"@@ -390,6 +390,12 @@ TEST_P(CppGradients, TestReluGrad) {
 }
 
 TEST_P(CppGradients, TestSoftmaxLossGrad) {
+  bool use_function = !std::get<2>(GetParam());
+  if (use_function) {
+    // TODO(b/168850692): Enable this.
+    GTEST_SKIP() << ""Can't take gradient of ""
+                    ""SparseSoftmaxCrossEntropyWithLogits in tracing mode."";
+  }
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
 
@@ -458,6 +464,12 @@ TEST_P(CppGradients, TestSoftmaxLossGrad) {
 }
 
 TEST_P(CppGradients, TestMNISTGrad) {
+  bool use_function = !std::get<2>(GetParam());
+  if (use_function) {
+    // TODO(b/168850692): Enable this.
+    GTEST_SKIP() << ""Can't take gradient of ""
+                    ""SparseSoftmaxCrossEntropyWithLogits in tracing mode."";
+  }
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
   AbstractContextPtr ctx;
@@ -605,6 +617,12 @@ TEST_P(CppGradients, TestScalarMul) {
 }
 
 TEST_P(CppGradients, TestMNIST_Training) {
+  bool use_function = !std::get<2>(GetParam());
+  if (use_function) {
+    // TODO(b/168850692): Enable this.
+    GTEST_SKIP() << ""Can't take gradient of ""
+                    ""SparseSoftmaxCrossEntropyWithLogits in tracing mode."";
+  }
   std::unique_ptr<TF_Status, decltype(&TF_DeleteStatus)> status(
       TF_NewStatus(), TF_DeleteStatus);
 
",0,train
666277f9a4a3a11e9350555e0974ae827a438cf9,tensorflow/tensorflow,"Support batched inputs to SparseSoftmaxCrossEntropyWithLogitsGradientFunction.
This requires broadcasting the incoming grads to the softmax grad.

PiperOrigin-RevId: 332372721
Change-Id: Ifa048f20d16de9997dec3d8111360d27b55ea941",nn_grad.cc,"@@ -14,9 +14,15 @@ limitations under the License.
 ==============================================================================*/
 #include ""tensorflow/c/experimental/gradients/nn_grad.h""
 
+#include ""absl/types/span.h""
+#include ""tensorflow/c/eager/abstract_tensor_handle.h""
+#include ""tensorflow/c/eager/immediate_execution_context.h""
+#include ""tensorflow/c/eager/immediate_execution_tensor_handle.h""
 #include ""tensorflow/c/experimental/ops/array_ops.h""
 #include ""tensorflow/c/experimental/ops/math_ops.h""
 #include ""tensorflow/c/experimental/ops/nn_ops.h""
+#include ""tensorflow/core/lib/llvm_rtti/llvm_rtti.h""
+#include ""tensorflow/core/platform/errors.h""
 
 using std::vector;
 using tensorflow::ops::Mul;
@@ -54,9 +60,31 @@ class ReluGradientFunction : public GradientFunction {
   vector<AbstractTensorHandle*> forward_outputs;
 };
 
-class SparseSoftmaxCrossEntropyLossGradientFunction : public GradientFunction {
+Status BroadcastMul(AbstractContext* ctx, AbstractTensorHandle* vec,
+                    AbstractTensorHandle* mat,
+                    absl::Span<AbstractTensorHandle*> outputs) {
+  if (!isa<ImmediateExecutionContext>(ctx)) {
+    // TODO(b/168850692): Fix this.
+    return errors::Unimplemented(
+        ""BroadcastMul is not supported in tracing mode yet."");
+  }
+  auto imm_ctx = dyn_cast<ImmediateExecutionContext>(ctx);
+  AbstractTensorPtr minus_1(imm_ctx->CreateInt32Scalar(-1));
+  ImmediateTensorHandlePtr dim(imm_ctx->CreateLocalHandle(minus_1.get()));
+  vector<AbstractTensorHandle*> expand_dims_outputs(1);
+  TF_RETURN_IF_ERROR(ops::ExpandDims(ctx, {vec, dim.get()},
+                                     absl::MakeSpan(expand_dims_outputs),
+                                     ""ExpandDims""));
+  TF_RETURN_IF_ERROR(
+      ops::Mul(ctx, {expand_dims_outputs[0], mat}, outputs, ""Mul""));
+  expand_dims_outputs[0]->Unref();
+  return Status::OK();
+}
+
+class SparseSoftmaxCrossEntropyWithLogitsGradientFunction
+    : public GradientFunction {
  public:
-  explicit SparseSoftmaxCrossEntropyLossGradientFunction(
+  explicit SparseSoftmaxCrossEntropyWithLogitsGradientFunction(
       vector<AbstractTensorHandle*> f_outputs)
       : forward_outputs(f_outputs) {}
 
@@ -65,12 +93,10 @@ class SparseSoftmaxCrossEntropyLossGradientFunction : public GradientFunction {
     grad_outputs->resize(2);
 
     // Grad for Softmax Input
-    std::string name = ""Mul_Softmax_Grad"";
     vector<AbstractTensorHandle*> mul_outputs(1);
-    TF_RETURN_IF_ERROR(
-        ops::Mul(ctx->ctx, {grad_inputs[0], forward_outputs[1]},
-                 absl::MakeSpan(mul_outputs),
-                 name.c_str()));  // upstream_grad * local softmax grad
+    TF_RETURN_IF_ERROR(BroadcastMul(
+        ctx->ctx, grad_inputs[0], forward_outputs[1],
+        absl::MakeSpan(mul_outputs)));  // upstream_grad * local softmax grad
     (*grad_outputs)[0] = mul_outputs[0];
 
     // Grad for labels is null
@@ -78,7 +104,7 @@ class SparseSoftmaxCrossEntropyLossGradientFunction : public GradientFunction {
 
     return Status::OK();
   }
-  ~SparseSoftmaxCrossEntropyLossGradientFunction() override {}
+  ~SparseSoftmaxCrossEntropyWithLogitsGradientFunction() override {}
 
  private:
   vector<AbstractTensorHandle*> forward_outputs;
@@ -98,7 +124,7 @@ BackwardFunction* ReluRegisterer(const ForwardOperation& op) {
 BackwardFunction* SparseSoftmaxCrossEntropyWithLogitsRegisterer(
     const ForwardOperation& op) {
   auto gradient_function =
-      new SparseSoftmaxCrossEntropyLossGradientFunction(op.outputs);
+      new SparseSoftmaxCrossEntropyWithLogitsGradientFunction(op.outputs);
   auto default_gradients = new PassThroughDefaultGradients(op);
   return new BackwardFunction(gradient_function, default_gradients);
 }
",0,train
add0043e9d6233d9fabf2676e449d26ecd257ec5,tensorflow/tensorflow,"- Fix typo in evaluator

PiperOrigin-RevId: 199164433",hlo_evaluator_typed_visitor.h,"@@ -1962,7 +1962,7 @@ class HloEvaluatorTypedVisitor : public DfsHloVisitorWithDefault {
 
     // TODO(b/74360564): This is implementation defined behavior, but is
     // currently respected by all implementations. Change this if we ever decide
-    // to oficially document different behavior.
+    // to officially document different behavior.
     for (int64 i = 0; i < start.size(); ++i) {
       start[i] = std::min<int64>(
           std::max(int64{0}, start[i]),
",0,test
efd35d70d22d370bf4a997cbf53a8030031a48da,tensorflow/tensorflow,"[MLIR] Convert FuncOp signature with unranked types in HLO->LHLO conversion.

PiperOrigin-RevId: 320146856
Change-Id: Ic534e97b2eecbd4573b91ff48ef90d38bbacd9a4",hlo_legalize_to_lhlo.cc,"@@ -391,16 +391,15 @@ struct HloLegalizeToLhlo
     target.addIllegalDialect<mhlo::XlaHloDialect>();
 
     BufferAssignmentTypeConverter converter;
+    auto isMemRefType = [](Type type) { return type.isa<BaseMemRefType>(); };
     target.addDynamicallyLegalOp<FuncOp>([&](FuncOp op) {
       auto inputs = op.getType().getInputs();
-      return llvm::all_of(inputs,
-                          [](Type input) { return input.isa<MemRefType>(); }) &&
+      return llvm::all_of(inputs, isMemRefType) &&
              converter.isLegal(&op.getBody());
     });
     target.addDynamicallyLegalOp<mlir::ReturnOp>([&](mlir::ReturnOp returnOp) {
       return std::all_of(returnOp.operand_type_begin(),
-                         returnOp.operand_type_end(),
-                         [](Type type) { return type.isa<MemRefType>(); });
+                         returnOp.operand_type_end(), isMemRefType);
     });
 
     auto module = getOperation();
",0,train
bc5eddee3dda337c5b9287691b55d7a363b65c7b,tensorflow/tensorflow,"Add Ragged support to tf.math.sigmoid operation.

PiperOrigin-RevId: 381407240
Change-Id: Ic64d0603d85da24b436ab190a9b7c0ba34c9412c",ragged_dispatch.py,"@@ -331,6 +331,7 @@ _UNARY_ELEMENTWISE_OPS = [
     math_ops.rsqrt,
     math_ops.saturate_cast,
     math_ops.sign,
+    math_ops.sigmoid,
     math_ops.sin,
     math_ops.sinh,
     math_ops.sqrt,
",0,train
bc5eddee3dda337c5b9287691b55d7a363b65c7b,tensorflow/tensorflow,"Add Ragged support to tf.math.sigmoid operation.

PiperOrigin-RevId: 381407240
Change-Id: Ic64d0603d85da24b436ab190a9b7c0ba34c9412c",ragged_dispatch_test.py,"@@ -756,9 +756,9 @@ class RaggedDispatchTest(test_util.TensorFlowTestCase, parameterized.TestCase):
         'math.reduce_any', 'math.reduce_max', 'math.reduce_mean',
         'math.reduce_variance', 'math.reduce_std', 'math.reduce_min',
         'math.reduce_prod', 'math.reduce_sum', 'math.rint', 'math.round',
-        'math.rsqrt', 'math.sign', 'math.sin', 'math.sinh', 'math.sqrt',
-        'math.square', 'math.squared_difference', 'math.subtract', 'math.tan',
-        'math.truediv', 'math.unsorted_segment_max',
+        'math.rsqrt', 'math.sign', 'math.sigmoid', 'math.sin', 'math.sinh',
+        'math.sqrt', 'math.square', 'math.squared_difference', 'math.subtract',
+        'math.tan', 'math.truediv', 'math.unsorted_segment_max',
         'math.unsorted_segment_mean', 'math.unsorted_segment_min',
         'math.unsorted_segment_prod', 'math.unsorted_segment_sqrt_n',
         'math.unsorted_segment_sum', 'one_hot', 'ones_like', 'rank', 'realdiv',
",0,train
bc5eddee3dda337c5b9287691b55d7a363b65c7b,tensorflow/tensorflow,"Add Ragged support to tf.math.sigmoid operation.

PiperOrigin-RevId: 381407240
Change-Id: Ic64d0603d85da24b436ab190a9b7c0ba34c9412c",ragged_tensor_test_ops.py,"@@ -64,6 +64,7 @@ UNARY_FLOAT_OPS = [
     math_ops.round,
     math_ops.rsqrt,
     math_ops.sign,
+    math_ops.sigmoid,
     math_ops.sin,
     math_ops.sinh,
     math_ops.sqrt,
",0,train
2775ac493806fefa4e7c2fd798be5b1f87e01a94,tensorflow/tensorflow,"Extend tensor_list with basic support for appending to TensorArrays. This allows handling list-type operations on lists that we haven't created, e.g. received as parameters.

PiperOrigin-RevId: 188094077",tensor_list.py,"@@ -18,7 +18,26 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.framework import ops
 from tensorflow.python.ops import list_ops
+from tensorflow.python.ops import tensor_array_ops
+
+
+def dynamic_list_append(target, element):
+  """"""Converts a list append call inline.""""""
+  if isinstance(target, tensor_array_ops.TensorArray):
+    return target.write(target.size(), element)
+  # TODO(mdan): What's the right way to check this?
+  # TODO(mdan): We may not need this branch.
+  # It may be possible to use TensorList alone if the loop body will not
+  # require wrapping it, although we'd have to think about an autoboxing
+  # mechanism for lists received as parameter.
+  if isinstance(target, ops.Tensor):
+    return list_ops.tensor_list_push_back(target, element)
+
+  # Python targets (including TensorList): fallback to their original append.
+  target.append(element)
+  return target
 
 
 class TensorList(object):
",0,test
2775ac493806fefa4e7c2fd798be5b1f87e01a94,tensorflow/tensorflow,"Extend tensor_list with basic support for appending to TensorArrays. This allows handling list-type operations on lists that we haven't created, e.g. received as parameters.

PiperOrigin-RevId: 188094077",tensor_list_test.py,"@@ -21,13 +21,41 @@ from __future__ import print_function
 from tensorflow.contrib.py2tf.utils import tensor_list as tl
 from tensorflow.python.client.session import Session
 from tensorflow.python.eager import context
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework.constant_op import constant
+from tensorflow.python.ops import list_ops
+from tensorflow.python.ops import tensor_array_ops
 from tensorflow.python.platform import test
 
 
 class TensorListTest(test.TestCase):
 
+  def _shape(self, shape_tuple):
+    return constant(shape_tuple, dtypes.int32)
+
+  def test_dynamic_list_append(self):
+    l = []
+    l = tl.dynamic_list_append(l, 1)
+    self.assertListEqual(l, [1])
+
+    l = list_ops.empty_tensor_list(self._shape(()), dtypes.int32)
+    l = tl.dynamic_list_append(l, 1)
+    s = list_ops.tensor_list_stack(l, element_dtype=dtypes.int32)
+    with self.test_session() as sess:
+      self.assertAllEqual(sess.run(s), [1])
+
+    l = tensor_array_ops.TensorArray(dtypes.int32, size=0, dynamic_size=True)
+    l = tl.dynamic_list_append(l, 1)
+    s = l.stack()
+    with self.test_session() as sess:
+      self.assertAllEqual(sess.run(s), [1])
+
+    l = tl.TensorList(self._shape(()), dtypes.int32)
+    l = tl.dynamic_list_append(l, 1)
+    with self.test_session() as sess:
+      self.assertAllEqual(sess.run(l[0]), 1)
+
   def test_list_append_python(self):
     with context.eager_mode():
       a = constant(3.0)
",0,test
41e4c2033d0ca35587724abe4940db0bee2d6d96,tensorflow/tensorflow,"[tf:tfrt] Add [1, 0, 2] case to transpose benchmarks

The [1, 0, 2] case was missing.

PiperOrigin-RevId: 427990241
Change-Id: Icf9a59f13cf4b0043f850b52a6d441c99f77bf6a",transpose_op_benchmark.cc,"@@ -122,6 +122,15 @@ BM(Tfrt(Transpose_small_1x2x0, Transpose3D({1, 2, 0}), ""compute"",
         Inputs({32, 32, 16})));
 BM(Eigen(Transpose_small_1x2x0, Shuffle<3>({1, 2, 0}), Inputs({32, 32, 16})));
 
+// Small 3D Transpose: [1, 0, 2]
+BM(Jitrt(Transpose_small_1x0x2, Transpose3D({1, 0, 2}), ""compute"",
+         Inputs({32, 32, 16})));
+BM(JitrtV(Transpose_small_1x0x2, Transpose3D({1, 0, 2}), ""compute"",
+          Inputs({32, 32, 16})));
+BM(Tfrt(Transpose_small_1x0x2, Transpose3D({1, 0, 2}), ""compute"",
+        Inputs({32, 32, 16})));
+BM(Eigen(Transpose_small_1x0x2, Shuffle<3>({1, 0, 2}), Inputs({32, 32, 16})));
+
 // 2D Transpose: [1, 0]
 BM(Jitrt(Transpose_1x0, Transpose2D(), ""compute"", Inputs({4096, 4096})));
 BM(JitrtV(Transpose_1x0, Transpose2D(), ""compute"", Inputs({4096, 4096})));
@@ -164,4 +173,13 @@ BM(Tfrt(Transpose_1x2x0, Transpose3D({1, 2, 0}), ""compute"",
         Inputs({256, 256, 256})));
 BM(Eigen(Transpose_1x2x0, Shuffle<3>({1, 2, 0}), Inputs({256, 256, 256})));
 
+// 3D Transpose: [1, 0, 2]
+BM(Jitrt(Transpose_1x0x2, Transpose3D({1, 0, 2}), ""compute"",
+         Inputs({256, 256, 256})));
+BM(JitrtV(Transpose_1x0x2, Transpose3D({1, 0, 2}), ""compute"",
+          Inputs({256, 256, 256})));
+BM(Tfrt(Transpose_1x0x2, Transpose3D({1, 0, 2}), ""compute"",
+        Inputs({256, 256, 256})));
+BM(Eigen(Transpose_1x0x2, Shuffle<3>({1, 0, 2}), Inputs({256, 256, 256})));
+
 }  // namespace tensorflow
",0,test
50c58837d2be9aa218736ebe5eacb499bcbe7052,tensorflow/tensorflow,Fix: fixed LSTMBlockCell cuda kernel,lstm_ops_gpu.cu.cc,"@@ -350,8 +350,8 @@ __global__ void lstm_gates_bprop(
   di[cid] = di_local;
 
   dgates[gid + 0 * cell_size] = di_local;
-  dgates[gate_c_offset(gate_layout, cell_size)] = dci_local;
-  dgates[gate_f_offset(gate_layout, cell_size)] = df_local;
+  dgates[gid + gate_c_offset(gate_layout, cell_size)] = dci_local;
+  dgates[gid + gate_f_offset(gate_layout, cell_size)] = df_local;
   dgates[gid + 3 * cell_size] = do_local;
 
   cs_prev_grad[cid] = dcs_local * f_local;
",0,test
50c58837d2be9aa218736ebe5eacb499bcbe7052,tensorflow/tensorflow,Fix: fixed LSTMBlockCell cuda kernel,rnn_grad_test.py,"@@ -66,6 +66,60 @@ class RNNGradTest(test.TestCase):
     self.assertAllEqual(w_grad, w_ifco_grad)
     self.assertAllEqual(b_grad, b_ifco_grad)
 
+  @test_util.deprecated_graph_mode_only
+  def testLSTMBlockCell(self):
+    batch_size = np.random.randint(1, 32)
+    input_size = np.random.randint(1, 32)
+    hidden_size = np.random.randint(1, 32)
+    w = deterministic_random_uniform(
+        [input_size + hidden_size, 4 * hidden_size])
+    b = deterministic_random_uniform([4 * hidden_size])
+    x = deterministic_random_uniform([batch_size, input_size])
+    cs_prev = h_prev = deterministic_random_uniform([batch_size, hidden_size])
+    w_peephole = array_ops.zeros(cs_prev.shape[1:], dtype=w.dtype)
+    cs_grad = deterministic_random_uniform([batch_size, hidden_size])
+    h_grad = deterministic_random_uniform([batch_size, hidden_size])
+
+    outputs = []
+    grads = []
+    for use_gpu in [False, True]:
+      with self.cached_session(use_gpu=use_gpu):
+        output = gen_rnn_ops.lstm_block_cell(
+            x=x,
+            cs_prev=cs_prev,
+            h_prev=h_prev,
+            w=w,
+            wci=w_peephole,
+            wcf=w_peephole,
+            wco=w_peephole,
+            b=b,
+            forget_bias=1.0,
+            cell_clip=0.0,
+            use_peephole=False)
+        (i, cs, f, o, ci, co, _) = output
+        grad = gen_rnn_ops.lstm_block_cell_grad(
+            x=x,
+            cs_prev=cs_prev,
+            h_prev=h_prev,
+            w=w,
+            wci=w_peephole,
+            wcf=w_peephole,
+            wco=w_peephole,
+            b=b,
+            i=i,
+            cs=cs,
+            f=f,
+            o=o,
+            ci=ci,
+            co=co,
+            cs_grad=cs_grad,
+            h_grad=h_grad,
+            use_peephole=False)
+        outputs.append(output)
+        grads.append(grad)
+    self.assertAllClose(outputs[0], outputs[1])
+    self.assertAllClose(grads[0], grads[1])
+
   def _lstm_block(self, op, w, b, x, cs_prev, h_prev):
     w_peephole = array_ops.zeros(cs_prev.shape[1:], dtype=w.dtype)
     _, all_cs, _, _, _, _, all_h = op(
",0,test
871d07bafaba5af6ea68ae5a86e36ced5a52a32a,tensorflow/tensorflow,"Add basic support for declarative Linalg transformations

Linalg ops provide a good anchor for pattern matching/rewriting transformations.
This CL adds a simple example of how multi-level tiling may be specified by attaching a simple StringAttr to ops as they are transformed so we can easily specify partial lowering to control transformation application.

This is a first stab at taking advantage of higher-level information contained in Linalg ops and will evolve in the future.

PiperOrigin-RevId: 277497958
Change-Id: I6d504c7c39373d49cb8e5ae3b596c24d1efd2581",Passes.h,"@@ -41,6 +41,8 @@ std::unique_ptr<OpPassBase<FuncOp>> createLinalgPromotionPass();
 std::unique_ptr<OpPassBase<FuncOp>> createLowerLinalgToLoopsPass();
 
 std::unique_ptr<OpPassBase<ModuleOp>> createLowerLinalgToLLVMPass();
+
+std::unique_ptr<OpPassBase<FuncOp>> createLinalgTransformsPass();
 } // namespace linalg
 } // namespace mlir
 
",0,train
a703b288d8a8bae22a3cb4587e3ee1f98235a0a5,tensorflow/tensorflow,"Simplify reset_uids()

Remove unnecessary dict clearance code",backend.py,"@@ -211,10 +211,8 @@ def get_uid(prefix=''):
 def reset_uids():
   """"""Resets graph identifiers.
   """"""
-  per_graph_object_name_uids = PER_GRAPH_OBJECT_NAME_UIDS
-  keys = list(per_graph_object_name_uids.keys())
-  for key in keys:
-    del per_graph_object_name_uids[key]
+
+  PER_GRAPH_OBJECT_NAME_UIDS.clear()
 
 
 @keras_export('keras.backend.clear_session')
",0,train
e576acf5dbd7b800d3b6aa4de4b69952a9e2c0fb,tensorflow/tensorflow,"Internal-only change.

PiperOrigin-RevId: 224362520",tpu_cluster_resolver.py,"@@ -197,13 +197,14 @@ class TPUClusterResolver(ClusterResolver):
     elif tpu == 'local' or not tpu:
       # Google environment, where the TPU is attached to the host.
       self._environment = 'google'
-    elif tpu.startswith('/bns'):
+    elif tpu.startswith('/bns') or tpu.startswith('uptc://'):
       # Google environment, where we reach the TPU through BNS.
       self._environment = 'google'
 
     # If TPU is in the Google environment or exists locally, we don't use any
     # RPC layer.
-    if tpu.startswith('/bns') or tpu == 'local' or not tpu:
+    if tpu.startswith('/bns') or tpu.startswith(
+        'uptc://') or tpu == 'local' or not tpu:
       self.rpc_layer = None
     else:
       self.rpc_layer = 'grpc'
",0,train
76319741cd303273a542eae0cdf78df61e2c4e83,tensorflow/tensorflow,"[MLIR][NFC] Use CallOpInterface::resolveCallable() to reduce some code clutter

- Use this to reduce the nesting of if's needed to get to the FuncOp for a call
- Add helper functions to get attached FuncOp for WhileOp

PiperOrigin-RevId: 323365892
Change-Id: If6c6d4f1c8359c5df50366f90cbcb67fc9311771",prepare_composite_functions_tf.cc,"@@ -239,18 +239,14 @@ LogicalResult CheckOutputConsumer(
 
 LogicalResult CheckFusableKerasLstm(FuncOp lstm_func, ModuleOp module) {
   for (auto func : module.getOps<FuncOp>()) {
-    auto result = func.walk([&](Operation* op) {
-      if (auto call_op = dyn_cast<CallOpInterface>(op)) {
-        CallInterfaceCallable callable = call_op.getCallableForCallee();
-        if (auto sym = callable.dyn_cast<SymbolRefAttr>()) {
-          if (sym.getRootReference() == lstm_func.getName()) {
-            // Keras LSTM have 5 outputs.
-            // We should make sure only the first or the second output are
-            // consumed.
-            if (failed(CheckOutputConsumer(call_op, 5, {0, 1})))
-              return WalkResult::interrupt();
-          }
-        }
+    if (func == lstm_func) continue;
+    auto result = func.walk([&](CallOpInterface op) {
+      if (dyn_cast<FuncOp>(op.resolveCallable()) == lstm_func) {
+        // Keras LSTM have 5 outputs.
+        // We should make sure only the first or the second output are
+        // consumed.
+        if (failed(CheckOutputConsumer(op.getOperation(), 5, {0, 1})))
+          return WalkResult::interrupt();
       }
       return WalkResult::advance();
     });
",0,train
76319741cd303273a542eae0cdf78df61e2c4e83,tensorflow/tensorflow,"[MLIR][NFC] Use CallOpInterface::resolveCallable() to reduce some code clutter

- Use this to reduce the nesting of if's needed to get to the FuncOp for a call
- Add helper functions to get attached FuncOp for WhileOp

PiperOrigin-RevId: 323365892
Change-Id: If6c6d4f1c8359c5df50366f90cbcb67fc9311771",executor_tpuv1_inline_tpu_island.cc,"@@ -61,11 +61,11 @@ void TPUBridgeExecutorIslandInlining::runOnOperation() {
     LLVM_DEBUG(llvm::dbgs()
                << ""Found call to inline: "" << *call_op.getOperation() << ""\n"");
 
-    FuncOp called_func = dyn_cast_or_null<FuncOp>(
-        symbol_table.lookupSymbolIn(getOperation(), call_op.f()));
+    auto call_interface = cast<CallOpInterface>(call_op.getOperation());
+    auto called_func =
+        dyn_cast_or_null<FuncOp>(call_interface.resolveCallable());
 
-    if (failed(inlineCall(inliner,
-                          cast<CallOpInterface>(call_op.getOperation()),
+    if (failed(inlineCall(inliner, call_interface,
                           cast<CallableOpInterface>(called_func.getOperation()),
                           called_func.getCallableRegion(),
                           /* shouldCloneInlinedRegion = */ false))) {
",0,train
76319741cd303273a542eae0cdf78df61e2c4e83,tensorflow/tensorflow,"[MLIR][NFC] Use CallOpInterface::resolveCallable() to reduce some code clutter

- Use this to reduce the nesting of if's needed to get to the FuncOp for a call
- Add helper functions to get attached FuncOp for WhileOp

PiperOrigin-RevId: 323365892
Change-Id: If6c6d4f1c8359c5df50366f90cbcb67fc9311771",optimize_global_tensors.cc,"@@ -68,9 +68,8 @@ bool IsResource(Value value) { return IsResourceType(value.getType()); }
 class ResourceAnalyzer {
  public:
   explicit ResourceAnalyzer(ModuleOp module) {
-    SymbolTable symbol_table(module);
     for (auto func : module.getOps<FuncOp>()) {
-      AnalyzeFunc(func, symbol_table);
+      AnalyzeFunc(func);
     }
   }
 
@@ -89,7 +88,7 @@ class ResourceAnalyzer {
   // written"". Do this recursively across the chain of funcs via call or control
   // flow ops.
   // TODO(ashwinm): Move to iterative traversal.
-  LogicalResult AnalyzeFunc(FuncOp func, const SymbolTable& symbol_table) {
+  LogicalResult AnalyzeFunc(FuncOp func) {
     // Avoid infinite recursion.
     if (!discovered_.insert(func).second) {
       return success();
@@ -104,24 +103,20 @@ class ResourceAnalyzer {
         return;
       }
       if (auto call = dyn_cast<CallOpInterface>(op)) {
-        if (auto sym = op->getAttrOfType<SymbolRefAttr>(""f"")) {
-          PropagatePotentiallyWrittenUpFromCallee(
-              sym.cast<FlatSymbolRefAttr>().getValue(), call.getArgOperands(),
-              symbol_table);
+        if (auto func = dyn_cast<FuncOp>(call.resolveCallable())) {
+          PropagatePotentiallyWrittenUpFromCallee(func, call.getArgOperands());
         }
         return;
       }
       if (auto if_op = dyn_cast<TF::IfOp>(op)) {
-        for (auto callee : {if_op.then_branch(), if_op.else_branch()}) {
-          PropagatePotentiallyWrittenUpFromCallee(callee, if_op.input(),
-                                                  symbol_table);
+        for (auto callee : {if_op.then_func(), if_op.else_func()}) {
+          PropagatePotentiallyWrittenUpFromCallee(callee, if_op.input());
         }
         return;
       }
       if (auto while_op = dyn_cast<TF::WhileOp>(op)) {
-        for (auto callee : {while_op.cond(), while_op.body()}) {
-          PropagatePotentiallyWrittenUpFromCallee(callee, while_op.input(),
-                                                  symbol_table);
+        for (auto callee : {while_op.cond_func(), while_op.body_func()}) {
+          PropagatePotentiallyWrittenUpFromCallee(callee, while_op.input());
         }
         return;
       }
@@ -149,15 +144,13 @@ class ResourceAnalyzer {
     });
   }
 
-  // Given a funcOp associated with the callee and operands from the
+  // Given a FuncOp associated with the callee and operands from the
   // corresponding callOp, propagate the potentially written decision to the
   // callOp's operands, if the corresponding func's arguments are potentially
   // written resources.
   void PropagatePotentiallyWrittenUpFromCallee(
-      StringRef callee, Operation::operand_range propagate_to,
-      const SymbolTable& symbol_table) {
-    auto func = symbol_table.lookup<FuncOp>(callee);
-    AnalyzeFunc(func, symbol_table);
+      FuncOp func, Operation::operand_range propagate_to) {
+    AnalyzeFunc(func);
     for (auto t : llvm::zip(func.getArguments(), propagate_to)) {
       if (!IsResource(std::get<0>(t))) {
         continue;
",0,train
76319741cd303273a542eae0cdf78df61e2c4e83,tensorflow/tensorflow,"[MLIR][NFC] Use CallOpInterface::resolveCallable() to reduce some code clutter

- Use this to reduce the nesting of if's needed to get to the FuncOp for a call
- Add helper functions to get attached FuncOp for WhileOp

PiperOrigin-RevId: 323365892
Change-Id: If6c6d4f1c8359c5df50366f90cbcb67fc9311771",shape_inference.cc,"@@ -39,6 +39,7 @@ limitations under the License.
 #include ""mlir/IR/StandardTypes.h""  // from @llvm-project
 #include ""mlir/IR/SymbolTable.h""  // from @llvm-project
 #include ""mlir/IR/Value.h""  // from @llvm-project
+#include ""mlir/Interfaces/CallInterfaces.h""  // from @llvm-project
 #include ""mlir/Pass/Pass.h""  // from @llvm-project
 #include ""mlir/Pass/PassRegistry.h""  // from @llvm-project
 #include ""mlir/Support/LLVM.h""  // from @llvm-project
@@ -243,14 +244,11 @@ bool RefineResultType(Operation* op, Value result,
 
 // Infers the shape from a (Stateful)PartionedCall operation by looking up the
 // called function and propagating the return type.
-bool InferShapeForCall(Operation* op) {
-  auto call_op = cast<CallOpInterface>(op);
-  CallInterfaceCallable callable = call_op.getCallableForCallee();
-  SymbolRefAttr sym = callable.dyn_cast<SymbolRefAttr>();
-  if (!sym) return false;
-  FuncOp func = dyn_cast<FuncOp>(SymbolTable::lookupNearestSymbolFrom(op, sym));
+bool InferShapeForCall(CallOpInterface call_op) {
+  FuncOp func = dyn_cast<FuncOp>(call_op.resolveCallable());
   if (!func) return false;
 
+  Operation* op = call_op.getOperation();
   bool changed = false;
   // Map each of the results of the call to the returned type of the
   // function.
@@ -533,7 +531,7 @@ class ShapeInference {
   //      like predicate).
   LogicalResult PropagateShapeToFunctions(
       ModuleOp module, Operation::operand_type_range input_types,
-      ArrayRef<StringRef> func_names, int64_t max_iteration);
+      ArrayRef<FuncOp> functions, int64_t max_iteration);
 
   // Propagates shapes to regions given the shapes of the inputs of the regions.
   // All regions provided in `regions` are assumed to have inputs of type
@@ -555,13 +553,13 @@ class ShapeInference {
   //
   // TODO(b/154065712): Move this to a more general inter-procedural constant
   // folding pass.
-  void PropagateConstantToCallee(CallOpInterface call_op,
-                                 SymbolRefAttr callee_sym, ModuleOp module);
+  void PropagateConstantToCallee(CallOpInterface call_op, FuncOp func,
+                                 ModuleOp module);
 
   // Propagates any constant return value of the callee function to the call
   // op's corresponding result.
-  void PropagateConstantFromCallee(CallOpInterface call_op,
-                                   SymbolRefAttr callee_sym, ModuleOp module);
+  void PropagateConstantFromCallee(CallOpInterface call_op, FuncOp func,
+                                   ModuleOp module);
 
   // Tries to compute the result of folding the op. This doesn't actually
   // perform constant folding, it is just computes the equivalent constants.
@@ -779,9 +777,7 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) {
 
   // Handle call operations by looking up callee and infering return shape as
   // needed.
-  if (isa<PartitionedCallOp, StatefulPartitionedCallOp, TPUPartitionedCallOp>(
-          op))
-    return InferShapeForCall(op);
+  if (auto call = dyn_cast<CallOpInterface>(op)) return InferShapeForCall(call);
 
   // tf.Cast are only inferred if they have at least one user in the TF dialect
   // or feeding into the function return. This is necessary to avoid inserting
@@ -984,14 +980,13 @@ bool ShapeInference::InferShapeForSingleOperation(Operation* op) {
 
 LogicalResult ShapeInference::PropagateShapeToFunctions(
     ModuleOp module, Operation::operand_type_range input_types,
-    ArrayRef<StringRef> func_names, int64_t max_iteration) {
+    ArrayRef<FuncOp> functions, int64_t max_iteration) {
   bool all_succeeded = true;
   auto types = llvm::to_vector<4>(input_types);
   // If shape propagation fails for one function, return failure, but do not
   // early exit and attempt to propagate shapes for all provided functions to
   // have a best-effort propagation.
-  for (auto func_name : func_names) {
-    FuncOp func = module.lookupSymbol<FuncOp>(func_name);
+  for (FuncOp func : functions) {
     auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion());
     if (!llvm::hasSingleElement(func_uses.getValue())) {
       int num_uses = std::distance(func_uses->begin(), func_uses->end());
@@ -1046,12 +1041,9 @@ LogicalResult ShapeInference::PropagateShapeToRegions(
 }
 
 void ShapeInference::PropagateConstantToCallee(CallOpInterface call_op,
-                                               SymbolRefAttr callee_sym,
-                                               ModuleOp module) {
-  auto func = module.lookupSymbol<FuncOp>(callee_sym.getRootReference());
+                                               FuncOp func, ModuleOp module) {
   auto func_uses = SymbolTable::getSymbolUses(func, &module.getBodyRegion());
-  int num_uses = std::distance(func_uses->begin(), func_uses->end());
-  if (num_uses != 1) return;
+  if (!llvm::hasSingleElement(func_uses.getValue())) return;
 
   OpBuilder builder(&func.front().front());
   Operation* op = call_op.getOperation();
@@ -1077,9 +1069,7 @@ void ShapeInference::PropagateConstantToCallee(CallOpInterface call_op,
 }
 
 void ShapeInference::PropagateConstantFromCallee(CallOpInterface call_op,
-                                                 SymbolRefAttr callee_sym,
-                                                 ModuleOp module) {
-  auto func = module.lookupSymbol<FuncOp>(callee_sym.getRootReference());
+                                                 FuncOp func, ModuleOp module) {
   // If the return value is a constant, use the constant as the value of
   // the call return.
   Operation* op = call_op.getOperation();
@@ -1111,28 +1101,29 @@ LogicalResult ShapeInference::PropagateShapeIntoAttachedFunctions(
   if (auto if_op = dyn_cast<TF::IfOp>(op)) {
     return PropagateShapeToFunctions(
         module, drop_begin(if_op.getOperandTypes(), 1),
-        {if_op.then_branch(), if_op.else_branch()}, max_iteration);
+        {if_op.then_func(), if_op.else_func()}, max_iteration);
   } else if (auto case_op = dyn_cast<TF::CaseOp>(op)) {
-    SmallVector<StringRef, 4> branches;
-    for (Attribute branch : case_op.branches())
-      branches.push_back(branch.cast<FlatSymbolRefAttr>().getValue());
+    SmallVector<FuncOp, 4> branches;
+    for (Attribute branch : case_op.branches()) {
+      auto sym = branch.cast<FlatSymbolRefAttr>();
+      branches.push_back(SymbolTable::lookupNearestSymbolFrom<FuncOp>(op, sym));
+    }
     return PropagateShapeToFunctions(module,
                                      drop_begin(case_op.getOperandTypes(), 1),
                                      branches, max_iteration);
   } else if (auto while_op = dyn_cast<TF::WhileOp>(op)) {
-    return PropagateShapeToFunctions(module, while_op.getOperandTypes(),
-                                     {while_op.cond(), while_op.body()},
-                                     max_iteration);
+    return PropagateShapeToFunctions(
+        module, while_op.getOperandTypes(),
+        {while_op.cond_func(), while_op.body_func()}, max_iteration);
   } else if (auto call_op = dyn_cast<CallOpInterface>(op)) {
-    CallInterfaceCallable callable = call_op.getCallableForCallee();
-    if (SymbolRefAttr sym = callable.dyn_cast<SymbolRefAttr>()) {
-      PropagateConstantToCallee(call_op, sym, module);
-      if (failed(PropagateShapeToFunctions(
-              module, call_op.getArgOperands().getTypes(),
-              {sym.getRootReference()}, max_iteration))) {
+    if (auto func = dyn_cast<FuncOp>(call_op.resolveCallable())) {
+      PropagateConstantToCallee(call_op, func, module);
+      if (failed(PropagateShapeToFunctions(module,
+                                           call_op.getArgOperands().getTypes(),
+                                           {func}, max_iteration))) {
         return failure();
       }
-      PropagateConstantFromCallee(call_op, sym, module);
+      PropagateConstantFromCallee(call_op, func, module);
       return success();
     }
   }
",0,train
3edab0abb1213f88507692042a320abc695ff674,tensorflow/tensorflow,"Remove reshape of sparse tensor indices in for maybe_batch.

PiperOrigin-RevId: 191310753",input.py,"@@ -515,8 +515,7 @@ def _store_sparse_tensors(tensor_list, enqueue_many, keep_input,
     def _sparse_values_to_keep(t, keep_input):
       """"""Convert a per-row `keep_input` vector to a per-value one.""""""
       # Get the rows of every value in the sparse Tensor.
-      row_values = array_ops.reshape(
-          t.indices, [array_ops.shape(t.indices)[0], -1])[:, 0]
+      row_values = t.indices[:, 0]
       # The value should be kept iff the row should be kept.
       return array_ops.gather(keep_input, row_values)
     if keep_input.shape.ndims == 1:
",0,train
d48d6758481ee0e24bd60996daceb241d272d310,tensorflow/tensorflow,"Move tpu fingerprint lookup to OSS

PiperOrigin-RevId: 361656725
Change-Id: I4c879184f93dc89405659bd0a4ea5acd1332f614",tpu_fingerprint_lookup.cc,"@@ -0,0 +1,72 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include ""tensorflow/core/tpu/kernels/tpu_fingerprint_lookup.h""
+
+namespace tensorflow {
+namespace tpu {
+
+TpuFingerprintLookup* TpuFingerprintLookup::Create() {
+  return new TpuFingerprintLookup();
+}
+
+bool TpuFingerprintLookup::RegisterKeyValuePair(uint64 key, std::string value) {
+  absl::MutexLock lock(&mu_);
+  bool is_successful = false;
+  VLOG(2) << ""registering key ("" << key << "") with value: "" << value;
+  auto it = key_to_value_.find(key);
+  if (it == key_to_value_.end()) {
+    // A new key. If the value is not seen before, register key-value and
+    // value-key pairs. Otherwise, skip registration.
+    auto maybe_existing_key = value_to_key_.find(value);
+    if (maybe_existing_key == value_to_key_.end()) {
+      key_to_value_.emplace(key, value);
+      value_to_key_.emplace(value, key);
+      is_successful = true;
+    } else {
+      // The value is registered before with a different key. Skip registration.
+      if (maybe_existing_key->second != key) {
+        VLOG(2) << ""The value ("" << value
+                << "") is associated with an existing key ( ""
+                << maybe_existing_key->second
+                << ""), which does not match the requesting key ("" << key
+                << "")."";
+      }
+    }
+  } else {
+    // The key is registered before, no actions needed. For debugging purpose,
+    // check if existing value agrees with the value.
+    if (it->second != value) {
+      VLOG(2) << ""The key ("" << key
+              << "") has been registered and the requesting value ( "" << value
+              << "" and the existing"" << it->second << "") doesn't match."";
+    }
+  }
+  DCHECK(key_to_value_.size() == value_to_key_.size());
+
+  return is_successful;
+}
+
+absl::optional<std::string_view> TpuFingerprintLookup::Lookup(uint64 key) {
+  absl::MutexLock lock(&mu_);
+  auto it = key_to_value_.find(key);
+  if (it == key_to_value_.end()) {
+    return absl::optional<std::string_view>{};
+  } else {
+    return it->second;
+  }
+}
+
+}  // namespace tpu
+}  // namespace tensorflow
",0,test
d48d6758481ee0e24bd60996daceb241d272d310,tensorflow/tensorflow,"Move tpu fingerprint lookup to OSS

PiperOrigin-RevId: 361656725
Change-Id: I4c879184f93dc89405659bd0a4ea5acd1332f614",tpu_fingerprint_lookup.h,"@@ -0,0 +1,84 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_TPU_KERNELS_TPU_FINGERPRINT_LOOKUP_H_
+#define TENSORFLOW_CORE_TPU_KERNELS_TPU_FINGERPRINT_LOOKUP_H_
+
+#include <cstddef>
+#include <deque>
+
+#include ""absl/base/integral_types.h""
+#include ""absl/container/flat_hash_map.h""
+#include ""absl/container/node_hash_map.h""
+#include ""absl/strings/string_view.h""
+#include ""absl/synchronization/mutex.h""
+#include ""tensorflow/core/framework/resource_mgr.h""
+
+namespace tensorflow {
+namespace tpu {
+
+// A class that holds the key-value pair of fingerprints. By calling the
+// Register method, this class can map the key to the value. Note that this
+// class holds invariant key-value pairs. That is, it does not allow updating
+// key-value pairs, nor N-key-to-1-value and 1-key-to-M-value pairs. If such
+// cases occur, the class keeps the earliest registered pairs and discards any
+// violating pairs.
+//
+// Example:
+//  TpuFingerprintLookup fingerprint_lookup;
+//
+//  // Register value with key.
+//  fingerprint_lookup.RegisterKeyValuePair(""key1"", ""program1"");
+//
+//  // Lookup fingerprint with key.
+//  std::string fingerprint = fingerprint_lookup.Lookup(""key1"");
+//
+// TODO(chiachenc): use templates and add Unregister methods.
+class TpuFingerprintLookup : public ResourceBase {
+ public:
+  // Creates an instance of TpuFingerprintLookup.
+  static TpuFingerprintLookup* Create();
+
+  // Register value with tag. Return true if successfully registering a
+  // key-value pair; otherwise, return false.
+  bool RegisterKeyValuePair(uint64 key, std::string value);
+
+  // Look up fingerprint with key. Return absl::optional<std::string_view>{} if
+  // not found.
+  absl::optional<std::string_view> Lookup(uint64 key);
+
+  size_t num_valid() {
+    absl::MutexLock lock(&mu_);
+    return key_to_value_.size();
+  }
+
+  std::string DebugString() const override { return ""TpuFingerprintLookup""; }
+
+ private:
+  explicit TpuFingerprintLookup() {}
+
+  absl::Mutex mu_;
+  // Main storage for lookup
+  absl::node_hash_map<uint64, std::string> key_to_value_ ABSL_GUARDED_BY(mu_);
+
+  // An auxiliary storage to ensure 1-to-1 and invariant key-value pair
+  absl::node_hash_map<std::string, uint64> value_to_key_ ABSL_GUARDED_BY(mu_);
+
+  TpuFingerprintLookup(const TpuFingerprintLookup&) = delete;
+  TpuFingerprintLookup& operator=(const TpuFingerprintLookup&) = delete;
+};
+}  // namespace tpu
+}  // namespace tensorflow
+#endif  // TENSORFLOW_CORE_TPU_KERNELS_TPU_FINGERPRINT_LOOKUP_H_
",0,test
5c7047ad96f3dc9ff13c1feeb7ade5e1e831e645,tensorflow/tensorflow,"[tf.lite] Restrict Interpreter initialization methods

Begin making private Interpreter initialization methods currently marked
public, starting with:
  * Interpreter::SetExecutionPlan
  * Interpreter::ReserveNodes

Follow-up changes will introduce a test-only helper for accessing
such methods for test purposes, but otherwise such methods should only be
accessible to the InterpreterBuilder.

PiperOrigin-RevId: 375160379
Change-Id: Ia48289115e35eed2df26226fbedc4db02b210bed",subgraph.h,"@@ -72,11 +72,6 @@ class Subgraph {
   // interpreter.
   TfLiteStatus SetVariables(std::vector<int> variables);
 
-  // Ensure the internal node storage memory allocates at least `count`
-  // spots for node. NOTE, this doesn't actually add operators. This is an
-  // efficiency optimization that is subject to change.
-  void ReserveNodes(int count);
-
   // Adds a node with the given parameters and returns the index of the new
   // node in `node_index` (optionally). Interpreter will take ownership of
   // `builtin_data` and destroy it with `free`. Ownership of 'init_data'
@@ -137,10 +132,6 @@ class Subgraph {
       bool is_variable = false, const size_t rank_dims_signature = 0,
       const int* dims_signature = nullptr);
 
-  // WARNING: Experimental interface, subject to change
-  // Overrides execution plan. This bounds checks indices sent in.
-  TfLiteStatus SetExecutionPlan(const std::vector<int>& new_plan);
-
   // Get a mutable tensor data structure.
   TfLiteTensor* tensor(int tensor_index) {
     if (tensor_index < 0 ||
@@ -351,6 +342,7 @@ class Subgraph {
   const std::string& GetName() const;
 
  private:
+  friend class InterpreterBuilder;
   friend class TestDelegate;
   // SubgraphAwareProfiler wraps an actual TFLite profiler, such as a
   // BufferedProfiler instance, and takes care of event profiling/tracing in a
@@ -395,6 +387,16 @@ class Subgraph {
     const int64_t subgraph_index_;
   };
 
+  // Ensure the internal node storage memory allocates at least `count`
+  // spots for node. NOTE, this doesn't actually add operators. This is an
+  // efficiency optimization that is subject to change.
+  // Note: Only used during initialization.
+  void ReserveNodes(int count);
+
+  // Overrides execution plan. This bounds checks indices sent in.
+  // Note: Only used during initialization.
+  TfLiteStatus SetExecutionPlan(const std::vector<int>& new_plan);
+
   // Prevent 'context_' from accessing functions that are only available to
   // delegated kernels.
   void SwitchToKernelContext();
",0,train
5c7047ad96f3dc9ff13c1feeb7ade5e1e831e645,tensorflow/tensorflow,"[tf.lite] Restrict Interpreter initialization methods

Begin making private Interpreter initialization methods currently marked
public, starting with:
  * Interpreter::SetExecutionPlan
  * Interpreter::ReserveNodes

Follow-up changes will introduce a test-only helper for accessing
such methods for test purposes, but otherwise such methods should only be
accessible to the InterpreterBuilder.

PiperOrigin-RevId: 375160379
Change-Id: Ia48289115e35eed2df26226fbedc4db02b210bed",interpreter.cc,"@@ -229,10 +229,6 @@ TfLiteStatus Interpreter::AllocateTensors() {
   return primary_subgraph().AllocateTensors();
 }
 
-void Interpreter::ReserveNodes(int count) {
-  primary_subgraph().ReserveNodes(count);
-}
-
 void Interpreter::AddSubgraphs(int subgraphs_to_add,
                                int* first_new_subgraph_index) {
   const size_t base_index = subgraphs_.size();
",0,train
5c7047ad96f3dc9ff13c1feeb7ade5e1e831e645,tensorflow/tensorflow,"[tf.lite] Restrict Interpreter initialization methods

Begin making private Interpreter initialization methods currently marked
public, starting with:
  * Interpreter::SetExecutionPlan
  * Interpreter::ReserveNodes

Follow-up changes will introduce a test-only helper for accessing
such methods for test purposes, but otherwise such methods should only be
accessible to the InterpreterBuilder.

PiperOrigin-RevId: 375160379
Change-Id: Ia48289115e35eed2df26226fbedc4db02b210bed",interpreter.h,"@@ -125,11 +125,6 @@ class Interpreter {
   /// interpreter.
   TfLiteStatus SetVariables(std::vector<int> variables);
 
-  /// Ensure the internal node storage memory allocates at least `count`
-  /// spots for node. NOTE, this doesn't actually add operators. This is an
-  /// efficiency optimization that is subject to change.
-  void ReserveNodes(int count);
-
   /// Adds a node with the given parameters and returns the index of the new
   /// node in `node_index` (optionally). Interpreter will take ownership of
   /// `builtin_data` and destroy it with `free`. Ownership of 'init_data'
@@ -242,12 +237,6 @@ class Interpreter {
     return primary_subgraph().execution_plan();
   }
 
-#ifndef DOXYGEN_
-  /// WARNING: Experimental interface, subject to change
-  /// Overrides execution plan. This bounds checks indices sent in.
-  TfLiteStatus SetExecutionPlan(const std::vector<int>& new_plan);
-#endif  // DOXYGEN_SKIP
-
   /// Get a mutable tensor data structure.
   // TODO(aselle): Create a safe ArrayHandle interface to avoid exposing this
   // read/write access to structure
@@ -717,6 +706,10 @@ class Interpreter {
     return -1;
   }
 
+  // Overrides execution plan. This bounds checks indices sent in.
+  // Note: Only used during initialization.
+  TfLiteStatus SetExecutionPlan(const std::vector<int>& new_plan);
+
   // Sets the profiler to all subgraphs.
   void SetSubgraphProfiler();
 
",0,train
5c7047ad96f3dc9ff13c1feeb7ade5e1e831e645,tensorflow/tensorflow,"[tf.lite] Restrict Interpreter initialization methods

Begin making private Interpreter initialization methods currently marked
public, starting with:
  * Interpreter::SetExecutionPlan
  * Interpreter::ReserveNodes

Follow-up changes will introduce a test-only helper for accessing
such methods for test purposes, but otherwise such methods should only be
accessible to the InterpreterBuilder.

PiperOrigin-RevId: 375160379
Change-Id: Ia48289115e35eed2df26226fbedc4db02b210bed",interpreter_test.cc,"@@ -1182,7 +1182,7 @@ TEST_F(InterpreterTest, ExternalBackendContextClearsCachesOnDelete) {
 // node graph that can be executed in either [0,1] order or [1,0] order.
 // The CopyOp records when it is invoked in the class member run_order_
 // so we can test whether the execution plan was honored.
-class TestExecutionPlan : public ::testing::Test {
+class TestExecutionPlan : public InterpreterTest {
   // Encapsulates the node ids and provides them to a C primitive data type
   // Allocatable with placement new, but never destructed, so make sure this
   // doesn't own any heap allocated data. This is then is used as op local
@@ -1276,8 +1276,6 @@ class TestExecutionPlan : public ::testing::Test {
   }
 
  protected:
-  Interpreter interpreter_;
-
   // list of node_ids that were run
   std::vector<int> run_order_;
 };
@@ -1290,21 +1288,21 @@ TEST_F(TestExecutionPlan, DefaultExecutionPlan) {
 
 TEST_F(TestExecutionPlan, ReversedExecutionPlan) {
   // Check reversed order
-  interpreter_.SetExecutionPlan({1, 0});
+  SetExecutionPlan({1, 0});
   ASSERT_EQ(interpreter_.Invoke(), kTfLiteOk);
   ASSERT_EQ(run_order_, std::vector<int>({1, 0}));
 }
 
 TEST_F(TestExecutionPlan, SubsetExecutionPlan) {
   // Check running only node index 1
-  interpreter_.SetExecutionPlan({1});
+  SetExecutionPlan({1});
   ASSERT_EQ(interpreter_.Invoke(), kTfLiteOk);
   ASSERT_EQ(run_order_, std::vector<int>({1}));
 }
 
 TEST_F(TestExecutionPlan, NullExecutionPlan) {
   // Check nothing executed.
-  interpreter_.SetExecutionPlan({});
+  SetExecutionPlan({});
   ASSERT_EQ(interpreter_.Invoke(), kTfLiteOk);
   ASSERT_EQ(run_order_, std::vector<int>());
 }
",0,train
5c7047ad96f3dc9ff13c1feeb7ade5e1e831e645,tensorflow/tensorflow,"[tf.lite] Restrict Interpreter initialization methods

Begin making private Interpreter initialization methods currently marked
public, starting with:
  * Interpreter::SetExecutionPlan
  * Interpreter::ReserveNodes

Follow-up changes will introduce a test-only helper for accessing
such methods for test purposes, but otherwise such methods should only be
accessible to the InterpreterBuilder.

PiperOrigin-RevId: 375160379
Change-Id: Ia48289115e35eed2df26226fbedc4db02b210bed",interpreter_test_util.h,"@@ -60,6 +60,10 @@ class InterpreterTest : public ::testing::Test {
     interpreter_.SetSignatureDef({signature});
   }
 
+  TfLiteStatus SetExecutionPlan(const std::vector<int>& new_plan) {
+    return interpreter_.SetExecutionPlan(new_plan);
+  }
+
   Interpreter interpreter_;
 };
 
",0,train
0632e92abc4f08ffacf6802205f9880accf7ecd2,tensorflow/tensorflow,"DOCFIX:  hmc.sample_chain kwarg num_steps_between_results docstring seemed to
indicate a different type of thinning than what is actually going on.
PiperOrigin-RevId: 186349630",hmc_impl.py,"@@ -109,10 +109,13 @@ def sample_chain(
 
   Note: `target_log_prob_fn` is called exactly twice.
 
-  Only one out of every `num_steps_between_samples + 1` steps is included in the
-  returned results. This ""thinning"" comes at a cost of reduced statistical
-  power, while reducing memory requirements and autocorrelation. For more
-  discussion see [1].
+  Since HMC states are correlated, it is sometimes desirable to produce
+  additional intermediate states, and then discard them, ending up with a set of
+  states with decreased autocorrelation.  See [1].  Such ""thinning"" is made
+  possible by setting `num_steps_between_results > 0`.  The chain then takes
+  `num_steps_between_results` extra steps between the steps that make it into
+  the results.  The extra steps are never materialized (in calls to `sess.run`),
+  and thus do not increase memory requirements.
 
   [1]: ""Statistically efficient thinning of a Markov chain sampler.""
        Art B. Owen. April 2017.
@@ -225,10 +228,8 @@ def sample_chain(
       Default value: 0 (i.e., no burn-in).
     num_steps_between_results: Integer number of chain steps between collecting
       a result. Only one out of every `num_steps_between_samples + 1` steps is
-      included in the returned results. This ""thinning"" comes at a cost of
-      reduced statistical power, while reducing memory requirements and
-      autocorrelation. For more discussion see [1].
-      Default value: 0 (i.e., no subsampling).
+      included in the returned results.  The number of returned chain states is
+      still equal to `num_results`.  Default value: 0 (i.e., no thinning).
     seed: Python integer to seed the random number generator.
     current_target_log_prob: (Optional) `Tensor` representing the value of
       `target_log_prob_fn` at the `current_state`. The only reason to specify
",0,train
f6533187f2bba0e6717d5a3c7cd018b311392a56,tensorflow/tensorflow,"Improve error message in ColocateResourceAndRefEdges.

PiperOrigin-RevId: 237395227",colocation_graph.cc,"@@ -212,7 +212,9 @@ Status Member::EnsureCompatibilityAcrossResourceEdge(
         ""Cannot place the graph because a reference or resource edge ""
         ""connects colocation groups with incompatible assigned devices: "",
         DeviceNameUtils::ParsedNameToString(src_root.assigned_device_name_),
-        "" vs "", DeviceNameUtils::ParsedNameToString(assigned_device_name_));
+        "" vs "", DeviceNameUtils::ParsedNameToString(assigned_device_name_),
+        "". The edge src node is "", src.name(), "" , and the dst node is "",
+        dst.name());
   }
 
   if (DeviceNameUtils::AreCompatibleDevNames(src_root.requested_device_name_,
",0,train
f436e2a967a66b81239aec398a847dc266dbd08e,tensorflow/tensorflow,"Fix minor typos

exection -> execution
hitogram -> histogram
opertor  -> operator",c_api_experimental_test.cc,"@@ -267,10 +267,10 @@ TEST(CAPI, MonitoringMultipleSampler) {
   TFE_MonitoringSamplerCellAdd(cell1, 2.0);
   TF_Buffer* result1 = TF_NewBuffer();
   TFE_MonitoringSamplerCellValue(cell1, result1);
-  tensorflow::HistogramProto hitogram1;
-  EXPECT_TRUE(hitogram1.ParseFromString(
+  tensorflow::HistogramProto histogram1;
+  EXPECT_TRUE(histogram1.ParseFromString(
       {reinterpret_cast<const char*>(result1->data), result1->length}));
-  EXPECT_EQ(hitogram1.sum(), 3.0);
+  EXPECT_EQ(histogram1.sum(), 3.0);
   delete result1;
 
   auto* sampler2 = TFE_MonitoringNewSampler2(""test/sampler2"", buckets, status,
@@ -281,10 +281,10 @@ TEST(CAPI, MonitoringMultipleSampler) {
   TFE_MonitoringSamplerCellAdd(cell2, 3.0);
   TF_Buffer* result2 = TF_NewBuffer();
   TFE_MonitoringSamplerCellValue(cell2, result2);
-  tensorflow::HistogramProto hitogram2;
-  EXPECT_TRUE(hitogram2.ParseFromString(
+  tensorflow::HistogramProto histogram2;
+  EXPECT_TRUE(histogram2.ParseFromString(
       {reinterpret_cast<const char*>(result2->data), result2->length}));
-  EXPECT_EQ(hitogram2.sum(), 5.0);
+  EXPECT_EQ(histogram2.sum(), 5.0);
   delete result2;
 
   TFE_MonitoringDeleteBuckets(buckets);
",0,train
f436e2a967a66b81239aec398a847dc266dbd08e,tensorflow/tensorflow,"Fix minor typos

exection -> execution
hitogram -> histogram
opertor  -> operator",client.h,"@@ -42,7 +42,7 @@ class Client {
 
   // Compile the computation with the given argument shapes and returns the
   // handle to the compiled executable. The compiled executable is cached on the
-  // service, and the returned handle can be used for exection without
+  // service, and the returned handle can be used for execution without
   // re-compile.
   // * The shape and layout of the arguments being executed with will affect how
   //   the computation is compiled. If argument_shapes is empty, the parameters'
",0,train
f436e2a967a66b81239aec398a847dc266dbd08e,tensorflow/tensorflow,"Fix minor typos

exection -> execution
hitogram -> histogram
opertor  -> operator",runtime_matmul_mkl.cc,"@@ -110,7 +110,7 @@ __xla_cpu_runtime_MKLSingleThreadedMatMulF32(const void* run_options_ptr,
                                              int64 m, int64 n, int64 k,
                                              int32 transpose_lhs,
                                              int32 transpose_rhs) {
-  // Set the thread number to 1 for single threaded excution.
+  // Set the thread number to 1 for single threaded execution.
   int prev_num_threads = mkl_set_num_threads_local(1);
   MatMulF32(nullptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs);
   // Set thread number back to the previous number.
@@ -123,7 +123,7 @@ __xla_cpu_runtime_MKLSingleThreadedMatMulF64(const void* run_options_ptr,
                                              double* rhs, int64 m, int64 n,
                                              int64 k, int32 transpose_lhs,
                                              int32 transpose_rhs) {
-  // Set the thread number to 1 for single threaded excution.
+  // Set the thread number to 1 for single threaded execution.
   int prev_num_threads = mkl_set_num_threads_local(1);
   MatMulF64(nullptr, out, lhs, rhs, m, n, k, transpose_lhs, transpose_rhs);
   // Set thread number back to the previous number.
",0,train
f436e2a967a66b81239aec398a847dc266dbd08e,tensorflow/tensorflow,"Fix minor typos

exection -> execution
hitogram -> histogram
opertor  -> operator",distributed_training_utils.py,"@@ -864,7 +864,7 @@ def _make_execution_function_with_cloning(model, mode):
     distributed_function = _make_graph_execution_function(model, mode)
 
   # We cache the distributed execution function on the model since creating
-  # distributed models and exection functions are expensive.
+  # distributed models and execution functions are expensive.
   distributed_model._distributed_function = distributed_function
   return distributed_function
 
",0,train
3583bf48e9032aa21861b6dbddfd06ea383f8f2a,tensorflow/tensorflow,Add definition of what PS stands for,test_util.py,"@@ -2708,6 +2708,19 @@ def create_local_cluster(num_workers,
                          ps_config=None):
   """"""Create and start local servers and return the associated `Server` objects.
 
+  ""PS"" stands for ""parameter server"": a task responsible for storing and
+  updating the model's parameters. Other tasks send updates to these parameters
+  as they work on optimizing the parameters. This particular division of labor
+  between tasks is not required, but is common for distributed training.
+
+  Read more at https://www.tensorflow.org/guide/extend/architecture
+
+  TODO: image from https://www.tensorflow.org/images/diag1.svg
+
+  Figure 2 illustrates the interaction of these components.
+  ""/job:worker/task:0"" and ""/job:ps/task:0"" are both tasks with worker services.
+
+
   Example:
   ```python
   workers, _ = tf.test.create_local_cluster(num_workers=2, num_ps=2)
",0,test
bbed106374e5db95057f19fd17113811bd85c4b3,tensorflow/tensorflow,Fix a typo,map_dataset_op_test.cc,"@@ -550,7 +550,7 @@ TEST_P(ParameterizedMapDatasetOpTest, Roundtrip) {
     TF_EXPECT_OK(iterator->Save(serialization_ctx.get(), &writer));
     TF_EXPECT_OK(writer.Flush());
     VariantTensorDataReader reader(&data);
-    TF_EXPECT_OK(RestoreIterator(iterator_ctx.get(), &reader, ""Iterator"",
+    TF_EXPECT_OK(RestoreIterator(iterator_context.get(), &reader, ""Iterator"",
                                  *map_dataset, &iterator));
 
     while (cur_iteration <= breakpoint) {
",0,train
39357a6f3d6cc75bb8eee002bd7c05b117a05968,tensorflow/tensorflow,"Allow grayscale images to be 2D, either [H, W] or [H, 1]

PiperOrigin-RevId: 248224634",image_ops_impl.py,"@@ -157,21 +157,21 @@ def _Check3DImage(image, require_static=True):
 def _Assert3DImage(image):
   """"""Assert that we are working with a properly shaped image.
 
-    Performs the check statically if possible (i.e. if the shape
-    is statically known). Otherwise adds a control dependency
-    to an assert op that checks the dynamic shape.
+  Performs the check statically if possible (i.e. if the shape
+  is statically known). Otherwise adds a control dependency
+  to an assert op that checks the dynamic shape.
 
-    Args:
-      image: 3-D Tensor of shape [height, width, channels]
+  Args:
+    image: 3-D Tensor of shape [height, width, channels]
 
-    Raises:
-      ValueError: if `image.shape` is not a 3-vector.
+  Raises:
+    ValueError: if `image.shape` is not a 3-vector.
 
-    Returns:
-      If the shape of `image` could be verified statically, `image` is
-      returned unchanged, otherwise there will be a control dependency
-      added that asserts the correct dynamic shape.
-    """"""
+  Returns:
+    If the shape of `image` could be verified statically, `image` is
+    returned unchanged, otherwise there will be a control dependency
+    added that asserts the correct dynamic shape.
+  """"""
   return control_flow_ops.with_dependencies(
       _Check3DImage(image, require_static=False), image)
 
@@ -179,20 +179,20 @@ def _Assert3DImage(image):
 def _AssertAtLeast3DImage(image):
   """"""Assert that we are working with a properly shaped image.
 
-    Performs the check statically if possible (i.e. if the shape
-    is statically known). Otherwise adds a control dependency
-    to an assert op that checks the dynamic shape.
+  Performs the check statically if possible (i.e. if the shape
+  is statically known). Otherwise adds a control dependency
+  to an assert op that checks the dynamic shape.
 
-    Args:
-      image: >= 3-D Tensor of size [*, height, width, depth]
+  Args:
+    image: >= 3-D Tensor of size [*, height, width, depth]
 
-    Raises:
-      ValueError: if image.shape is not a [>= 3] vector.
+  Raises:
+    ValueError: if image.shape is not a [>= 3] vector.
 
-    Returns:
-      If the shape of `image` could be verified statically, `image` is
-      returned unchanged, otherwise there will be a control dependency
-      added that asserts the correct dynamic shape.
+  Returns:
+    If the shape of `image` could be verified statically, `image` is
+    returned unchanged, otherwise there will be a control dependency
+    added that asserts the correct dynamic shape.
   """"""
   return control_flow_ops.with_dependencies(
       _CheckAtLeast3DImage(image, require_static=False), image)
@@ -241,40 +241,37 @@ def _CheckAtLeast3DImage(image, require_static=True):
 
 
 def _AssertGrayscaleImage(image):
-  """"""Assert that we are working with a properly shaped
-
-     grayscale image.
+  """"""Assert that we are working with a properly shaped grayscale image.
 
-    Performs the check statically if possible (i.e. if the shape
-    is statically known). Otherwise adds a control dependency
-    to an assert op that checks the dynamic shape.
+  Performs the check statically if possible (i.e. if the shape
+  is statically known). Otherwise adds a control dependency
+  to an assert op that checks the dynamic shape.
 
-    Args:
-      image: >= 3-D Tensor of size [*, height, width, depth]
+  Args:
+    image: >= 2-D Tensor of size [*, 1]
 
-    Raises:
-      ValueError: if image.shape is not a [>= 3] vector or if
-                last dimension is not size 1.
+  Raises:
+    ValueError: if image.shape is not a [>= 2] vector or if
+              last dimension is not size 1.
 
-    Returns:
-      If the shape of `image` could be verified statically, `image` is
-      returned unchanged, otherwise there will be a control dependency
-      added that asserts the correct dynamic shape.
+  Returns:
+    If the shape of `image` could be verified statically, `image` is
+    returned unchanged, otherwise there will be a control dependency
+    added that asserts the correct dynamic shape.
   """"""
   return control_flow_ops.with_dependencies(
       _CheckGrayscaleImage(image, require_static=False), image)
 
 
 def _CheckGrayscaleImage(image, require_static=True):
-  """"""Assert that we are working with properly shaped
-
-  grayscale image.
+  """"""Assert that we are working with properly shaped grayscale image.
 
   Args:
-    image: >= 3-D Tensor of size [*, height, width, depth]
+    image: >= 2-D Tensor of size [*, 1]
+    require_static: Boolean, whether static shape is required.
 
   Raises:
-    ValueError: if image.shape is not a [>= 3] vector or if
+    ValueError: if image.shape is not a [>= 2] vector or if
               last dimension is not size 1.
 
   Returns:
@@ -283,11 +280,11 @@ def _CheckGrayscaleImage(image, require_static=True):
   """"""
   try:
     if image.get_shape().ndims is None:
-      image_shape = image.get_shape().with_rank(3)
+      image_shape = image.get_shape().with_rank(2)
     else:
-      image_shape = image.get_shape().with_rank_at_least(3)
+      image_shape = image.get_shape().with_rank_at_least(2)
   except ValueError:
-    raise ValueError('A grayscale image must be at least three-dimensional.')
+    raise ValueError('A grayscale image must be at least two-dimensional.')
   if require_static and not image_shape.is_fully_defined():
     raise ValueError('\'image\' must be fully defined.')
   if image_shape.is_fully_defined():
@@ -302,7 +299,7 @@ def _CheckGrayscaleImage(image, require_static=True):
         check_ops.assert_greater_equal(
             array_ops.rank(image),
             3,
-            message='A grayscale image must be at least three-dimensional.')
+            message='A grayscale image must be at least two-dimensional.')
     ]
   else:
     return []
@@ -468,6 +465,7 @@ def _flip(image, flip_index, scope_name):
     image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
       of shape `[height, width, channels]`.
     flip_index: 0 For vertical, 1 for horizontal.
+    scope_name: string, scope name.
 
   Returns:
     A tensor of the same type and shape as `image`.
@@ -2168,7 +2166,7 @@ def decode_image(contents,
     expand_animations: Controls the shape of the returned op's output.
       If `True`, the returned op will produce a 3-D tensor for PNG, JPEG, and
       BMP files; and a 4-D tensor for all GIFs, whether animated or not.
-      If, `False`, the returned op will produce a 3-D tensor for all file 
+      If, `False`, the returned op will produce a 3-D tensor for all file
       types and will truncate animated GIFs to the first frame.
 
   Returns:
",0,train
39357a6f3d6cc75bb8eee002bd7c05b117a05968,tensorflow/tensorflow,"Allow grayscale images to be 2D, either [H, W] or [H, 1]

PiperOrigin-RevId: 248224634",image_ops_test.py,"@@ -234,13 +234,13 @@ class GrayscaleToRGBTest(test_util.TensorFlowTestCase):
     # tests if an exception is raised if a two dimensional
     # input is used, i.e. the images have shape [height, width]
     with self.cached_session(use_gpu=True):
-      # 2-D input without batch dimension.
-      x_np = np.array([[1, 2]], dtype=np.uint8).reshape([1, 2])
+      # 1-D input without batch dimension.
+      x_np = np.array([[1, 2]], dtype=np.uint8).reshape([2])
 
       x_tf = constant_op.constant(x_np, shape=x_np.shape)
 
       # this is the error message we expect the function to raise
-      err_msg = ""A grayscale image must be at least three-dimensional""
+      err_msg = ""A grayscale image must be at least two-dimensional""
       with self.assertRaisesRegexp(ValueError, err_msg):
         image_ops.grayscale_to_rgb(x_tf)
 
@@ -4553,11 +4553,11 @@ class NonMaxSuppressionWithOverlapsTest(test_util.TensorFlowTestCase):
         [0.2, 0.0, 1.0],
     ]
     scores_np = [0.7, 0.9, 0.1]
-    max_ouput_size_np = 3
+    max_output_size_np = 3
 
     overlaps = constant_op.constant(overlaps_np)
     scores = constant_op.constant(scores_np)
-    max_output_size = constant_op.constant(max_ouput_size_np)
+    max_output_size = constant_op.constant(max_output_size_np)
     overlap_threshold = 0.6
     score_threshold = 0.4
 
",0,train
42b5d6692bf87c79efbd3fa688ed6c49bcdc6254,tensorflow/tensorflow,"Fix Keras API functional API op cloning inside TPUEstimator.

The ops that are being cloned have the TPU attributes added. When
the NodeDef is cloned, they continue to have the attribute, so the
TPUReplicateContext complains about nesting. Add a private _cloned attribute
to indicate that it was cloned by TensorFlowOpLayer.

PiperOrigin-RevId: 276175985
Change-Id: I99cd09e74d2c573bc625fa284713858d82529a37",base_layer.py,"@@ -2500,6 +2500,9 @@ class TensorFlowOpLayer(Layer):
   def _make_node_def(self, graph):
     node_def = node_def_pb2.NodeDef()
     node_def.CopyFrom(self.node_def)
+    # Used in TPUReplicateContext to indicate whether this node has been cloned
+    # and to not add TPU attributes.
+    node_def.attr['_cloned'].b = True
     node_def.name = graph.unique_name(node_def.name)
     return node_def
 
",0,train
42b5d6692bf87c79efbd3fa688ed6c49bcdc6254,tensorflow/tensorflow,"Fix Keras API functional API op cloning inside TPUEstimator.

The ops that are being cloned have the TPU attributes added. When
the NodeDef is cloned, they continue to have the attribute, so the
TPUReplicateContext complains about nesting. Add a private _cloned attribute
to indicate that it was cloned by TensorFlowOpLayer.

PiperOrigin-RevId: 276175985
Change-Id: I99cd09e74d2c573bc625fa284713858d82529a37",base_layer_utils.py,"@@ -28,6 +28,7 @@ from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras import backend
 from tensorflow.python.ops import array_ops
+from tensorflow.python.ops import control_flow_util
 from tensorflow.python.ops import control_flow_v2_func_graphs
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import init_ops_v2
@@ -230,10 +231,15 @@ def _create_keras_history_helper(tensors, processed_ops, created_layers):
         else:
           # Treat any value not originating from a `keras.Input` as
           # a constant. Variables cannot be supported.
-          if (distribution_strategy_context.in_cross_replica_context() and
-              not ops.executing_eagerly_outside_functions()):
+          ds_with_session = (
+              distribution_strategy_context.in_cross_replica_context() and
+              not ops.executing_eagerly_outside_functions())
+          using_xla = control_flow_util.GraphOrParentsInXlaContext(
+              ops.get_default_graph())
+          if ds_with_session or using_xla:
             # In Legacy Graph mode, evaluating here makes Session be
-            # configured improperly.
+            # configured improperly. The downside of this is that saving
+            # via `get_config` breaks, but SavedModel still works.
             constants[i] = op_input
           else:
             with ops.init_scope():
",0,train
42b5d6692bf87c79efbd3fa688ed6c49bcdc6254,tensorflow/tensorflow,"Fix Keras API functional API op cloning inside TPUEstimator.

The ops that are being cloned have the TPU attributes added. When
the NodeDef is cloned, they continue to have the attribute, so the
TPUReplicateContext complains about nesting. Add a private _cloned attribute
to indicate that it was cloned by TensorFlowOpLayer.

PiperOrigin-RevId: 276175985
Change-Id: I99cd09e74d2c573bc625fa284713858d82529a37",tpu.py,"@@ -486,8 +486,13 @@ class TPUReplicateContext(control_flow_ops.XLAControlFlowContext):
       raise NotImplementedError(
           ""Non-resource Variables are not supported inside TPU computations ""
           ""(operator name: %s)"" % op.name)
-    if _TPU_REPLICATE_ATTR in op.node_def.attr:
-      raise ValueError(""TPU computations cannot be nested"")
+
+    # TensorFlowOpLayer may clone nodes that are in tpu.rewrite()s. It'll add
+    # the ""_cloned"" attribute and we should continue in that case.
+    if (_TPU_REPLICATE_ATTR in op.node_def.attr and
+        ""_cloned"" not in op.node_def.attr):
+      raise ValueError(""TPU computations cannot be nested on op (%s)"" %
+                       op)
     op._set_attr_with_buf(
         _TPU_REPLICATE_ATTR, self._tpu_relicate_attr_buf._buffer)
     if self._outside_compilation_cluster:
",0,train
9e78991b5c380b7fba0444685e5c6ef40e3c5b26,tensorflow/tensorflow,"Fix typo in Tensorflow control_flow_ops_py_test.

The test would fall back to GPU:0 when unable to find a GPU. This should be CPU.

PiperOrigin-RevId: 212649435",control_flow_ops_py_test.py,"@@ -1753,7 +1753,7 @@ class ControlFlowTest(test.TestCase):
 
   def _testWhileGrad_ColocateGradients(self, colocate):
     gpu_dev_name = test.gpu_device_name() if test.is_gpu_available(
-    ) else ""/device:GPU:0""
+    ) else ""/device:CPU:0""
 
     graph = ops.Graph()
     with graph.as_default():
",0,train
0f5d0cfc2a738c5605cb23a1975ac4a1ceb11e24,tensorflow/tensorflow,"[tfdbg] Fix gRPC message length limit issue in source remote

Fixes https://github.com/tensorflow/tensorboard/issues/1103

PiperOrigin-RevId: 257419107",grpc_debug_server.py,"@@ -346,7 +346,10 @@ class EventListenerBaseServicer(debug_service_pb2_grpc.EventListenerServicer):
       if self._server_started:
         raise ValueError(""Server has already started running"")
 
-      self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
+      no_max_message_sizes = [(""grpc.max_receive_message_length"", -1),
+                              (""grpc.max_send_message_length"", -1)]
+      self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10),
+                                options=no_max_message_sizes)
       debug_service_pb2_grpc.add_EventListenerServicer_to_server(self,
                                                                  self.server)
       self.server.add_insecure_port(""[::]:%d"" % self._server_port)
",0,test
0f5d0cfc2a738c5605cb23a1975ac4a1ceb11e24,tensorflow/tensorflow,"[tfdbg] Fix gRPC message length limit issue in source remote

Fixes https://github.com/tensorflow/tensorboard/issues/1103

PiperOrigin-RevId: 257419107",source_remote.py,"@@ -28,7 +28,6 @@ from tensorflow.python.debug.lib import common
 from tensorflow.python.debug.lib import debug_service_pb2_grpc
 from tensorflow.python.debug.lib import source_utils
 from tensorflow.python.platform import gfile
-from tensorflow.python.platform import tf_logging
 from tensorflow.python.profiler import tfprof_logger
 
 
@@ -96,11 +95,6 @@ def _source_file_paths_outside_tensorflow_py_library(code_defs, id_to_string):
   return non_tf_files
 
 
-def grpc_message_length_bytes():
-  """"""Maximum gRPC message length in bytes.""""""
-  return 4 * 1024 * 1024
-
-
 def _send_call_tracebacks(destinations,
                           origin_stack,
                           is_eager_execution=False,
@@ -169,20 +163,14 @@ def _send_call_tracebacks(destinations,
       debugged_source_files.append(source_files)
 
   for destination in destinations:
-    channel = grpc.insecure_channel(destination)
+    no_max_message_sizes = [(""grpc.max_receive_message_length"", -1),
+                            (""grpc.max_send_message_length"", -1)]
+    channel = grpc.insecure_channel(destination, options=no_max_message_sizes)
     stub = debug_service_pb2_grpc.EventListenerStub(channel)
     stub.SendTracebacks(call_traceback)
     if send_source:
-      for path, source_files in zip(
-          source_file_paths, debugged_source_files):
-        if source_files.ByteSize() < grpc_message_length_bytes():
-          stub.SendSourceFiles(source_files)
-        else:
-          tf_logging.warn(
-              ""The content of the source file at %s is not sent to ""
-              ""gRPC debug server %s, because the message size exceeds ""
-              ""gRPC message length limit (%d bytes)."" % (
-                  path, destination, grpc_message_length_bytes()))
+      for source_files in debugged_source_files:
+        stub.SendSourceFiles(source_files)
 
 
 def send_graph_tracebacks(destinations,
",0,test
0f5d0cfc2a738c5605cb23a1975ac4a1ceb11e24,tensorflow/tensorflow,"[tfdbg] Fix gRPC message length limit issue in source remote

Fixes https://github.com/tensorflow/tensorboard/issues/1103

PiperOrigin-RevId: 257419107",source_remote_test.py,"@@ -21,6 +21,8 @@ from __future__ import print_function
 import os
 import traceback
 
+import grpc
+
 from tensorflow.core.debug import debug_service_pb2
 from tensorflow.python.client import session
 from tensorflow.python.debug.lib import grpc_debug_test_server
@@ -129,9 +131,17 @@ class SendTracebacksTest(test_util.TensorFlowTestCase):
 
       send_traceback = traceback.extract_stack()
       send_lineno = line_number_above()
-      source_remote.send_graph_tracebacks(
-          [self._server_address, self._server_address_2],
-          ""dummy_run_key"", send_traceback, sess.graph)
+
+      with test.mock.patch.object(
+          grpc, ""insecure_channel"",
+          wraps=grpc.insecure_channel) as mock_grpc_channel:
+        source_remote.send_graph_tracebacks(
+            [self._server_address, self._server_address_2],
+            ""dummy_run_key"", send_traceback, sess.graph)
+        mock_grpc_channel.assert_called_with(
+            test.mock.ANY,
+            options=[(""grpc.max_receive_message_length"", -1),
+                     (""grpc.max_send_message_length"", -1)])
 
       servers = [self._server, self._server_2]
       for server in servers:
@@ -157,51 +167,6 @@ class SendTracebacksTest(test_util.TensorFlowTestCase):
         self.assertEqual([""dummy_run_key""], server.query_call_keys())
         self.assertEqual([sess.graph.version], server.query_graph_versions())
 
-  def testSourceFileSizeExceedsGrpcMessageLengthLimit(self):
-    """"""In case source file size exceeds the grpc message length limit.
-
-    it ought not to have been sent to the server.
-    """"""
-    this_func_name = ""testSourceFileSizeExceedsGrpcMessageLengthLimit""
-
-    # Patch the method to simulate a very small message length limit.
-    with test.mock.patch.object(
-        source_remote, ""grpc_message_length_bytes"", return_value=2):
-      with session.Session() as sess:
-        a = variables.Variable(21.0, name=""two/a"")
-        a_lineno = line_number_above()
-        b = variables.Variable(2.0, name=""two/b"")
-        b_lineno = line_number_above()
-        x = math_ops.add(a, b, name=""two/x"")
-        x_lineno = line_number_above()
-
-        send_traceback = traceback.extract_stack()
-        send_lineno = line_number_above()
-        source_remote.send_graph_tracebacks(
-            [self._server_address, self._server_address_2],
-            ""dummy_run_key"", send_traceback, sess.graph)
-
-        servers = [self._server, self._server_2]
-        for server in servers:
-          # Even though the source file content is not sent, the traceback
-          # should have been sent.
-          tb = server.query_op_traceback(""two/a"")
-          self.assertIn((self._curr_file_path, a_lineno, this_func_name), tb)
-          tb = server.query_op_traceback(""two/b"")
-          self.assertIn((self._curr_file_path, b_lineno, this_func_name), tb)
-          tb = server.query_op_traceback(""two/x"")
-          self.assertIn((self._curr_file_path, x_lineno, this_func_name), tb)
-
-          self.assertIn(
-              (self._curr_file_path, send_lineno, this_func_name),
-              server.query_origin_stack()[-1])
-
-          tf_trace_file_path = (
-              self._findFirstTraceInsideTensorFlowPyLibrary(x.op))
-          # Verify that the source content is not sent to the server.
-          with self.assertRaises(ValueError):
-            self._server.query_source_file_line(tf_trace_file_path, 0)
-
   def testSendEagerTracebacksToSingleDebugServer(self):
     this_func_name = ""testSendEagerTracebacksToSingleDebugServer""
     send_traceback = traceback.extract_stack()
@@ -213,6 +178,20 @@ class SendTracebacksTest(test_util.TensorFlowTestCase):
     self.assertIn((self._curr_file_path, send_lineno, this_func_name),
                   self._server.query_origin_stack()[-1])
 
+  def testGRPCServerMessageSizeLimit(self):
+    """"""Assert gRPC debug server is started with unlimited message size.""""""
+    with test.mock.patch.object(
+        grpc, ""server"", wraps=grpc.server) as mock_grpc_server:
+      (_, _, _, server_thread,
+       server) = grpc_debug_test_server.start_server_on_separate_thread(
+           poll_server=True)
+      mock_grpc_server.assert_called_with(
+          test.mock.ANY,
+          options=[(""grpc.max_receive_message_length"", -1),
+                   (""grpc.max_send_message_length"", -1)])
+    server.stop_server().wait()
+    server_thread.join()
+
 
 if __name__ == ""__main__"":
   googletest.main()
",0,test
47ea7eeb96bd9f46ab1f6a7bfaf0ab8f98ad2e42,tensorflow/tensorflow,"Minor updates to flatbuffer utilities

PiperOrigin-RevId: 307732210
Change-Id: I6b97ccdff0323dbf0fd20fc20d6bc7e49d5e08ad",flatbuffer_utils_test.py,"@@ -31,7 +31,7 @@ class WriteReadModelTest(test_util.TensorFlowTestCase):
   def testWriteReadModel(self):
     # 1. SETUP
     # Define the initial model
-    initial_model = test_utils.build_mock_model_python_object()
+    initial_model = test_utils.build_mock_model()
     # Define temporary files
     tmp_dir = self.get_temp_dir()
     model_filename = os.path.join(tmp_dir, 'model.tflite')
@@ -76,7 +76,7 @@ class StripStringsTest(test_util.TensorFlowTestCase):
   def testStripStrings(self):
     # 1. SETUP
     # Define the initial model
-    initial_model = test_utils.build_mock_model_python_object()
+    initial_model = test_utils.build_mock_model()
     final_model = copy.deepcopy(initial_model)
 
     # 2. INVOKE
@@ -121,7 +121,7 @@ class RandomizeWeightsTest(test_util.TensorFlowTestCase):
   def testRandomizeWeights(self):
     # 1. SETUP
     # Define the initial model
-    initial_model = test_utils.build_mock_model_python_object()
+    initial_model = test_utils.build_mock_model()
     final_model = copy.deepcopy(initial_model)
 
     # 2. INVOKE
",0,train
47ea7eeb96bd9f46ab1f6a7bfaf0ab8f98ad2e42,tensorflow/tensorflow,"Minor updates to flatbuffer utilities

PiperOrigin-RevId: 307732210
Change-Id: I6b97ccdff0323dbf0fd20fc20d6bc7e49d5e08ad",test_utils.py,"@@ -14,7 +14,7 @@
 # ==============================================================================
 """"""Utility functions that support testing.
 
-All functions that can be commonly used by various tests are in this file.
+All functions that can be commonly used by various tests.
 """"""
 
 from __future__ import absolute_import
@@ -25,7 +25,7 @@ from flatbuffers.python import flatbuffers
 from tensorflow.lite.python import schema_py_generated as schema_fb
 
 
-def build_mock_model():
+def build_mock_flatbuffer_model():
   """"""Creates a flatbuffer containing an example model.""""""
   builder = flatbuffers.Builder(1024)
 
@@ -205,10 +205,14 @@ def build_mock_model():
   return model
 
 
-def build_mock_model_python_object():
-  """"""Creates a python flatbuffer object containing an example model.""""""
-  model_mock = build_mock_model()
-  model_obj = schema_fb.Model.GetRootAsModel(model_mock, 0)
-  model = schema_fb.ModelT.InitFromObj(model_obj)
-
+def load_model_from_flatbuffer(flatbuffer_model):
+  """"""Loads a model as a python object from a flatbuffer model.""""""
+  model = schema_fb.Model.GetRootAsModel(flatbuffer_model, 0)
+  model = schema_fb.ModelT.InitFromObj(model)
   return model
+
+
+def build_mock_model():
+  """"""Creates an object containing an example model.""""""
+  model = build_mock_flatbuffer_model()
+  return load_model_from_flatbuffer(model)
",0,train
47ea7eeb96bd9f46ab1f6a7bfaf0ab8f98ad2e42,tensorflow/tensorflow,"Minor updates to flatbuffer utilities

PiperOrigin-RevId: 307732210
Change-Id: I6b97ccdff0323dbf0fd20fc20d6bc7e49d5e08ad",visualize_test.py,"@@ -35,8 +35,8 @@ class VisualizeTest(test_util.TensorFlowTestCase):
     self.assertEqual('HASHTABLE_LOOKUP', visualize.BuiltinCodeToName(10))
 
   def testFlatbufferToDict(self):
-    model_data = test_utils.build_mock_model()
-    model_dict = visualize.CreateDictFromFlatbuffer(model_data)
+    model = test_utils.build_mock_flatbuffer_model()
+    model_dict = visualize.CreateDictFromFlatbuffer(model)
     self.assertEqual(0, model_dict['version'])
     self.assertEqual(1, len(model_dict['subgraphs']))
     self.assertEqual(1, len(model_dict['operator_codes']))
@@ -45,12 +45,11 @@ class VisualizeTest(test_util.TensorFlowTestCase):
     self.assertEqual(0, model_dict['subgraphs'][0]['tensors'][0]['buffer'])
 
   def testVisualize(self):
-    model_data = test_utils.build_mock_model()
-
+    model = test_utils.build_mock_flatbuffer_model()
     tmp_dir = self.get_temp_dir()
     model_filename = os.path.join(tmp_dir, 'model.tflite')
     with open(model_filename, 'wb') as model_file:
-      model_file.write(model_data)
+      model_file.write(model)
     html_filename = os.path.join(tmp_dir, 'visualization.html')
 
     visualize.CreateHtmlFile(model_filename, html_filename)
",0,train
b08e6cd85aba13749accab67f9f94f00621ecb9c,tensorflow/tensorflow,"Remove use of gtl/cleanup.h and hash.h in cupti_tracer

PiperOrigin-RevId: 310582340
Change-Id: I1c087a126947c1eced849deb3f8a5689ebe08d08",cupti_tracer.cc,"@@ -18,8 +18,6 @@ limitations under the License.
 #include ""absl/container/flat_hash_map.h""
 #include ""absl/container/node_hash_map.h""
 #include ""tensorflow/core/lib/core/errors.h""
-#include ""tensorflow/core/lib/gtl/cleanup.h""
-#include ""tensorflow/core/lib/hash/hash.h""
 #include ""tensorflow/core/platform/env.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/macros.h""
@@ -286,19 +284,14 @@ void CUPTIAPI FreeCuptiActivityBuffer(CUcontext context, uint32_t stream_id,
           << reinterpret_cast<uintptr_t>(buffer) << std::dec
           << "" size: "" << size << "" valid_size: "" << valid_size;
 
-  // Ensure buffer is free when this function returns.
-  auto buffer_cleanup =
-      gtl::MakeCleanup([buffer] { port::AlignedFree(buffer); });
+  if (valid_size > 0) {
+    VLOG(3) << ""Activity profile for stream "" << stream_id;
 
-  if (valid_size <= 0) {
-    return;
+    CuptiTracer *cupti_tracer = CuptiTracer::GetCuptiTracerSingleton();
+    cupti_tracer->ProcessActivityBuffer(context, stream_id, buffer, valid_size)
+        .IgnoreError();
   }
-
-  VLOG(3) << ""Activity profile for stream "" << stream_id;
-
-  CuptiTracer *cupti_tracer = CuptiTracer::GetCuptiTracerSingleton();
-  cupti_tracer->ProcessActivityBuffer(context, stream_id, buffer, valid_size)
-      .IgnoreError();
+  port::AlignedFree(buffer);
 }
 
 void AddKernelEventUponApiExit(CuptiTraceCollector *collector, uint32 device_id,
@@ -984,7 +977,7 @@ class CudaEventRecorder {
   using StreamKey = std::pair<CUcontext, CUstream>;
 
   absl::node_hash_map<CUcontext, ContextInfo> context_infos_;
-  absl::flat_hash_map<StreamKey, StreamInfo, hash<StreamKey>> stream_infos_;
+  absl::flat_hash_map<StreamKey, StreamInfo> stream_infos_;
 };
 
 // This hook uses cuda events to measure device side activities.
",0,train
3492b4307ba62b54eca66a64edbcfdbade005d8d,tensorflow/tensorflow,"allow (different) sparse types in unary eltwise ops

PiperOrigin-RevId: 436568879",hlo_ops.h,"@@ -84,6 +84,10 @@ class TokenType : public Type::TypeBase<TokenType, Type, TypeStorage> {
   using Base::Base;
 };
 
+// Returns the type, but without any sparsity encoding. Used to
+// strip sparsity properties of tensor types before is-same tests.
+Type getTypeWithoutSparseEncoding(Type tp);
+
 // Shape derivation function that computes the shape of the result based on an
 // operand. For a 2-dimensional input tensor, this produces IR of the form
 //
",0,train
3492b4307ba62b54eca66a64edbcfdbade005d8d,tensorflow/tensorflow,"allow (different) sparse types in unary eltwise ops

PiperOrigin-RevId: 436568879",hlo_ops.cc,"@@ -50,6 +50,7 @@ limitations under the License.
 #include ""mlir-hlo/utils/hlo_utils.h""
 #include ""mlir/Dialect/Arithmetic/IR/Arithmetic.h""
 #include ""mlir/Dialect/Shape/IR/Shape.h""
+#include ""mlir/Dialect/SparseTensor/IR/SparseTensor.h""
 #include ""mlir/Dialect/Tensor/IR/Tensor.h""
 #include ""mlir/IR/Attributes.h""
 #include ""mlir/IR/Builders.h""
@@ -7501,6 +7502,18 @@ static LogicalResult VerifyArgResultAliasAttr(StringAttr attr_name,
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// Type utilities for ignoring sparsity encoding
+//===----------------------------------------------------------------------===//
+
+Type getTypeWithoutSparseEncoding(Type tp) {
+  if (sparse_tensor::getSparseTensorEncoding(tp)) {
+    auto rtp = tp.dyn_cast<RankedTensorType>();
+    tp = RankedTensorType::get(rtp.getShape(), rtp.getElementType());
+  }
+  return tp;
+}
+
 //===----------------------------------------------------------------------===//
 // Shape inference
 //===----------------------------------------------------------------------===//
",0,train
924a8f24a9b8b8a3b1a561123f3e4cf9ebe91708,tensorflow/tensorflow,"Fix the version string in setup.py. The PR seemed to miss it.

PiperOrigin-RevId: 205443195",setup.py,"@@ -45,7 +45,7 @@ DOCLINES = __doc__.split('\n')
 # This version string is semver compatible, but incompatible with pip.
 # For pip, we will remove all '-' characters from this string, and use the
 # result for pip.
-_VERSION = '1.9.0-rc0'
+_VERSION = '1.9.0'
 
 REQUIRED_PACKAGES = [
     'absl-py >= 0.1.6',
",0,train
b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF.

inspect.getargspec raises errors if they are present but getfullargspec is
perfectly happy to let functions with type annotations pass.

PiperOrigin-RevId: 207127930",backprop.py,"@@ -276,7 +276,7 @@ def implicit_grad(f):
 def _get_arg_spec(f, params, param_args):
   """"""The positions of the parameters of f to be differentiated in param_args.""""""
   try:
-    args = tf_inspect.getargspec(f).args
+    args = tf_inspect.getfullargspec(f).args
   except TypeError as e:
     # TypeError can happen when f is a callable object.
     if params is None:
",0,train
b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF.

inspect.getargspec raises errors if they are present but getfullargspec is
perfectly happy to let functions with type annotations pass.

PiperOrigin-RevId: 207127930",graph_callable.py,"@@ -288,7 +288,7 @@ def _graph_callable_internal(func, shape_and_dtypes):
     with tmp_graph.as_default():
       # Placeholders for the non-variable inputs.
       func_inputs = _get_graph_callable_inputs(shape_and_dtypes)
-      func_num_args = len(tf_inspect.getargspec(func).args)
+      func_num_args = len(tf_inspect.getfullargspec(func).args)
       if len(func_inputs) != func_num_args:
         raise TypeError(""The number of arguments accepted by the decorated ""
                         ""function `%s` (%d) must match the number of ""
",0,train
b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF.

inspect.getargspec raises errors if they are present but getfullargspec is
perfectly happy to let functions with type annotations pass.

PiperOrigin-RevId: 207127930",backend.py,"@@ -2935,8 +2935,8 @@ def function(inputs, outputs, updates=None, **kwargs):
   """"""
   if kwargs:
     for key in kwargs:
-      if (key not in tf_inspect.getargspec(session_module.Session.run)[0] and
-          key not in tf_inspect.getargspec(Function.__init__)[0]):
+      if (key not in tf_inspect.getfullargspec(session_module.Session.run)[0]
+          and key not in tf_inspect.getfullargspec(Function.__init__)[0]):
         msg = ('Invalid argument ""%s"" passed to K.function with TensorFlow '
                'backend') % key
         raise ValueError(msg)
",0,train
b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF.

inspect.getargspec raises errors if they are present but getfullargspec is
perfectly happy to let functions with type annotations pass.

PiperOrigin-RevId: 207127930",base_layer.py,"@@ -175,7 +175,7 @@ class Layer(checkpointable.CheckpointableBase):
 
     self.supports_masking = False
 
-    call_argspec = tf_inspect.getargspec(self.call)
+    call_argspec = tf_inspect.getfullargspec(self.call)
     if 'training' in call_argspec.args:
       self._expects_training_arg = True
     else:
@@ -904,7 +904,7 @@ class Layer(checkpointable.CheckpointableBase):
       assert len(call_args) == 1  # TypeError raised earlier in __call__.
       return call_args[0], call_kwargs
     else:
-      call_arg_spec = tf_inspect.getargspec(self.call)
+      call_arg_spec = tf_inspect.getfullargspec(self.call)
       # There is no explicit ""inputs"" argument expected or provided to
       # call(). Arguments which have default values are considered non-inputs,
       # and arguments without are considered inputs.
@@ -924,8 +924,8 @@ class Layer(checkpointable.CheckpointableBase):
       _, unwrapped_call = tf_decorator.unwrap(self.call)
       bound_args = inspect.getcallargs(
           unwrapped_call, *call_args, **call_kwargs)
-      if call_arg_spec.keywords is not None:
-        var_kwargs = bound_args.pop(call_arg_spec.keywords)
+      if call_arg_spec.varkw is not None:
+        var_kwargs = bound_args.pop(call_arg_spec.varkw)
         bound_args.update(var_kwargs)
         keyword_arg_names = keyword_arg_names.union(var_kwargs.keys())
       all_args = call_arg_spec.args
",0,train
b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF.

inspect.getargspec raises errors if they are present but getfullargspec is
perfectly happy to let functions with type annotations pass.

PiperOrigin-RevId: 207127930",network.py,"@@ -215,7 +215,7 @@ class Network(base_layer.Layer):
 
     self._base_init(name=name)
     self._compute_previous_mask = (
-        'mask' in tf_inspect.getargspec(self.call).args or
+        'mask' in tf_inspect.getfullargspec(self.call).args or
         hasattr(self, 'compute_mask'))
     # A Network does not create weights of its own, thus it is already
     # built.
@@ -309,7 +309,7 @@ class Network(base_layer.Layer):
   def _init_subclassed_network(self, name=None):
     self._base_init(name=name)
     self._is_graph_network = False
-    call_argspec = tf_inspect.getargspec(self.call)
+    call_argspec = tf_inspect.getfullargspec(self.call)
     if 'training' in call_argspec.args:
       self._expects_training_arg = True
     else:
@@ -788,7 +788,7 @@ class Network(base_layer.Layer):
             x = base_layer.generate_placeholders_from_shape(input_shape)
 
           kwargs = {}
-          num_call_args = len(tf_inspect.getargspec(self.call).args)
+          num_call_args = len(tf_inspect.getfullargspec(self.call).args)
           if self._expects_training_arg and num_call_args == 3:
             # Has call signature of call(self, input, training)
             kwargs['training'] = False
@@ -1035,9 +1035,9 @@ class Network(base_layer.Layer):
             if len(computed_data) == 1:
               computed_tensor, computed_mask = computed_data[0]
               # Ensure mask propagation if applicable.
-              if 'mask' in tf_inspect.getargspec(layer.call).args:
+              if 'mask' in tf_inspect.getfullargspec(layer.call).args:
                 kwargs.setdefault('mask', computed_mask)
-              if 'training' in tf_inspect.getargspec(layer.call).args:
+              if 'training' in tf_inspect.getfullargspec(layer.call).args:
                 kwargs.setdefault('training', training)
 
               output_tensors = nest.flatten(
@@ -1055,9 +1055,9 @@ class Network(base_layer.Layer):
             else:
               computed_tensors = [x[0] for x in computed_data]
               computed_masks = [x[1] for x in computed_data]
-              if 'mask' in tf_inspect.getargspec(layer.call).args:
+              if 'mask' in tf_inspect.getfullargspec(layer.call).args:
                 kwargs.setdefault('mask', computed_masks)
-              if 'training' in tf_inspect.getargspec(layer.call).args:
+              if 'training' in tf_inspect.getfullargspec(layer.call).args:
                 kwargs.setdefault('training', training)
 
               output_tensors = nest.flatten(
",0,train
b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF.

inspect.getargspec raises errors if they are present but getfullargspec is
perfectly happy to let functions with type annotations pass.

PiperOrigin-RevId: 207127930",generic_utils.py,"@@ -162,7 +162,7 @@ def deserialize_keras_object(identifier,
       if cls is None:
         raise ValueError('Unknown ' + printable_module_name + ': ' + class_name)
     if hasattr(cls, 'from_config'):
-      arg_spec = tf_inspect.getargspec(cls.from_config)
+      arg_spec = tf_inspect.getfullargspec(cls.from_config)
       custom_objects = custom_objects or {}
 
       if 'custom_objects' in arg_spec.args:
@@ -281,8 +281,8 @@ def has_arg(fn, name, accept_all=False):
   Returns:
       bool, whether `fn` accepts a `name` keyword argument.
   """"""
-  arg_spec = tf_inspect.getargspec(fn)
-  if accept_all and arg_spec.keywords is not None:
+  arg_spec = tf_inspect.getfullargspec(fn)
+  if accept_all and arg_spec.varkw is not None:
     return True
   return name in arg_spec.args
 
",0,train
b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF.

inspect.getargspec raises errors if they are present but getfullargspec is
perfectly happy to let functions with type annotations pass.

PiperOrigin-RevId: 207127930",custom_gradient.py,"@@ -142,9 +142,9 @@ def _graph_mode_decorator(f, *args, **kwargs):
   # The variables that grad_fn needs to return gradients for are the set of
   # variables used that are *not* part of the inputs.
   variables = list(set(tape.watched_variables()) - set(args))
-  grad_argspec = tf_inspect.getargspec(grad_fn)
+  grad_argspec = tf_inspect.getfullargspec(grad_fn)
   variables_in_signature = (""variables"" in grad_argspec.args or
-                            grad_argspec.keywords)
+                            grad_argspec.varkw)
   if variables and not variables_in_signature:
     raise TypeError(""If using @custom_gradient with a function that ""
                     ""uses variables, then grad_fn must accept a keyword ""
@@ -194,9 +194,9 @@ def _eager_mode_decorator(f, *args, **kwargs):
   # The variables that grad_fn needs to return gradients for are the set of
   # variables used that are *not* part of the inputs.
   variables = [v for v in set(tape.watched_variables()) if v not in all_inputs]
-  grad_argspec = tf_inspect.getargspec(grad_fn)
-  if (variables and
-      not (""variables"" in grad_argspec.args or grad_argspec.keywords)):
+  grad_argspec = tf_inspect.getfullargspec(grad_fn)
+  if (variables and (""variables"" not in grad_argspec.args) and
+      not grad_argspec.varkw):
     raise TypeError(""If using @custom_gradient with a function that ""
                     ""uses variables, then grad_fn must accept a keyword ""
                     ""argument 'variables'."")
",0,train
b42e59a82275a8a630c6f91621628a8c578f132d,tensorflow/tensorflow,"Do not panic on type annotations in python3 for layers / functions passed to TF.

inspect.getargspec raises errors if they are present but getfullargspec is
perfectly happy to let functions with type annotations pass.

PiperOrigin-RevId: 207127930",deprecation.py,"@@ -388,7 +388,7 @@ def deprecated_args(date, instructions, *deprecated_arg_names_or_tuples,
     Args:
       names_to_ok_vals: dict from string arg_name to a list of values,
         possibly empty, which should not elicit a warning.
-      arg_spec: Output from tf_inspect.getargspec on the called function.
+      arg_spec: Output from tf_inspect.getfullargspec on the called function.
 
     Returns:
       Dictionary from arg_name to DeprecatedArgSpec.
@@ -408,16 +408,16 @@ def deprecated_args(date, instructions, *deprecated_arg_names_or_tuples,
     decorator_utils.validate_callable(func, 'deprecated_args')
     deprecated_arg_names = _get_arg_names_to_ok_vals()
 
-    arg_spec = tf_inspect.getargspec(func)
+    arg_spec = tf_inspect.getfullargspec(func)
     deprecated_positions = _get_deprecated_positional_arguments(
         deprecated_arg_names, arg_spec)
 
     is_varargs_deprecated = arg_spec.varargs in deprecated_arg_names
-    is_kwargs_deprecated = arg_spec.keywords in deprecated_arg_names
+    is_kwargs_deprecated = arg_spec.varkw in deprecated_arg_names
 
     if (len(deprecated_positions) + is_varargs_deprecated + is_kwargs_deprecated
         != len(deprecated_arg_names_or_tuples)):
-      known_args = arg_spec.args + [arg_spec.varargs, arg_spec.keywords]
+      known_args = arg_spec.args + [arg_spec.varargs, arg_spec.varkw]
       missing_args = [arg_name for arg_name in deprecated_arg_names
                       if arg_name not in known_args]
       raise ValueError('The following deprecated arguments are not present '
@@ -467,7 +467,7 @@ def deprecated_args(date, instructions, *deprecated_arg_names_or_tuples,
         if is_varargs_deprecated and len(args) > len(arg_spec.args):
           invalid_args.append(arg_spec.varargs)
         if is_kwargs_deprecated and kwargs:
-          invalid_args.append(arg_spec.keywords)
+          invalid_args.append(arg_spec.varkw)
         for arg_name in deprecated_arg_names:
           if (arg_name in kwargs and
               not (deprecated_positions[arg_name].has_ok_value and
",0,train
b543d891faf7283b3a7342aa89ecb8ff9d44629a,tensorflow/tensorflow,"Make summary names for linear models unique
Change: 130962334",models.py,"@@ -81,8 +81,9 @@ def linear_regression(x, y, init_mean=None, init_stddev=1.0):
     uniform_unit_scaling_initialzer will be used.
   """"""
   with vs.variable_scope('linear_regression'):
-    logging_ops.histogram_summary('linear_regression.x', x)
-    logging_ops.histogram_summary('linear_regression.y', y)
+    scope_name = vs.get_variable_scope().name
+    logging_ops.histogram_summary('%s.x' % scope_name, x)
+    logging_ops.histogram_summary('%s.y' % scope_name, y)
     dtype = x.dtype.base_dtype
     y_shape = y.get_shape()
     if len(y_shape) == 1:
@@ -103,8 +104,8 @@ def linear_regression(x, y, init_mean=None, init_stddev=1.0):
                              initializer=init_ops.random_normal_initializer(
                                  init_mean, init_stddev, dtype=dtype),
                              dtype=dtype)
-    logging_ops.histogram_summary('linear_regression.weights', weights)
-    logging_ops.histogram_summary('linear_regression.bias', bias)
+    logging_ops.histogram_summary('%s.weights' % scope_name, weights)
+    logging_ops.histogram_summary('%s.bias' % scope_name, bias)
     return losses_ops.mean_squared_error_regressor(x, y, weights, bias)
 
 
@@ -139,8 +140,9 @@ def logistic_regression(x,
     uniform_unit_scaling_initialzer will be used.
   """"""
   with vs.variable_scope('logistic_regression'):
-    logging_ops.histogram_summary('%s.x' % vs.get_variable_scope().name, x)
-    logging_ops.histogram_summary('%s.y' % vs.get_variable_scope().name, y)
+    scope_name = vs.get_variable_scope().name
+    logging_ops.histogram_summary('%s.x' % scope_name, x)
+    logging_ops.histogram_summary('%s.y' % scope_name, y)
     dtype = x.dtype.base_dtype
     # Set up the requested initialization.
     if init_mean is None:
@@ -157,10 +159,8 @@ def logistic_regression(x,
                              initializer=init_ops.random_normal_initializer(
                                  init_mean, init_stddev, dtype=dtype),
                              dtype=dtype)
-    logging_ops.histogram_summary('%s.weights' % vs.get_variable_scope().name,
-                                  weights)
-    logging_ops.histogram_summary('%s.bias' % vs.get_variable_scope().name,
-                                  bias)
+    logging_ops.histogram_summary('%s.weights' % scope_name, weights)
+    logging_ops.histogram_summary('%s.bias' % scope_name, bias)
     # If no class weight provided, try to retrieve one from pre-defined
     # tensor name in the graph.
     if not class_weight:
",0,train
4c960c9c2c54a2f3130af4de46805cf27c616126,tensorflow/tensorflow,"[tf.data] Update benchmarks to reflect moving the `AUTOTUNE` constant from `optimization` to `dataset_ops`.

PiperOrigin-RevId: 251289847",autotune_benchmark.py,"@@ -22,7 +22,6 @@ import time
 import numpy as np
 
 from tensorflow.python.client import session
-from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import test
@@ -42,7 +41,7 @@ class AutotuneBenchmark(test.Benchmark):
                                                 np.random.rand(4 * k,
                                                                1))).repeat()
     dataset = dataset.map(
-        math_ops.matmul, num_parallel_calls=optimization.AUTOTUNE)
+        math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE)
     options = dataset_ops.Options()
     options.experimental_optimization.apply_default_optimizations = False
     options.experimental_optimization.autotune = autotune
@@ -78,7 +77,7 @@ class AutotuneBenchmark(test.Benchmark):
                                                 np.random.rand(4 * k,
                                                                1))).repeat()
     dataset = dataset.map(
-        math_ops.matmul, num_parallel_calls=optimization.AUTOTUNE)
+        math_ops.matmul, num_parallel_calls=dataset_ops.AUTOTUNE)
     dataset = dataset.batch(batch_size=batch_size)
     options = dataset_ops.Options()
     options.experimental_optimization.apply_default_optimizations = False
@@ -118,7 +117,7 @@ class AutotuneBenchmark(test.Benchmark):
     dataset = dataset_ops.Dataset.range(1).repeat().interleave(
         lambda _: dataset,
         cycle_length=10,
-        num_parallel_calls=optimization.AUTOTUNE)
+        num_parallel_calls=dataset_ops.AUTOTUNE)
     options = dataset_ops.Options()
     options.experimental_optimization.apply_default_optimizations = False
     options.experimental_optimization.autotune = autotune
@@ -164,21 +163,21 @@ class AutotuneBenchmark(test.Benchmark):
       return a, math_ops.matmul(x, y)
 
     dataset = dataset_a
-    dataset = dataset.map(f1, num_parallel_calls=optimization.AUTOTUNE)
+    dataset = dataset.map(f1, num_parallel_calls=dataset_ops.AUTOTUNE)
     dataset = dataset_ops.Dataset.range(1).repeat().interleave(
         lambda _: dataset,
-        num_parallel_calls=optimization.AUTOTUNE,
+        num_parallel_calls=dataset_ops.AUTOTUNE,
         cycle_length=2)
 
     dataset = dataset_ops.Dataset.zip((dataset, dataset_b))
-    dataset = dataset.map(f2, num_parallel_calls=optimization.AUTOTUNE)
+    dataset = dataset.map(f2, num_parallel_calls=dataset_ops.AUTOTUNE)
     dataset = dataset_ops.Dataset.range(1).repeat().interleave(
         lambda _: dataset,
-        num_parallel_calls=optimization.AUTOTUNE,
+        num_parallel_calls=dataset_ops.AUTOTUNE,
         cycle_length=2)
 
     dataset = dataset_ops.Dataset.zip((dataset, dataset_c))
-    dataset = dataset.map(f2, num_parallel_calls=optimization.AUTOTUNE)
+    dataset = dataset.map(f2, num_parallel_calls=dataset_ops.AUTOTUNE)
     options = dataset_ops.Options()
     options.experimental_optimization.apply_default_optimizations = False
     options.experimental_optimization.autotune = autotune
",0,train
4c960c9c2c54a2f3130af4de46805cf27c616126,tensorflow/tensorflow,"[tf.data] Update benchmarks to reflect moving the `AUTOTUNE` constant from `optimization` to `dataset_ops`.

PiperOrigin-RevId: 251289847",parallel_interleave_benchmark.py,"@@ -23,7 +23,6 @@ import numpy as np
 
 from tensorflow.python.client import session
 from tensorflow.python.data.experimental.ops import interleave_ops
-from tensorflow.python.data.experimental.ops import optimization
 from tensorflow.python.data.experimental.ops import sleep
 from tensorflow.python.data.ops import dataset_ops
 from tensorflow.python.framework import ops
@@ -96,7 +95,7 @@ class ParallelInterleaveBenchmark(test.Benchmark):
     def dataset_fn():
       return dataset_ops.Dataset.range(1).repeat().interleave(
           _make_fake_dataset_fn(),
-          cycle_length=10, num_parallel_calls=optimization.AUTOTUNE)
+          cycle_length=10, num_parallel_calls=dataset_ops.AUTOTUNE)
 
     self._benchmark(dataset_fn=dataset_fn, iters=100, num_elements=1000)
 
",0,train
6b031486f84e66f112231a75201d521829b389c3,tensorflow/tensorflow,Move header to comply with formatting standard,hlo_verifier_test.cc,"@@ -18,6 +18,7 @@ limitations under the License.
 #include <memory>
 #include <utility>
 
+#include ""absl/strings/str_replace.h""
 #include ""tensorflow/compiler/xla/service/hlo_computation.h""
 #include ""tensorflow/compiler/xla/service/hlo_instruction.h""
 #include ""tensorflow/compiler/xla/service/hlo_module_config.h""
@@ -32,8 +33,6 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/xla_data.pb.h""
 #include ""tensorflow/core/lib/core/status_test_util.h""
 
-#include ""absl/strings/str_replace.h""
-
 namespace xla {
 namespace {
 
",0,train
d3d9dc68ec625ed853b6356757210b063302f396,tensorflow/tensorflow,"internal change

PiperOrigin-RevId: 183333411",capture_tpu_profile.cc,"@@ -47,12 +47,14 @@ string GetCurrentTimeStampAsString() {
   return s;
 }
 
-ProfileResponse Profile(const string& service_addr, int duration_ms) {
+ProfileResponse Profile(const string& service_addr, int duration_ms,
+                        const ProfileOptions& opts) {
   ProfileRequest request;
   request.set_duration_ms(duration_ms);
   request.set_max_events(kMaxEvents);
   request.add_tools(""input_pipeline"");
   request.add_tools(""overview_page"");
+  *request.mutable_opts() = opts;
   std::cout << ""Limiting the number of trace events to "" << kMaxEvents
             << std::endl;
   ::grpc::ClientContext context;
@@ -76,6 +78,7 @@ int main(int argc, char** argv) {
   tensorflow::string FLAGS_service_addr;
   tensorflow::string FLAGS_logdir;
   int FLAGS_duration_ms = 2000;
+  bool FLAGS_include_dataset_ops = true;
   std::vector<tensorflow::Flag> flag_list = {
       tensorflow::Flag(""service_addr"", &FLAGS_service_addr,
                        ""Address of TPU profiler service e.g. localhost:8466""),
@@ -83,6 +86,8 @@ int main(int argc, char** argv) {
                        ""Path of TensorBoard log directory e.g. /tmp/tb_log""),
       tensorflow::Flag(""duration_ms"", &FLAGS_duration_ms,
                        ""Duration of tracing in ms. Default is 2000ms.""),
+      tensorflow::Flag(""include_dataset_ops"", &FLAGS_include_dataset_ops,
+                       ""Set to false to profile longer TPU device traces.""),
   };
 
   std::cout << ""Welcome to the Cloud TPU Profiler v"" << TPU_PROFILER_VERSION
@@ -97,8 +102,10 @@ int main(int argc, char** argv) {
   tensorflow::port::InitMain(argv[0], &argc, &argv);
 
   int duration_ms = FLAGS_duration_ms;
+  tensorflow::ProfileOptions opts;
+  opts.set_include_dataset_ops(FLAGS_include_dataset_ops);
   tensorflow::ProfileResponse response =
-      tensorflow::tpu::Profile(FLAGS_service_addr, duration_ms);
+      tensorflow::tpu::Profile(FLAGS_service_addr, duration_ms, opts);
   // Use the current timestamp as the run name.
   tensorflow::string run = tensorflow::tpu::GetCurrentTimeStampAsString();
   TF_CHECK_OK(tensorflow::tpu::WriteTensorboardTPUProfile(
",0,train
34709045c9cabf6e88a069ecbb1046d054b9d0aa,tensorflow/tensorflow,"Propagate static shape information in tf.boolean_mask
Change: 122184973",array_ops_test.py,"@@ -31,14 +31,12 @@ from tensorflow.python.platform import googletest
 
 class BooleanMaskTest(test_util.TensorFlowTestCase):
 
-  def CheckVersusNumpy(self, ndims_mask, arr_shape):
+  def CheckVersusNumpy(self, ndims_mask, arr_shape, make_mask=None):
     """"""Check equivalence between boolean_mask and numpy masking.""""""
-    arr_size = arr_shape.prod()
-    arr = np.random.rand(arr_size).reshape(arr_shape)
-    mask_shape = arr_shape[: ndims_mask]
-    mask_size = mask_shape.prod()
-    mask = np.random.randint(
-        0, 2, size=mask_size).reshape(mask_shape).astype(bool)
+    if make_mask is None:
+      make_mask = lambda shape: np.random.randint(0, 2, size=shape).astype(bool)
+    arr = np.random.rand(*arr_shape)
+    mask = make_mask(arr_shape[: ndims_mask])
     masked_arr = arr[mask]
     with self.test_session():
       masked_tensor = array_ops.boolean_mask(arr, mask)
@@ -47,6 +45,12 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
           masked_tensor.eval(),
           err_msg=""masked_arr:\n%s\n\nmasked_tensor:\n%s"" % (
               masked_arr, masked_tensor.eval()))
+      masked_tensor.get_shape().assert_is_compatible_with(masked_arr.shape)
+      self.assertSequenceEqual(
+          masked_tensor.get_shape()[1:].as_list(),
+          masked_arr.shape[1:],
+          msg=""shape information lost %s -> %s"" % (
+              masked_arr.shape, masked_tensor.get_shape()))
 
   def testOneDimensionalMask(self):
     # Do 1d separately because it's the only easy one to debug!
@@ -63,6 +67,14 @@ class BooleanMaskTest(test_util.TensorFlowTestCase):
           arr_shape = np.random.randint(1, 5, size=ndims_arr)
           self.CheckVersusNumpy(ndims_mask, arr_shape)
 
+  def testEmptyOutput(self):
+    make_mask = lambda shape: np.zeros(shape, dtype=bool)
+    for ndims_mask in range(1, 4):
+      for ndims_arr in range(ndims_mask, ndims_mask + 3):
+        for _ in range(3):
+          arr_shape = np.random.randint(1, 5, size=ndims_arr)
+          self.CheckVersusNumpy(ndims_mask, arr_shape, make_mask=make_mask)
+
   def testWorksWithDimensionsEqualToNoneDuringGraphBuild(self):
     # The rank of the mask tensor must be specified. This is explained
     # in the docstring as well.
",0,train
34709045c9cabf6e88a069ecbb1046d054b9d0aa,tensorflow/tensorflow,"Propagate static shape information in tf.boolean_mask
Change: 122184973",array_ops.py,"@@ -453,6 +453,11 @@ def boolean_mask(tensor, mask, name=""boolean_mask""):
     shape_tensor[:ndims_mask].assert_is_compatible_with(shape_mask)
 
     tensor = reshape(tensor, concat(0, [[-1], shape(tensor)[ndims_mask:]]))
+    first_dim = shape_tensor[:ndims_mask].num_elements()
+    tensor.set_shape(
+        tensor_shape.as_shape([first_dim])
+        .concatenate(shape_tensor[ndims_mask:]))
+
     mask = reshape(mask, [-1])
     return _apply_mask_1d(tensor, mask)
 
",0,train
1943c55f6c6b25f0eef5359914fc1285f828f05c,tensorflow/tensorflow,"Introduce GetFirstResultType() for a pattern

This is to facilitate a change to happen in MLIR declarative rewrite
rules: a captured operation will change from Operation* to Value*
if it just has one result.
PiperOrigin-RevId: 262220172",prepare_tf.cc,"@@ -65,6 +65,18 @@ namespace TFL {
 // pass.
 namespace {
 
+// Returns the first result type of the given `op`.
+Type GetFirstResultType(Operation *op) { return *op->result_type_begin(); }
+// TODO(antiagainst): We need overload functions of the above to facilitate
+// changes brought by declarative rewrite rules. Remove this post variadic
+// operand support is improved.
+// NOLINTNEXTLINE
+Type GetFirstResultType(TF::TransposeOp op) { return op.getType(); }
+// NOLINTNEXTLINE
+Type GetFirstResultType(TF::ReshapeOp op) { return op.getType(); }
+// NOLINTNEXTLINE
+Type GetFirstResultType(Value *val) { return val->getType(); }
+
 // Prepare TF operations in functions for subsequent legalization.
 struct PrepareTFPass : public FunctionPass<PrepareTFPass> {
   void runOnFunction() override;
",0,train
00979a1a952045eac6f19f42f87a003fe0a819c8,tensorflow/tensorflow,"iOS Metal delegate: squared diff operation tests added.

PiperOrigin-RevId: 272055201",operations.cc,"@@ -163,6 +163,7 @@ OperationType OperationTypeFromString(const std::string& name) {
           {""softmax"", OperationType::SOFTMAX},
           {""sqrt"", OperationType::SQRT},
           {""square"", OperationType::SQUARE},
+          {""squared_diff"", OperationType::SQUARED_DIFF},
           {""subtract"", OperationType::SUB},
           {""tanh"", OperationType::TANH},
           {""upsample_2d"", OperationType::UPSAMPLE_2D},
",0,train
2d0887780f7cac362b40936ff282ff0589edb791,tensorflow/tensorflow,"Add legacy init op to SavedModel py.
Change: 136619012",builder.py,"@@ -1,4 +1,4 @@
-## Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+# Copyright 2015 The TensorFlow Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the ""License"");
 # you may not use this file except in compliance with the License.
@@ -141,7 +141,8 @@ class SavedModelBuilder(object):
     Args:
       assets_collection_to_add: The collection where the asset paths are setup.
     """"""
-    asset_source_filepath_list = self._save_assets(assets_collection_to_add)
+    asset_source_filepath_list = self._maybe_save_assets(
+        assets_collection_to_add)
 
     # Return if there are no assets to write.
     if len(asset_source_filepath_list) is 0:
@@ -167,7 +168,22 @@ class SavedModelBuilder(object):
 
     tf_logging.info(""Assets written to: %s"", assets_destination_dir)
 
-  def _save_assets(self, assets_collection_to_add=None):
+  def _maybe_add_legacy_init_op(self, legacy_init_op=None):
+    """"""Add legacy init op to the SavedModel.
+
+    Args:
+      legacy_init_op: Optional legacy init op to support backward compatibility.
+
+    Raises:
+      TypeError if legacy init op is not of type `Operation`.
+    """"""
+    if legacy_init_op is not None:
+      if not isinstance(legacy_init_op, ops.Operation):
+        raise TypeError(""legacy_init_op needs to be an Operation: %r"" %
+                        legacy_init_op)
+      ops.add_to_collection(constants.LEGACY_INIT_OP_KEY, legacy_init_op)
+
+  def _maybe_save_assets(self, assets_collection_to_add=None):
     """"""Saves assets to the meta graph.
 
     Args:
@@ -225,8 +241,11 @@ class SavedModelBuilder(object):
     proto_meta_graph_def = self._saved_model.meta_graphs.add()
     proto_meta_graph_def.CopyFrom(meta_graph_def)
 
-  def add_meta_graph(self, tags, signature_def_map=None,
-                     assets_collection=None):
+  def add_meta_graph(self,
+                     tags,
+                     signature_def_map=None,
+                     assets_collection=None,
+                     legacy_init_op=None):
     """"""Adds the current meta graph to the SavedModel.
 
     Creates a Saver in the current scope and uses the Saver to export the meta
@@ -240,6 +259,8 @@ class SavedModelBuilder(object):
       assets_collection: Assets collection to be saved with SavedModel. Note
           that this collection should be a subset of the assets saved as part of
           the first meta graph in the SavedModel.
+      legacy_init_op: Op or group of ops to execute after the restore op upon a
+        load.
 
     Raises:
       AssertionError: If the variables for the SavedModel have not been saved
@@ -251,12 +272,16 @@ class SavedModelBuilder(object):
           ""Please invoke `add_meta_graph_and_variables()` first."")
 
     # Save asset files, if any.
-    self._save_assets(assets_collection)
+    self._maybe_save_assets(assets_collection)
+
+    # Add legacy init op to the SavedModel.
+    self._maybe_add_legacy_init_op(legacy_init_op)
 
     saver = tf_saver.Saver(
         variables.all_variables(),
         sharded=True,
         write_version=saver_pb2.SaverDef.V2)
+
     meta_graph_def = saver.export_meta_graph()
 
     # Tag the meta graph def and add it to the SavedModel.
@@ -266,7 +291,8 @@ class SavedModelBuilder(object):
                                    sess,
                                    tags,
                                    signature_def_map=None,
-                                   assets_collection=None):
+                                   assets_collection=None,
+                                   legacy_init_op=None):
     """"""Adds the current meta graph to the SavedModel and saves variables.
 
     Creates a Saver to save the variables from the provided session. Exports the
@@ -282,6 +308,8 @@ class SavedModelBuilder(object):
       signature_def_map: The map of signature def map to add to the meta graph
         def.
       assets_collection: Assets collection to be saved with SavedModel.
+      legacy_init_op: Op or group of ops to execute after the restore op upon a
+        load.
     """"""
     if self._has_saved_variables:
       raise AssertionError(""Variables and assets have already been saved. ""
@@ -301,6 +329,9 @@ class SavedModelBuilder(object):
         compat.as_text(variables_dir),
         compat.as_text(constants.VARIABLES_FILENAME))
 
+    # Add legacy init op to the SavedModel.
+    self._maybe_add_legacy_init_op(legacy_init_op)
+
     # Save the variables and export meta graph def.
     saver = tf_saver.Saver(
         variables.all_variables(),
",0,train
2d0887780f7cac362b40936ff282ff0589edb791,tensorflow/tensorflow,"Add legacy init op to SavedModel py.
Change: 136619012",constants.py,"@@ -22,6 +22,8 @@ from __future__ import print_function
 ASSETS_DIRECTORY = ""assets""
 ASSETS_KEY = ""saved_model_assets""
 
+LEGACY_INIT_OP_KEY = ""legacy_init_op""
+
 SAVED_MODEL_SCHEMA_VERSION = 1
 SAVED_MODEL_FILENAME_PB = ""saved_model.pb""
 SAVED_MODEL_FILENAME_PBTXT = ""saved_model.pbtxt""
",0,train
2d0887780f7cac362b40936ff282ff0589edb791,tensorflow/tensorflow,"Add legacy init op to SavedModel py.
Change: 136619012",loader.py,"@@ -29,8 +29,6 @@ variables though will correspond to the saved values from the first meta graph
 added to the SavedModel using `add_meta_graph_and_variables(...)` in
 `builder.py`.
 
-TODO(sukritiramesh): Add support for a single init or main op to run upon load.
-
 Typical usage:
 ```python
 ...
@@ -64,6 +62,7 @@ from __future__ import division
 from __future__ import print_function
 
 import os
+import tensorflow as tf
 
 from google.protobuf import text_format
 from tensorflow.core.protobuf import meta_graph_pb2
@@ -150,6 +149,30 @@ def _get_asset_tensors(export_dir, meta_graph_def_to_load):
   return asset_tensor_dict
 
 
+def _get_legacy_init_op_tensor(meta_graph_def_to_load):
+  """"""Gets the legacy init op tensor, if one exists.
+
+  Args:
+    meta_graph_def_to_load: The meta graph def from the SavedModel to be loaded.
+
+  Returns:
+    The legacy init op tensor, if it exists and `None` otherwise.
+
+  Raises:
+    RuntimeError: If the collection def corresponding to the legacy init op key
+        has other than exactly one tensor.
+  """"""
+  collection_def = meta_graph_def_to_load.collection_def
+  legacy_init_op_tensor = None
+  if constants.LEGACY_INIT_OP_KEY in collection_def:
+    legacy_init_ops = collection_def[
+        constants.LEGACY_INIT_OP_KEY].node_list.value
+    if len(legacy_init_ops) != 1:
+      raise RuntimeError(""Expected exactly one legacy serving init op."")
+    legacy_init_op_tensor = tf.get_collection(constants.LEGACY_INIT_OP_KEY)[0]
+  return legacy_init_op_tensor
+
+
 def load(sess, tags, export_dir):
   """"""Loads the model from a SavedModel as specified by tags.
 
@@ -194,7 +217,15 @@ def load(sess, tags, export_dir):
   saver.restore(sess, variables_path)
 
   # Get asset tensors, if any.
-  _get_asset_tensors(export_dir, meta_graph_def_to_load)
+  asset_tensors_dictionary = _get_asset_tensors(export_dir,
+                                                meta_graph_def_to_load)
+
+  # TODO(sukritiramesh): Add support for a single main op to run upon load,
+  # which will supersede the legacy_init_op.
+  legacy_init_op_tensor = _get_legacy_init_op_tensor(meta_graph_def_to_load)
+
+  if legacy_init_op_tensor is not None:
+    sess.run(fetches=[legacy_init_op_tensor],
+             feed_dict=asset_tensors_dictionary)
 
-  # Return the meta graph def that was loaded into the session.
   return meta_graph_def_to_load
",0,train
2d0887780f7cac362b40936ff282ff0589edb791,tensorflow/tensorflow,"Add legacy init op to SavedModel py.
Change: 136619012",saved_model_test.py,"@@ -373,6 +373,40 @@ class SavedModelTest(tf.test.TestCase):
           compat.as_bytes(""ignored.txt""))
       self.assertFalse(file_io.file_exists(ignored_asset_path))
 
+  def testLegacyInitOp(self):
+    export_dir = os.path.join(tf.test.get_temp_dir(), ""test_legacy_init_op"")
+    builder = saved_model_builder.SavedModelBuilder(export_dir)
+
+    with self.test_session(graph=tf.Graph()) as sess:
+      # Add `v1` and `v2` variables to the graph.
+      v1 = tf.Variable(1, name=""v1"")
+      tf.add_to_collection(""v"", v1)
+      v2 = tf.Variable(2, name=""v2"")
+      tf.add_to_collection(""v"", v2)
+
+      # Initialize another variable `v3` to 42.
+      v3 = tf.Variable(42, name=""v3"", trainable=False, collections=[])
+      tf.add_to_collection(""v"", v3)
+
+      # Set up an assignment op to be run as part of the legacy_init_op.
+      assign_v3 = tf.assign(v3, tf.add(v1, v2))
+      legacy_init_op = tf.group(assign_v3, name=""legacy_init_op"")
+
+      sess.run(tf.initialize_all_variables())
+      builder.add_meta_graph_and_variables(
+          sess, [""foo""], legacy_init_op=legacy_init_op)
+
+    # Save the SavedModel to disk.
+    builder.save()
+
+    with self.test_session(graph=tf.Graph()) as sess:
+      loader.load(sess, [""foo""], export_dir)
+      self.assertEqual(1, tf.get_collection(""v"")[0].eval())
+      self.assertEqual(2, tf.get_collection(""v"")[1].eval())
+      # Evaluates to the sum of the first two variables and assigned as part of
+      # the legacy_init_op, following a restore.
+      self.assertEqual(3, tf.get_collection(""v"")[2].eval())
+
   def testOp(self):
     export_dir = os.path.join(tf.test.get_temp_dir(), ""test_op"")
     builder = saved_model_builder.SavedModelBuilder(export_dir)
",0,train
c96b33aaba891e6ce6aeb8f693e79c56959cbb5f,tensorflow/tensorflow,"Skip some tests for oss kokoro test as initializing tpu system is very slow. Also combine some tests to reduce the number of times when tpu system is re-initialized.

PiperOrigin-RevId: 426245148
Change-Id: Ic0e7dbae68a6b4497a5638d76951a3da5ffd823a",tpu_embedding_v2_correctness_test.py,"@@ -54,6 +54,12 @@ flags.DEFINE_string('model_dir', os.environ.get('TEST_TMPDIR'),
 
 class TPUEmbeddingCorrectness(parameterized.TestCase, test.TestCase):
 
+  def skip_if_oss(self):
+    if FLAGS.project is not None or FLAGS.zone is not None:
+      self.skipTest(
+          'Skipping tests for oss as it is slow to run every test in cloud tpu.'
+      )
+
   def setUp(self):
     super(TPUEmbeddingCorrectness, self).setUp()
     self.embedding_values = np.array(list(range(32)), dtype=np.float64)
@@ -186,6 +192,8 @@ class TPUEmbeddingCorrectness(parameterized.TestCase, test.TestCase):
                          [True, False], [True, False], [True, False]))
   def test_embedding(self, optimizer_name, training, sparse,
                      is_high_dimensional):
+    if optimizer_name != 'sgd':
+      self.skip_if_oss()
     strategy, mid_level_api, optimizer = (
         self._create_strategy_and_mid_level(optimizer_name))
 
",0,train
c96b33aaba891e6ce6aeb8f693e79c56959cbb5f,tensorflow/tensorflow,"Skip some tests for oss kokoro test as initializing tpu system is very slow. Also combine some tests to reduce the number of times when tpu system is re-initialized.

PiperOrigin-RevId: 426245148
Change-Id: Ic0e7dbae68a6b4497a5638d76951a3da5ffd823a",tpu_embedding_v2_test.py,"@@ -67,6 +67,12 @@ flags.DEFINE_string('model_dir', os.environ.get('TEST_TMPDIR'),
 
 class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase):
 
+  def skip_if_oss(self):
+    if FLAGS.project is not None or FLAGS.zone is not None:
+      self.skipTest(
+          'Skipping tests for oss as it is slow to run every test in cloud tpu.'
+      )
+
   def setUp(self):
     super(TPUEmbeddingCheckpointTest, self).setUp()
     self.resolver = tpu_cluster_resolver.TPUClusterResolver(
@@ -161,6 +167,7 @@ class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase):
     # This test works right now because we only have one TPU host in the unit
     # environment. Initializing from checkpoint does not understand how to
     # pass the sharding info to the restore op right now.
+    self.skip_if_oss()
 
     class TestModule(module.Module):
 
@@ -310,6 +317,8 @@ class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase):
                                                                    optimizer):
     # Reinitialize the TPU so that we can re-initialize the embeddings with the
     # given optimizer.
+    if optimizer != tpu_embedding_v2_utils.SGD:
+      self.skip_if_oss()
     tpu_strategy_util.initialize_tpu_system(self.resolver)
     optimizer = optimizer(learning_rate=0.1)
 
@@ -333,6 +342,12 @@ class TPUEmbeddingCheckpointTest(parameterized.TestCase, test.TestCase):
 
 class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
 
+  def skip_if_oss(self):
+    if FLAGS.project is not None or FLAGS.zone is not None:
+      self.skipTest(
+          'Skipping tests for oss as it is slow to run every test in cloud tpu.'
+      )
+
   def setUp(self):
     super(TPUEmbeddingTest, self).setUp()
     self.embedding_values = np.array(list(range(32)), dtype=np.float64)
@@ -435,27 +450,23 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
     # matter.
     mid_level_api.build(64)
 
+    # Test pass non tensor to apply_gradients.
     @def_function.function
-    def test_apply():
+    def test_apply_1():
       mid_level_api.apply_gradients((1, 2, 3))
 
     with self.assertRaisesRegex(ValueError, 'found non-tensor type'):
-      strategy.run(test_apply)
+      strategy.run(test_apply_1)
 
-  def test_pass_different_structure_to_apply_gradients(self):
-    strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
-    # We aren't going to actually run anything, so the batch_size here does not
-    # matter.
-    mid_level_api.build(64)
+    # Test pass different structure to apply_gradients.
     @def_function.function
-    def test_apply():
+    def test_apply_2():
       # This should be a tuple as feature_config is a tuple of 3 configs.
       mid_level_api.apply_gradients([1, 2, 3])
 
     with self.assertRaisesRegex(
-        TypeError,
-        'The two structures don\'t have the same nested structure.'):
-      strategy.run(test_apply)
+        TypeError, 'The two structures don\'t have the same nested structure.'):
+      strategy.run(test_apply_2)
 
   def test_pass_none_to_apply_gradients(self):
     strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
@@ -520,13 +531,12 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
     self.num_replicas = strategy.num_replicas_in_sync
     return strategy
 
-  def test_dequeue_on_cpu(self):
+  def test_enqueue_dequeue_apply_gradients_on_cpu(self):
+    # Dequeue on CPU.
     mid_level_api = self._create_mid_level()
     with self.assertRaises(RuntimeError):
       mid_level_api.dequeue()
-
-  def test_enqueue_on_cpu(self):
-    mid_level_api = self._create_mid_level()
+    # Enqueue on CPU.
     features = {
         'watched': sparse_tensor.SparseTensor(
             indices=self.feature_watched_indices,
@@ -534,11 +544,10 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
             dense_shape=[2, 2])}
     with self.assertRaises(RuntimeError):
       mid_level_api.enqueue(features)
-
-  def test_apply_gradients_on_cpu(self):
+    # Apply gradient on CPU.
     mid_level_api = self._create_mid_level()
     with self.assertRaises(RuntimeError):
-      mid_level_api.enqueue(None)
+      mid_level_api.apply_gradients(None)
 
   def test_get_embedding_tables_on_cpu(self):
     mid_level_api = self._create_mid_level()
@@ -573,10 +582,10 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
     with self.assertRaisesRegex(ValueError, 'Weight specified for dense input'):
       test_fn()
 
-  def test_enqueue_wrong_weight_type_for_sparse_tensor(self):
+  def test_enqueue_wrong_weight_type_for_sparse_and_ragged_tensor(self):
     strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
 
-    sparse = self._create_sparse_dataset(strategy)
+    sparse = self._create_sparse_dataset(strategy, include_weights=True)
     ragged = self._create_ragged_dataset(strategy, include_weights=True)
     sparse_iter = iter(
         strategy.experimental_distribute_dataset(
@@ -590,48 +599,32 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
                 experimental_fetch_to_device=False)))
 
     @def_function.function
-    def test_fn():
+    def test_sparse_fn():
       def step():
         return mid_level_api.dequeue()
 
-      features = next(sparse_iter)
+      features, _ = next(sparse_iter)
       _, weights = next(ragged_iter)
       mid_level_api.enqueue(features, weights=weights, training=False)
       return strategy.run(step)
 
     with self.assertRaisesRegex(
         ValueError, 'which does not match type input which is SparseTensor.'):
-      test_fn()
-
-  def test_enqueue_wrong_weight_type_for_ragged_tensor(self):
-    strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
-
-    sparse = self._create_sparse_dataset(strategy, include_weights=True)
-    ragged = self._create_ragged_dataset(strategy)
-    sparse_iter = iter(
-        strategy.experimental_distribute_dataset(
-            sparse,
-            options=distribute_lib.InputOptions(
-                experimental_fetch_to_device=False)))
-    ragged_iter = iter(
-        strategy.experimental_distribute_dataset(
-            ragged,
-            options=distribute_lib.InputOptions(
-                experimental_fetch_to_device=False)))
+      test_sparse_fn()
 
     @def_function.function
-    def test_fn():
+    def test_ragged_fn():
       def step():
         return mid_level_api.dequeue()
 
       _, weights = next(sparse_iter)
-      features = next(ragged_iter)
+      features, _ = next(ragged_iter)
       mid_level_api.enqueue(features, weights=weights, training=False)
       return strategy.run(step)
 
     with self.assertRaisesRegex(
         ValueError, 'which does not match type input which is RaggedTensor.'):
-      test_fn()
+      test_ragged_fn()
 
   def test_enqueue_sparse_and_ragged(self):
     strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
@@ -662,10 +655,10 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
 
     test_fn()
 
-  def test_enqueue_incorrect_structure_for_features(self):
+  def test_enqueue_incorrect_structure_for_features_and_weights(self):
     strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
 
-    sparse = self._create_sparse_dataset(strategy)
+    sparse = self._create_sparse_dataset(strategy, include_weights=True)
     sparse_iter = iter(
         strategy.experimental_distribute_dataset(
             sparse,
@@ -673,7 +666,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
                 experimental_fetch_to_device=False)))
 
     @def_function.function
-    def test_fn():
+    def test_features_fn():
       def step():
         return mid_level_api.dequeue()
 
@@ -684,20 +677,10 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
 
     # The error here is raised from nest.assert_same_structure
     with self.assertRaises(ValueError):
-      test_fn()
-
-  def test_enqueue_incorrect_structure_for_weights(self):
-    strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
-
-    sparse = self._create_sparse_dataset(strategy, include_weights=True)
-    sparse_iter = iter(
-        strategy.experimental_distribute_dataset(
-            sparse,
-            options=distribute_lib.InputOptions(
-                experimental_fetch_to_device=False)))
+      test_features_fn()
 
     @def_function.function
-    def test_fn():
+    def test_weights_fn():
       def step():
         return mid_level_api.dequeue()
 
@@ -708,7 +691,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
 
     # The error here is raised from nest.assert_same_structure
     with self.assertRaises(ValueError):
-      test_fn()
+      test_weights_fn()
 
   def test_enqueue_ragged_tensor(self):
     strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
@@ -812,6 +795,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
 
   @parameterized.parameters([True, False])
   def test_enqueue_cpu_tensor_with_outside_compilation(self, use_mlir):
+
     if use_mlir:
       config.enable_mlir_bridge()
 
@@ -834,6 +818,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
 
   @parameterized.parameters(True, False)
   def test_enqueue_with_weights(self, ragged):
+    self.skip_if_oss()
     strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
     weight = 0.5
     if ragged:
@@ -885,6 +870,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
 
   @parameterized.parameters([True, False])
   def test_enqueue_with_outside_compilation(self, use_mlir):
+    self.skip_if_oss()
     if use_mlir:
       config.enable_mlir_bridge()
 
@@ -928,6 +914,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
 
   @parameterized.parameters(True, False)
   def test_enqueue_with_outside_compilation_in_control_flow(self, use_mlir):
+    self.skip_if_oss()
     if use_mlir:
       config.enable_mlir_bridge()
 
@@ -959,6 +946,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
       enqueue_with_outside_compilation()
 
   def test_enqueue_with_outside_compilation_non_direct_input(self):
+    self.skip_if_oss()
     strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
     mid_level_api.build([
         TensorShape((self.batch_size, 2)),
@@ -987,6 +975,7 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
       enqueue_with_outside_compilation()
 
   def test_enqueue_with_outside_compilation_auto_mode(self):
+    self.skip_if_oss()
     strategy, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
     mid_level_api.build([
         TensorShape((self.batch_size, 2)),
@@ -1483,6 +1472,12 @@ class TPUEmbeddingTest(parameterized.TestCase, test.TestCase):
 class TPUEmbeddingHighDimensionalTensorTest(parameterized.TestCase,
                                             test.TestCase):
 
+  def skip_if_oss(self):
+    if FLAGS.project is not None or FLAGS.zone is not None:
+      self.skipTest(
+          'Skipping tests for oss as it is slow to run every test in cloud tpu.'
+      )
+
   def setUp(self):
     super(TPUEmbeddingHighDimensionalTensorTest, self).setUp()
     self.embedding_values = np.array(list(range(32)), dtype=np.float64)
@@ -1815,6 +1810,7 @@ class TPUEmbeddingHighDimensionalTensorTest(parameterized.TestCase,
       test_fn()
 
   def test_not_fully_defined_output_shapes_in_feature_config(self):
+    self.skip_if_oss()
     _, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
 
     # Feature config sets undefined output shapes
@@ -1823,6 +1819,7 @@ class TPUEmbeddingHighDimensionalTensorTest(parameterized.TestCase,
       mid_level_api.build()
 
   def test_not_fully_defined_output_shapes_for_build(self):
+    self.skip_if_oss()
     _, mid_level_api, _ = self._create_strategy_and_mid_level('sgd')
 
     # Build with undefined output shape
",0,train
0fccb5d8384d2fd6e0c8d57fc1ebfd094a5c19af,tensorflow/tensorflow,"Replace assert_called() with called to fix python3 test failures

PiperOrigin-RevId: 169702185",training_test.py,"@@ -253,7 +253,7 @@ class _TrainingExecutorTrainingTest(object):
         config=test.mock.ANY,
         start=False)
 
-    mock_server_instance.start.assert_called()
+    self.assertTrue(mock_server_instance.start.called)
 
     mock_est.train.assert_called_with(input_fn=train_spec.input_fn,
                                       max_steps=train_spec.max_steps,
@@ -365,7 +365,7 @@ class TrainingExecutorRunWorkerTest(_TrainingExecutorTrainingTest,
     with test.mock.patch.object(time, 'sleep') as mock_sleep:
       mock_sleep.side_effect = lambda s: self.assertEqual(expected_secs, s)
       self._run_task(executor)
-      mock_sleep.assert_called()
+      self.assertTrue(mock_sleep.called)
 
 
 class TrainingExecutorRunChiefTest(_TrainingExecutorTrainingTest,
@@ -546,8 +546,8 @@ class TrainingExecutorRunPsTest(test.TestCase):
         config=test.mock.ANY,
         start=False)
 
-    mock_server_instance.start.assert_called()
-    mock_server_instance.join.assert_called()
+    self.assertTrue(mock_server_instance.start.called)
+    self.assertTrue(mock_server_instance.join.called)
 
   def test_fail_with_empty_cluster_spec(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
",0,train
8950c470bb11a9b94c0dd08d73156008dfac60c9,tensorflow/tensorflow,"Remove automatic control dep wrapping from layers in v2.

PiperOrigin-RevId: 316638920
Change-Id: Iad14b1a4b0b14052f34784401b375a14b49a7641",base_layer.py,"@@ -40,7 +40,6 @@ from tensorflow.python.eager import context
 from tensorflow.python.eager import execute
 from tensorflow.python.eager import function
 from tensorflow.python.eager import monitoring
-from tensorflow.python.framework import auto_control_deps
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
@@ -1105,17 +1104,7 @@ class Layer(module.Module, version_utils.LayerVersionSelector):
 
           try:
             with ops.enable_auto_cast_variables(self._compute_dtype_object):
-              # Add auto_control_deps in V2 when they are not already added by
-              # a `tf.function`.
-              if (ops.executing_eagerly_outside_functions() and
-                  not base_layer_utils.is_in_eager_or_tf_function()):
-                with auto_control_deps.AutomaticControlDependencies() as acd:
-                  outputs = call_fn(cast_inputs, *args, **kwargs)
-                  # Wrap Tensors in `outputs` in `tf.identity` to avoid
-                  # circular dependencies.
-                  outputs = base_layer_utils.mark_as_return(outputs, acd)
-              else:
-                outputs = call_fn(cast_inputs, *args, **kwargs)
+              outputs = call_fn(cast_inputs, *args, **kwargs)
 
           except errors.OperatorNotAllowedInGraphError as e:
             raise TypeError('You are attempting to use Python control '
",0,train
601e77f51f558724c7b71c2d9d362e724211f813,tensorflow/tensorflow,"[XLA:Python] Add an mlir_module_to_xla_computation Python helper.

In passing, remove some unnecessary MLIR dependencies from the XLA Python client.

PiperOrigin-RevId: 413926439
Change-Id: Iac9011d9dd446ab88ce6191537085e34991b094a",mlir.cc,"@@ -13,26 +13,32 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include <string>
+
 #include ""llvm/Support/raw_ostream.h""
 #include ""mlir/Dialect/StandardOps/IR/Ops.h""  // from @llvm-project
 #include ""mlir/IR/BuiltinOps.h""  // from @llvm-project
 #include ""mlir/IR/MLIRContext.h""  // from @llvm-project
+#include ""mlir/Parser.h""  // from @llvm-project
 #include ""pybind11/pybind11.h""
+#include ""tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/chlo_ops.h""
 #include ""tensorflow/compiler/mlir/hlo/include/mlir-hlo/Dialect/mhlo/IR/hlo_ops.h""
 #include ""tensorflow/compiler/mlir/xla/hlo_to_mlir_hlo.h""
 #include ""tensorflow/compiler/xla/client/xla_computation.h""
+#include ""tensorflow/compiler/xla/pjrt/mlir_to_hlo.h""
 #include ""tensorflow/compiler/xla/python/types.h""
 #include ""tensorflow/compiler/xla/status.h""
 
 namespace py = pybind11;
 
 namespace xla {
+namespace {
 
 // Converts an XlaComputation to an MHLO mlir::Module string. Exists for
 // backwards compatibility.
 // TODO(phawkins): port remaining users of XlaComputations to use mlir::Modules
 // instead and delete this function.
-StatusOr<std::string> XlaComputationToMlirModule(
+StatusOr<std::string> PyXlaComputationToMlirModule(
     const XlaComputation& computation) {
   mlir::MLIRContext context;
   mlir::OwningModuleRef module =
@@ -47,11 +53,43 @@ StatusOr<std::string> XlaComputationToMlirModule(
   return s;
 }
 
+StatusOr<XlaComputation> PyMlirModuleToXlaComputation(std::string mlir_module,
+                                                      bool use_tuple_args,
+                                                      bool return_tuple) {
+  mlir::MLIRContext context;
+  mlir::OwningModuleRef module;
+  context.loadDialect<mlir::StandardOpsDialect>();
+  context.loadDialect<mlir::mhlo::MhloDialect>();
+  context.loadDialect<mlir::chlo::HloClientDialect>();
+  mlir::StatusScopedDiagnosticHandler diagnostic_handler(&context);
+  module = mlir::parseSourceString(
+      llvm::StringRef(mlir_module.data(), mlir_module.size()), &context);
+  if (!module) {
+    return diagnostic_handler.ConsumeStatus();
+  }
+  if (failed(module->verify())) {
+    VLOG(1) << ""MLIR verification failed."";
+    module->dump();
+    return diagnostic_handler.ConsumeStatus();
+  }
+
+  XlaComputation computation;
+  TF_RETURN_IF_ERROR(
+      MlirToXlaComputation(*module, computation, use_tuple_args, return_tuple));
+  return computation;
+}
+
+}  // namespace
+
 void BuildMlirSubmodule(py::module& m) {
   py::module mlir_module = m.def_submodule(""mlir"", ""MLIR/XLA integration"");
 
   mlir_module.def(""xla_computation_to_mlir_module"",
-                  &XlaComputationToMlirModule);
+                  &PyXlaComputationToMlirModule);
+  mlir_module.def(""mlir_module_to_xla_computation"",
+                  &PyMlirModuleToXlaComputation, py::arg(""mlir_module""),
+                  py::arg(""use_tuple_args"") = false,
+                  py::arg(""return_tuple"") = false);
 }
 
 }  // namespace xla
",0,test
601e77f51f558724c7b71c2d9d362e724211f813,tensorflow/tensorflow,"[XLA:Python] Add an mlir_module_to_xla_computation Python helper.

In passing, remove some unnecessary MLIR dependencies from the XLA Python client.

PiperOrigin-RevId: 413926439
Change-Id: Iac9011d9dd446ab88ce6191537085e34991b094a",xla_client.py,"@@ -44,7 +44,7 @@ profiler = _xla.profiler
 
 # Just an internal arbitrary increasing number to help with backward-compatible
 # changes.
-_version = 45
+_version = 46
 
 xla_platform_names = {
     'cpu': 'Host',
",0,test
aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC.
- Removes FisherEstimator.inv_updates_dict. Users should create directly from
  FisherEstimator.inv_update_ops.
- Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer.

PiperOrigin-RevId: 182135826",convnet.py,"@@ -286,7 +286,7 @@ def minimize_loss_distributed(task_id, num_worker_tasks, num_ps_tasks, master,
         damping=0.001,
         layer_collection=layer_collection,
         momentum=0.9)
-    inv_update_queue = oq.OpQueue(optimizer.inv_updates_dict.values())
+    inv_update_queue = oq.OpQueue(optimizer.inv_update_ops)
     sync_optimizer = tf.train.SyncReplicasOptimizer(
         opt=optimizer,
         replicas_to_aggregate=_num_gradient_tasks(num_worker_tasks))
",0,train
aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC.
- Removes FisherEstimator.inv_updates_dict. Users should create directly from
  FisherEstimator.inv_update_ops.
- Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer.

PiperOrigin-RevId: 182135826",mlp.py,"@@ -239,3 +239,85 @@ def train_mnist_multitower(data_dir,
       })
   return minimize(
       loss, accuracy, layer_collection, session_config=session_config)
+
+
+def train_mnist_estimator(data_dir, num_epochs, use_fake_data=False):
+  """"""Train an MLP on MNIST using tf.estimator.
+
+  Args:
+    data_dir: string. Directory to read MNIST examples from.
+    num_epochs: int. Number of passes to make over the training set.
+    use_fake_data: bool. If True, generate a synthetic dataset.
+
+  Returns:
+    accuracy of model on the final minibatch of training data.
+  """"""
+
+  # Load a dataset.
+  def input_fn():
+    tf.logging.info(""Loading MNIST into memory."")
+    return mnist.load_mnist(
+        data_dir,
+        num_epochs=num_epochs,
+        batch_size=64,
+        flatten_images=True,
+        use_fake_data=use_fake_data)
+
+  def model_fn(features, labels, mode, params):
+    """"""Model function for MLP trained with K-FAC.
+
+    Args:
+      features: Tensor of shape [batch_size, input_size]. Input features.
+      labels: Tensor of shape [batch_size]. Target labels for training.
+      mode: tf.estimator.ModeKey. Must be TRAIN.
+      params: ignored.
+
+    Returns:
+      EstimatorSpec for training.
+
+    Raises:
+      ValueError: If 'mode' is anything other than TRAIN.
+    """"""
+    del params
+
+    if mode != tf.estimator.ModeKeys.TRAIN:
+      raise ValueError(""Only training is supposed with this API."")
+
+    # Build a ConvNet.
+    layer_collection = lc.LayerCollection()
+    loss, accuracy = build_model(
+        features, labels, num_labels=10, layer_collection=layer_collection)
+
+    # Train with K-FAC.
+    global_step = tf.train.get_or_create_global_step()
+    optimizer = opt.KfacOptimizer(
+        learning_rate=tf.train.exponential_decay(
+            0.00002, global_step, 10000, 0.5, staircase=True),
+        cov_ema_decay=0.95,
+        damping=0.0001,
+        layer_collection=layer_collection,
+        momentum=0.99)
+
+    # Run cov_update_op every step. Run 1 inv_update_ops per step.
+    cov_update_op = optimizer.cov_update_op
+    inv_update_op = tf.group(
+        tf.contrib.kfac.utils.batch_execute(
+            global_step, optimizer.inv_update_thunks, batch_size=1))
+    with tf.control_dependencies([cov_update_op, inv_update_op]):
+      train_op = optimizer.minimize(loss, global_step=global_step)
+
+    # Print metrics every 5 sec.
+    hooks = [
+        tf.train.LoggingTensorHook(
+            {
+                ""loss"": loss,
+                ""accuracy"": accuracy
+            }, every_n_secs=5),
+    ]
+    return tf.estimator.EstimatorSpec(
+        mode=mode, loss=loss, train_op=train_op, training_hooks=hooks)
+
+  # Train until input_fn() is empty with Estimator. This is a prerequisite for
+  # TPU compatibility.
+  estimator = tf.estimator.Estimator(model_fn=model_fn)
+  estimator.train(input_fn=input_fn)
",0,train
aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC.
- Removes FisherEstimator.inv_updates_dict. Users should create directly from
  FisherEstimator.inv_update_ops.
- Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer.

PiperOrigin-RevId: 182135826",mlp_mnist_main.py,"@@ -33,7 +33,11 @@ FLAGS = None
 
 def main(argv):
   _ = argv
-  if FLAGS.num_towers > 1:
+  if FLAGS.use_estimator:
+    if FLAGS.num_towers != 1:
+      raise ValueError(""Only 1 device supported in tf.estimator example."")
+    mlp.train_mnist_estimator(FLAGS.data_dir, num_epochs=200)
+  elif FLAGS.num_towers > 1:
     mlp.train_mnist_multitower(
         FLAGS.data_dir, num_epochs=200, num_towers=FLAGS.num_towers)
   else:
@@ -52,5 +56,9 @@ if __name__ == ""__main__"":
       type=int,
       default=1,
       help=""Number of CPUs to split minibatch across."")
+  parser.add_argument(
+      ""--use_estimator"",
+      action=""store_true"",
+      help=""Use tf.estimator API to train."")
   FLAGS, unparsed = parser.parse_known_args()
   tf.app.run(main=main, argv=[sys.argv[0]] + unparsed)
",0,train
aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC.
- Removes FisherEstimator.inv_updates_dict. Users should create directly from
  FisherEstimator.inv_update_ops.
- Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer.

PiperOrigin-RevId: 182135826",mlp_test.py,"@@ -53,6 +53,11 @@ class MlpTest(tf.test.TestCase):
       mlp.train_mnist_multitower(
           data_dir=None, num_epochs=1, num_towers=2, use_fake_data=True)
 
+  def testTrainMnistEstimator(self):
+    with tf.Graph().as_default():
+      # Ensure model training doesn't crash.
+      mlp.train_mnist_estimator(data_dir=None, num_epochs=1, use_fake_data=True)
+
 
 if __name__ == ""__main__"":
   tf.test.main()
",0,train
aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC.
- Removes FisherEstimator.inv_updates_dict. Users should create directly from
  FisherEstimator.inv_update_ops.
- Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer.

PiperOrigin-RevId: 182135826",estimator.py,"@@ -281,11 +281,6 @@ class FisherEstimator(object):
 
     return thunk
 
-  @property
-  def inv_updates_dict(self):
-    """"""Returns a dictionary mapping strings to inv_update_ops.""""""
-    return {op.name: op for op in self.inv_update_ops}
-
   def _get_grads_lists_gradients(self, tensors):
     grads_flat = gradients_impl.gradients(
         self._layers.total_sampled_loss(),
",0,train
aaac4ac3e9d1d8c48db9e4010459a417a07553d2,tensorflow/tensorflow,"K-FAC: Example using tf.estimator and K-FAC.
- Removes FisherEstimator.inv_updates_dict. Users should create directly from
  FisherEstimator.inv_update_ops.
- Adds (cov|inv)_update_(thunks|ops) to KfacOptimizer.

PiperOrigin-RevId: 182135826",optimizer.py,"@@ -137,12 +137,32 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
     self._batch_size = array_ops.shape(layer_collection.losses[0].inputs)[0]
     self._losses = layer_collection.losses
 
-    self.cov_update_op = self._fisher_est.cov_update_op
-    self.inv_update_op = self._fisher_est.inv_update_op
-    self.inv_updates_dict = self._fisher_est.inv_updates_dict
-
     super(KfacOptimizer, self).__init__(learning_rate, name=name)
 
+  @property
+  def cov_update_thunks(self):
+    return self._fisher_est.cov_update_thunks
+
+  @property
+  def cov_update_ops(self):
+    return self._fisher_est.cov_update_ops
+
+  @property
+  def cov_update_op(self):
+    return self._fisher_est.cov_update_op
+
+  @property
+  def inv_update_thunks(self):
+    return self._fisher_est.inv_update_thunks
+
+  @property
+  def inv_update_ops(self):
+    return self._fisher_est.inv_update_ops
+
+  @property
+  def inv_update_op(self):
+    return self._fisher_est.inv_update_op
+
   @property
   def variables(self):
     return self._fisher_est.variables
",0,train
eb6474b35cd1c5792c9e9034396ba6351c198915,tensorflow/tensorflow,"Fix documentation of ResourceApplyFtrlV2.

The doc says that we add grad_with_shrinkage^2 to accum, but what's
really added in the op kernel is just grad^2 (same for XLA). So adjust
the documentation to reflect the implementation's behavior.
Also, add/enable a test for this.

PiperOrigin-RevId: 378933095
Change-Id: If0258ad3e79e87f2253cc0be878b8034e4f3249a",ftrl_ops_test.py,"@@ -18,7 +18,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import unittest
 import numpy as np
 
 from tensorflow.compiler.tests import xla_test
@@ -137,19 +136,16 @@ class ResourceApplyFtrlTest(xla_test.XLATestCase):
         lr=1, l1=1, l2=-1.25, lr_power=1)
     self.assertAllClose(0.25 * np.ones((1, 3, 2)), var)
 
-  @unittest.skip(""Needs cl/378772774"")
   def testL2Shrinkage(self):
-    """"""Test that 2 * l2_shrinkage * var is added to the gradient.""""""
-    # TODO(kramm): XLA adds grad^2 to accum, not grad_to_use^2
+    """"""Test that 2 * l2_shrinkage * var is *not* added to the gradient.""""""
     _, accum, _ = self._eval(
         var=np.ones((1, 3, 2)),
         accum=np.zeros((1, 3, 2)),
         linear=np.zeros((1, 3, 2)),
         grad=np.zeros((1, 3, 2)),
         lr=7, l1=3, l2=7, lr_power=2, l2_shrinkage=0.5)
-    self.assertAllClose(np.ones((1, 3, 2)), accum)
+    self.assertAllClose(np.zeros((1, 3, 2)), accum)
 
-  @unittest.skip(""Needs cl/378772774"")
   def testL2ShrinkageOnLinear(self):
     """"""Test that 2 * l2_shrinkage * var is added to linear.""""""
     _, _, linear = self._eval(
",0,train
f54c05b8375f7cdb1cb6300d7c55914bea3df520,tensorflow/tensorflow,"Return the new graph instead of using input/output param

PiperOrigin-RevId: 400762263
Change-Id: I5cfe18d7c959d80a7b7f95fd35b7746048b2c2f0",tfrt_graph_execution_state.cc,"@@ -202,7 +202,14 @@ TfrtGraphExecutionState::CreateOptimizedGraph(
   result.functionalization_duration =
       grappler_start_time - functionalization_start_time;
 
-  TF_RETURN_IF_ERROR(OptimizeGraph(result.graph, build_graph_options));
+  auto status_or_optimized_graph =
+      OptimizeGraph(*result.graph, build_graph_options);
+  if (status_or_optimized_graph.ok()) {
+    result.graph = std::move(status_or_optimized_graph.ValueOrDie());
+  } else {
+    LOG(WARNING) << ""TFRT failed to optimize graph: ""
+                 << status_or_optimized_graph.status();
+  }
 
   if (VLOG_IS_ON(1)) {
     DumpGraphToFile(""after_grappler"", *result.graph);
@@ -536,21 +543,17 @@ Status OptimizeFunctions(FunctionDefLibrary& flib_proto,
 
 }  // namespace
 
-Status TfrtGraphExecutionState::OptimizeGraph(
-    std::unique_ptr<tensorflow::Graph>& graph,
+StatusOr<std::unique_ptr<tensorflow::Graph>>
+TfrtGraphExecutionState::OptimizeGraph(
+    const tensorflow::Graph& graph,
     const tensorflow::BuildGraphOptions& build_graph_options) {
   std::unique_ptr<tensorflow::Graph> optimized_graph;
   std::unique_ptr<tensorflow::FunctionLibraryDefinition> optimized_flib;
 
   // Invoke Grappler to optimize the graph.
-  auto status = graph_execution_state_->OptimizeGraph(
-      build_graph_options, *graph, &graph->flib_def(), &optimized_graph,
-      &optimized_flib);
-
-  if (!status.ok()) {
-    LOG(WARNING) << ""TFRT failed to optimize graph: "" << status;
-    return tensorflow::Status::OK();
-  }
+  TF_RETURN_IF_ERROR(graph_execution_state_->OptimizeGraph(
+      build_graph_options, graph, &graph.flib_def(), &optimized_graph,
+      &optimized_flib));
 
   FunctionDefLibrary optimized_flib_proto = optimized_flib->ToProto();
   if (run_placer_grappler_on_functions_) {
@@ -564,8 +567,7 @@ Status TfrtGraphExecutionState::OptimizeGraph(
 
   TF_RETURN_IF_ERROR(optimized_graph->AddFunctionLibrary(optimized_flib_proto));
 
-  graph = std::move(optimized_graph);
-  return tensorflow::Status::OK();
+  return optimized_graph;
 }
 
 }  // namespace tfrt_stub
",0,train
f54c05b8375f7cdb1cb6300d7c55914bea3df520,tensorflow/tensorflow,"Return the new graph instead of using input/output param

PiperOrigin-RevId: 400762263
Change-Id: I5cfe18d7c959d80a7b7f95fd35b7746048b2c2f0",tfrt_graph_execution_state.h,"@@ -81,8 +81,8 @@ class TfrtGraphExecutionState {
     return graph_execution_state_->flib_def();
   }
 
-  Status OptimizeGraph(
-      std::unique_ptr<tensorflow::Graph>& graph,
+  StatusOr<std::unique_ptr<tensorflow::Graph>> OptimizeGraph(
+      const tensorflow::Graph& graph,
       const tensorflow::BuildGraphOptions& build_graph_options);
 
   std::unique_ptr<tensorflow::GraphExecutionState> graph_execution_state_;
",0,train
8a33966dbf9c190199dac4ca529bf70bce9c2a86,tensorflow/tensorflow,"Change PySeqToTensor to return TFE_TensorHandle

PiperOrigin-RevId: 289108443
Change-Id: I2aac99acb068b0dae2f8aabf72e323d0d303ebb1",pywrap_tensor.cc,"@@ -252,25 +252,6 @@ TFE_TensorHandle* EagerCast(TFE_Context* ctx, TFE_TensorHandle* handle,
 #undef RETURN_ERROR
 }
 
-TFE_TensorHandle* PySeqToTFE_TensorHandle(TFE_Context* ctx, PyObject* value,
-                                          DataType dtype) {
-  tensorflow::TensorHandle* handle = nullptr;
-  tensorflow::Tensor t;
-  // TODO(josh11b): Have PySeqToTensor set python errors instead of
-  // returning Status.
-  auto cppstatus = tensorflow::PySeqToTensor(value, dtype, &t);
-  if (cppstatus.ok()) {
-    cppstatus = tensorflow::TensorHandle::CreateLocalHandle(
-        t, /*d=*/nullptr, /*op_device=*/nullptr, ctx->context, &handle);
-  }
-  if (!cppstatus.ok()) {
-    PyErr_SetString(PyExc_ValueError, cppstatus.error_message().c_str());
-    return nullptr;
-  }
-  CHECK_NE(handle, nullptr);
-  return new TFE_TensorHandle{tensorflow::TensorHandleInterface(handle)};
-}
-
 TFE_TensorHandle* ConvertToEagerTensorUncached(TFE_Context* ctx,
                                                PyObject* value,
                                                tensorflow::DataType dtype,
",0,test
8a33966dbf9c190199dac4ca529bf70bce9c2a86,tensorflow/tensorflow,"Change PySeqToTensor to return TFE_TensorHandle

PiperOrigin-RevId: 289108443
Change-Id: I2aac99acb068b0dae2f8aabf72e323d0d303ebb1",py_seq_tensor.cc,"@@ -15,6 +15,7 @@ limitations under the License.
 
 #include ""tensorflow/python/lib/core/py_seq_tensor.h""
 
+#include ""tensorflow/c/eager/c_api_internal.h""
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_shape.h""
 #include ""tensorflow/core/framework/types.h""
@@ -67,7 +68,7 @@ bool IsPyFloat(PyObject* obj) {
 
 struct ConverterState {
   // The inferred tensor shape.
-  TensorShape inferred_shape;
+  gtl::InlinedVector<int64, 4> inferred_shape;
 
   // The inferred tensor data type.
   DataType inferred_dtype;
@@ -155,14 +156,14 @@ Status InferShapeAndType(PyObject* obj, ConverterState* state) {
     } else if (PySequence_Check(obj)) {
       auto length = PySequence_Length(obj);
       if (length > 0) {
-        state->inferred_shape.AddDim(length);
+        state->inferred_shape.push_back(length);
         PyObject* elem = nullptr;
         TF_RETURN_IF_ERROR(SampleElementFromSequence(obj, &elem));
         obj = elem;
         refs_to_clean.push_back(make_safe(obj));
         continue;
       } else if (length == 0) {
-        state->inferred_shape.AddDim(length);
+        state->inferred_shape.push_back(length);
         state->inferred_dtype = DT_INVALID;  // Invalid dtype for empty tensors.
       } else {
         // The sequence does not have a valid length (PySequence_Length < 0).
@@ -247,12 +248,12 @@ struct Converter {
     Safe_PyObjectPtr seq = make_safe(PySequence_Fast(obj, """"));
     if (TF_PREDICT_FALSE(seq == nullptr)) return ErrorRectangular;
 
-    const int64 s = state->inferred_shape.dim_size(depth);
+    const int64 s = state->inferred_shape[depth];
     if (TF_PREDICT_FALSE(s != PySequence_Fast_GET_SIZE(seq.get()))) {
       return ErrorRectangular;
     }
 
-    if (state->inferred_shape.dims() - depth > 1) {
+    if (state->inferred_shape.size() - depth > 1) {
       /* Iterate over outer dim, and recursively convert each element. */
       for (int64 i = 0; i < s; ++i) {
         const char* error = Helper(PySequence_Fast_GET_ITEM(seq.get(), i),
@@ -272,24 +273,31 @@ struct Converter {
     return nullptr;
   }
 
-  static const char* Convert(PyObject* obj, ConverterState* state,
-                             Tensor* dest) {
+  static Status Convert(TFE_Context* ctx, PyObject* obj, ConverterState* state,
+                        TFE_TensorHandle** h, const char** error) {
     /* TODO(josh11b): Allocator & attributes? */
-    Tensor result(ConverterTraits<T>::kTypeEnum, state->inferred_shape);
-    if (state->inferred_shape.dims() == 0) { /* Scalar case */
+    Tensor result(ConverterTraits<T>::kTypeEnum,
+                  TensorShape(state->inferred_shape));
+    if (state->inferred_shape.empty()) { /* Scalar case */
       T value;
       auto scalar = ZeroDimArrayToScalar(obj, state);
-      const char* error = ConverterTraits<T>::ConvertScalar(scalar, &value);
+      *error = ConverterTraits<T>::ConvertScalar(scalar, &value);
       Py_DECREF(scalar);
-      if (error != nullptr) return error;
+      if (*error != nullptr) return errors::InvalidArgument(*error);
       result.scalar<T>()() = value;
     } else {
       T* buf = result.flat<T>().data();
-      const char* error = Helper(obj, 0, state, &buf);
-      if (error != nullptr) return error;
+      *error = Helper(obj, 0, state, &buf);
+      if (*error != nullptr) return errors::InvalidArgument(*error);
     }
-    *dest = result;
-    return nullptr;
+    tensorflow::TensorHandle* handle = nullptr;
+    auto status = tensorflow::TensorHandle::CreateLocalHandle(
+        result, /*d=*/nullptr, /*op_device=*/nullptr, ctx->context, &handle);
+    if (!status.ok()) {
+      return status;
+    }
+    *h = new TFE_TensorHandle{TensorHandleInterface(handle)};
+    return Status::OK();
   }
 };
 
@@ -592,16 +600,14 @@ typedef Converter<bool> BoolConverter;
 
 }  // namespace
 
-#define RETURN_STRING_AS_STATUS(...)                             \
-  do {                                                           \
-    const char* _error = (__VA_ARGS__);                          \
-    if (TF_PREDICT_TRUE(_error == nullptr)) return Status::OK(); \
-    return errors::InvalidArgument(_error);                      \
-  } while (0)
-
-Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret) {
+TFE_TensorHandle* PySeqToTFE_TensorHandle(TFE_Context* ctx, PyObject* obj,
+                                          DataType dtype) {
   ConverterState state;
-  TF_RETURN_IF_ERROR(InferShapeAndType(obj, &state));
+  Status status = InferShapeAndType(obj, &state);
+  if (!status.ok()) {
+    PyErr_SetString(PyExc_ValueError, status.error_message().c_str());
+    return nullptr;
+  }
   DataType requested_dtype = DT_INVALID;
   if (dtype != DT_INVALID) {
     requested_dtype = dtype;
@@ -610,116 +616,131 @@ Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret) {
   // we just try instead to create a tensor of the inferred type and
   // let the caller convert it to the requested type using a cast
   // operation.
+  const char* error = nullptr;
+  TFE_TensorHandle* handle = nullptr;
+  status = errors::Unimplemented(""Missing Python -> Tensor conversion for "",
+                                 DataTypeString(state.inferred_dtype));
   switch (requested_dtype) {
     case DT_FLOAT:
-      if (FloatConverter::Convert(obj, &state, ret) == nullptr)
-        return Status::OK();
+      status = FloatConverter::Convert(ctx, obj, &state, &handle, &error);
       break;
 
     case DT_DOUBLE:
-      if (DoubleConverter::Convert(obj, &state, ret) == nullptr)
-        return Status::OK();
+      status = DoubleConverter::Convert(ctx, obj, &state, &handle, &error);
       break;
 
     case DT_HALF:
-      if (NumpyHalfConverter::Convert(obj, &state, ret) == nullptr)
-        return Status::OK();
+      status = NumpyHalfConverter::Convert(ctx, obj, &state, &handle, &error);
       break;
 
     case DT_INT64:
-      if (Int64Converter::Convert(obj, &state, ret) == nullptr)
-        return Status::OK();
+      status = Int64Converter::Convert(ctx, obj, &state, &handle, &error);
       break;
 
     case DT_INT32:
-      if (Int32Converter::Convert(obj, &state, ret) == nullptr)
-        return Status::OK();
+      status = Int32Converter::Convert(ctx, obj, &state, &handle, &error);
       break;
 
     case DT_UINT64:
-      if (UInt64Converter::Convert(obj, &state, ret) == nullptr)
-        return Status::OK();
+      status = UInt64Converter::Convert(ctx, obj, &state, &handle, &error);
       break;
 
     case DT_COMPLEX128:
-      if (Complex128Converter::Convert(obj, &state, ret) == nullptr)
-        return Status::OK();
+      status = Complex128Converter::Convert(ctx, obj, &state, &handle, &error);
       break;
 
     case DT_STRING:
-      if (StringConverter::Convert(obj, &state, ret) == nullptr)
-        return Status::OK();
+      status = StringConverter::Convert(ctx, obj, &state, &handle, &error);
       break;
 
     case DT_BOOL:
-      if (BoolConverter::Convert(obj, &state, ret) == nullptr)
-        return Status::OK();
+      status = BoolConverter::Convert(ctx, obj, &state, &handle, &error);
       break;
 
     default:
       break;
   }
+  if (status.ok()) return handle;
+
   switch (state.inferred_dtype) {
     case DT_FLOAT:
       // TODO(josh11b): Handle mixed floats and complex numbers?
       if (requested_dtype == DT_INVALID) {
         // TensorFlow uses float32s to represent floating point numbers
         // by default (for space and speed over using doubles).
-        RETURN_STRING_AS_STATUS(FloatConverter::Convert(obj, &state, ret));
+        status = FloatConverter::Convert(ctx, obj, &state, &handle, &error);
       } else {
         // We are going to do a cast to the user's requested dtype
         // after this.  We use doubles for this intermediate result so
         // we don't lose precision that might be representable in the
         // final type.
-        RETURN_STRING_AS_STATUS(DoubleConverter::Convert(obj, &state, ret));
+        status = DoubleConverter::Convert(ctx, obj, &state, &handle, &error);
       }
+      break;
 
     case DT_DOUBLE:
-      RETURN_STRING_AS_STATUS(DoubleConverter::Convert(obj, &state, ret));
+      status = DoubleConverter::Convert(ctx, obj, &state, &handle, &error);
+      break;
 
     case DT_HALF:
-      RETURN_STRING_AS_STATUS(NumpyHalfConverter::Convert(obj, &state, ret));
+      status = NumpyHalfConverter::Convert(ctx, obj, &state, &handle, &error);
+      break;
 
     case DT_INT64:
       if (requested_dtype == DT_INVALID) {
-        const char* error = Int32Converter::Convert(obj, &state, ret);
+        status = Int32Converter::Convert(ctx, obj, &state, &handle, &error);
         if (error == ErrorFoundInt64) {
-          error = Int64Converter::Convert(obj, &state, ret);
+          status = Int64Converter::Convert(ctx, obj, &state, &handle, &error);
         }
         if (error == ErrorFoundFloat) {
-          error = FloatConverter::Convert(obj, &state, ret);
+          status = FloatConverter::Convert(ctx, obj, &state, &handle, &error);
         }
         // TODO(josh11b): May also want to fall back to using doubles if
         // error == ErrorOutOfRange?
-        RETURN_STRING_AS_STATUS(error);
       } else {
-        const char* error = Int64Converter::Convert(obj, &state, ret);
+        status = Int64Converter::Convert(ctx, obj, &state, &handle, &error);
         if (error == ErrorFoundFloat) {
-          error = DoubleConverter::Convert(obj, &state, ret);
+          status = DoubleConverter::Convert(ctx, obj, &state, &handle, &error);
         }
-        RETURN_STRING_AS_STATUS(error);
       }
+      break;
 
     case DT_STRING:
-      RETURN_STRING_AS_STATUS(StringConverter::Convert(obj, &state, ret));
+      status = StringConverter::Convert(ctx, obj, &state, &handle, &error);
+      break;
 
     case DT_COMPLEX128:
-      RETURN_STRING_AS_STATUS(Complex128Converter::Convert(obj, &state, ret));
+      status = Complex128Converter::Convert(ctx, obj, &state, &handle, &error);
+      break;
 
     case DT_BOOL:
-      RETURN_STRING_AS_STATUS(BoolConverter::Convert(obj, &state, ret));
+      status = BoolConverter::Convert(ctx, obj, &state, &handle, &error);
+      break;
 
     case DT_INVALID:  // Only occurs for empty tensors.
-      *ret = Tensor(requested_dtype == DT_INVALID ? DT_FLOAT : requested_dtype,
-                    state.inferred_shape);
-      return Status::OK();
+    {
+      tensorflow::TensorHandle* h = nullptr;
+      Tensor tensor(requested_dtype == DT_INVALID ? DT_FLOAT : requested_dtype,
+                    TensorShape(state.inferred_shape));
+      status = tensorflow::TensorHandle::CreateLocalHandle(
+          tensor, /*d=*/nullptr, /*op_device=*/nullptr, ctx->context, &h);
+      if (!status.ok()) {
+        PyErr_SetString(PyExc_ValueError, status.error_message().c_str());
+        return nullptr;
+      }
+      return new TFE_TensorHandle{TensorHandleInterface(h)};
+    }
 
     default:
-      return errors::Unimplemented(""Missing Python -> Tensor conversion for "",
-                                   DataTypeString(state.inferred_dtype));
+      break;
   }
 
-  return Status::OK();
+  if (!status.ok()) {
+    PyErr_SetString(PyExc_ValueError, status.error_message().c_str());
+    return nullptr;
+  }
+
+  return handle;
 }
 
 }  // namespace tensorflow
",0,test
8a33966dbf9c190199dac4ca529bf70bce9c2a86,tensorflow/tensorflow,"Change PySeqToTensor to return TFE_TensorHandle

PiperOrigin-RevId: 289108443
Change-Id: I2aac99acb068b0dae2f8aabf72e323d0d303ebb1",py_seq_tensor.h,"@@ -18,6 +18,7 @@ limitations under the License.
 
 #include <Python.h>
 
+#include ""tensorflow/c/eager/c_api_internal.h""
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/lib/core/status.h""
 
@@ -25,12 +26,16 @@ namespace tensorflow {
 
 // Converts Python object `obj` representing a rectangular array of
 // Python values (a scalar, a sequence of scalars, a sequence of
-// sequences, etc.) into a C++ TensorFlow Tensor and stores it in
-// *ret.  If dtype is not None it should by a Python integer
+// sequences, etc.) into a TFE_TensorHandle.
+// If dtype is not None it should by a Python integer
 // representing the desired dtype of the resulting Tensor.
 // This is used only as a hint, *ret may not have that dtype on
 // success and may require a cast.
-Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret);
+//
+// If an error occurs, this return nullptr and sets the python error indicator
+// with PyErr_SetString.
+TFE_TensorHandle* PySeqToTFE_TensorHandle(TFE_Context* ctx, PyObject* obj,
+                                          DataType dtype);
 
 }  // namespace tensorflow
 
",0,test
969489871031946f438d0899f3a0815270863296,tensorflow/tensorflow,"[XLA:Python] Add TopK operation to Python API.

PiperOrigin-RevId: 296327503
Change-Id: I345150c480b48ba97645376674faa6109f6631a7",xla.cc,"@@ -32,6 +32,7 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/client/lib/math.h""
 #include ""tensorflow/compiler/xla/client/lib/qr.h""
 #include ""tensorflow/compiler/xla/client/lib/self_adjoint_eig.h""
+#include ""tensorflow/compiler/xla/client/lib/sorting.h""
 #include ""tensorflow/compiler/xla/client/lib/svd.h""
 #include ""tensorflow/compiler/xla/client/local_client.h""
 #include ""tensorflow/compiler/xla/client/xla_builder.h""
@@ -454,6 +455,7 @@ void BuildOpsSubmodule(py::module* m) {
       },
       py::arg(""builder""), py::arg(""operands""), py::arg(""dimension"") = -1,
       py::arg(""comparator"") = absl::nullopt);
+  ops.def(""TopK"", &TopK, py::arg(""input""), py::arg(""k""));
   ops.def(""Transpose"", &Transpose);
   ops.def(""TriangularSolve"", &TriangularSolve);
   ops.def(""Tuple"", &Tuple);
",0,train
969489871031946f438d0899f3a0815270863296,tensorflow/tensorflow,"[XLA:Python] Add TopK operation to Python API.

PiperOrigin-RevId: 296327503
Change-Id: I345150c480b48ba97645376674faa6109f6631a7",xla_client.py,"@@ -1725,6 +1725,7 @@ _OTHER_OPS = [
     'Rev',
     'Select',
     'SliceInDim',
+    'TopK',
 ]
 
 
",0,train
63ef3b47e7b82bc92426814aff48ddcd31d36c82,tensorflow/tensorflow,"Add injection sites for customizing page construction.

* For single-page changes, tag an object using `doc_controls.set_custom_page_builder_c lass(obj, cls)
* For global changes pass a dict of `{ObjectType:Type[PageInfo]}` to the DocGenerator's `page_builder_classes` argument.
* Switch generate2 to use the new customization pathways.

PiperOrigin-RevId: 420801142
Change-Id: I57d02ffa4dd439ddd63578b76109ca5794d4d8da",generate2.py,"@@ -37,6 +37,8 @@ import tensorflow as tf
 from tensorflow_docs.api_generator import doc_controls
 from tensorflow_docs.api_generator import doc_generator_visitor
 from tensorflow_docs.api_generator import generate_lib
+from tensorflow_docs.api_generator.pretty_docs import base_page
+from tensorflow_docs.api_generator.pretty_docs import module_page
 
 from tensorflow.python.framework import ops
 from tensorflow.python.util import tf_export
@@ -99,38 +101,51 @@ tf.__doc__ = """"""
   """"""
 
 
-def generate_raw_ops_doc():
-  """"""Generates docs for `tf.raw_ops`.""""""
+class RawOpsPageInfo(module_page.ModulePageInfo):
+  """"""Generates a custom page for `tf.raw_ops`.""""""
+  DEFAULT_BUILDER_CLASS = base_page.TemplatePageBuilder
 
-  warning = textwrap.dedent(""""""\n
-    Note: `tf.raw_ops` provides direct/low level access to all TensorFlow ops.
-    See [the RFC](https://github.com/tensorflow/community/blob/master/rfcs/20181225-tf-raw-ops.md)
-    for details. Unless you are library writer, you likely do not need to use
-    these ops directly."""""")
+  def build(self):
+    # Skip the ModulePage implementation, which doesn't use a template.
+    content = base_page.PageInfo.build(self)
 
-  table_header = textwrap.dedent(""""""
+    raw_ops_doc = self.generate_raw_ops_doc()
 
-      | Op Name | Has Gradient |
-      |---------|:------------:|"""""")
+    return ""\n"".join([content, raw_ops_doc])
 
-  parts = [warning, table_header]
+  def generate_raw_ops_doc(self):
+    """"""Generates docs for `tf.raw_ops`.""""""
+    del self
 
-  for op_name in sorted(dir(tf.raw_ops)):
-    try:
-      ops._gradient_registry.lookup(op_name)  # pylint: disable=protected-access
-      has_gradient = ""\N{HEAVY CHECK MARK}\N{VARIATION SELECTOR-16}""
-    except LookupError:
-      has_gradient = ""\N{CROSS MARK}""
+    warning = textwrap.dedent(""""""\n
+      Note: `tf.raw_ops` provides direct/low level access to all TensorFlow ops.
+      See [the RFC](https://github.com/tensorflow/community/blob/master/rfcs/20181225-tf-raw-ops.md)
+      for details. Unless you are library writer, you likely do not need to use
+      these ops directly."""""")
 
-    if not op_name.startswith(""_""):
-      path = pathlib.Path(""/"") / FLAGS.site_path / ""tf/raw_ops"" / op_name
-      path = path.with_suffix("".md"")
-      link = ('<a id={op_name} href=""{path}"">{op_name}</a>').format(
-          op_name=op_name, path=str(path))
-      parts.append(""| {link} | {has_gradient} |"".format(
-          link=link, has_gradient=has_gradient))
+    table_header = textwrap.dedent(""""""
 
-  return ""\n"".join(parts)
+        | Op Name | Has Gradient |
+        |---------|:------------:|"""""")
+
+    parts = [warning, table_header]
+
+    for op_name in sorted(dir(tf.raw_ops)):
+      try:
+        ops._gradient_registry.lookup(op_name)  # pylint: disable=protected-access
+        has_gradient = ""\N{HEAVY CHECK MARK}\N{VARIATION SELECTOR-16}""
+      except LookupError:
+        has_gradient = ""\N{CROSS MARK}""
+
+      if not op_name.startswith(""_""):
+        path = pathlib.Path(""/"") / FLAGS.site_path / ""tf/raw_ops"" / op_name
+        path = path.with_suffix("".md"")
+        link = ('<a id={op_name} href=""{path}"">{op_name}</a>').format(
+            op_name=op_name, path=str(path))
+        parts.append(""| {link} | {has_gradient} |"".format(
+            link=link, has_gradient=has_gradient))
+
+    return ""\n"".join(parts)
 
 
 # The doc generator isn't aware of tf_export.
@@ -167,7 +182,7 @@ def build_docs(output_dir, code_url_prefix, search_hints):
     search_hints: Bool. Include meta-data search hints at the top of each file.
   """"""
   # The custom page will be used for raw_ops.md not the one generated above.
-  doc_controls.set_custom_page_content(tf.raw_ops, generate_raw_ops_doc())
+  doc_controls.set_custom_page_builder_cls(tf.raw_ops, RawOpsPageInfo)
 
   # Hide raw_ops from search.
   for name, obj in tf_inspect.getmembers(tf.raw_ops):
",0,train
b0ce57a4a7be3735c73e33a927117d78792d5cd6,tensorflow/tensorflow,Fix typo in irfft2d.py test,irfft2d.py,"@@ -25,7 +25,7 @@ from tensorflow.lite.testing.zip_test_utils import register_make_test_function
 
 
 @register_make_test_function()
-def make_rfft2d_tests(options):
+def make_irfft2d_tests(options):
   """"""Make a set of tests to do irfft2d.""""""
 
   test_parameters = [{
",0,train
9a7288ccf576a25ab65e61246242dd9bb90345bd,tensorflow/tensorflow,"Add dot configuration as part of module configuration.

PiperOrigin-RevId: 299879284
Change-Id: I64932896316470cda7556a3618b32ac3556b3dac",hlo_module_config.h,"@@ -183,6 +183,12 @@ class HloModuleConfig {
     return &fusion_config_;
   }
 
+  const std::vector<std::vector<int64>>& dot_config() const {
+    return dot_config_;
+  }
+
+  std::vector<std::vector<int64>>* mutable_dot_config() { return &dot_config_; }
+
  private:
   // If you add new members, be sure to update compilation_cache_key.
 
@@ -214,6 +220,8 @@ class HloModuleConfig {
       FusionConfigCollection::kOff;
 
   std::vector<std::vector<bool>> fusion_config_;
+
+  std::vector<std::vector<int64>> dot_config_;
 };
 
 }  // namespace xla
",0,train
29fdee8e85e750d04f6e9d378e85443ba5c7a239,tensorflow/tensorflow,"Fix for error_reporter.

Change-Id: I58745cc97872af74b1ad5b0af3ad778b39f01555",quantization_utils.cc,"@@ -113,7 +113,8 @@ TfLiteStatus GetQuantizationParams(TensorT* tensor, TensorType activations_type,
                                    tensor->quantization->max[0],
                                    quantized_range, quantization_params);
   } else {
-    error_reporter->Report(
+    TF_LITE_REPORT_ERROR(
+        error_reporter,
         ""Unsupported activation type for quantize-activation: %s"",
         activations_type);
     return kTfLiteError;
",0,train
29fdee8e85e750d04f6e9d378e85443ba5c7a239,tensorflow/tensorflow,"Fix for error_reporter.

Change-Id: I58745cc97872af74b1ad5b0af3ad778b39f01555",quantize_model.cc,"@@ -370,9 +370,9 @@ TfLiteStatus ApplyConstraints(ModelT* model,
         std::unique_ptr<TensorT> additional_tensor;
         const string requant_tensor_name = input_tensor->name + ""_requantized"";
         utils::MakeTensorWithQuantParam(
-            requant_tensor_name, input_tensor->shape, 
-            input_tensor->shape_signature, activations_type,
-            output_scale, output_zp, &additional_tensor);
+            requant_tensor_name, input_tensor->shape,
+            input_tensor->shape_signature, activations_type, output_scale,
+            output_zp, &additional_tensor);
         const int32_t additional_tensor_idx = subgraph->tensors.size();
         subgraph->tensors.push_back(std::move(additional_tensor));
 
@@ -869,13 +869,15 @@ TfLiteStatus QuantizeWeightsInputOutput(
 
       if (activations_type == TensorType_INT16 && !property.quantizable &&
           !allow_float) {
-        error_reporter->Report(
-            ""Quantization to 16x8-bit not yet supported for op: %s"",
+        TF_LITE_REPORT_ERROR(
+            error_reporter,
+            ""Quantization to 16x8-bit not yet supported for op: %"",
             EnumNameBuiltinOperator(op_code));
         return kTfLiteError;
       } else if (!property.quantizable && !allow_float) {
-        error_reporter->Report(""Quantization not yet supported for op: %s"",
-                               EnumNameBuiltinOperator(op_code));
+        TF_LITE_REPORT_ERROR(error_reporter,
+                             ""Quantization not yet supported for op: %"",
+                             EnumNameBuiltinOperator(op_code));
         return kTfLiteError;
       }
 
",0,train
012a1167d2b3db1a79a823dee959e58c162b3843,tensorflow/tensorflow,"Forwardprop: Ensure that inner nested accumulators don't see outer accumulators' jvps

Just for consistency; apparently this was a difference between function-wrapped and non-function-wrapped accumulation.

PiperOrigin-RevId: 260979468",tape.h,"@@ -262,6 +262,12 @@ class ForwardAccumulator {
       const std::function<BackwardFunction*()>& backward_function_getter,
       const std::function<void(BackwardFunction*)>& backward_function_deleter);
 
+  // Returns true if `Accumulate` is active somewhere above on the stack. This
+  // is useful for ordering ForwardAccumulators, where more deeply nested
+  // accumulators should not see computations from less deeply nested
+  // accumulators.
+  bool BusyAccumulating() const { return this->accumulating_; }
+
   // Fetches the current Jacobian-vector product associated with `tensor_id`, or
   // a nullptr if none is available.
   //
",0,train
012a1167d2b3db1a79a823dee959e58c162b3843,tensorflow/tensorflow,"Forwardprop: Ensure that inner nested accumulators don't see outer accumulators' jvps

Just for consistency; apparently this was a difference between function-wrapped and non-function-wrapped accumulation.

PiperOrigin-RevId: 260979468",forwardprop_test.py,"@@ -282,9 +282,12 @@ class ForwardpropTest(test.TestCase, parameterized.TestCase):
       f = _forwardgrad(f)
     self.assertAllClose(expected, f(primal))
 
-  def testFunctionGradPureForward(self):
+  @parameterized.named_parameters(
+      [(""Function"", def_function.function),
+       (""NoFunction"", lambda f: f)])
+  def testGradPureForward(self, decorator):
 
-    @def_function.function
+    @decorator
     def f(x):
       return x ** 3.5
 
",0,train
012a1167d2b3db1a79a823dee959e58c162b3843,tensorflow/tensorflow,"Forwardprop: Ensure that inner nested accumulators don't see outer accumulators' jvps

Just for consistency; apparently this was a difference between function-wrapped and non-function-wrapped accumulation.

PiperOrigin-RevId: 260979468",pywrap_tfe_src.cc,"@@ -1905,6 +1905,12 @@ void TapeSetRecordOperation(
       if (MaybeRaiseExceptionFromStatus(status, nullptr)) {
         return;
       }
+      if (accumulator->accumulator->BusyAccumulating()) {
+        // Ensure inner accumulators don't see outer accumulators' jvps. This
+        // mostly happens on its own, with some potentially surprising
+        // exceptions, so the blanket policy is for consistency.
+        break;
+      }
     }
   }
 }
",0,train
f2134cbd2ec4dd98f9f20ac41e4f46cdd0246af2,tensorflow/tensorflow,use get_item_tensor_string for string with rank 0,slices_test.py,"@@ -53,6 +53,12 @@ class SlicesTest(test.TestCase):
     with self.test_session() as sess:
       self.assertEqual(sess.run(t), b""b"")
 
+    initial_list_str = constant_op.constant([""abcd"", ""bcde""])
+    t = slices.get_item(initial_list_str, 1, slices.GetItemOpts(element_dtype=initial_str.dtype))
+
+    with self.test_session() as sess:
+      self.assertEqual(sess.run(t), b""bcde"")
+
 
 if __name__ == '__main__':
   test.main()
",0,train
7cd52d03c423c27e5daf4e981ec44a5c84362d2c,tensorflow/tensorflow,"[XLA] Initialize fields of RematerializationSizes by default

PiperOrigin-RevId: 355954409
Change-Id: I0da0d3ce320c53321778ad66ab9703307bc231c7",hlo_rematerialization.h,"@@ -45,8 +45,8 @@ class HloRematerialization : public HloModulePass {
   // Helper struct that communicates the before / after sizes for the
   // rematerialization process.
   struct RematerializationSizes {
-    int64 before_bytes;
-    int64 after_bytes;
+    int64 before_bytes = -1;
+    int64 after_bytes = -1;
   };
 
   // Mode in which the rematerialization algorithm should be run.
",0,train
4f8410553665507aa09763284e426e81a6084023,tensorflow/tensorflow,test: check statefulness with number of executions,script_ops_test.py,"@@ -40,8 +40,11 @@ class NumpyFunctionTest(test.TestCase):
     self.assertAllEqual(actual_result, expect_result)
 
   def test_stateless_flag(self):
+    call_count = 0
 
     def plus(a, b):
+      global call_count
+      call_count += 1
       return a + b
 
     @def_function.function
@@ -53,21 +56,30 @@ class NumpyFunctionTest(test.TestCase):
       return numpy_function(plus, [a, b], dtypes.int32, stateful=False)
 
     @def_function.function(autograph=False)
-    def tensor_double_plus(a, b, c, d):
-      sum_stateful = tensor_plus_stateful(a, b)
-      assert sum_stateful.op.op_def.is_stateful
+    def tensor_double_plus_stateless(a, b):
+      sum1 = tensor_plus_stateless(a, b)
+      sum2 = tensor_plus_stateless(a, b)
+      return sum1 + sum2
 
-      sum_stateless = tensor_plus_stateless(c, d)
-      assert not sum_stateless.op.op_def.is_stateful
-
-      return sum_stateful, sum_stateless
-
-    tensor_double_plus(
+    # different argument
+    tensor_double_plus_stateless(
       constant_op.constant(1, dtype=dtypes.int32),
       constant_op.constant(2, dtype=dtypes.int32),
+    )
+    assert call_count == 1  # +1 as only the first one was executed
+
+    @def_function.function(autograph=False)
+    def tensor_double_plus_stateful(a, b):
+      sum1 = tensor_plus_stateful(a, b)
+      sum2 = tensor_plus_stateful(a, b)
+      return sum1 + sum2
+
+    tensor_double_plus_stateful(
       constant_op.constant(3, dtype=dtypes.int32),
       constant_op.constant(4, dtype=dtypes.int32),
-    )
+                          )
+    assert call_count == 3  # +2 as it is stateful, both were executed
+
 
 
 if __name__ == ""__main__"":
",0,test
d70a2cf2ab0495926dadd9d190def0d4a8522878,tensorflow/tensorflow,"Print a cycle if detected by DFS.

Example output

Directed cycle:
  fusion.48
  get-tuple-element.32
  fusion.62
  get-tuple-element.67
  fusion.44
  get-tuple-element.65
  fusion.48

PiperOrigin-RevId: 266934452",hlo_instruction.cc,"@@ -2209,11 +2209,52 @@ string PrintName(const string& name, bool print_ids) {
 
 namespace {
 
+using DFSStack = absl::InlinedVector<std::pair<int, HloInstruction*>, 16>;
+
 string PrintNameInternal(const string& name, const HloPrintOptions& options) {
   return StrCat(options.print_percent() ? ""%"" : """",
                 PrintName(name, options.print_ids()));
 }
 
+void PrintCycle(const HloInstruction* child, DFSStack* dfs_stack) {
+  // This set contains HloInstructions from the top of `DFSStack` that might
+  // belong to the cycle, i.e. if  DFSStack :=[back,...,child,...,top], then
+  // `subgraph` := {child,...,top}.
+  absl::flat_hash_set<const HloInstruction*> subgraph;
+  while (!dfs_stack->empty() && dfs_stack->back().second != child) {
+    subgraph.insert(dfs_stack->back().second);
+    dfs_stack->pop_back();
+  }
+  // Start dfs at `child` and find a cycle with all nodes in `subgraph`.
+  absl::flat_hash_set<const HloInstruction*> visited;
+  absl::InlinedVector<const HloInstruction*, 16> dfs;
+  dfs.push_back(child);
+  while (!dfs.empty()) {
+    bool found_next_instr = false;
+    for (const auto& user : dfs.back()->users()) {
+      if (user == child) {
+        dfs.push_back(child);
+        LOG(INFO) << ""\n\nDirected cycle:\n  ""
+                  << absl::StrJoin(
+                         dfs, ""\n  "",
+                         [](std::string* out, const HloInstruction* instr) {
+                           out->append(instr->name());
+                         });
+        return;
+      }
+      if (!subgraph.contains(user) || visited.contains(user)) {
+        continue;
+      }
+      visited.insert(user);
+      dfs.push_back(user);
+      found_next_instr = true;
+    }
+    if (!found_next_instr) {
+      dfs.pop_back();
+    }
+  }
+}
+
 }  // namespace
 
 string HloInstruction::ToString(const HloPrintOptions& options) const {
@@ -2847,8 +2888,6 @@ Status HloInstruction::Visit(DfsHloVisitorBase<HloInstructionPtr>* visitor) {
 template Status HloInstruction::Visit(DfsHloVisitor* visitor);
 template Status HloInstruction::Visit(ConstDfsHloVisitor* visitor);
 
-using DFSStack = absl::InlinedVector<std::pair<int, HloInstruction*>, 16>;
-
 // Push ""child"" onto the dfs_stack if not already visited.  Returns false if a
 // cycle was detected, and true otherwise.
 template <typename Visitor>
@@ -2926,6 +2965,7 @@ static Status PostOrderDFS(HloInstruction* root, Visitor* visitor,
     const size_t old_dfs_stack_size = dfs_stack.size();
     for (HloInstruction* child : current_node->operands()) {
       if (!TF_PREDICT_TRUE(PushDFSChild(visitor, &dfs_stack, child))) {
+        PrintCycle(child, &dfs_stack);
         return FailedPrecondition(
             ""A cycle is detected while visiting instruction %s"",
             current_node->ToString());
@@ -2935,6 +2975,7 @@ static Status PostOrderDFS(HloInstruction* root, Visitor* visitor,
     if (!ignore_control_predecessors) {
       for (HloInstruction* child : current_node->control_predecessors()) {
         if (!TF_PREDICT_TRUE(PushDFSChild(visitor, &dfs_stack, child))) {
+          PrintCycle(child, &dfs_stack);
           return FailedPrecondition(
               ""A cycle is detected while visiting instruction %s"",
               current_node->ToString());
",0,train
6f737e0dd60fc02138c6bf0dc34c6a7e64297c73,tensorflow/tensorflow,"Doc improvements to ReductionToOneDevice.

PiperOrigin-RevId: 264908765",cross_device_ops.py,"@@ -404,15 +404,20 @@ class CrossDeviceOps(object):
 class ReductionToOneDevice(CrossDeviceOps):
   """"""Always do reduction to one device first and then do broadcasting.
 
-    Batch reduction is done by reduction on each element one by one.
+  Batch reduction is done by reduction on each element one by one.
+
+  ```
+    mirrored_strategy = tf.distribute.MirroredStrategy(
+      cross_device_ops=tf.distribute.ReductionToOneDevice())
+  ```
   """"""
 
   def __init__(self, reduce_to_device=None, accumulation_fn=None):
-    """"""Initializes the instance of ReductionToOneDevice.
+    """"""Initializes with a device to reduce to and a way to accumulate.
 
     Args:
       reduce_to_device: the intermediate device to reduce to. If None, reduce
-        to the first device in `destinations` of the reduce() method.
+        to the first device in `destinations` of the `reduce()` method.
       accumulation_fn: a function that does accumulation.  If None, then
         `tf.math.add_n` is used.
     """"""
",0,train
650172a574504223ec2bdb328ed7c985389313d7,tensorflow/tensorflow,"Update test case for complex support of squared difference

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",math_ops_test.py,"@@ -217,7 +217,7 @@ class SquaredDifferenceTest(test_util.TensorFlowTestCase):
     for dtype in [np.complex64, np.complex128]:
       x = np.array([[1+3j, 2+2j, 3+1j], [4-1j, 5-2j, 6-3j]], dtype=dtype)
       y = np.array([-3+1j, -2+2j, -1+3j], dtype=dtype)
-      z = (x - y) * (x - y)
+      z = np.conj(x - y) * (x - y)
       with test_util.device(use_gpu=False):
         z_tf = self.evaluate(math_ops.squared_difference(x, y))
         self.assertAllClose(z, z_tf)
",0,test
e479a1690683a64cdabac0ca46ce6265c0b0dbec,tensorflow/tensorflow,"Refactor kernel thunk's launch dimension setting - part 5/8.

Move SetThunkLaunchDimensions() to right after KernelThunk construction. Launch dimension will be passed to KernelThunk's constructor as a parameter.

PiperOrigin-RevId: 386342276
Change-Id: Ie486ddd8f35b1e1377007e29b3b6bf026067684b",ir_emitter_unnested.cc,"@@ -1696,41 +1696,10 @@ Status IrEmitterUnnested::EmitLoopFusion(mlir::Operation* op) {
   MlirEmitterContext context;
   context.SetOperation(fusion);
 
-  std::vector<llvm_ir::IrArray> ir_arrays;
-  Thunk* kernel_thunk;
-  {
-    TF_ASSIGN_OR_RETURN(std::unique_ptr<KernelThunk> kernel_thunk_ptr,
-                        BuildKernelThunk(fusion, GetThunkInfo(op), &ir_arrays));
-    kernel_thunk = kernel_thunk_ptr.get();
-    thunk_sequence_.emplace_back(std::move(kernel_thunk_ptr));
-  }
-
-  auto operand_arrays =
-      absl::MakeSpan(ir_arrays).subspan(0, context.operand_shapes.size());
-  auto output_element_arrays = absl::MakeSpan(ir_arrays).subspan(
-      context.operand_shapes.size(), context.output_shapes.size());
-
   TF_ASSIGN_OR_RETURN(const HloComputation* fused_computation,
                       GetOrCreateSubComputationFromRegion(&fusion.region(),
                                                           /*is_fusion=*/true));
 
-  GpuElementalIrEmitter elemental_emitter(hlo_module_config_, module_, &b_,
-                                          GetNestedComputer());
-  FusedIrEmitter fused_emitter(&elemental_emitter);
-
-  for (int i = 0; i < context.operand_shapes.size(); i++) {
-    auto* builder = &b_;
-    auto ir_array = operand_arrays[i];
-    fused_emitter.BindGenerator(
-        fused_computation->parameter_instruction(i),
-        [builder, ir_array](llvm_ir::IrArray::Index index) {
-          return ir_array.EmitReadArrayElement(index, builder);
-        });
-  }
-  TF_ASSIGN_OR_RETURN(
-      auto element_generator,
-      fused_emitter.GetGenerator(fused_computation->root_instruction()));
-
   int unroll_factor;
   if (!MayPreventVectorization(fusion)) {
     unroll_factor = ComputeMaxUnrollFactor(fusion, hlo_module_config_);
@@ -1782,8 +1751,40 @@ Status IrEmitterUnnested::EmitLoopFusion(mlir::Operation* op) {
                       CalculateLaunchDimensions(
                           element_shape, ir_emitter_context_->gpu_device_info(),
                           launch_config));
+
+  std::vector<llvm_ir::IrArray> ir_arrays;
+  Thunk* kernel_thunk;
+  {
+    TF_ASSIGN_OR_RETURN(std::unique_ptr<KernelThunk> kernel_thunk_ptr,
+                        BuildKernelThunk(fusion, GetThunkInfo(op), &ir_arrays));
+    kernel_thunk = kernel_thunk_ptr.get();
+    thunk_sequence_.emplace_back(std::move(kernel_thunk_ptr));
+  }
   SetThunkLaunchDimensions(launch_dimensions, kernel_thunk,
                            ir_emitter_context_->llvm_module());
+
+  auto operand_arrays =
+      absl::MakeSpan(ir_arrays).subspan(0, context.operand_shapes.size());
+  auto output_element_arrays = absl::MakeSpan(ir_arrays).subspan(
+      context.operand_shapes.size(), context.output_shapes.size());
+
+  GpuElementalIrEmitter elemental_emitter(hlo_module_config_, module_, &b_,
+                                          GetNestedComputer());
+  FusedIrEmitter fused_emitter(&elemental_emitter);
+
+  for (int i = 0; i < context.operand_shapes.size(); i++) {
+    auto* builder = &b_;
+    auto ir_array = operand_arrays[i];
+    fused_emitter.BindGenerator(
+        fused_computation->parameter_instruction(i),
+        [builder, ir_array](llvm_ir::IrArray::Index index) {
+          return ir_array.EmitReadArrayElement(index, builder);
+        });
+  }
+  TF_ASSIGN_OR_RETURN(
+      auto element_generator,
+      fused_emitter.GetGenerator(fused_computation->root_instruction()));
+
   llvm::Type* index_type =
       GetIndexTypeForKernel(fusion, launch_dimensions.launch_bound(), &b_);
 
",0,train
723c4048790d6f0636f6c1df5f4fb793ef7a4ae6,tensorflow/tensorflow,"FileSystem directory creation fixes:
- Ensure that CreateDir returns error::ALREADY_EXISTS if the dirname exists.
- Ensure that RecursivelyCreateDirectory ignores error::ALREADY_EXISTS when
  creating directories and subdirectories.

Fixes #6974
Change: 145144720",file_system.cc,"@@ -229,7 +229,10 @@ Status FileSystem::RecursivelyCreateDir(const string& dirname) {
   string built_path = remaining_dir.ToString();
   for (const StringPiece sub_dir : sub_dirs) {
     built_path = io::JoinPath(built_path, sub_dir);
-    TF_RETURN_IF_ERROR(CreateDir(io::CreateURI(scheme, host, built_path)));
+    Status status = CreateDir(io::CreateURI(scheme, host, built_path));
+    if (!status.ok() && status.code() != tensorflow::error::ALREADY_EXISTS) {
+      return status;
+    }
   }
   return Status::OK();
 }
",0,train
723c4048790d6f0636f6c1df5f4fb793ef7a4ae6,tensorflow/tensorflow,"FileSystem directory creation fixes:
- Ensure that CreateDir returns error::ALREADY_EXISTS if the dirname exists.
- Ensure that RecursivelyCreateDirectory ignores error::ALREADY_EXISTS when
  creating directories and subdirectories.

Fixes #6974
Change: 145144720",file_system.h,"@@ -87,7 +87,7 @@ class FileSystem {
   //   '\\' c: matches character c
   //   lo '-' hi: matches character c for lo <= c <= hi
   //
-  // Typical return codes
+  // Typical return codes:
   //  * OK - no errors
   //  * UNIMPLEMENTED - Some underlying functions (like GetChildren) are not
   //                    implemented
@@ -100,10 +100,16 @@ class FileSystem {
 
   virtual Status DeleteFile(const string& fname) = 0;
 
+  // \brief Creates the specified directory.
+  // Typical return codes:
+  //  * OK - successfully created the directory.
+  //  * ALREADY_EXISTS - directory with name dirname already exists.
+  //  * PERMISSION_DENIED - dirname is not writable.
   virtual Status CreateDir(const string& dirname) = 0;
 
   // \brief Creates the specified directory and all the necessary
-  // subdirectories. Typical return codes.
+  // subdirectories.
+  // Typical return codes:
   //  * OK - successfully created the directory and sub directories, even if
   //         they were already created.
   //  * PERMISSION_DENIED - dirname or some subdirectory is not writable.
@@ -116,7 +122,7 @@ class FileSystem {
   // files and directories that weren't deleted (unspecified if the return
   // status is not OK).
   // REQUIRES: undeleted_files, undeleted_dirs to be not null.
-  // Typical return codes
+  // Typical return codes:
   //  * OK - dirname exists and we were able to delete everything underneath.
   //  * NOT_FOUND - dirname doesn't exist
   //  * PERMISSION_DENIED - dirname or some descendant is not writable
",0,train
723c4048790d6f0636f6c1df5f4fb793ef7a4ae6,tensorflow/tensorflow,"FileSystem directory creation fixes:
- Ensure that CreateDir returns error::ALREADY_EXISTS if the dirname exists.
- Ensure that RecursivelyCreateDirectory ignores error::ALREADY_EXISTS when
  creating directories and subdirectories.

Fixes #6974
Change: 145144720",file_system_test.cc,"@@ -44,9 +44,10 @@ class InterPlanetaryFileSystem : public NullFileSystem {
   Status CreateDir(const string& dirname) override {
     string parsed_path;
     ParsePath(dirname, &parsed_path);
-    // If the directory already exists then ignore.
+    // If the directory already exists, throw an error.
     if (celestial_bodies_.find(parsed_path) != celestial_bodies_.end()) {
-      return Status::OK();
+      return Status(tensorflow::error::ALREADY_EXISTS,
+                    ""dirname already exists."");
     }
     std::vector<string> split_path = str_util::Split(parsed_path, '/');
     // If the path is too long then we don't support it.
@@ -248,4 +249,14 @@ TEST(TestFileSystem, MatchMultipleWildcards) {
             ""match-00/abc/00,match-00/abc/01,match-01/abc/00,match-01/abc/04"");
 }
 
+TEST(TestFileSystem, RecursivelyCreateAlreadyExistingDir) {
+  InterPlanetaryFileSystem ipfs;
+  const string dirname = io::JoinPath(kPrefix, ""match-00/abc/00"");
+  TF_EXPECT_OK(ipfs.RecursivelyCreateDir(dirname));
+  // Ensure that CreateDir throws an error, to sanity check that this test
+  // actually tests the behavior of RecursivelyCreateDir.
+  EXPECT_EQ(ipfs.CreateDir(dirname).code(), tensorflow::error::ALREADY_EXISTS);
+  TF_EXPECT_OK(ipfs.RecursivelyCreateDir(dirname));
+}
+
 }  // namespace tensorflow
",0,train
c41f4652b45bf70f20686e612b41574b4b8139d7,tensorflow/tensorflow,"Add an option to enable MLIR bridge for tpu_py_test rule

If enable_mlir_bridge is True, a new test will be generated that runs with the MLIR bridge enabled.
This option is off by default.

PiperOrigin-RevId: 317173675
Change-Id: I332e1ae24cf82fceea20fd0aff2cec7c9b236a24",test_util.py,"@@ -1933,6 +1933,9 @@ class TensorFlowTestCase(googletest.TestCase):
       # disable it here.
       pywrap_tf_session.TF_SetXlaConstantFoldingDisabled(True)
 
+    if is_mlir_bridge_enabled():
+      context.context().enable_mlir_bridge = True
+
     self._threads = []
     self._tempdir = None
     self._cached_session = None
",0,train
d916f20e1f1897696a19158ac7f5bd8d83e1b857,tensorflow/tensorflow,"Merging of GpuModel moved to api neutral common/gpu_model from opencl inference context.

PiperOrigin-RevId: 419787683
Change-Id: I860fd1866d2a046559fb44d350afb65713a18b2b",inference_context.cc,"@@ -55,15 +55,6 @@ namespace gpu {
 namespace cl {
 
 namespace {
-bool IsReady(const absl::flat_hash_set<ValueId>& ready_tensors,
-             const GpuNode& node) {
-  for (const ValueId in_id : node.inputs) {
-    if (ready_tensors.find(in_id) == ready_tensors.end()) {
-      return false;
-    }
-  }
-  return true;
-}
 
 std::vector<std::pair<ValueId, TensorDescriptor>> GetCLNodeTensors(
     const CLNode& node) {
@@ -80,15 +71,6 @@ std::vector<std::pair<ValueId, TensorDescriptor>> GetCLNodeTensors(
   return result;
 }
 
-absl::Status MergeGpuNodes(GpuNode* src, GpuNode* dst) {
-  for (int j = 1; j < src->inputs.size(); ++j) {
-    dst->inputs.push_back(src->inputs[j]);
-  }
-  dst->outputs[0] = src->outputs[0];
-  dst->name += "" linked : "" + src->name;
-  return dst->gpu_operation->AddOperation(src->gpu_operation.get());
-}
-
 void AddUsage(ValueId id, int task_index,
               std::map<ValueId, int2>* usage_records) {
   auto it = usage_records->find(id);
@@ -401,53 +383,6 @@ absl::Status ConvertOperations(const GpuInfo& gpu_info,
   return absl::OkStatus();
 }
 
-absl::Status Merge(GpuModel* gpu_model) {
-  absl::flat_hash_set<ValueId> ready_tensors;
-  for (const auto& input : gpu_model->input_ids_and_refs) {
-    ready_tensors.insert(input.first);
-  }
-  auto& nodes = gpu_model->nodes;
-  for (int i = 0; i < nodes.size(); ++i) {
-    auto& node = nodes[i];
-    for (const auto& out_id : node.outputs) {
-      ready_tensors.insert(out_id);
-    }
-    if (node.outputs.size() != 1) {
-      continue;
-    }
-    std::vector<int> next_nodes;
-    int link_index = 0;
-    for (int j = i + 1; j < nodes.size(); ++j) {
-      for (int k = 0; k < nodes[j].inputs.size(); ++k) {
-        if (nodes[j].inputs[k] == node.outputs[0]) {
-          next_nodes.push_back(j);
-          link_index = k;
-        }
-      }
-    }
-    if (next_nodes.size() != 1 || link_index != 0) {
-      continue;
-    }
-    auto& linkable_node = nodes[next_nodes[0]];
-    if (!linkable_node.gpu_operation->IsLinkable() ||
-        linkable_node.outputs.size() != 1 ||
-        !IsReady(ready_tensors, linkable_node)) {
-      continue;
-    }
-    const auto& original_dst_def =
-        node.gpu_operation->GetDefinition().dst_tensors[0];
-    const auto& link_dst_def =
-        linkable_node.gpu_operation->GetDefinition().dst_tensors[0];
-    if (original_dst_def != link_dst_def) {
-      continue;
-    }
-    RETURN_IF_ERROR(MergeGpuNodes(&linkable_node, &node));
-    nodes.erase(nodes.begin() + next_nodes[0]);
-    i -= 1;
-  }
-  return absl::OkStatus();
-}
-
 void CopyExternals(const GraphFloat32& graph, GpuModel* gpu_model) {
   const auto inputs = graph.inputs();
   for (const auto& value : inputs) {
@@ -521,7 +456,7 @@ absl::Status GraphToGpuModel(const CreateGpuModelInfo& create_info,
   CopyExternals(graph, gpu_model);
   RETURN_IF_ERROR(ConvertOperations(gpu_info, graph, create_info,
                                     &tensor_reserver, gpu_model));
-  RETURN_IF_ERROR(Merge(gpu_model));
+  RETURN_IF_ERROR(MergeNodes(gpu_model));
   gpu_model->tensors = std::move(tensor_reserver.reservations_);
 
   for (auto& node : gpu_model->nodes) {
",0,test
d916f20e1f1897696a19158ac7f5bd8d83e1b857,tensorflow/tensorflow,"Merging of GpuModel moved to api neutral common/gpu_model from opencl inference context.

PiperOrigin-RevId: 419787683
Change-Id: I860fd1866d2a046559fb44d350afb65713a18b2b",gpu_model.cc,"@@ -0,0 +1,93 @@
+/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""tensorflow/lite/delegates/gpu/common/gpu_model.h""
+
+#include ""absl/container/flat_hash_set.h""
+
+namespace tflite {
+namespace gpu {
+
+namespace {
+bool IsReady(const absl::flat_hash_set<ValueId>& ready_tensors,
+             const GpuNode& node) {
+  for (const ValueId in_id : node.inputs) {
+    if (ready_tensors.find(in_id) == ready_tensors.end()) {
+      return false;
+    }
+  }
+  return true;
+}
+
+absl::Status MergeGpuNodes(GpuNode* src, GpuNode* dst) {
+  for (int j = 1; j < src->inputs.size(); ++j) {
+    dst->inputs.push_back(src->inputs[j]);
+  }
+  dst->outputs[0] = src->outputs[0];
+  dst->name += "" linked : "" + src->name;
+  return dst->gpu_operation->AddOperation(src->gpu_operation.get());
+}
+
+}  // namespace
+
+absl::Status MergeNodes(GpuModel* gpu_model) {
+  absl::flat_hash_set<ValueId> ready_tensors;
+  for (const auto& input : gpu_model->input_ids_and_refs) {
+    ready_tensors.insert(input.first);
+  }
+  auto& nodes = gpu_model->nodes;
+  for (int i = 0; i < nodes.size(); ++i) {
+    auto& node = nodes[i];
+    for (const auto& out_id : node.outputs) {
+      ready_tensors.insert(out_id);
+    }
+    if (node.outputs.size() != 1) {
+      continue;
+    }
+    std::vector<int> next_nodes;
+    int link_index = 0;
+    for (int j = i + 1; j < nodes.size(); ++j) {
+      for (int k = 0; k < nodes[j].inputs.size(); ++k) {
+        if (nodes[j].inputs[k] == node.outputs[0]) {
+          next_nodes.push_back(j);
+          link_index = k;
+        }
+      }
+    }
+    if (next_nodes.size() != 1 || link_index != 0) {
+      continue;
+    }
+    auto& linkable_node = nodes[next_nodes[0]];
+    if (!linkable_node.gpu_operation->IsLinkable() ||
+        linkable_node.outputs.size() != 1 ||
+        !IsReady(ready_tensors, linkable_node)) {
+      continue;
+    }
+    const auto& original_dst_def =
+        node.gpu_operation->GetDefinition().dst_tensors[0];
+    const auto& link_dst_def =
+        linkable_node.gpu_operation->GetDefinition().dst_tensors[0];
+    if (original_dst_def != link_dst_def) {
+      continue;
+    }
+    RETURN_IF_ERROR(MergeGpuNodes(&linkable_node, &node));
+    nodes.erase(nodes.begin() + next_nodes[0]);
+    i -= 1;
+  }
+  return absl::OkStatus();
+}
+
+}  // namespace gpu
+}  // namespace tflite
",0,test
d916f20e1f1897696a19158ac7f5bd8d83e1b857,tensorflow/tensorflow,"Merging of GpuModel moved to api neutral common/gpu_model from opencl inference context.

PiperOrigin-RevId: 419787683
Change-Id: I860fd1866d2a046559fb44d350afb65713a18b2b",gpu_model.h,"@@ -103,6 +103,8 @@ struct GpuModel {
   absl::flat_hash_map<ValueId, TensorDescriptor> const_tensors;
 };
 
+absl::Status MergeNodes(GpuModel* gpu_model);
+
 }  // namespace gpu
 }  // namespace tflite
 
",0,test
90c5838c5d8fc672b020e4baa3d5138f3940cd03,tensorflow/tensorflow,"Add the tensor shape to the Exception string when the image doesnt match.
Change: 150251692",image_ops_impl.py,"@@ -128,9 +128,11 @@ def _Check3DImage(image, require_static=True):
   try:
     image_shape = image.get_shape().with_rank(3)
   except ValueError:
-    raise ValueError(""'image' must be three-dimensional."")
+    raise ValueError(""'image' (shape %s) must be three-dimensional."" %
+                     image.shape)
   if require_static and not image_shape.is_fully_defined():
-    raise ValueError(""'image' must be fully defined."")
+    raise ValueError(""'image' (shape %s) must be fully defined."" %
+                     image_shape)
   if any(x == 0 for x in image_shape):
     raise ValueError(""all dims of 'image.shape' must be > 0: %s"" %
                      image_shape)
",0,train
90c5838c5d8fc672b020e4baa3d5138f3940cd03,tensorflow/tensorflow,"Add the tensor shape to the Exception string when the image doesnt match.
Change: 150251692",image_ops_test.py,"@@ -314,29 +314,29 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
         [1000, 1, 3],
     ]
     test_styles = [
-        'all_random',
-        'rg_same',
-        'rb_same',
-        'gb_same',
-        'rgb_same',
+        ""all_random"",
+        ""rg_same"",
+        ""rb_same"",
+        ""gb_same"",
+        ""rgb_same"",
     ]
     for x_shape in x_shapes:
       for test_style in test_styles:
         x_np = np.random.rand(*x_shape) * 255.
         delta_h = np.random.rand() * 2.0 - 1.0
-        if test_style == 'all_random':
+        if test_style == ""all_random"":
           pass
-        elif test_style == 'rg_same':
+        elif test_style == ""rg_same"":
           x_np[..., 1] = x_np[..., 0]
-        elif test_style == 'rb_same':
+        elif test_style == ""rb_same"":
           x_np[..., 2] = x_np[..., 0]
-        elif test_style == 'gb_same':
+        elif test_style == ""gb_same"":
           x_np[..., 2] = x_np[..., 1]
-        elif test_style == 'rgb_same':
+        elif test_style == ""rgb_same"":
           x_np[..., 1] = x_np[..., 0]
           x_np[..., 2] = x_np[..., 0]
         else:
-          raise AssertionError('Invalid test style: %s' % (test_style))
+          raise AssertionError(""Invalid test style: %s"" % (test_style))
         y_np = self._adjustHueNp(x_np, delta_h)
         y_tf = self._adjustHueTf(x_np, delta_h)
         self.assertAllClose(y_tf, y_np, rtol=2e-5, atol=1e-5)
@@ -350,11 +350,11 @@ class AdjustHueTest(test_util.TensorFlowTestCase):
     x_np = np.random.rand(2, 3) * 255.
     delta_h = np.random.rand() * 2.0 - 1.0
     fused = False
-    with self.assertRaisesRegexp(ValueError, 'Shape must be at least rank 3'):
+    with self.assertRaisesRegexp(ValueError, ""Shape must be at least rank 3""):
       self._adjustHueTf(x_np, delta_h)
     x_np = np.random.rand(4, 2, 4) * 255.
     delta_h = np.random.rand() * 2.0 - 1.0
-    with self.assertRaisesOpError('input must have 3 channels'):
+    with self.assertRaisesOpError(""input must have 3 channels""):
       self._adjustHueTf(x_np, delta_h)
 
 
@@ -368,7 +368,7 @@ class AdjustHueBenchmark(test.Benchmark):
     if cpu_count is not None:
       config.inter_op_parallelism_threads = 1
       config.intra_op_parallelism_threads = cpu_count
-    with session.Session('', graph=ops.Graph(), config=config) as sess:
+    with session.Session("""", graph=ops.Graph(), config=config) as sess:
       with ops.device(device):
         inputs = variables.Variable(
             random_ops.random_uniform(
@@ -385,19 +385,19 @@ class AdjustHueBenchmark(test.Benchmark):
           sess.run(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
-    tag = '%s' % (cpu_count) if cpu_count is not None else '_all'
-    print('benchmarkAdjustHue_299_299_3_cpu%s step_time: %.2f us' %
+    tag = ""%s"" % (cpu_count) if cpu_count is not None else ""_all""
+    print(""benchmarkAdjustHue_299_299_3_cpu%s step_time: %.2f us"" %
           (tag, step_time * 1e6))
     self.report_benchmark(
-        name='benchmarkAdjustHue_299_299_3_cpu%s' % (tag),
+        name=""benchmarkAdjustHue_299_299_3_cpu%s"" % (tag),
         iters=benchmark_rounds,
         wall_time=step_time)
 
   def benchmarkAdjustHueCpu1(self):
-    self._benchmarkAdjustHue('/cpu:0', 1)
+    self._benchmarkAdjustHue(""/cpu:0"", 1)
 
   def benchmarkAdjustHueCpuAll(self):
-    self._benchmarkAdjustHue('/cpu:0', None)
+    self._benchmarkAdjustHue(""/cpu:0"", None)
 
   def benchmarkAdjustHueGpu(self):
     self._benchmarkAdjustHue(test.gpu_device_name(), None)
@@ -413,7 +413,7 @@ class AdjustSaturationBenchmark(test.Benchmark):
     if cpu_count is not None:
       config.inter_op_parallelism_threads = 1
       config.intra_op_parallelism_threads = cpu_count
-    with session.Session('', graph=ops.Graph(), config=config) as sess:
+    with session.Session("""", graph=ops.Graph(), config=config) as sess:
       with ops.device(device):
         inputs = variables.Variable(
             random_ops.random_uniform(
@@ -431,19 +431,19 @@ class AdjustSaturationBenchmark(test.Benchmark):
           sess.run(run_op)
     end = time.time()
     step_time = (end - start) / benchmark_rounds
-    tag = '%s' % (cpu_count) if cpu_count is not None else '_all'
-    print('benchmarkAdjustSaturation_599_599_3_cpu%s step_time: %.2f us' %
+    tag = ""%s"" % (cpu_count) if cpu_count is not None else ""_all""
+    print(""benchmarkAdjustSaturation_599_599_3_cpu%s step_time: %.2f us"" %
           (tag, step_time * 1e6))
     self.report_benchmark(
-        name='benchmarkAdjustSaturation_599_599_3_cpu%s' % (tag),
+        name=""benchmarkAdjustSaturation_599_599_3_cpu%s"" % (tag),
         iters=benchmark_rounds,
         wall_time=step_time)
 
   def benchmarkAdjustSaturationCpu1(self):
-    self._benchmarkAdjustSaturation('/cpu:0', 1)
+    self._benchmarkAdjustSaturation(""/cpu:0"", 1)
 
   def benchmarkAdjustSaturationCpuAll(self):
-    self._benchmarkAdjustSaturation('/cpu:0', None)
+    self._benchmarkAdjustSaturation(""/cpu:0"", None)
 
   def benchmarkAdjustSaturationGpu(self):
     self._benchmarkAdjustSaturation(test.gpu_device_name(), None)
@@ -457,7 +457,7 @@ class ResizeBilinearBenchmark(test.Benchmark):
     img = variables.Variable(
         random_ops.random_normal(
             [batch_size, image_size[0], image_size[1], num_channels]),
-        name='img')
+        name=""img"")
 
     deps = []
     for _ in xrange(num_ops):
@@ -472,9 +472,9 @@ class ResizeBilinearBenchmark(test.Benchmark):
       results = self.run_op_benchmark(
           sess,
           benchmark_op,
-          name=('resize_bilinear_%s_%s_%s' %
+          name=(""resize_bilinear_%s_%s_%s"" %
                 (image_size[0], image_size[1], num_channels)))
-      print('%s   : %.2f ms/img' % (results['name'], 1000 * results['wall_time']
+      print(""%s   : %.2f ms/img"" % (results[""name""], 1000 * results[""wall_time""]
                                     / (batch_size * num_ops)))
 
   def benchmarkSimilar3Channel(self):
@@ -504,7 +504,7 @@ class ResizeBicubicBenchmark(test.Benchmark):
     img = variables.Variable(
         random_ops.random_normal(
             [batch_size, image_size[0], image_size[1], num_channels]),
-        name='img')
+        name=""img"")
 
     deps = []
     for _ in xrange(num_ops):
@@ -520,9 +520,9 @@ class ResizeBicubicBenchmark(test.Benchmark):
           sess,
           benchmark_op,
           min_iters=20,
-          name=('resize_bicubic_%s_%s_%s' % (image_size[0], image_size[1],
+          name=(""resize_bicubic_%s_%s_%s"" % (image_size[0], image_size[1],
                                              num_channels)))
-      print('%s   : %.2f ms/img' % (results['name'], 1000 * results['wall_time']
+      print(""%s   : %.2f ms/img"" % (results[""name""], 1000 * results[""wall_time""]
                                     / (batch_size * num_ops)))
 
   def benchmarkSimilar3Channel(self):
@@ -561,7 +561,7 @@ class ResizeAreaBenchmark(test.Benchmark):
     img = variables.Variable(
         random_ops.random_normal([batch_size, image_size[0],
                                   image_size[1], num_channels]),
-        name='img')
+        name=""img"")
 
     deps = []
     for _ in xrange(num_ops):
@@ -574,11 +574,11 @@ class ResizeAreaBenchmark(test.Benchmark):
       sess.run(variables.global_variables_initializer())
       results = self.run_op_benchmark(
           sess, benchmark_op,
-          name=('resize_area_%s_%s_%s' %
+          name=(""resize_area_%s_%s_%s"" %
                 (image_size[0], image_size[1], num_channels)))
-      print('%s   : %.2f ms/img' % (
-          results['name'],
-          1000*results['wall_time'] / (batch_size * num_ops)))
+      print(""%s   : %.2f ms/img"" % (
+          results[""name""],
+          1000*results[""wall_time""] / (batch_size * num_ops)))
 
   def benchmarkSimilar3Channel(self):
     self._benchmarkResize((183, 229), 3)
@@ -632,7 +632,7 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase):
       self.assertAllEqual(y_tf, y_np)
 
   def _adjust_saturation(self, image, saturation_factor):
-    image = ops.convert_to_tensor(image, name='image')
+    image = ops.convert_to_tensor(image, name=""image"")
     orig_dtype = image.dtype
     flt_image = image_ops.convert_image_dtype(image, dtypes.float32)
     saturation_adjusted_image = gen_image_ops.adjust_saturation(
@@ -697,30 +697,30 @@ class AdjustSaturationTest(test_util.TensorFlowTestCase):
         [1000, 1, 3],
     ]
     test_styles = [
-        'all_random',
-        'rg_same',
-        'rb_same',
-        'gb_same',
-        'rgb_same',
+        ""all_random"",
+        ""rg_same"",
+        ""rb_same"",
+        ""gb_same"",
+        ""rgb_same"",
     ]
     with self.test_session():
       for x_shape in x_shapes:
         for test_style in test_styles:
           x_np = np.random.rand(*x_shape) * 255.
           scale = np.random.rand()
-          if test_style == 'all_random':
+          if test_style == ""all_random"":
             pass
-          elif test_style == 'rg_same':
+          elif test_style == ""rg_same"":
             x_np[..., 1] = x_np[..., 0]
-          elif test_style == 'rb_same':
+          elif test_style == ""rb_same"":
             x_np[..., 2] = x_np[..., 0]
-          elif test_style == 'gb_same':
+          elif test_style == ""gb_same"":
             x_np[..., 2] = x_np[..., 1]
-          elif test_style == 'rgb_same':
+          elif test_style == ""rgb_same"":
             x_np[..., 1] = x_np[..., 0]
             x_np[..., 2] = x_np[..., 0]
           else:
-            raise AssertionError('Invalid test style: %s' % (test_style))
+            raise AssertionError(""Invalid test style: %s"" % (test_style))
           y_baseline = self._adjustSaturationNp(x_np, scale)
           y_fused = self._adjust_saturation(x_np, scale).eval()
           self.assertAllClose(y_fused, y_baseline, rtol=2e-5, atol=1e-5)
@@ -846,9 +846,9 @@ class FlipTransposeRotateTest(test_util.TensorFlowTestCase):
       transformed_unknown_width = op(p_unknown_width)
       self.assertEqual(3, transformed_unknown_width.get_shape().ndims)
 
-      with self.assertRaisesRegexp(ValueError, 'must be three-dimensional'):
+      with self.assertRaisesRegexp(ValueError, ""must be three-dimensional""):
         op(p_wrong_rank)
-      with self.assertRaisesRegexp(ValueError, 'must be > 0'):
+      with self.assertRaisesRegexp(ValueError, ""must be > 0""):
         op(p_zero_dim)
 
   def testRot90GroupOrder(self):
@@ -1130,7 +1130,7 @@ class CropToBoundingBoxTest(test_util.TensorFlowTestCase):
         if err_msg not in str(e):
           raise
       else:
-        raise AssertionError('Exception not raised: %s' % err_msg)
+        raise AssertionError(""Exception not raised: %s"" % err_msg)
 
   def _assertShapeInference(self, pre_shape, height, width, post_shape):
     image = array_ops.placeholder(dtypes.float32, shape=pre_shape)
@@ -1187,7 +1187,7 @@ class CropToBoundingBoxTest(test_util.TensorFlowTestCase):
 
     for x_shape in ([1, 3, 5, 1], [3, 5]):
       self._assertRaises(x, x_shape, offset_height, offset_width, target_height,
-                         target_width, ""'image' must be three-dimensional"")
+                         target_width, ""must be three-dimensional"")
 
   def testZeroLengthInput(self):
     # Input image has 0-length dimension(s).
@@ -1217,7 +1217,7 @@ class CropToBoundingBoxTest(test_util.TensorFlowTestCase):
           offset_width,
           target_height,
           target_width,
-          'assertion failed:',
+          ""assertion failed:"",
           use_tensor_inputs_options=[True])
 
   def testBadParams(self):
@@ -1226,12 +1226,12 @@ class CropToBoundingBoxTest(test_util.TensorFlowTestCase):
 
     # Each line is a test configuration:
     #   (offset_height, offset_width, target_height, target_width), err_msg
-    test_config = (([-1, 0, 3, 3], 'offset_height must be >= 0'),
-                   ([0, -1, 3, 3], 'offset_width must be >= 0'),
-                   ([0,  0, 0, 3], 'target_height must be > 0'),
-                   ([0,  0, 3, 0], 'target_width must be > 0'),
-                   ([2,  0, 3, 3], 'height must be >= target + offset'),
-                   ([0,  2, 3, 3], 'width must be >= target + offset'))
+    test_config = (([-1, 0, 3, 3], ""offset_height must be >= 0""),
+                   ([0, -1, 3, 3], ""offset_width must be >= 0""),
+                   ([0, 0, 0, 3], ""target_height must be > 0""),
+                   ([0, 0, 3, 0], ""target_width must be > 0""),
+                   ([2, 0, 3, 3], ""height must be >= target + offset""),
+                   ([0, 2, 3, 3], ""width must be >= target + offset""))
 
     for params, err_msg in test_config:
       self._assertRaises(x, x_shape, *params, err_msg=err_msg)
@@ -1362,7 +1362,7 @@ class PadToBoundingBoxTest(test_util.TensorFlowTestCase):
         if err_msg not in str(e):
           raise
       else:
-        raise AssertionError('Exception not raised: %s' % err_msg)
+        raise AssertionError(""Exception not raised: %s"" % err_msg)
 
   def _assertShapeInference(self, pre_shape, height, width, post_shape):
     image = array_ops.placeholder(dtypes.float32, shape=pre_shape)
@@ -1432,7 +1432,7 @@ class PadToBoundingBoxTest(test_util.TensorFlowTestCase):
 
     for x_shape in ([1, 3, 5, 1], [3, 5]):
       self._assertRaises(x, x_shape, offset_height, offset_width, target_height,
-                         target_width, ""'image' must be three-dimensional"")
+                         target_width, ""must be three-dimensional"")
 
   def testZeroLengthInput(self):
     # Input image has 0-length dimension(s).
@@ -1474,10 +1474,10 @@ class PadToBoundingBoxTest(test_util.TensorFlowTestCase):
 
     # Each line is a test configuration:
     #   offset_height, offset_width, target_height, target_width, err_msg
-    test_config = ((-1, 0, 4, 4, 'offset_height must be >= 0'),
-                   ( 0,-1, 4, 4, 'offset_width must be >= 0'),
-                   ( 2, 0, 4, 4, 'height must be <= target - offset'),
-                   ( 0, 2, 4, 4, 'width must be <= target - offset'))
+    test_config = ((-1, 0, 4, 4, ""offset_height must be >= 0""),
+                   (0, -1, 4, 4, ""offset_width must be >= 0""),
+                   (2, 0, 4, 4, ""height must be <= target - offset""),
+                   (0, 2, 4, 4, ""width must be <= target - offset""))
 
     for config_item in test_config:
       self._assertRaises(x, x_shape, *config_item)
@@ -1554,7 +1554,7 @@ class SelectDistortedCropBoxTest(test_util.TensorFlowTestCase):
 
     # For reference, here is what the distribution of area ratios look like.
     area_ratio_hist, _ = np.histogram(area_ratios, bins=10, range=area_range)
-    print('area_ratio_hist ', area_ratio_hist)
+    print(""area_ratio_hist "", area_ratio_hist)
 
     # Ensure that fraction_object_covered is satisfied.
     # TODO(wicke, shlens, dga): Restore this test so that it is no longer flaky.
@@ -2048,7 +2048,7 @@ class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase):
         if err_msg not in str(e):
           raise
       else:
-        raise AssertionError('Exception not raised: %s' % err_msg)
+        raise AssertionError(""Exception not raised: %s"" % err_msg)
 
   def _assertShapeInference(self, pre_shape, height, width, post_shape):
     image = array_ops.placeholder(dtypes.float32, shape=pre_shape)
@@ -2222,7 +2222,7 @@ class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase):
 
     for x_shape in ([1, 3, 5, 1], [3, 5]):
       self._assertRaises(x, x_shape, target_height, target_width,
-                         ""'image' must be three-dimensional"")
+                         ""must be three-dimensional"")
 
   def testZeroLengthInput(self):
     # Input image has 0-length dimension(s).
@@ -2256,12 +2256,12 @@ class ResizeImageWithCropOrPadTest(test_util.TensorFlowTestCase):
     # target_height <= 0
     target_height, target_width = [0, 5]
     self._assertRaises(x, x_shape, target_height, target_width,
-                       'target_height must be > 0')
+                       ""target_height must be > 0"")
 
     # target_width <= 0
     target_height, target_width = [5, 0]
     self._assertRaises(x, x_shape, target_height, target_width,
-                       'target_width must be > 0')
+                       ""target_width must be > 0"")
 
 
 def _SimpleColorRamp():
@@ -2286,8 +2286,8 @@ class JpegTest(test_util.TensorFlowTestCase):
 
   def testExisting(self):
     # Read a real jpeg and verify shape
-    path = ('tensorflow/core/lib/jpeg/testdata/'
-            'jpeg_merge_test1.jpg')
+    path = (""tensorflow/core/lib/jpeg/testdata/""
+            ""jpeg_merge_test1.jpg"")
     with self.test_session(use_gpu=True) as sess:
       jpeg0 = io_ops.read_file(path)
       image0 = image_ops.decode_jpeg(jpeg0)
@@ -2299,9 +2299,9 @@ class JpegTest(test_util.TensorFlowTestCase):
 
   def testCmyk(self):
     # Confirm that CMYK reads in as RGB
-    base = 'tensorflow/core/lib/jpeg/testdata'
-    rgb_path = os.path.join(base, 'jpeg_merge_test1.jpg')
-    cmyk_path = os.path.join(base, 'jpeg_merge_test1_cmyk.jpg')
+    base = ""tensorflow/core/lib/jpeg/testdata""
+    rgb_path = os.path.join(base, ""jpeg_merge_test1.jpg"")
+    cmyk_path = os.path.join(base, ""jpeg_merge_test1_cmyk.jpg"")
     shape = 256, 128, 3
     for channels in 3, 0:
       with self.test_session(use_gpu=True) as sess:
@@ -2320,9 +2320,9 @@ class JpegTest(test_util.TensorFlowTestCase):
       # Encode it, then decode it, then encode it
       image0 = constant_op.constant(_SimpleColorRamp())
       jpeg0 = image_ops.encode_jpeg(image0)
-      image1 = image_ops.decode_jpeg(jpeg0, dct_method='INTEGER_ACCURATE')
+      image1 = image_ops.decode_jpeg(jpeg0, dct_method=""INTEGER_ACCURATE"")
       image2 = image_ops.decode_jpeg(
-          image_ops.encode_jpeg(image1), dct_method='INTEGER_ACCURATE')
+          image_ops.encode_jpeg(image1), dct_method=""INTEGER_ACCURATE"")
       jpeg0, image0, image1, image2 = sess.run([jpeg0, image0, image1, image2])
 
       # The decoded-encoded image should be similar to the input
@@ -2340,9 +2340,9 @@ class JpegTest(test_util.TensorFlowTestCase):
       # Encode it, then decode it, then encode it
       image0 = constant_op.constant(_SimpleColorRamp())
       jpeg0 = image_ops.encode_jpeg(image0)
-      image1 = image_ops.decode_jpeg(jpeg0, dct_method='INTEGER_FAST')
+      image1 = image_ops.decode_jpeg(jpeg0, dct_method=""INTEGER_FAST"")
       image2 = image_ops.decode_jpeg(
-          image_ops.encode_jpeg(image1), dct_method='INTEGER_FAST')
+          image_ops.encode_jpeg(image1), dct_method=""INTEGER_FAST"")
       jpeg0, image0, image1, image2 = sess.run([jpeg0, image0, image1, image2])
 
       # The decoded-encoded image should be similar to the input, but
@@ -2364,7 +2364,7 @@ class JpegTest(test_util.TensorFlowTestCase):
       # default.  They should be the same.
       image0 = constant_op.constant(_SimpleColorRamp())
       jpeg0 = image_ops.encode_jpeg(image0)
-      image1 = image_ops.decode_jpeg(jpeg0, dct_method='INTEGER_FAST')
+      image1 = image_ops.decode_jpeg(jpeg0, dct_method=""INTEGER_FAST"")
       image2 = image_ops.decode_jpeg(jpeg0)
       image1, image2 = sess.run([image1, image2])
 
@@ -2373,7 +2373,7 @@ class JpegTest(test_util.TensorFlowTestCase):
 
   def testShape(self):
     with self.test_session(use_gpu=True) as sess:
-      jpeg = constant_op.constant('nonsense')
+      jpeg = constant_op.constant(""nonsense"")
       for channels in 0, 1, 3:
         image = image_ops.decode_jpeg(jpeg, channels=channels)
         self.assertEqual(image.get_shape().as_list(),
@@ -2384,8 +2384,8 @@ class PngTest(test_util.TensorFlowTestCase):
 
   def testExisting(self):
     # Read some real PNGs, converting to different channel numbers
-    prefix = 'tensorflow/core/lib/png/testdata/'
-    inputs = (1, 'lena_gray.png'), (4, 'lena_rgba.png')
+    prefix = ""tensorflow/core/lib/png/testdata/""
+    inputs = (1, ""lena_gray.png""), (4, ""lena_rgba.png"")
     for channels_in, filename in inputs:
       for channels in 0, 1, 3, 4:
         with self.test_session(use_gpu=True) as sess:
@@ -2451,7 +2451,7 @@ class PngTest(test_util.TensorFlowTestCase):
 
   def testShape(self):
     with self.test_session(use_gpu=True):
-      png = constant_op.constant('nonsense')
+      png = constant_op.constant(""nonsense"")
       for channels in 0, 1, 3:
         image = image_ops.decode_png(png, channels=channels)
         self.assertEqual(image.get_shape().as_list(),
@@ -2462,8 +2462,8 @@ class GifTest(test_util.TensorFlowTestCase):
 
   def testValid(self):
     # Read some real GIFs
-    prefix = 'tensorflow/core/lib/gif/testdata/'
-    filename = 'scan.gif'
+    prefix = ""tensorflow/core/lib/gif/testdata/""
+    filename = ""scan.gif""
     WIDTH = 20
     HEIGHT = 40
     STRIDE = 5
@@ -2492,8 +2492,8 @@ class GifTest(test_util.TensorFlowTestCase):
 
   def testInValid(self):
     # Read some real GIFs
-    prefix = 'tensorflow/core/lib/gif/testdata/'
-    filename = 'optimized.gif'
+    prefix = ""tensorflow/core/lib/gif/testdata/""
+    filename = ""optimized.gif""
 
     with self.test_session(use_gpu=True) as sess:
       gif0 = io_ops.read_file(prefix + filename)
@@ -2503,7 +2503,7 @@ class GifTest(test_util.TensorFlowTestCase):
 
   def testShape(self):
     with self.test_session(use_gpu=True) as sess:
-      gif = constant_op.constant('nonsense')
+      gif = constant_op.constant(""nonsense"")
       image = image_ops.decode_gif(gif)
       self.assertEqual(image.get_shape().as_list(), [None, None, None, 3])
 
@@ -2526,7 +2526,7 @@ class ConvertImageTest(test_util.TensorFlowTestCase):
       image = constant_op.constant([1], dtype=dtypes.uint8)
       image_ops.convert_image_dtype(image, dtypes.uint8)
       y = image_ops.convert_image_dtype(image, dtypes.uint8)
-      self.assertEquals(y.op.type, 'Identity')
+      self.assertEquals(y.op.type, ""Identity"")
       self.assertEquals(y.op.inputs[0], image)
 
   def testConvertBetweenInteger(self):
@@ -2751,5 +2751,5 @@ class TotalVariationTest(test_util.TensorFlowTestCase):
     self._test(multi, tot_var * np.array([1.0, 1.1, 1.2]))
 
 
-if __name__ == '__main__':
+if __name__ == ""__main__"":
   googletest.main()
",0,train
37e7693c78ef7f73192d95b439d3c3be0bee5271,tensorflow/tensorflow,"Re-enable contrib/quantize test when unfused mean and variance updates are used.
Fix missed change from ""executing_eagerly"" to ""executing_eagerly_outside_functions"" that was accidentally dropped in cl/300392015.

PiperOrigin-RevId: 300407526
Change-Id: Iaa88ac039a440b4e1081bc210a647de61cfad675",normalization.py,"@@ -20,7 +20,6 @@ from __future__ import print_function
 
 from tensorflow.python.compat import compat
 from tensorflow.python.distribute import distribution_strategy_context
-from tensorflow.python.eager import context
 from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
@@ -544,8 +543,9 @@ class BatchNormalizationBase(Layer):
     # TODO(rmlarsen): Support using fused avg updates for non-eager execution
     # after fixing graph pattern matching and enabling fused_batch_norm to
     # take exponential_avg_factor as a tensor input.
-    use_fused_avg_updates = (compat.forward_compatible(2020, 3, 6) and
-                             context.executing_eagerly())
+    use_fused_avg_updates = (
+        compat.forward_compatible(2020, 3, 6) and
+        ops.executing_eagerly_outside_functions())
     if use_fused_avg_updates:
       exponential_avg_factor = 1.0 - self.momentum
     else:
",0,train
580c7502f68dd84cf74c34b9d454a37def81d286,tensorflow/tensorflow,"internal change.
Change: 149658231",learn_runner.py,"@@ -24,6 +24,28 @@ from tensorflow.contrib.learn.python.learn.experiment import Experiment
 from tensorflow.python.platform import tf_logging as logging
 
 
+# TODO(xiejw): Refactor the learn_runner to make code reusable.
+def _execute_schedule(experiment, schedule):
+  """"""Execute the method named `schedule` of `experiment`.""""""
+  if not hasattr(experiment, schedule):
+    logging.error('Schedule references non-existent task %s', schedule)
+    valid_tasks = [x for x in dir(experiment)
+                   if not x.startswith('_')
+                   and callable(getattr(experiment, x))]
+    logging.error('Allowed values for this experiment are: %s', valid_tasks)
+    raise ValueError('Schedule references non-existent task %s' % schedule)
+
+  task = getattr(experiment, schedule)
+  if not callable(task):
+    logging.error('Schedule references non-callable member %s', schedule)
+    valid_tasks = [x for x in dir(experiment)
+                   if not x.startswith('_')
+                   and callable(getattr(experiment, x))]
+    logging.error('Allowed values for this experiment are: %s', valid_tasks)
+    raise TypeError('Schedule references non-callable member %s' % schedule)
+  return task()
+
+
 def run(experiment_fn, output_dir, schedule=None):
   """"""Make and run an experiment.
 
@@ -86,25 +108,7 @@ def run(experiment_fn, output_dir, schedule=None):
   config = experiment.estimator.config
   schedule = schedule or _get_default_schedule(config)
 
-  # Execute the schedule
-  if not hasattr(experiment, schedule):
-    logging.error('Schedule references non-existent task %s', schedule)
-    valid_tasks = [x for x in dir(experiment)
-                   if not x.startswith('_')
-                   and callable(getattr(experiment, x))]
-    logging.error('Allowed values for this experiment are: %s', valid_tasks)
-    raise ValueError('Schedule references non-existent task %s' % schedule)
-
-  task = getattr(experiment, schedule)
-  if not callable(task):
-    logging.error('Schedule references non-callable member %s', schedule)
-    valid_tasks = [x for x in dir(experiment)
-                   if not x.startswith('_')
-                   and callable(getattr(experiment, x))]
-    logging.error('Allowed values for this experiment are: %s', valid_tasks)
-    raise TypeError('Schedule references non-callable member %s' % schedule)
-
-  return task()
+  return _execute_schedule(experiment, schedule)
 
 
 @experimental
",0,train
580c7502f68dd84cf74c34b9d454a37def81d286,tensorflow/tensorflow,"internal change.
Change: 149658231",tuner.py,"@@ -24,7 +24,6 @@ import abc
 from tensorflow.contrib.framework.python.framework import experimental
 
 
-@experimental
 class Tuner(object):
   """"""Tuner class is the interface for Experiment hyper-parameters tuning.
 
@@ -45,6 +44,7 @@ class Tuner(object):
 
   __metaclass__ = abc.ABCMeta
 
+  @experimental
   @abc.abstractmethod
   def next_trial(self):
     """"""Switch to the next trial.
@@ -59,6 +59,7 @@ class Tuner(object):
     """"""
     raise NotImplementedError(""Calling an abstract method."")
 
+  @experimental
   @abc.abstractmethod
   def run_experiment(self, experiment_fn):
     """"""Creates an Experiment by calling `experiment_fn` and executes it.
",0,train
398e65b283cdd213c3f2474bb0fedf3d3c10d848,tensorflow/tensorflow,"[TF:MLIR] Replace the used of saved model unused function removal pass with mark
function visibility pass followed by symbol DCE pass.

Fix the saved model unused function test.

PiperOrigin-RevId: 293184416
Change-Id: Ifa8ca9da834ac384643c952edfd73b6d7fd00864",delete_unused_funcs.cc,"@@ -1,99 +0,0 @@
-/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the ""License"");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
-    http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an ""AS IS"" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-// This pass uses tf_saved_model dialect linkage information to delete
-// unused func's.
-
-#include ""llvm/ADT/DenseMap.h""
-#include ""llvm/ADT/STLExtras.h""
-#include ""mlir/IR/Module.h""  // TF:llvm-project
-#include ""mlir/Pass/Pass.h""  // TF:llvm-project
-#include ""tensorflow/compiler/mlir/tensorflow/ir/tf_saved_model.h""
-
-namespace mlir {
-namespace tf_saved_model {
-
-namespace {
-struct DeleteUnusedFuncsPass : public ModulePass<DeleteUnusedFuncsPass> {
-  void runOnModule() override;
-};
-}  // namespace
-
-void DeleteUnusedFuncsPass::runOnModule() {
-  // If the model doesn't have tf_saved_model semantics, we can't do anything.
-  if (!HasTfSavedModelSemantics(getModule())) {
-    return;
-  }
-
-  // TODO(silvasean): Use more generic MLIR functionality when available.
-  // This is just a basic call graph reachability pass (which in the case of TF
-  // functional control flow also implies handling tf.If/tf.While).
-  // The only thing specific to tf_saved_model is the set of roots.
-
-  auto module = getModule();
-  SymbolTable symbol_table(module);
-
-  // Calculate func reachability with a DFS on the symbol reference graph.
-  SmallPtrSet<FuncOp, 8> dfs_visited_set;
-  SmallVector<FuncOp, 16> dfs_stack;
-
-  // Initialize the roots of the DFS search.
-  for (auto func : module.getOps<FuncOp>()) {
-    if (IsExported(func)) {
-      dfs_stack.push_back(func);
-    }
-  }
-
-  // Do the DFS.
-  while (!dfs_stack.empty()) {
-    FuncOp func = dfs_stack.pop_back_val();
-    if (!dfs_visited_set.insert(func).second) {
-      // If we already visited this node, skip it.
-      continue;
-    }
-
-    SmallPtrSet<FuncOp, 8> callees;
-    auto uses = SymbolTable::getSymbolUses(func);
-    for (auto use : *uses) {
-      auto func = symbol_table.lookup<FuncOp>(
-          use.getSymbolRef().cast<FlatSymbolRefAttr>().getValue());
-      if (func) {
-        callees.insert(func);
-      }
-    }
-
-    for (auto callee : callees) {
-      dfs_stack.push_back(callee);
-    }
-  }
-
-  // Erase all unreachable func's.
-  for (auto func : llvm::make_early_inc_range(module.getOps<FuncOp>())) {
-    if (dfs_visited_set.find(func) == dfs_visited_set.end()) {
-      func.erase();
-    }
-  }
-}
-
-std::unique_ptr<OpPassBase<ModuleOp>> CreateDeleteUnusedFuncsPass() {
-  return std::make_unique<DeleteUnusedFuncsPass>();
-}
-
-static PassRegistration<DeleteUnusedFuncsPass> pass(
-    ""tf-saved-model-delete-unused-funcs"",
-    ""Use tf_saved_model linkage information to delete unused func's."");
-
-}  // namespace tf_saved_model
-}  // namespace mlir
",0,train
398e65b283cdd213c3f2474bb0fedf3d3c10d848,tensorflow/tensorflow,"[TF:MLIR] Replace the used of saved model unused function removal pass with mark
function visibility pass followed by symbol DCE pass.

Fix the saved model unused function test.

PiperOrigin-RevId: 293184416
Change-Id: Ifa8ca9da834ac384643c952edfd73b6d7fd00864",passes.h,"@@ -182,10 +182,6 @@ void CreateTPUBridge(OpPassManager& pm);
 
 namespace tf_saved_model {
 
-// Creates a pass that uses tf_saved_model dialect linkage information
-// to delete unused func's.
-std::unique_ptr<OpPassBase<ModuleOp>> CreateDeleteUnusedFuncsPass();
-
 // Creates a pass that optimizes tf_saved_model.global_tensor ops.
 std::unique_ptr<OpPassBase<ModuleOp>> CreateOptimizeGlobalTensorsPass();
 
",0,train
df2fbb89588065fca2c6e5fcfba7d8c2b4378591,tensorflow/tensorflow,"[XLA:Python] Plumb xla_gpu_enable_fast_min_max into the XLA:Python client.

Disable it by default to get correct NaN semantics for min/max.

Will fix https://github.com/google/jax/issues/1072 when deployed in jaxlib.

PiperOrigin-RevId: 260567980",xla.cc,"@@ -425,7 +425,10 @@ PYBIND11_MODULE(xla_extension, m) {
                     &DebugOptions::set_xla_cpu_fast_math_honor_nans)
       .def_property(""xla_cpu_fast_math_honor_division"",
                     &DebugOptions::xla_cpu_fast_math_honor_division,
-                    &DebugOptions::set_xla_cpu_fast_math_honor_division);
+                    &DebugOptions::set_xla_cpu_fast_math_honor_division)
+      .def_property(""xla_gpu_enable_fast_min_max"",
+                    &DebugOptions::xla_gpu_enable_fast_min_max,
+                    &DebugOptions::set_xla_gpu_enable_fast_min_max);
 
   py::class_<ExecutableBuildOptions>(m, ""ExecutableBuildOptions"")
       .def(py::init<>())
",0,train
df2fbb89588065fca2c6e5fcfba7d8c2b4378591,tensorflow/tensorflow,"[XLA:Python] Plumb xla_gpu_enable_fast_min_max into the XLA:Python client.

Disable it by default to get correct NaN semantics for min/max.

Will fix https://github.com/google/jax/issues/1072 when deployed in jaxlib.

PiperOrigin-RevId: 260567980",xla_client.py,"@@ -109,6 +109,7 @@ class LocalBackend(Backend):
     options.debug_options.xla_cpu_fast_math_honor_infs = True
     options.debug_options.xla_cpu_fast_math_honor_nans = True
     options.debug_options.xla_cpu_fast_math_honor_division = True
+    options.debug_options.xla_gpu_enable_fast_min_max = False
     return _xla.LocalExecutable.Compile(c_computation,
                                         compile_options.argument_layouts,
                                         options, self.client,
",0,train
c9cd1784bf287543d89593ca1432170cdbf694de,tensorflow/tensorflow,"Use a header to declare Register_AUDIO_MICROFRONTEND, instead of having to forward-declare.

PiperOrigin-RevId: 275381415
Change-Id: Ib0abc4e0a8813532362ac70a95b9e68c344c4ca9",audio_microfrontend.h,"@@ -0,0 +1,29 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_AUDIO_MICROFRONTEND_H_
+#define TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_AUDIO_MICROFRONTEND_H_
+
+#include ""tensorflow/lite/context.h""
+
+namespace tflite {
+namespace ops {
+namespace custom {
+TfLiteRegistration* Register_AUDIO_MICROFRONTEND();
+}  // namespace custom
+}  // namespace ops
+}  // namespace tflite
+
+#endif  // TENSORFLOW_LITE_EXPERIMENTAL_MICROFRONTEND_AUDIO_MICROFRONTEND_H_
",0,train
c9cd1784bf287543d89593ca1432170cdbf694de,tensorflow/tensorflow,"Use a header to declare Register_AUDIO_MICROFRONTEND, instead of having to forward-declare.

PiperOrigin-RevId: 275381415
Change-Id: Ib0abc4e0a8813532362ac70a95b9e68c344c4ca9",audio_microfrontend_test.cc,"@@ -14,6 +14,8 @@ limitations under the License.
 ==============================================================================*/
 // Unit test for TFLite Micro Frontend op.
 
+#include ""tensorflow/lite/experimental/microfrontend/audio_microfrontend.h""
+
 #include <memory>
 #include <vector>
 
@@ -27,9 +29,6 @@ limitations under the License.
 namespace tflite {
 namespace ops {
 namespace custom {
-
-TfLiteRegistration* Register_AUDIO_MICROFRONTEND();
-
 namespace {
 
 using ::testing::ElementsAreArray;
",0,train
9f3d53da8262cba49716ec85781fb88d80626b81,tensorflow/tensorflow,"Less strict error bound in conv_ops_test

PiperOrigin-RevId: 243868038",conv_ops_test.cc,"@@ -794,7 +794,7 @@ class FusedConv2DOpTest : public OpsTestBase {
     if (image_width == filter_size && image_height == filter_size) {
       test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4);
     } else {
-      test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6);
+      test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-5);
     }
   }
 
@@ -844,7 +844,7 @@ class FusedConv2DOpTest : public OpsTestBase {
     if (image_width == filter_size && image_height == filter_size) {
       test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-4);
     } else {
-      test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-6);
+      test::ExpectClose(conv_2d, fused_conv_2d, /*atol=*/1e-5);
     }
   }
 
",0,train
002677f22fb686e34c464143f8ce2a71fbd03190,tensorflow/tensorflow,"Using TensorDescriptor instead of DummyTensor in OpenCL inference context.

PiperOrigin-RevId: 401161076
Change-Id: I9750034906311eb9ac234e6fade6d09b2385b41c",inference_context.cc,"@@ -330,8 +330,9 @@ absl::Status InferenceContext::ReserveGraphTensors(
     }
     RETURN_IF_ERROR(SelectBestStorageType(gpu_info, shape, storage_type,
                                           data_type, layout, &storage_type));
-    tensor_reserver_.Add(
-        t->id, {shape, TensorDescriptor{data_type, storage_type, layout}});
+    TensorDescriptor tensor_desc{data_type, storage_type, layout};
+    tensor_desc.shape = BHWDC(shape.b, shape.h, shape.w, 1, shape.c);
+    tensor_reserver_.Add(t->id, tensor_desc);
     max_id = std::max(max_id, t->id);
   }
   tensor_reserver_.SetNext(max_id + 1);
@@ -344,7 +345,7 @@ absl::Status InferenceContext::ConvertOperations(const GpuInfo& gpu_info,
   std::map<ValueId, TensorDescriptor> tensor_descriptors;
   const auto values = graph.values();
   for (auto value : values) {
-    tensor_descriptors[value->id] = tensor_reserver_.Get(value->id).descriptor;
+    tensor_descriptors[value->id] = tensor_reserver_.Get(value->id);
   }
   std::set<NodeId> consumed_nodes;
   std::vector<Node*> graph_nodes = graph.nodes();
@@ -365,7 +366,7 @@ absl::Status InferenceContext::ConvertOperations(const GpuInfo& gpu_info,
           absl::any_cast<ConstTensorAttributes>(node.operation.attributes);
       auto outputs = graph.FindOutputs(node.id);
       const_tensors_descs_[outputs[0]->id] =
-          tensor_reserver_.Get(outputs[0]->id).descriptor;
+          tensor_reserver_.Get(outputs[0]->id);
       const_tensors_descs_[outputs[0]->id].UploadData(attr.tensor);
       continue;
     }
@@ -405,12 +406,10 @@ absl::Status InferenceContext::ConvertOperations(const GpuInfo& gpu_info,
       OperationDef op_def;
       op_def.precision = precision_;
       for (int j = 0; j < inputs.size(); ++j) {
-        op_def.src_tensors.push_back(
-            tensor_reserver_.Get(inputs[j]->id).descriptor);
+        op_def.src_tensors.push_back(tensor_reserver_.Get(inputs[j]->id));
       }
       for (int j = 0; j < outputs.size(); ++j) {
-        op_def.dst_tensors.push_back(
-            tensor_reserver_.Get(outputs[j]->id).descriptor);
+        op_def.dst_tensors.push_back(tensor_reserver_.Get(outputs[j]->id));
       }
       RETURN_IF_ERROR(GPUOperationFromNode(gpu_info, op_def, hints, inputs,
                                            outputs, node, &gpu_subgraph));
@@ -418,7 +417,9 @@ absl::Status InferenceContext::ConvertOperations(const GpuInfo& gpu_info,
     absl::flat_hash_map<int, ValueId> mapping_to_global_ids;
     for (int j = 0; j < gpu_subgraph.new_tensors.size(); ++j) {
       const auto& t = gpu_subgraph.new_tensors[j];
-      auto global_id = tensor_reserver_.Add({t.first, t.second});
+      TensorDescriptor td = t.second;
+      td.shape = BHWDC(t.first.b, t.first.h, t.first.w, 1, t.first.c);
+      auto global_id = tensor_reserver_.Add(td);
       mapping_to_global_ids[j] = global_id;
     }
     for (auto& gpu_op : gpu_subgraph.operations) {
@@ -525,8 +526,7 @@ InferenceContext::TensorMemoryType InferenceContext::GetTensorMemoryType(
     return TensorMemoryType::kConst;
   } else if (variable_ids_and_refs_.find(id) != variable_ids_and_refs_.end()) {
     return TensorMemoryType::kVariable;
-  } else if (IsBufferBased(gpu_info,
-                           tensor_reserver_.Get(id).descriptor.storage_type)) {
+  } else if (IsBufferBased(gpu_info, tensor_reserver_.Get(id).storage_type)) {
     return TensorMemoryType::kBuffer;
   } else {
     return TensorMemoryType::kStrongShape;
@@ -560,7 +560,7 @@ absl::Status InferenceContext::AllocateMemoryForVariableTensors(
         ref_value_to_tensor_index.end()) {
       const auto& t = tensor_reserver_.Get(value_and_ref_value.first);
       const auto& shape = t.shape;
-      const auto& descriptor = t.descriptor;
+      const auto& descriptor = t;
 
       RETURN_IF_ERROR(
           CreateTensor(*context, shape, descriptor,
@@ -583,7 +583,7 @@ absl::Status InferenceContext::AllocateMemoryForBuffers(const GpuInfo& gpu_info,
   for (auto& usage : buffer_usages) {
     const auto& t = tensor_reserver_.Get(usage.first);
     const auto& shape = t.shape;
-    const auto& descriptor = t.descriptor;
+    const auto& descriptor = t;
     const size_t element_size =
         descriptor.data_type == DataType::FLOAT32 ? 4 : 2;
     size_t buffer_size;
@@ -664,7 +664,8 @@ absl::Status InferenceContext::AllocateMemoryForBuffers(const GpuInfo& gpu_info,
         continue;
       const int tensor_index = graph_ids_to_shared_buffer_tensors_[t.first];
       if (created_tensors[tensor_index]) continue;
-      const auto& shape = tensor_reserver_.Get(t.first).shape;
+      const auto& shape_5d = tensor_reserver_.Get(t.first).shape;
+      const auto shape = BHWC(shape_5d.b, shape_5d.h, shape_5d.w, shape_5d.c);
       const int buffer_index = use_offset_assignment
                                    ? tensor_index
                                    : buffer_assignment.object_ids[tensor_index];
@@ -698,7 +699,7 @@ absl::Status InferenceContext::AllocateMemoryForStrongShapes(
       },
       &usages);
 
-  std::vector<TensorUsageRecord<DummyTensor>> usage_records;
+  std::vector<TensorUsageRecord<TensorDescriptor>> usage_records;
   std::map<ValueId, ValueId> remap_from_graph_ids;
   for (auto& usage : usages) {
     remap_from_graph_ids[usage.first] = usage_records.size();
@@ -707,7 +708,7 @@ absl::Status InferenceContext::AllocateMemoryForStrongShapes(
                              static_cast<TaskId>(usage.second.y)});
   }
 
-  ObjectsAssignment<DummyTensor> assignment;
+  ObjectsAssignment<TensorDescriptor> assignment;
   RETURN_IF_ERROR(AssignObjectsToTensors(
       usage_records, MemoryStrategy::EQUALITY, &assignment));
 
",0,test
002677f22fb686e34c464143f8ce2a71fbd03190,tensorflow/tensorflow,"Using TensorDescriptor instead of DummyTensor in OpenCL inference context.

PiperOrigin-RevId: 401161076
Change-Id: I9750034906311eb9ac234e6fade6d09b2385b41c",inference_context.h,"@@ -170,56 +170,32 @@ class InferenceContext {
   //  anywhere.
   std::vector<CLNode> nodes_;
 
-  struct DummyTensor {
-    BHWC shape;
-    TensorDescriptor descriptor;
-
-    bool operator==(const DummyTensor& b) const {
-      return shape == b.shape && descriptor == b.descriptor;
-    }
-  };
-
   class TensorReserver {
    public:
     TensorReserver() : next_(0) {}
-    ValueId Add(const DummyTensor& dummy) {
+    ValueId Add(const TensorDescriptor& dummy) {
       reservations_[next_] = dummy;
       return next_++;
     }
-    void Add(ValueId id, const DummyTensor& dummy) {
+    void Add(ValueId id, const TensorDescriptor& dummy) {
       reservations_[id] = dummy;
     }
     void SetNext(ValueId id) { next_ = id; }
-    DummyTensor Get(ValueId id) { return reservations_[id]; }
+    TensorDescriptor Get(ValueId id) { return reservations_[id]; }
 
     std::vector<std::pair<ValueId, TensorDescriptor>> GetTensorDescs() const {
-      std::vector<std::pair<ValueId, TensorDescriptor>> result;
-      for (auto& v : reservations_) {
-        TensorDescriptor desc = v.second.descriptor;
-        desc.shape.b = v.second.shape.b;
-        desc.shape.h = v.second.shape.h;
-        desc.shape.w = v.second.shape.w;
-        desc.shape.d = 1;
-        desc.shape.c = v.second.shape.c;
-        result.push_back({v.first, desc});
-      }
-      return result;
+      return std::vector<std::pair<ValueId, TensorDescriptor>>(
+          reservations_.begin(), reservations_.end());
     }
 
     void Add(const std::vector<std::pair<ValueId, TensorDescriptor>>& tensors) {
       for (auto& v : tensors) {
-        DummyTensor dummy;
-        dummy.descriptor = v.second;
-        dummy.shape.b = v.second.shape.b;
-        dummy.shape.h = v.second.shape.h;
-        dummy.shape.w = v.second.shape.w;
-        dummy.shape.c = v.second.shape.c;
-        Add(v.first, dummy);
+        Add(v.first, v.second);
       }
     }
 
    private:
-    absl::flat_hash_map<ValueId, DummyTensor> reservations_;
+    absl::flat_hash_map<ValueId, TensorDescriptor> reservations_;
     ValueId next_;
   };
   TensorReserver tensor_reserver_;
",0,test
002677f22fb686e34c464143f8ce2a71fbd03190,tensorflow/tensorflow,"Using TensorDescriptor instead of DummyTensor in OpenCL inference context.

PiperOrigin-RevId: 401161076
Change-Id: I9750034906311eb9ac234e6fade6d09b2385b41c",tensor.cc,"@@ -615,7 +615,17 @@ absl::Status CreateSharedImage2DBufferTensor(const CLContext& context,
                                              const TensorDescriptor& descriptor,
                                              int row_bytes_alignment,
                                              Tensor* result) {
-  const int width = shape.b * shape.w;
+  BHWDC shape5d(shape.b, shape.h, shape.w, 1, shape.c);
+  return CreateSharedImage2DBufferTensor(context, memory, shape5d, descriptor,
+                                         row_bytes_alignment, result);
+}
+
+absl::Status CreateSharedImage2DBufferTensor(const CLContext& context,
+                                             cl_mem memory, const BHWDC& shape,
+                                             const TensorDescriptor& descriptor,
+                                             int row_bytes_alignment,
+                                             Tensor* result) {
+  const int width = shape.b * shape.w * shape.d;
   const int height =
       descriptor.storage_type == TensorStorageType::SINGLE_TEXTURE_2D
           ? shape.h
",0,test
002677f22fb686e34c464143f8ce2a71fbd03190,tensorflow/tensorflow,"Using TensorDescriptor instead of DummyTensor in OpenCL inference context.

PiperOrigin-RevId: 401161076
Change-Id: I9750034906311eb9ac234e6fade6d09b2385b41c",tensor.h,"@@ -158,6 +158,12 @@ absl::Status CreateSharedImage2DBufferTensor(const CLContext& context,
                                              int row_bytes_alignment,
                                              Tensor* result);
 
+absl::Status CreateSharedImage2DBufferTensor(const CLContext& context,
+                                             cl_mem memory, const BHWDC& shape,
+                                             const TensorDescriptor& descriptor,
+                                             int row_bytes_alignment,
+                                             Tensor* result);
+
 template <DataType T>
 absl::Status Tensor::WriteData(CLCommandQueue* queue,
                                const tflite::gpu::Tensor<BHWC, T>& src) {
",0,test
28340a4b12e286fe14bb7ac08aebe325c3e150b4,tensorflow/tensorflow,"Fix cmake for MacOS (#17005)

This change address cmake build issues for MacOS.
Also fixes #14712",hexagon_controller.c,"@@ -19,7 +19,7 @@ limitations under the License.
 
 #include ""hexagon_controller.h""
 
-#include <malloc.h>
+#include <stdlib.h>
 #include <stdio.h>
 
 #include ""adspmsgd.h""
",0,train
613dad93cebadd573da162cad261318bcebe1416,tensorflow/tensorflow,"Fix colocation in function inlining

PiperOrigin-RevId: 331814867
Change-Id: Idf8692894dd83ebd9de195b5b676ef681d20d11c",inline_function_utils.cc,"@@ -587,6 +587,10 @@ Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, Graph* g,
   //
   // If 'x' is a node in fbody->graph and its copy in 'g' is 'y', we
   // remember 'y' in node_map[x->id()].
+  absl::flat_hash_set<string> fn_nodes;
+  for (Node* n : fbody->graph->op_nodes()) {
+    fn_nodes.insert(n->name());
+  }
   std::vector<Node*> node_map(fbody->graph->num_node_ids());
   for (Node* n : fbody->graph->op_nodes()) {
     NodeDef ndef = n->def();
@@ -605,6 +609,8 @@ Status InlineFunctionBody(const FunctionLibraryDefinition& flib_def, Graph* g,
     const string prefix = strings::StrCat(caller->name(), ""/"");
     TF_RETURN_IF_ERROR(AddPrefixAndSuffixToNode(prefix, /*suffix=*/"""", &ndef,
                                                 options.uniquify_frame_names));
+    TF_RETURN_IF_ERROR(
+        MaybeAddPrefixToColocationConstraints(fn_nodes, prefix, &ndef));
 
     Status added_node;
     Node* clone = g->AddNode(ndef, &added_node);
",0,train
613dad93cebadd573da162cad261318bcebe1416,tensorflow/tensorflow,"Fix colocation in function inlining

PiperOrigin-RevId: 331814867
Change-Id: Idf8692894dd83ebd9de195b5b676ef681d20d11c",node_def_util.cc,"@@ -795,6 +795,8 @@ bool IsValidControlInputName(StringPiece sp) {
   }
 }
 
+const StringPiece kColocationGroupPrefixStringPiece(kColocationGroupPrefix);
+
 }  // namespace
 
 Status ValidateOpInput(const string& input_name, bool* is_control_input) {
@@ -924,17 +926,27 @@ Status AddPrefixAndSuffixToNode(StringPiece prefix, StringPiece suffix,
     attr.set_s(frame_name);
   }
 
-  // Update colocation constraints.
-  constexpr char kClassAttr[] = ""_class"";
-  auto class_attr = node_def->mutable_attr()->find(kClassAttr);
-  if (class_attr != node_def->mutable_attr()->end()) {
-    AttrValue new_value;
-    new_value.mutable_list()->add_s(
-        strings::StrCat(prefix, class_attr->second.s()));
-    node_def->mutable_attr()->erase(kClassAttr);
-    node_def->mutable_attr()->insert({kClassAttr, new_value});
-  }
+  return Status::OK();
+}
 
+Status MaybeAddPrefixToColocationConstraints(
+    const absl::flat_hash_set<string>& match, StringPiece prefix,
+    NodeDef* node_def) {
+  auto attr = node_def->mutable_attr()->find(kColocationAttrName);
+  if (attr == node_def->mutable_attr()->end()) {
+    return Status::OK();
+  }
+  auto constraints_list = attr->second.mutable_list();
+  auto constraints_size = constraints_list->s_size();
+  for (size_t i = 0; i < constraints_size; ++i) {
+    StringPiece original(constraints_list->s(i));
+    if (absl::ConsumePrefix(&original, kColocationGroupPrefixStringPiece)) {
+      if (match.contains(original)) {
+        (*constraints_list->mutable_s(i)) =
+            strings::StrCat(kColocationGroupPrefix, prefix, original);
+      }
+    }
+  }
   return Status::OK();
 }
 
",0,train
613dad93cebadd573da162cad261318bcebe1416,tensorflow/tensorflow,"Fix colocation in function inlining

PiperOrigin-RevId: 331814867
Change-Id: Idf8692894dd83ebd9de195b5b676ef681d20d11c",node_def_util.h,"@@ -19,6 +19,7 @@ limitations under the License.
 #include <string>
 #include <vector>
 
+#include ""absl/container/flat_hash_set.h""
 #include ""tensorflow/core/framework/attr_value_util.h""
 #include ""tensorflow/core/framework/node_def.pb.h""
 #include ""tensorflow/core/framework/tensor.h""
@@ -391,6 +392,13 @@ Status AttachDef(const Status& status, const NodeDef& node_def,
 Status AddPrefixAndSuffixToNode(StringPiece prefix, StringPiece suffix,
                                 NodeDef* node_def,
                                 bool uniquify_frame_name = true);
+
+// Appends the given prefix to the colocation group name if the name exists
+// in `to_match`.
+Status MaybeAddPrefixToColocationConstraints(
+    const absl::flat_hash_set<string>& match, StringPiece prefix,
+    NodeDef* node_def);
+
 }  // namespace tensorflow
 
 #endif  // TENSORFLOW_CORE_FRAMEWORK_NODE_DEF_UTIL_H_
",0,train
613dad93cebadd573da162cad261318bcebe1416,tensorflow/tensorflow,"Fix colocation in function inlining

PiperOrigin-RevId: 331814867
Change-Id: Idf8692894dd83ebd9de195b5b676ef681d20d11c",node_def_util_test.cc,"@@ -615,6 +615,39 @@ TEST(AddPrefixAndSuffixToNode, Enter) {
   EXPECT_EQ(""prefix/test_frame/suffix"", frame_name);
 }
 
+TEST(MaybeAddPrefixToColocationConstraints, Basic) {
+  NodeDef node_def;
+  node_def.set_name(""Identity"");
+  node_def.set_op(""Identity"");
+  AddNodeAttr(kColocationAttrName,
+              {strings::StrCat(kColocationGroupPrefix, ""Node1""),
+               strings::StrCat(kColocationGroupPrefix, ""Node2""),
+               strings::StrCat(kColocationGroupPrefix, ""Node3"")},
+              &node_def);
+
+  absl::flat_hash_set<string> match;
+  match.insert(""Node1"");
+  match.insert(""Node3"");
+  TF_ASSERT_OK(MaybeAddPrefixToColocationConstraints(match, ""fn/"", &node_def));
+  std::vector<string> coloc_constraints;
+  TF_ASSERT_OK(GetNodeAttr(node_def, kColocationAttrName, &coloc_constraints));
+  EXPECT_EQ(
+      coloc_constraints,
+      std::vector<string>({""loc:@fn/Node1"", ""loc:@Node2"", ""loc:@fn/Node3""}));
+}
+
+TEST(MaybeAddPrefixToColocationConstraints, NoConstraints) {
+  NodeDef node_def;
+  node_def.set_name(""Identity"");
+  node_def.set_op(""Identity"");
+
+  absl::flat_hash_set<string> match;
+  match.insert(""Node1"");
+  match.insert(""Node3"");
+  TF_ASSERT_OK(MaybeAddPrefixToColocationConstraints(match, ""fn/"", &node_def));
+  EXPECT_FALSE(HasNodeAttr(node_def, kColocationAttrName));
+}
+
 TEST(FormatNodeForErrorTest, Node) {
   Graph g(OpRegistry::Global());
   Node* node;
",0,train
0557f9ef182290b28bb30076f9e4c52f67c6cc55,tensorflow/tensorflow,"Apply clang-tidy fixes for llvm-header-guard in test_passes.h (NFC)

PiperOrigin-RevId: 434206359",test_passes.h,"@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H_
-#define TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H_
+#ifndef MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H
+#define MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H
 
 #include ""mlir/Pass/Pass.h""
 
@@ -31,4 +31,4 @@ std::unique_ptr<OperationPass<FuncOp>> createTestGmlStLoopTilingPass();
 }  // namespace gml_st
 }  // namespace mlir
 
-#endif  // TENSORFLOW_COMPILER_MLIR_HLO_INCLUDE_MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H_
+#endif  // MLIR_HLO_DIALECT_GML_ST_TRANSFORMS_TEST_PASSES_H
",0,test
8714a150aae0cc62703e5b7070747443296903d3,tensorflow/tensorflow,"Fix bug to enable function conversion with main graph disable flag.

Setting the `minimum_segment_size` to -1 will disable the main graph conversion, but it currently also disables the function conversions. This change disables only the main graph from conversion, and runs function conversions.

PiperOrigin-RevId: 392517438
Change-Id: I32eb70f4016bd111391cef72d3fb81d34180b118",trt_optimization_pass.cc,"@@ -353,7 +353,9 @@ Status TRTOptimizationPass::Optimize(grappler::Cluster* cluster,
   VLOG(1) << ""Called TRTOptimization Pass "" << name_
           << "" on a grappler item with id="" << item.id;
   TF_ASSIGN_OR_RETURN(bool do_function_conversion, ShouldConvertFunction(item));
-  if (minimum_segment_size_ == -1 ||
+  // Optimizing the main graph(identified with `item.id == ""tf_graph""`) with
+  // `minimim_segment_size == -1` indicates skipping main graph conversion.
+  if ((minimum_segment_size_ == -1 && item.id == ""tf_graph"") ||
       (item.id != ""tf_graph"" && !do_function_conversion)) {
     VLOG(1) << ""Not optimizing this grappler item: "" << item.id;
     *optimized_graph = item.graph;
@@ -410,6 +412,7 @@ Status TRTOptimizationPass::Optimize(grappler::Cluster* cluster,
         tensorflow::down_cast<const grappler::GrapplerFunctionItem&>(item);
     TF_RETURN_IF_ERROR(
         UpdateFunctionSpecificConversionParams(cp, func_item.func_attr()));
+    assert(cp.minimum_segment_size > 0);
   }
 
   auto status = ConvertAfterShapes(cp);
",0,train
2ef925512189149d9374f20a02389382f75a15ce,tensorflow/tensorflow,"lite: Release cond and body subgraph of WHILE op

It will save more runtime memory with very few latency overhead.

PiperOrigin-RevId: 394379825
Change-Id: I32e9d81ccc727687f7f7e4bdaa2de6d612de7fa8",while.cc,"@@ -111,6 +111,7 @@ struct OpData {
   int body_subgraph_index;
   bool cond_has_dynamic_output_tensors;
   bool body_has_dynamic_output_tensors;
+  bool subgraphs_allocated;
 };
 
 void* Init(TfLiteContext* context, const char* buffer, size_t length) {
@@ -120,6 +121,7 @@ void* Init(TfLiteContext* context, const char* buffer, size_t length) {
   op_data->body_subgraph_index = params->body_subgraph_index;
   op_data->cond_has_dynamic_output_tensors = false;
   op_data->body_has_dynamic_output_tensors = false;
+  op_data->subgraphs_allocated = false;
   return op_data;
 }
 
@@ -175,6 +177,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                    context, this_subgraph, TfLiteIntArrayView(node->inputs),
                    body_subgraph, body_subgraph->inputs(), true));
   TF_LITE_ENSURE_OK(context, body_subgraph->AllocateTensors());
+  op_data->subgraphs_allocated = true;
   if (body_subgraph->HasDynamicTensors()) {
     op_data->body_has_dynamic_output_tensors = true;
   } else {
@@ -214,7 +217,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  const OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
+  OpData* op_data = reinterpret_cast<OpData*>(node->user_data);
   Subgraph* this_subgraph = reinterpret_cast<Subgraph*>(context->impl_);
   auto* subgraphs = this_subgraph->GetSubgraphs();
   Subgraph* cond_subgraph = (*subgraphs)[op_data->cond_subgraph_index].get();
@@ -256,6 +259,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // isn't optimized yet and a lot of redundant copies are made.
   // TODO(b/120234921): Optimize and avoid copying tensors between subgraphs.
 
+  if (op_data->subgraphs_allocated == false) {
+    TF_LITE_ENSURE_OK(context, cond_subgraph->AllocateTensors());
+    TF_LITE_ENSURE_OK(context, body_subgraph->AllocateTensors());
+  }
+
   if (op_data->body_has_dynamic_output_tensors) {
     // If body subgraph has dynamic outputs, the input of condition subgraph may
     // be changed in the last invocation and may need resizing.
@@ -329,6 +337,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       context,
       CopyTensorsData(context, cond_subgraph, cond_subgraph->inputs(),
                       this_subgraph, TfLiteIntArrayView(node->outputs)));
+
+  TF_LITE_ENSURE_OK(context, cond_subgraph->ReleaseNonPersistentMemory());
+  TF_LITE_ENSURE_OK(context, body_subgraph->ReleaseNonPersistentMemory());
+  op_data->subgraphs_allocated = false;
+
   return kTfLiteOk;
 }
 
",0,train
512f92db4e27a2871d94ffccaf9d01e7389b497c,tensorflow/tensorflow,"Set mlir-cpu-runner JIT codegen opt level correctly

- the JIT codegen was being run at the default -O0 level; instead,
  propagate the opt level from the cmd line.

Signed-off-by: Uday Bondhugula <uday@polymagelabs.com>

Closes #123

COPYBARA_INTEGRATE_REVIEW=https://github.com/tensorflow/mlir/pull/123 from bondhugula:jit-runner 3b055e47f94c9a48bf487f6400787478738cda02
PiperOrigin-RevId: 267778586",ExecutionEngine.h,"@@ -72,13 +72,15 @@ public:
 
   /// Creates an execution engine for the given module.  If `transformer` is
   /// provided, it will be called on the LLVM module during JIT-compilation and
-  /// can be used, e.g., for reporting or optimization.
-  /// If `sharedLibPaths` are provided, the underlying JIT-compilation will open
-  /// and link the shared libraries for symbol resolution.
-  /// If `objectCache` is provided, JIT compiler will use it to store the object
-  /// generated for the given module.
+  /// can be used, e.g., for reporting or optimization. `jitCodeGenOptLevel`,
+  /// when provided, is used as the optimization level for target code
+  /// generation. If `sharedLibPaths` are provided, the underlying
+  /// JIT-compilation will open and link the shared libraries for symbol
+  /// resolution. If `objectCache` is provided, JIT compiler will use it to
+  /// store the object generated for the given module.
   static llvm::Expected<std::unique_ptr<ExecutionEngine>> create(
       ModuleOp m, std::function<llvm::Error(llvm::Module *)> transformer = {},
+      Optional<llvm::CodeGenOpt::Level> jitCodeGenOptLevel = llvm::None,
       ArrayRef<StringRef> sharedLibPaths = {}, bool enableObjectCache = false);
 
   /// Looks up a packed-argument function with the given name and returns a
",0,test
f90484b9a4302ccae5168e3a06bd539071661fee,tensorflow/tensorflow,[ROCm] Adding ROCm support for the stateful_random ops,stateful_random_ops_cpu_gpu.h,"@@ -82,7 +82,7 @@ struct RngSkip_Philox;
 
 using CPUDevice = Eigen::ThreadPoolDevice;
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 using GPUDevice = Eigen::GpuDevice;
 
@@ -100,7 +100,7 @@ struct RngSkip_Philox<GPUDevice> {
   void operator()(const GPUDevice& device, int64 delta, Tensor* state_tensor);
 };
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 
 }  // end namespace tensorflow
 
",0,train
f90484b9a4302ccae5168e3a06bd539071661fee,tensorflow/tensorflow,[ROCm] Adding ROCm support for the stateful_random ops,stateful_random_ops_gpu.cu.cc,"@@ -13,13 +13,18 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#if GOOGLE_CUDA
+#if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
+
+#if TENSORFLOW_USE_ROCM
+#include ""rocm/include/hip/hip_runtime.h""
+#endif
 
 #define EIGEN_USE_GPU
 
 #include ""tensorflow/core/kernels/random_op_gpu.h""
 #include ""tensorflow/core/kernels/stateful_random_ops_cpu_gpu.h""
 #include ""tensorflow/core/util/gpu_launch_config.h""
+#include ""tensorflow/core/util/gpu_kernel_helper.h""
 
 namespace tensorflow {
 
@@ -74,7 +79,11 @@ void UpdateVariableAndFill_Philox<GPUDevice, Distribution>::operator()(
       GetGpuLaunchConfig(work_element_count, d, FillKernel<Distribution>, 0, 0);
 
   int zero = 0;
+#if GOOGLE_CUDA
   cudaMemcpyToSymbol(thread_counter, &zero, sizeof(int));
+#else // TENSORFLOW_USE_ROCM
+  hipMemcpyToSymbol(HIP_SYMBOL(thread_counter), &zero, sizeof(int));
+#endif
   TF_CHECK_OK(GpuLaunchKernel(
       FillKernel<Distribution>, cfg.block_count, cfg.thread_per_block, 0,
       d.stream(), dist, state_size, output_size, state_data, output_data));
@@ -88,8 +97,8 @@ __global__ void SkipKernel(int64 delta, StateElementType* state_data) {
 
 void RngSkip_Philox<GPUDevice>::operator()(const GPUDevice& d, int64 delta,
                                            Tensor* state_tensor) {
-  SkipKernel<<<1, 1, 0, d.stream()>>>(
-      delta, state_tensor->flat<StateElementType>().data());
+  TF_CHECK_OK(GpuLaunchKernel(SkipKernel, 1, 1, 0, d.stream(),
+      delta, state_tensor->flat<StateElementType>().data()));
 }
 
 // Explicit instantiation of the GPU distributions functors.
@@ -140,4 +149,4 @@ template struct UpdateVariableAndFill_Philox<
 
 }  // end namespace tensorflow
 
-#endif  // GOOGLE_CUDA
+#endif  // GOOGLE_CUDA || TENSORFLOW_USE_ROCM
",0,train
f90484b9a4302ccae5168e3a06bd539071661fee,tensorflow/tensorflow,[ROCm] Adding ROCm support for the stateful_random ops,philox_random.h,"@@ -25,7 +25,7 @@ limitations under the License.
 #include ""tensorflow/core/platform/types.h""
 
 // Function qualifiers that need to work on both CPU and GPU.
-#if defined(__CUDACC__)
+#if defined(__CUDACC__) || defined(__HIPCC__)
 // For nvcc.
 #define PHILOX_DEVICE_FUNC __host__ __device__
 #define PHILOX_INLINE __inline__
",0,train
cb6047d9f30754c8339721e0f21c2e17f32cdf3a,tensorflow/tensorflow,"[XLA:GPU] Re-enable tests that are passing at head.

All of these were disabled due to zero-sized shapes.

PiperOrigin-RevId: 202132109",reshape_test.cc,"@@ -125,10 +125,7 @@ XLA_TEST_P(ReshapeTest, ScalarToSingleElementArray) {
                            zero_error_spec_);
 }
 
-// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
-// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
-// with an incorrect result rank.
-XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3)) {
+XLA_TEST_P(ReshapeTest, Trivial0x3) {
   XlaBuilder builder(TestName());
   Array2D<float> input_array(0, 3);
   auto input_literal = Literal::CreateR2FromArray2D(input_array);
@@ -141,10 +138,7 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3)) {
                            zero_error_spec_);
 }
 
-// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
-// does not handle zero-sized shapes correctly. Failed last on 2017-05-15
-// with an incorrect result rank.
-XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3WithParameter)) {
+XLA_TEST_P(ReshapeTest, Trivial0x3WithParameter) {
   XlaBuilder builder(TestName());
 
   std::unique_ptr<Literal> param0_literal =
@@ -158,10 +152,7 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial0x3WithParameter)) {
                            zero_error_spec_);
 }
 
-// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
-// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
-// with an incorrect result rank.
-XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Trivial3x0)) {
+XLA_TEST_P(ReshapeTest, Trivial3x0) {
   XlaBuilder builder(TestName());
   Array2D<float> input_array(3, 0);
   auto input_literal = Literal::CreateR2FromArray2D(input_array);
@@ -200,12 +191,8 @@ XLA_TEST_P(ReshapeTest, Trivial3x1) {
                            zero_error_spec_);
 }
 
-// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
-// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
-// with an incorrect result rank.
-//
 // Splits an empty vector into an empty matrix.
-XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(R1ToR2_0_To_2x0)) {
+XLA_TEST_P(ReshapeTest, R1ToR2_0_To_2x0) {
   XlaBuilder builder(TestName());
   auto input_literal = Literal::CreateR1<float>({});
   XlaOp parameter;
@@ -234,12 +221,8 @@ XLA_TEST_P(ReshapeTest, R1ToR2_6_To_2x3) {
                            zero_error_spec_);
 }
 
-// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
-// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
-// with an incorrect result rank.
-//
 // Transposes a 2x0 array to a 0x2 array.
-XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Reshape0x2To2x0)) {
+XLA_TEST_P(ReshapeTest, Reshape0x2To2x0) {
   XlaBuilder builder(TestName());
   auto input_literal = Literal::CreateFromArray(Array2D<float>(0, 2));
   XlaOp parameter;
@@ -286,12 +269,8 @@ XLA_TEST_P(ReshapeTest, TransposeAsReshape) {
                            zero_error_spec_);
 }
 
-// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
-// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
-// with an incorrect result rank.
-//
 // Transposes a 0x4 array with XlaBuilder::Transpose.
-XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(Transpose0x4)) {
+XLA_TEST_P(ReshapeTest, Transpose0x4) {
   XlaBuilder builder(TestName());
   auto input_literal = Literal::CreateFromArray(Array2D<float>(0, 4));
   XlaOp parameter;
@@ -319,13 +298,9 @@ XLA_TEST_P(ReshapeTest, Transpose4x3) {
                            zero_error_spec_);
 }
 
-// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
-// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
-// with an incorrect result rank.
-//
 // Reshapes an empty 2-dimensional array with dimensions that are not just a
 // rearrangement of the originals (split), but no reordering (no shuffle).
-XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitNoShuffleZeroElements)) {
+XLA_TEST_P(ReshapeTest, ReshapeSplitNoShuffleZeroElements) {
   XlaBuilder builder(TestName());
   auto input_literal = Literal::CreateFromArray(Array2D<float>(6, 0));
   XlaOp parameter;
@@ -338,10 +313,7 @@ XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitNoShuffleZeroElements)) {
                            zero_error_spec_);
 }
 
-// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
-// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
-// with an incorrect result rank.
-XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeR4ToR2ZeroElements)) {
+XLA_TEST_P(ReshapeTest, ReshapeR4ToR2ZeroElements) {
   XlaBuilder builder(TestName());
   auto input_literal = Literal::CreateFromArray(Array4D<float>(2, 3, 4, 0));
   XlaOp parameter;
@@ -372,11 +344,7 @@ XLA_TEST_P(ReshapeTest, ReshapeSplitNoShuffle) {
                            zero_error_spec_);
 }
 
-// TODO(b/29185393): Make this work with the GPU backend. The GPU backend
-// does not handle zero-sized shapes correctly. Failed last on 2017-11-30
-// with an incorrect result rank.
-//
-XLA_TEST_P(ReshapeTest, DISABLED_ON_GPU(ReshapeSplitAndShuffleZeroElements)) {
+XLA_TEST_P(ReshapeTest, ReshapeSplitAndShuffleZeroElements) {
   XlaBuilder builder(TestName());
   auto input_literal = Literal::CreateFromArray(Array2D<float>(0, 6));
   XlaOp parameter;
",0,test
cb6047d9f30754c8339721e0f21c2e17f32cdf3a,tensorflow/tensorflow,"[XLA:GPU] Re-enable tests that are passing at head.

All of these were disabled due to zero-sized shapes.

PiperOrigin-RevId: 202132109",while_test.cc,"@@ -184,8 +184,7 @@ TEST_F(WhileTest, WhileWithPredicateResult) {
 // while (result.sum() < 15.5f) {
 //   result = result + vector<float>(0);
 // }
-// TODO(b/29185393): does not terminate on CPU.
-TEST_F(WhileTest, DISABLED_WhileWithEmptyVectorResult) {
+TEST_F(WhileTest, DISABLED_ON_INTERPRETER(WhileWithEmptyVectorResult)) {
   Shape result_shape = ShapeUtil::MakeShape(F32, {0});
 
   // Create a computation for the reduction.
",0,test
396b58416e927a6cfbc7fba85265119e0c769168,tensorflow/tensorflow,Fix small typo in pooling_ops_test (#1953),pooling_ops_test.py,"@@ -870,9 +870,9 @@ class PoolingTest(tf.test.TestCase):
   def testShapeFunctionEdgeCases(self):
     # All shapes unknown.
     for pool_func in [tf.nn.max_pool, tf.nn.avg_pool]:
-      p = tf.nn.max_pool(tf.placeholder(tf.float32),
-                         ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
-                         padding=""SAME"")
+      p = pool_func(tf.placeholder(tf.float32),
+                    ksize=[1, 1, 1, 1], strides=[1, 1, 1, 1],
+                    padding=""SAME"")
       self.assertEqual([None, None, None, None], p.get_shape().as_list())
     p, am = tf.nn.max_pool_with_argmax(
         tf.placeholder(tf.float32),
",0,train
b1c1547bcf3a38e9275e1c0a3c4ddee6cbf47ab7,tensorflow/tensorflow,Update example to use tf.function,check_ops.py,"@@ -2121,25 +2121,16 @@ def ensure_shape(x, shape, name=None):
 
   For example:
 
-  >>> # tf.placeholder() is not compatible with eager execution
-  ...
-  >>> tf.compat.v1.disable_eager_execution()
-  >>> x = tf.compat.v1.placeholder(tf.int32)
-  >>> print(x.shape)
-  TensorShape(None)
-  >>> y = x * 2
-  >>> print(y.shape)
-  TensorShape(None)
-  >>> y = tf.ensure_shape(y, (None, 3, 3))
-  >>> print(y.shape)
-  TensorShape([Dimension(None), Dimension(3), Dimension(3)])
-  >>> with tf.compat.v1.Session() as sess:
-  >>>   sess.run(y, feed_dict={x: [1, 2, 3]})
+  >>> @tf.function(input_signature=[tf.TensorSpec(dtype=tf.float32, shape=None)])
+  >>> def f(tensor):
+  >>>   return tf.ensure_shape(x, [3, 3])
+  >>>
+  >>> f(tf.zeros([3, 3])) # Passes
+  >>> f([1, 2, 3]) # fails
   Traceback (most recent call last):
-      ...
-  InvalidArgumentError: Shape of tensor mul [3] is not compatible with
-   expected shape [?,3,3].
-
+  ...
+  InvalidArgumentError:  Shape of tensor x [3] is not compatible with expected shape [3,3].
+  
   The above example raises `tf.errors.InvalidArgumentError`,
   because the shape (3,) is not compatible with the shape (None, 3, 3)
 
",0,train
1408e0342948d10ddc6e3ec9996777a9cbd5ac86,tensorflow/tensorflow,"Tpu driver changes.

PiperOrigin-RevId: 289914023
Change-Id: Ie4a98a2c2b79f1647bbaac6da7040f350f352099",c_api_client.c,"@@ -23,12 +23,12 @@ limitations under the License.
 #include <stdio.h>
 #include <stdlib.h>
 
-#include ""c_api.h""
+#include ""libtpu.h""
 
 void* LoadAndInitializeDriver(const char* shared_lib,
                               struct TpuDriverFn* driver_fn) {
   void* handle;
-  handle = dlopen(""./c_api.so"", RTLD_NOW);
+  handle = dlopen(""libtpu.so"", RTLD_NOW);
   if (!handle) {
     fprintf(stderr, ""Error: %s\n"", dlerror());
     exit(EXIT_FAILURE);
",0,train
1408e0342948d10ddc6e3ec9996777a9cbd5ac86,tensorflow/tensorflow,"Tpu driver changes.

PiperOrigin-RevId: 289914023
Change-Id: Ie4a98a2c2b79f1647bbaac6da7040f350f352099",libtpu.h,"@@ -13,8 +13,8 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_C_API_H_
-#define TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_C_API_H_
+#ifndef TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_LIBTPU_H_
+#define TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_LIBTPU_H_
 
 #include <stdint.h>
 
@@ -255,4 +255,4 @@ struct TpuDriverFn {
   PrototypeTpuDriver_Version* TpuDriver_Version;                    // NOLINT
 };
 
-#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_C_API_H_
+#endif  // TENSORFLOW_COMPILER_XLA_PYTHON_TPU_DRIVER_CLIENT_LIBTPU_H_
",0,train
1408e0342948d10ddc6e3ec9996777a9cbd5ac86,tensorflow/tensorflow,"Tpu driver changes.

PiperOrigin-RevId: 289914023
Change-Id: Ie4a98a2c2b79f1647bbaac6da7040f350f352099",external_tpu_driver.cc,"@@ -17,7 +17,7 @@
 
 #include ""absl/strings/str_format.h""
 #include ""absl/time/time.h""
-#include ""tensorflow/compiler/xla/python/tpu_driver/client/c_api.h""
+#include ""tensorflow/compiler/xla/python/tpu_driver/client/libtpu.h""
 #include ""tensorflow/compiler/xla/python/tpu_driver/tpu_driver.h""
 #include ""tensorflow/compiler/xla/python/tpu_driver/tpu_driver.pb.h""
 #include ""tensorflow/compiler/xla/statusor.h""
",0,train
3a63696e3b417603830131f989865a6f5b141482,tensorflow/tensorflow,Update math_ops.py,math_ops.py,"@@ -4229,7 +4229,7 @@ def polyval(coeffs, x, name=None):
      p(x) = coeffs[n-1] + x * (coeffs[n-2] + ... + x * (coeffs[1] +
             x * coeffs[0]))
             
-Usage Example:
+  Usage Example:
   >>> coefficients = [1.0, 2.5, -4.2]
   >>> x = 5.0
   >>> y = tf.math.polyval(coefficients, x)
",0,train
8af8ce384998e327687fbea3e4675e47b9d864e1,tensorflow/tensorflow,"Add more Resize Bilinear tests

PiperOrigin-RevId: 251897573",image_ops_test.py,"@@ -561,6 +561,13 @@ class ResizeBilinearTest(parameterized.TestCase, xla_test.XLATestCase):
       (""86x86To456x456"", 86, 86, 456, 456),
       (""100x100To456x456"", 100, 100, 456, 456),
       (""64x64To224x224"", 64, 64, 224, 224),
+      (""128x128To224x224"", 128, 128, 224, 224),
+      (""256x256To224x224"", 256, 256, 224, 224),
+      (""512x512To224x224"", 512, 512, 224, 224),
+      (""64x64To299x299"", 64, 64, 299, 299),
+      (""128x128To299x299"", 128, 128, 299, 299),
+      (""256x256To299x299"", 256, 256, 299, 299),
+      (""512x512To299x299"", 512, 512, 299, 299),
       (""224x224To224x224"", 224, 224, 224, 224),
       # This test is disabled because it is very slow. It is slow because
       # 383 is prime, 383 and 2047 are coprime, and 2048 is large.
",0,test
6be738d758c5c60a9f8a04d48b24aff43c352efc,tensorflow/tensorflow,"Annotate data race on signgam.
Change: 129369856",cwise_op_lgamma.cc,"@@ -16,8 +16,17 @@ limitations under the License.
 #include ""tensorflow/core/kernels/cwise_ops_common.h""
 
 namespace tensorflow {
-REGISTER3(UnaryOp, CPU, ""Lgamma"", functor::lgamma, float, Eigen::half, double);
+
+template <typename Device, typename Functor>
+class LgammaOp : public UnaryOp<Device, Functor> {
+ public:
+  explicit LgammaOp(OpKernelConstruction* ctx) : UnaryOp<Device, Functor>(ctx) {
+    TF_ANNOTATE_BENIGN_RACE(&signgam, ""signgam output from lgamma is unused"");
+  }
+};
+
+REGISTER3(LgammaOp, CPU, ""Lgamma"", functor::lgamma, float, Eigen::half, double);
 #if GOOGLE_CUDA
-REGISTER3(UnaryOp, GPU, ""Lgamma"", functor::lgamma, float, Eigen::half, double);
+REGISTER3(LgammaOp, GPU, ""Lgamma"", functor::lgamma, float, Eigen::half, double);
 #endif
 }  // namespace tensorflow
",0,train
6be738d758c5c60a9f8a04d48b24aff43c352efc,tensorflow/tensorflow,"Annotate data race on signgam.
Change: 129369856",dynamic_annotations.h,"@@ -19,9 +19,14 @@ limitations under the License.
 // IWYU pragma: private, include ""third_party/tensorflow/core/platform/mem.h""
 // IWYU pragma: friend third_party/tensorflow/core/platform/mem.h
 
-// Do nothing for this platform
+// Do nothing for this platform.
+
 #define TF_ANNOTATE_MEMORY_IS_INITIALIZED(ptr, bytes) \
   do {                                                \
   } while (0)
 
+#define TF_ANNOTATE_BENIGN_RACE(ptr, description) \
+  do {                                            \
+  } while (0)
+
 #endif  // TENSORFLOW_CORE_PLATFORM_DEFAULT_DYNAMIC_ANNOTATIONS_H_
",0,train
aa5956dc18f65027bc28c8be132505cf9859d328,tensorflow/tensorflow,"Depthwise convolution 3x3 per-channel int8 for dot-product ARM (16).

Invoke new dot-product ASM path in normal per-channel flow.

PiperOrigin-RevId: 295755806
Change-Id: Ief16e2acd78d2bbb9c5ced91f7a0312681d833fe",depthwiseconv_uint8_3x3_filter.h,"@@ -13405,6 +13405,20 @@ inline void DepthwiseConvDotProduct3x3(
       thread_dim);
 }
 
+template <DepthwiseConvImplementation implementation>
+inline void DepthwiseConvDotProduct3x3PerChannel(
+    const DepthwiseParams& params, const RuntimeShape& input_shape,
+    const int8* input_data, const RuntimeShape& filter_shape,
+    const int8* filter_data, const RuntimeShape& bias_shape,
+    const int32* bias_data, const RuntimeShape& output_shape, int8* output_data,
+    int thread_start, int thread_end, int thread_dim) {
+  DepthwiseConvDotProduct3x3Impl<
+      implementation, depthwise_conv::QuantizationType::kPerChannelInt8>(
+      params, input_shape, input_data, filter_shape, filter_data, bias_shape,
+      bias_data, output_shape, output_data, thread_start, thread_end,
+      thread_dim);
+}
+
 #undef vst1_lane_8x4
 #undef vst1q_lane_8x4
 #undef vld1q_lane_s8x8
",0,test
aa5956dc18f65027bc28c8be132505cf9859d328,tensorflow/tensorflow,"Depthwise convolution 3x3 per-channel int8 for dot-product ARM (16).

Invoke new dot-product ASM path in normal per-channel flow.

PiperOrigin-RevId: 295755806
Change-Id: Ief16e2acd78d2bbb9c5ced91f7a0312681d833fe",depthwise_conv.h,"@@ -20,6 +20,7 @@ limitations under the License.
 #include ""tensorflow/lite/kernels/cpu_backend_threadpool.h""
 #include ""tensorflow/lite/kernels/internal/optimized/cpu_check.h""
 #include ""tensorflow/lite/kernels/internal/optimized/depthwiseconv_3x3_filter_common.h""
+#include ""tensorflow/lite/kernels/internal/optimized/depthwiseconv_uint8_3x3_filter.h""
 #include ""tensorflow/lite/kernels/internal/optimized/integer_ops/depthwise_conv_3x3_filter.h""
 #include ""tensorflow/lite/kernels/internal/optimized/optimized_ops.h""
 #include ""tensorflow/lite/kernels/internal/reference/depthwiseconv_uint8.h""
@@ -1789,7 +1790,8 @@ inline void DepthwiseConvWithRounding(
     const int8* input_data, const RuntimeShape& filter_shape,
     const int8* filter_data, const RuntimeShape& bias_shape,
     const int32* bias_data, const RuntimeShape& output_shape, int8* output_data,
-    int thread_start, int thread_end, int thread_dim) {
+    int thread_start, int thread_end, int thread_dim,
+    const CpuBackendContext& cpu_backend_context) {
   ruy::profiler::ScopeLabel label(""DepthwiseConvInt8/8bit"");
   const int depth_multiplier = params.depth_multiplier;
   const int dilation_width_factor = params.dilation_width_factor;
@@ -1807,6 +1809,36 @@ inline void DepthwiseConvWithRounding(
 // Enable for arm64 except for the Nvidia Linux 4 Tegra (L4T) running on
 // Jetson TX-2. This compiler does not support the offsetof() macro.
 #if defined(__aarch64__) && !defined(GOOGLE_L4T)
+#if defined(__ANDROID__) && defined(__clang__)
+  ruy::Context* ruy_context = cpu_backend_context.ruy_context();
+  const auto ruy_paths = ruy_context != nullptr
+                             ? ruy_context->GetRuntimeEnabledPaths()
+                             : ruy::Path::kNone;
+  const bool has_dot_product_instructions =
+      (ruy_paths & ruy::Path::kNeonDotprod) != ruy::Path::kNone;
+
+  // Dispatch to dot-product 3x3 kernels when supported.
+  if (has_dot_product_instructions) {
+    using optimized_ops::depthwise_conv::DotProduct3x3KernelType;
+    DotProduct3x3KernelType kernel_type =
+        optimized_ops::depthwise_conv::CategorizeDotProductKernel<
+            optimized_ops::depthwise_conv::QuantizationType::kPerChannelInt8>(
+            input_shape, filter_shape, output_shape, params);
+    if (kernel_type != DotProduct3x3KernelType::kNone) {
+      ruy::profiler::ScopeLabel specialized_label(
+          ""DepthwiseConvInt8/8bit/3x3XDotProduct"");
+      optimized_ops::depthwise_conv::DepthwiseConvDotProduct3x3PerChannel<
+          DepthwiseConvImplementation::kUseNeon3x3DotProduct>(
+          params, input_shape, input_data, filter_shape, filter_data,
+          bias_shape, bias_data, output_shape, output_data, thread_start,
+          thread_end, thread_dim);
+      return;
+    }
+  }
+
+#endif
+  // Dispatch to non-dot-product 3x3 kernels when supported.
+
   const int stride_width = params.stride_width;
   const int stride_height = params.stride_height;
   const int pad_width = params.padding_values.width;
@@ -1842,11 +1874,12 @@ inline void DepthwiseConvImpl(
     const int8* input_data, const RuntimeShape& filter_shape,
     const int8* filter_data, const RuntimeShape& bias_shape,
     const int32* bias_data, const RuntimeShape& output_shape, int8* output_data,
-    int thread_start, int thread_end, int thread_dim) {
+    int thread_start, int thread_end, int thread_dim,
+    const CpuBackendContext& cpu_backend_context) {
   return DepthwiseConvWithRounding<DepthwiseConvOutputRounding::kAwayFromZero>(
       params, output_multiplier, output_shift, input_shape, input_data,
       filter_shape, filter_data, bias_shape, bias_data, output_shape,
-      output_data, thread_start, thread_end, thread_dim);
+      output_data, thread_start, thread_end, thread_dim, cpu_backend_context);
 }
 
 template <typename T, typename TS>
@@ -1859,7 +1892,8 @@ struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task {
                           const T* filter_data, const RuntimeShape& bias_shape,
                           const TS* bias_data, const RuntimeShape& output_shape,
                           T* output_data, int thread_start, int thread_end,
-                          int thread_dim)
+                          int thread_dim,
+                          const CpuBackendContext& cpu_backend_context_x)
       : params_(params),
         output_multiplier_(output_multiplier),
         output_shift_(output_shift),
@@ -1873,13 +1907,14 @@ struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task {
         output_data_(output_data),
         thread_start_(thread_start),
         thread_end_(thread_end),
-        thread_dim_(thread_dim) {}
+        thread_dim_(thread_dim),
+        cpu_backend_context(cpu_backend_context_x) {}
 
   void Run() override {
     DepthwiseConvImpl(params_, output_multiplier_, output_shift_, input_shape_,
                       input_data_, filter_shape_, filter_data_, bias_shape_,
                       bias_data_, output_shape_, output_data_, thread_start_,
-                      thread_end_, thread_dim_);
+                      thread_end_, thread_dim_, cpu_backend_context);
   }
 
  private:
@@ -1897,6 +1932,7 @@ struct DepthwiseConvWorkerTask : cpu_backend_threadpool::Task {
   int thread_start_;
   int thread_end_;
   int thread_dim_;
+  const CpuBackendContext& cpu_backend_context;
 };
 
 inline int HowManyConvThreads(const RuntimeShape& output_shape,
@@ -1947,7 +1983,8 @@ inline void DepthwiseConvPerChannel(
     DepthwiseConvImpl(params, output_multiplier, output_shift, input_shape,
                       input_data, filter_shape, filter_data, bias_shape,
                       bias_data, output_shape, output_data, /*thread_start=*/0,
-                      /*thread_end=*/output_rows, /*thread_dim=*/1);
+                      /*thread_end=*/output_rows, /*thread_dim=*/1,
+                      *cpu_backend_context);
   } else {
     std::vector<DepthwiseConvWorkerTask<int8, int32>> tasks;
     // TODO(b/131746020) don't create new heap allocations every time.
@@ -1960,7 +1997,7 @@ inline void DepthwiseConvPerChannel(
       tasks.emplace_back(params, output_multiplier, output_shift, input_shape,
                          input_data, filter_shape, filter_data, bias_shape,
                          bias_data, output_shape, output_data, thread_start,
-                         thread_end, thread_dim);
+                         thread_end, thread_dim, *cpu_backend_context);
       thread_start = thread_end;
     }
     cpu_backend_threadpool::Execute(tasks.size(), tasks.data(),
",0,test
aa5956dc18f65027bc28c8be132505cf9859d328,tensorflow/tensorflow,"Depthwise convolution 3x3 per-channel int8 for dot-product ARM (16).

Invoke new dot-product ASM path in normal per-channel flow.

PiperOrigin-RevId: 295755806
Change-Id: Ief16e2acd78d2bbb9c5ced91f7a0312681d833fe",legacy_optimized_ops.h,"@@ -512,10 +512,11 @@ struct LegacyPerChannelDepthwiseConvWorkerTask : public gemmlowp::Task {
         thread_dim_(thread_dim) {}
 
   void Run() override {
+    CpuBackendContext backend_context;
     optimized_integer_ops::DepthwiseConvImpl(
         params_, output_multiplier_, output_shift_, input_shape_, input_data_,
         filter_shape_, filter_data_, bias_shape_, bias_data_, output_shape_,
-        output_data_, thread_start_, thread_end_, thread_dim_);
+        output_data_, thread_start_, thread_end_, thread_dim_, backend_context);
   }
 
  private:
@@ -568,11 +569,12 @@ inline void DepthwiseConvPerChannel(
   thread_count = std::max(1, std::min(thread_count, max_threads));
 
   if (thread_count == 1) {
+    CpuBackendContext backend_context;
     optimized_integer_ops::DepthwiseConvImpl(
         params, output_multiplier, output_shift, input_shape, input_data,
         filter_shape, filter_data, bias_shape, bias_data, output_shape,
         output_data, /*thread_start=*/0,
-        /*thread_end=*/output_rows, /*thread_dim=*/1);
+        /*thread_end=*/output_rows, /*thread_dim=*/1, backend_context);
   } else {
     std::vector<gemmlowp::Task*> tasks(thread_count);
     int thread_start = 0;
",0,test
1be692cf1161539fdfa77257cd969a549da8cc97,tensorflow/tensorflow,"Fix assert_called error on Python3

by replacing it with assertTrue(....called)",training_test.py,"@@ -626,7 +626,7 @@ class _TrainingExecutorTrainingTest(object):
 
     self._run_task(training._TrainingExecutor(mock_est, mock_train_spec,
                                               mock_eval_spec))
-    mock_est.train.assert_called()
+    self.assertTrue(mock_est.train.called)
     mock_server.assert_not_called()
 
   def test_fail_with_empty_task_type(self):
@@ -836,7 +836,7 @@ class TrainingExecutorRunMasterTest(test.TestCase):
     executor.run_master()
 
     mock_server.assert_not_called()
-    mock_est.train.assert_called()
+    self.assertTrue(mock_est.train.called)
 
   def test_fail_with_empty_task_type(self):
     mock_est = test.mock.Mock(spec=estimator_lib.Estimator)
",0,train
87c2f2dc3b263f90b79c4d31b6d9dbc410d8145d,tensorflow/tensorflow,"Allowing a slice to move through a reverse (i.e., slice(reverse) is reverse(slice)).

PiperOrigin-RevId: 322473168
Change-Id: Ia8c8563f121cfb3aac52464336a03642c7ae6b2a",algebraic_simplifier.cc,"@@ -509,6 +509,9 @@ class AlgebraicSimplifierVisitor : public DfsHloRewriteVisitor {
   // Tries to convert slice(reshape(X)) into reshape(slice(X))
   StatusOr<bool> TryToReorderSliceAndReshape(HloInstruction* slice);
 
+  // Tries to convert slice(reverse(X)) into reverse(slice(X))
+  StatusOr<bool> TryToReorderSliceAndReverse(HloInstruction* slice);
+
   // Tries to simplify `(and (< a N) (< a K))` in cases where `N <= K` into
   // `(< a N)`. This is crucial for being able to figure out the loop trip
   // count.
@@ -3574,6 +3577,52 @@ StatusOr<bool> AlgebraicSimplifierVisitor::TryToReorderSliceAndReshape(
   return false;
 }
 
+// Allowing a slice to move through a reverse with any necessary updates to the
+// slice config.
+StatusOr<bool> AlgebraicSimplifierVisitor::TryToReorderSliceAndReverse(
+    HloInstruction* slice) {
+  VLOG(2) << ""Entered TryToReorderSliceAndReverse for slice:""
+          << slice->ToString();
+  if (Match(slice, m::Slice(m::Reverse()))) {
+    HloInstruction* reverse = slice->mutable_operand(0);
+    HloInstruction* reverse_operand = reverse->mutable_operand(0);
+    std::vector<int64> new_starts = slice->slice_starts();
+    std::vector<int64> new_limits = slice->slice_limits();
+    std::vector<int64> new_strides = slice->slice_strides();
+    for (auto rdim : reverse->dimensions()) {
+      int64 start = slice->slice_starts(rdim);
+      int64 limit = slice->slice_limits(rdim);
+      int64 stride = slice->slice_strides(rdim);
+      // find_nth allows us to compute the appropriate index to begin
+      // with during reverse even in the presence of non-unit strides
+      int64 find_nth = (limit - start - 1) / stride;
+      find_nth = start + find_nth * stride;
+      limit = find_nth + 1;
+      new_starts[rdim] =
+          (reverse->shape().dimensions(rdim) - start) - (limit - start);
+      new_limits[rdim] = reverse->shape().dimensions(rdim) - start;
+      VLOG(2) << ""Analyzing dim:"" << rdim << "" (start,limit):"" << start << "",""
+              << limit << "" and new (start, limit):"" << new_starts[rdim] << "",""
+              << new_limits[rdim];
+    }
+    // New slice formed from the reverse_operand, but strides and shape of the
+    // slice output remains the same. New slice's starts and limits are updated
+    // for ONLY the reversed dimensions as indicated above.
+    HloInstruction* new_slice = computation_->AddInstruction(
+        HloInstruction::CreateSlice(slice->shape(), reverse_operand, new_starts,
+                                    new_limits, new_strides));
+    simplifier_->UpdateLayout(new_slice->mutable_shape());
+    TF_RETURN_IF_ERROR(ReplaceWithNewInstruction(
+        slice, HloInstruction::CreateReverse(new_slice->shape(), new_slice,
+                                             reverse->dimensions())));
+    // We do not delete the old reverse, since there might be another
+    // consumer of that reverse (i.e., full reverse output). DCE should take
+    // care of any deletion that is necessary if there was no use of reverse.
+    return true;
+  }
+  return false;
+}
+
 Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) {
   // Delete no-op slices, i.e. where shape = operand shape.
   if (ReplaceInstructionIfSameShape(slice, slice->mutable_operand(0))) {
@@ -3728,6 +3777,15 @@ Status AlgebraicSimplifierVisitor::HandleSlice(HloInstruction* slice) {
   if (replaced) {
     return Status::OK();
   }
+
+  bool reversed = false;
+  if (Match(slice, m::Slice(m::Reverse(m::Op())))) {
+    TF_ASSIGN_OR_RETURN(reversed, TryToReorderSliceAndReverse(slice));
+  }
+  if (reversed) {
+    return Status::OK();
+  }
+
   return Status::OK();
 }
 
",0,train
87c2f2dc3b263f90b79c4d31b6d9dbc410d8145d,tensorflow/tensorflow,"Allowing a slice to move through a reverse (i.e., slice(reverse) is reverse(slice)).

PiperOrigin-RevId: 322473168
Change-Id: Ia8c8563f121cfb3aac52464336a03642c7ae6b2a",algebraic_simplifier_test.cc,"@@ -2014,6 +2014,80 @@ TEST_F(AlgebraicSimplifierTest, RemoveUnaryConcatenate) {
   EXPECT_THAT(computation->root_instruction(), param0);
 }
 
+TEST_F(AlgebraicSimplifierTest, SliceReverse) {
+  const char* const hlo_string = R""(
+HloModule module
+
+ENTRY test {
+  param = f32[6,7,32] parameter(0)
+  constant = f32[] constant(0)
+  pad = f32[8,7,32] pad(param, constant), padding=1_1x0_0x0_0
+  rev = f32[8,7,32] reverse(pad), dimensions={0,2}
+  slice = f32[1,7,32] slice(rev), slice={[2:3:1], [0:7:1], [0:32:1]}
+  ROOT tuple = (f32[1,7,32]) tuple(slice)
+})"";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+  AlgebraicSimplifier simplifier(default_options_);
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  HloComputation* computation = module->entry_computation();
+  EXPECT_THAT(computation->root_instruction(),
+              GmockMatch(m::Tuple(m::Reverse(m::Slice(m::Pad())))));
+  const HloInstruction* slice =
+      computation->root_instruction()->operand(0)->operand(0);
+  EXPECT_TRUE(
+      ShapeUtil::Equal(slice->shape(), ShapeUtil::MakeShape(F32, {1, 7, 32})));
+  // slice start,limit of 0th and 2nd dimensions are changed
+  // while 1st dimension's slice start, limit remains the same since
+  // it is not reversed.
+  EXPECT_EQ(slice->slice_starts(0), 5);
+  EXPECT_EQ(slice->slice_limits(0), 6);
+  EXPECT_EQ(slice->slice_starts(1), 0);
+  EXPECT_EQ(slice->slice_limits(1), 7);
+  EXPECT_EQ(slice->slice_starts(2), 0);
+  EXPECT_EQ(slice->slice_limits(2), 32);
+  EXPECT_EQ(slice->slice_strides(0), 1);
+  EXPECT_EQ(slice->slice_strides(1), 1);
+  EXPECT_EQ(slice->slice_strides(2), 1);
+}
+
+TEST_F(AlgebraicSimplifierTest, SliceReverseNonUnitEvenOddStrides) {
+  const char* const hlo_string = R""(
+HloModule module
+
+ENTRY test {
+  param = f32[6,7,32] parameter(0)
+  constant = f32[] constant(0)
+  pad = f32[8,7,32] pad(param, constant), padding=1_1x0_0x0_0
+  rev = f32[8,7,32] reverse(pad), dimensions={0,1,2}
+  slice = f32[1,2,7] slice(rev), slice={[2:3:2], [0:7:4], [0:32:5]}
+  ROOT tuple = (f32[1,2,7]) tuple(slice)
+})"";
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+
+  AlgebraicSimplifier simplifier(default_options_);
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  HloComputation* computation = module->entry_computation();
+  EXPECT_THAT(computation->root_instruction(),
+              GmockMatch(m::Tuple(m::Reverse(m::Slice(m::Pad())))));
+  const HloInstruction* slice =
+      computation->root_instruction()->operand(0)->operand(0);
+  EXPECT_TRUE(
+      ShapeUtil::Equal(slice->shape(), ShapeUtil::MakeShape(F32, {1, 2, 7})));
+  // slice start,limit of all dimensions are changed
+  EXPECT_EQ(slice->slice_starts(0), 5);
+  EXPECT_EQ(slice->slice_limits(0), 6);
+  EXPECT_EQ(slice->slice_starts(1), 2);
+  EXPECT_EQ(slice->slice_limits(1), 7);
+  EXPECT_EQ(slice->slice_starts(2), 1);
+  EXPECT_EQ(slice->slice_limits(2), 32);
+  EXPECT_EQ(slice->slice_strides(0), 2);
+  EXPECT_EQ(slice->slice_strides(1), 4);
+  EXPECT_EQ(slice->slice_strides(2), 5);
+}
+
 // Test that empty operands of concatenates are removed.
 TEST_F(AlgebraicSimplifierTest, RemoveEmptyConcatenateOperands) {
   auto m = CreateNewVerifiedModule();
",0,train
9c57bb91998a34fdbc7b5e6dfe5a90bb8d5362aa,tensorflow/tensorflow,"Bug fixes in memory space assignment and buffer assignment.

PiperOrigin-RevId: 260802228",buffer_assignment.cc,"@@ -1222,8 +1222,13 @@ Status BufferAssigner::AssignPresetBuffers(
       preset_allocations;
   for (auto& color_and_size : preset_assignments_->sizes()) {
     LogicalBuffer::Color color(color_and_size.first);
-    preset_allocations.emplace(
+    auto inserted = preset_allocations.emplace(
         color, assignment->NewEmptyAllocation(color_and_size.second, color));
+    BufferAllocation* inserted_allocation = inserted.first->second;
+    VLOG(3) << ""Created preset buffer allocation ""
+            << inserted_allocation->index()
+            << "", color: "" << inserted_allocation->color()
+            << "", size: "" << inserted_allocation->size();
   }
 
   const HloAliasAnalysis& alias_analysis = assignment->alias_analysis();
@@ -1234,8 +1239,12 @@ Status BufferAssigner::AssignPresetBuffers(
         alias_analysis.GetUniqueBufferAt(position.instruction, position.index);
     VLOG(3) << ""Preset allocation for buffer: "" << buffer;
     const HeapSimulator::Chunk& chunk = position_and_chunk.second;
-    preset_allocations[buffer.color()]->AddAssignment(buffer.GetUniqueValue(),
-                                                      chunk.offset, chunk.size);
+    auto preset_allocations_iter = preset_allocations.find(buffer.color());
+    CHECK(preset_allocations_iter != preset_allocations.end())
+        << ""No preset buffer allocation for color "" << buffer.color()
+        << "" found."";
+    preset_allocations_iter->second->AddAssignment(buffer.GetUniqueValue(),
+                                                   chunk.offset, chunk.size);
     // Ensure that there is at most one preset allocation for each buffer.
     CHECK_EQ(assigned_buffers->count(&buffer), 0);
     assigned_buffers->emplace(&buffer);
",0,train
9c57bb91998a34fdbc7b5e6dfe5a90bb8d5362aa,tensorflow/tensorflow,"Bug fixes in memory space assignment and buffer assignment.

PiperOrigin-RevId: 260802228",memory_space_assignment.cc,"@@ -402,7 +402,7 @@ Status MemorySpaceAssignment::Process() {
     }
   }
 
-  if (preset_assignments_->chunks().empty()) {
+  if (!preset_assignments_->chunks().empty()) {
     preset_assignments_->add_size(alternate_memory_space_,
                                   alternate_memory_size);
   }
@@ -413,6 +413,10 @@ Status MemorySpaceAssignment::Process() {
       VLOG(3) << "" ["" << pair.second.offset << "", "" << pair.second.size
               << ""] : "" << pair.first.ToString();
     }
+    VLOG(3) << ""Exported alternate memory sizes:"";
+    for (auto& pair : preset_assignments_->sizes()) {
+      VLOG(3) << ""  space: "" << pair.first << "", size: "" << pair.second;
+    }
   }
   return Status::OK();
 }
@@ -427,7 +431,9 @@ void MemorySpaceAssignment::ScheduleAsynchronousCopy(
 Status MemorySpaceAssignment::FixSchedule() {
   CHECK(module_->has_schedule());
   HloSchedule& schedule = module_->schedule();
-  for (const HloComputation* computation : module_->computations()) {
+  for (const HloComputation* computation :
+       module_->MakeNonfusionComputations()) {
+    CHECK(schedule.is_computation_scheduled(computation));
     const HloInstructionSequence& sequence = schedule.sequence(computation);
     HloInstructionSequence new_sequence;
 
",0,train
9c57bb91998a34fdbc7b5e6dfe5a90bb8d5362aa,tensorflow/tensorflow,"Bug fixes in memory space assignment and buffer assignment.

PiperOrigin-RevId: 260802228",memory_space_assignment_test.cc,"@@ -31,7 +31,7 @@ class MemorySpaceAssignmentTest : public HloTestBase {
   const int64 kDefaultMemorySpace = 0;
   const int64 kAlternateMemorySpace = 1;
 
-  void AssignMemorySpace(HloModule* module) {
+  std::unique_ptr<PresetAssignments> AssignMemorySpace(HloModule* module) {
     auto size_fn = [](const BufferValue& buffer) {
       return ShapeUtil::ByteSizeOf(buffer.shape(), /*pointer_size=*/8);
     };
@@ -49,13 +49,14 @@ class MemorySpaceAssignmentTest : public HloTestBase {
       return true;
     };
 
-    ASSERT_IS_OK(MemorySpaceAssignment::Run(
-                     module, kAlternateMemorySpace, /*max_size_in_bytes=*/128,
-                     /*min_prefetch_interval=*/2,
-                     /*max_prefetch_interval=*/10,
-                     /*alternate_memory_space_alignment_in_bytes=*/8, size_fn,
-                     is_allowed_in_alternate_mem)
-                     .status());
+    return std::move(MemorySpaceAssignment::Run(
+                         module, kAlternateMemorySpace,
+                         /*max_size_in_bytes=*/128,
+                         /*min_prefetch_interval=*/2,
+                         /*max_prefetch_interval=*/10,
+                         /*alternate_memory_space_alignment_in_bytes=*/8,
+                         size_fn, is_allowed_in_alternate_mem)
+                         .ValueOrDie());
   }
 };
 
@@ -103,7 +104,7 @@ TEST_F(MemorySpaceAssignmentTest, Simple) {
   schedule.set_sequence(computation, {p0, p1, add, sub, mul});
   TF_CHECK_OK(module->set_schedule(schedule));
 
-  AssignMemorySpace(module.get());
+  auto preset_assignments = AssignMemorySpace(module.get());
 
   // Inputs and outputs are currently placed in the default memory. Everything
   // else should be in the alternate memory.
@@ -116,6 +117,10 @@ TEST_F(MemorySpaceAssignmentTest, Simple) {
   EXPECT_THAT(mul, op::ShapeWithLayout(shape));
   EXPECT_THAT(add, op::ShapeWithLayout(shape_in_alternate_mem));
   EXPECT_THAT(sub, op::ShapeWithLayout(shape_in_alternate_mem));
+
+  // Make sure the preset assignments is sane.
+  EXPECT_THAT(preset_assignments->chunks().size(), 2);
+  EXPECT_THAT(preset_assignments->sizes().size(), 1);
 }
 
 TEST_F(MemorySpaceAssignmentTest, NegateChain) {
",0,train
f4150f34c3c56abd61d24b5dd226585e006c9488,tensorflow/tensorflow,"Internal Change

PiperOrigin-RevId: 251500717",gru_ops_test.cc,"@@ -23,7 +23,7 @@ namespace tensorflow {
 
 class GruOpsTest : public ::testing::Test {
  public:
-  static void SetUpTestCase() {
+  static void SetUpTestSuite() {
     TF_Status* status = TF_NewStatus();
     auto* lib = TF_LoadLibrary(
         ""tensorflow/contrib/rnn/python/ops/_gru_ops.so"", status);
",0,train
f4150f34c3c56abd61d24b5dd226585e006c9488,tensorflow/tensorflow,"Internal Change

PiperOrigin-RevId: 251500717",lstm_ops_test.cc,"@@ -25,7 +25,7 @@ namespace tensorflow {
 
 class LSTMOpsTest : public ::testing::Test {
  public:
-  static void SetUpTestCase() {
+  static void SetUpTestSuite() {
     TF_Status* status = TF_NewStatus();
     auto* lib = TF_LoadLibrary(
         ""tensorflow/contrib/rnn/python/ops/_lstm_ops.so"", status);
",0,train
c6fdeaca7dd32c6bec3ff2df14889c3f2c129f14,tensorflow/tensorflow,"adding ps_strategy to run_config to enable different placement strate… (#15640)

* adding ps_strategy to run_config to enable different placement strategy in estimator

* 1. Moved estimator._device_fn to RunConfig as @property
2. Made RunConfig.device_fn to return custom device function if one is specified, otherwise the result from `tf.train.replica_device_setter` call is used
3. Added some basic unit tests, may need further tests.

* 1. Removing ps_strategy.
2. Modified estimator to take overriden device_fn from  if set.
3. Removed ps_strategy related unit tests.

* Adding manual initialization of _device_fn in legacy RunConfig class

* Updated estimator golden API through
1. bazel build //tensorflow/tools/api/tests:api_compatibility_test
2. bazel-bin/tensorflow/tools/api/tests/api_compatibility_test --update_goldens True

* fixing code styles",run_config.py,"@@ -299,6 +299,7 @@ class RunConfig(ClusterConfig, core_run_config.RunConfig):
     # so instead of breaking compatibility with that assumption, we
     # just manually initialize this field:
     self._train_distribute = None
+    self._device_fn = None
 
     gpu_options = config_pb2.GPUOptions(
         per_process_gpu_memory_fraction=gpu_memory_fraction)
",0,train
c6fdeaca7dd32c6bec3ff2df14889c3f2c129f14,tensorflow/tensorflow,"adding ps_strategy to run_config to enable different placement strate… (#15640)

* adding ps_strategy to run_config to enable different placement strategy in estimator

* 1. Moved estimator._device_fn to RunConfig as @property
2. Made RunConfig.device_fn to return custom device function if one is specified, otherwise the result from `tf.train.replica_device_setter` call is used
3. Added some basic unit tests, may need further tests.

* 1. Removing ps_strategy.
2. Modified estimator to take overriden device_fn from  if set.
3. Removed ps_strategy related unit tests.

* Adding manual initialization of _device_fn in legacy RunConfig class

* Updated estimator golden API through
1. bazel build //tensorflow/tools/api/tests:api_compatibility_test
2. bazel-bin/tensorflow/tools/api/tests/api_compatibility_test --update_goldens True

* fixing code styles",estimator.py,"@@ -216,7 +216,8 @@ class Estimator(object):
     else:
       self._session_config = self._config.session_config
 
-    self._device_fn = _get_replica_device_setter(self._config)
+    self._device_fn = self._config.device_fn or \
+                      _get_replica_device_setter(self._config)
 
     if model_fn is None:
       raise ValueError('model_fn must be provided to Estimator.')
",0,train
c6fdeaca7dd32c6bec3ff2df14889c3f2c129f14,tensorflow/tensorflow,"adding ps_strategy to run_config to enable different placement strate… (#15640)

* adding ps_strategy to run_config to enable different placement strategy in estimator

* 1. Moved estimator._device_fn to RunConfig as @property
2. Made RunConfig.device_fn to return custom device function if one is specified, otherwise the result from `tf.train.replica_device_setter` call is used
3. Added some basic unit tests, may need further tests.

* 1. Removing ps_strategy.
2. Modified estimator to take overriden device_fn from  if set.
3. Removed ps_strategy related unit tests.

* Adding manual initialization of _device_fn in legacy RunConfig class

* Updated estimator golden API through
1. bazel build //tensorflow/tools/api/tests:api_compatibility_test
2. bazel-bin/tensorflow/tools/api/tests/api_compatibility_test --update_goldens True

* fixing code styles",run_config.py,"@@ -27,11 +27,13 @@ import six
 from tensorflow.core.protobuf import config_pb2
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training import server_lib
+from tensorflow.python.estimator import util
 from tensorflow.python.util import compat_internal
 from tensorflow.python.util.tf_export import tf_export
 
 
 _USE_DEFAULT = object()
+_VALID_DEVICE_FN_ARGS = set(['op'])
 
 # A list of the property names in RunConfig that the user is allowed to change.
 _DEFAULT_REPLACEABLE_LIST = [
@@ -44,7 +46,8 @@ _DEFAULT_REPLACEABLE_LIST = [
     'keep_checkpoint_max',
     'keep_checkpoint_every_n_hours',
     'log_step_count_steps',
-    'train_distribute'
+    'train_distribute',
+    'device_fn'
 ]
 
 _SAVE_CKPT_ERR = (
@@ -279,6 +282,11 @@ def _validate_properties(run_config):
   _validate('tf_random_seed', lambda seed: isinstance(seed, six.integer_types),
             message='tf_random_seed must be integer.')
 
+  _validate('device_fn', lambda device_fn: six.callable(device_fn) and
+            set(util.fn_args(device_fn)) == _VALID_DEVICE_FN_ARGS,
+            message='device_fn must be callable with exactly'
+                    ' one argument ""op"".')
+
 
 class TaskType(object):
   MASTER = 'master'
@@ -302,7 +310,8 @@ class RunConfig(object):
                keep_checkpoint_max=5,
                keep_checkpoint_every_n_hours=10000,
                log_step_count_steps=100,
-               train_distribute=None):
+               train_distribute=None,
+               device_fn=None):
     """"""Constructs a RunConfig.
 
     All distributed training related properties `cluster_spec`, `is_chief`,
@@ -430,6 +439,10 @@ class RunConfig(object):
         `tf.contrib.distribute.DistributionStrategy`. If specified,
         then Estimator will distribute the user's model during training,
         according to the policy specified by that strategy.
+      device_fn: A callable invoked for every `Operation` that takes the
+        `Operation` and returns the device string. If `None`, defaults to
+        the device function returned by `tf.train.replica_device_setter`
+        with round-robin strategy.
 
     Raises:
       ValueError: If both `save_checkpoints_steps` and `save_checkpoints_secs`
@@ -466,7 +479,8 @@ class RunConfig(object):
         keep_checkpoint_max=keep_checkpoint_max,
         keep_checkpoint_every_n_hours=keep_checkpoint_every_n_hours,
         log_step_count_steps=log_step_count_steps,
-        train_distribute=train_distribute)
+        train_distribute=train_distribute,
+        device_fn=device_fn)
 
     self._init_distributed_setting_from_environment_var(tf_config)
 
@@ -568,6 +582,16 @@ class RunConfig(object):
   def cluster_spec(self):
     return self._cluster_spec
 
+  @property
+  def device_fn(self):
+    """"""Returns the device_fn.
+
+    If device_fn is not `None`, it overrides the default
+    device function used in `Estimator`.
+    Otherwise the default one is used.
+    """"""
+    return self._device_fn
+
   @property
   def evaluation_master(self):
     return self._evaluation_master
@@ -697,7 +721,8 @@ class RunConfig(object):
       - `keep_checkpoint_max`,
       - `keep_checkpoint_every_n_hours`,
       - `log_step_count_steps`,
-      - `train_distribute`.
+      - `train_distribute`,
+      - `device_fn`.
 
     In addition, either `save_checkpoints_steps` or `save_checkpoints_secs`
     can be set (should not be both).
",0,train
c6fdeaca7dd32c6bec3ff2df14889c3f2c129f14,tensorflow/tensorflow,"adding ps_strategy to run_config to enable different placement strate… (#15640)

* adding ps_strategy to run_config to enable different placement strategy in estimator

* 1. Moved estimator._device_fn to RunConfig as @property
2. Made RunConfig.device_fn to return custom device function if one is specified, otherwise the result from `tf.train.replica_device_setter` call is used
3. Added some basic unit tests, may need further tests.

* 1. Removing ps_strategy.
2. Modified estimator to take overriden device_fn from  if set.
3. Removed ps_strategy related unit tests.

* Adding manual initialization of _device_fn in legacy RunConfig class

* Updated estimator golden API through
1. bazel build //tensorflow/tools/api/tests:api_compatibility_test
2. bazel-bin/tensorflow/tools/api/tests/api_compatibility_test --update_goldens True

* fixing code styles",run_config_test.py,"@@ -42,6 +42,7 @@ _SESSION_CONFIG_ERR = 'session_config must be instance of ConfigProto'
 _KEEP_CKPT_MAX_ERR = 'keep_checkpoint_max should be >= 0'
 _KEEP_CKPT_HOURS_ERR = 'keep_checkpoint_every_n_hours should be > 0'
 _TF_RANDOM_SEED_ERR = 'tf_random_seed must be integer'
+_DEVICE_FN_ERR = 'device_fn must be callable with exactly one argument ""op"".'
 _ONE_CHIEF_ERR = 'The ""cluster"" in TF_CONFIG must have only one ""chief"" node.'
 _ONE_MASTER_ERR = 'The ""cluster"" in TF_CONFIG must have only one ""master"" node.'
 _INVALID_TASK_TYPE_FOR_EVAL_MASTER = (
@@ -83,6 +84,7 @@ class RunConfigTest(test.TestCase):
     self.assertEqual(5, config.keep_checkpoint_max)
     self.assertEqual(10000, config.keep_checkpoint_every_n_hours)
     self.assertIsNone(config.service)
+    self.assertIsNone(config.device_fn)
 
   def test_model_dir(self):
     empty_config = run_config_lib.RunConfig()
@@ -93,6 +95,7 @@ class RunConfigTest(test.TestCase):
 
   def test_replace_with_allowed_properties(self):
     session_config = config_pb2.ConfigProto(allow_soft_placement=True)
+    device_fn = lambda op: ""/cpu:0""
 
     config = run_config_lib.RunConfig().replace(
         tf_random_seed=11,
@@ -100,13 +103,15 @@ class RunConfigTest(test.TestCase):
         save_checkpoints_secs=14,
         session_config=session_config,
         keep_checkpoint_max=16,
-        keep_checkpoint_every_n_hours=17)
+        keep_checkpoint_every_n_hours=17,
+        device_fn=device_fn)
     self.assertEqual(11, config.tf_random_seed)
     self.assertEqual(12, config.save_summary_steps)
     self.assertEqual(14, config.save_checkpoints_secs)
     self.assertEqual(session_config, config.session_config)
     self.assertEqual(16, config.keep_checkpoint_max)
     self.assertEqual(17, config.keep_checkpoint_every_n_hours)
+    self.assertEqual(device_fn, config.device_fn)
 
   def test_replace_none_value(self):
     config = run_config_lib.RunConfig().replace(
@@ -117,7 +122,8 @@ class RunConfigTest(test.TestCase):
         save_checkpoints_steps=None,
         session_config=None,
         keep_checkpoint_max=None,
-        keep_checkpoint_every_n_hours=None)
+        keep_checkpoint_every_n_hours=None,
+        device_fn=None)
     self.assertIsNone(config.tf_random_seed)
     self.assertIsNone(config.model_dir)
     self.assertIsNone(config.save_summary_steps)
@@ -126,6 +132,7 @@ class RunConfigTest(test.TestCase):
     self.assertIsNone(config.session_config)
     self.assertIsNone(config.keep_checkpoint_max)
     self.assertIsNone(config.keep_checkpoint_every_n_hours)
+    self.assertIsNone(config.device_fn)
 
   def test_replace_with_disallowallowed_properties(self):
     config = run_config_lib.RunConfig()
@@ -166,9 +173,12 @@ class RunConfigTest(test.TestCase):
       config.replace(keep_checkpoint_every_n_hours=0)
     with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR):
       config.replace(tf_random_seed=1.0)
+    with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR):
+      config.replace(device_fn=lambda x, y: 0)
 
   def test_init_with_allowed_properties(self):
     session_config = config_pb2.ConfigProto(allow_soft_placement=True)
+    device_fn = lambda op: ""/cpu:0""
 
     config = run_config_lib.RunConfig(
         tf_random_seed=11,
@@ -176,13 +186,15 @@ class RunConfigTest(test.TestCase):
         save_checkpoints_secs=14,
         session_config=session_config,
         keep_checkpoint_max=16,
-        keep_checkpoint_every_n_hours=17)
+        keep_checkpoint_every_n_hours=17,
+        device_fn=device_fn)
     self.assertEqual(11, config.tf_random_seed)
     self.assertEqual(12, config.save_summary_steps)
     self.assertEqual(14, config.save_checkpoints_secs)
     self.assertEqual(session_config, config.session_config)
     self.assertEqual(16, config.keep_checkpoint_max)
     self.assertEqual(17, config.keep_checkpoint_every_n_hours)
+    self.assertEqual(device_fn, config.device_fn)
 
   def test_init_none_value(self):
     config = run_config_lib.RunConfig(
@@ -193,7 +205,8 @@ class RunConfigTest(test.TestCase):
         save_checkpoints_steps=None,
         session_config=None,
         keep_checkpoint_max=None,
-        keep_checkpoint_every_n_hours=None)
+        keep_checkpoint_every_n_hours=None,
+        device_fn=None)
     self.assertIsNone(config.tf_random_seed)
     self.assertIsNone(config.model_dir)
     self.assertIsNone(config.save_summary_steps)
@@ -202,6 +215,7 @@ class RunConfigTest(test.TestCase):
     self.assertIsNone(config.session_config)
     self.assertIsNone(config.keep_checkpoint_max)
     self.assertIsNone(config.keep_checkpoint_every_n_hours)
+    self.assertIsNone(config.device_fn)
 
   def test_init_invalid_values(self):
     with self.assertRaisesRegexp(ValueError, _MODEL_DIR_ERR):
@@ -220,6 +234,8 @@ class RunConfigTest(test.TestCase):
       run_config_lib.RunConfig(keep_checkpoint_every_n_hours=0)
     with self.assertRaisesRegexp(ValueError, _TF_RANDOM_SEED_ERR):
       run_config_lib.RunConfig(tf_random_seed=1.0)
+    with self.assertRaisesRegexp(ValueError, _DEVICE_FN_ERR):
+      run_config_lib.RunConfig(device_fn=lambda x: ""/cpu:0"")
 
 
 class RunConfigDistributedSettingTest(test.TestCase):
",0,train
f1406b4d064b56d1fc51b8ba88b91b8ddbed8b48,tensorflow/tensorflow,Switch from SimplePhilox to SingleSampleAdapter,sampling_dataset_op.cc,"@@ -138,8 +138,8 @@ class SamplingDatasetOp::Dataset : public DatasetBase {
     void ResetRngs() EXCLUSIVE_LOCKS_REQUIRED(mu_) {
       // Reset the generators based on the current iterator seeds.
       parent_generator_ = random::PhiloxRandom(seed_, seed2_);
-      generator_ = random::SimplePhilox(&parent_generator_);
-
+      generator_ =
+          random::SingleSampleAdapter<random::PhiloxRandom>(&parent_generator_);
       generator_.Skip(num_random_samples_);
     }
 
@@ -188,13 +188,17 @@ class SamplingDatasetOp::Dataset : public DatasetBase {
     float Random() {
       mutex_lock l(mu_);
       num_random_samples_++;
-      auto out = generator_.RandFloat();
-      return out;
+      uint32 random_uint = generator_();
+
+      // PhiloxRandom returns 32-bit unsigned ints. Convert to float in [0,1)
+      // using the same method that the RandomUniform op uses.
+      return random::Uint32ToFloat(random_uint);
     }
 
     // random util
     random::PhiloxRandom parent_generator_ GUARDED_BY(mu_);
-    random::SimplePhilox generator_ GUARDED_BY(mu_);
+    random::SingleSampleAdapter<random::PhiloxRandom> generator_
+        GUARDED_BY(mu_);
     int64 num_random_samples_ GUARDED_BY(mu_) = 0;
   };
 
",0,test
f1406b4d064b56d1fc51b8ba88b91b8ddbed8b48,tensorflow/tensorflow,Switch from SimplePhilox to SingleSampleAdapter,simple_philox.h,"@@ -66,9 +66,6 @@ class SimplePhilox {
   // range [0,2^max_log-1] with bias towards smaller numbers.
   uint32 Skewed(int max_log);
 
-  // Skip ahead `num_skips` entries in the stream of random numbers
-  void Skip(uint64 num_skips) { single_.Skip(num_skips); }
-
  private:
   SingleSampleAdapter<PhiloxRandom> single_;
 };
",0,test
5febf24c804c692b7444b12cbea8c63fe6d06f31,tensorflow/tensorflow,"[XLA:GPU] [NFC] Refactor the code to allocate buffers for a given BufferAssignment

PiperOrigin-RevId: 314453117
Change-Id: I4be382808f1026d6136967de8954627f408db15b",gpu_executable.cc,"@@ -320,49 +320,64 @@ GpuExecutable::ResolveConstantGlobals(se::Stream* stream) {
   return &module_globals_.emplace(executor, std::move(globals)).first->second;
 }
 
+StatusOr<se::DeviceMemoryBase> GpuExecutable::BufferForAllocation(
+    absl::Span<ExecutionInput const> arguments,
+    const GpuExecutable::BufferAllocToDeviceMemoryMap* globals,
+    const BufferAllocation& allocation,
+    se::DeviceMemoryAllocator* const memory_allocator, int device_ordinal,
+    int64 arg_idx) {
+  if (allocation.is_thread_local()) {
+    return se::DeviceMemoryBase{};
+  } else if (allocation.is_entry_computation_parameter()) {
+    auto param_no = allocation.parameter_number();
+    se::DeviceMemoryBase registered_buffer =
+        arguments[param_no]
+            .Buffer(allocation.param_shape_index())
+            .AsDeviceMemoryBase();
+    if (registered_buffer.is_null() && registered_buffer.size() > 0) {
+      return FailedPrecondition(
+          ""Cannot run XLA computation because pointer to (sub-)buffer at ""
+          ""index %s of parameter %d was null.  All pointers to ""
+          ""(sub-)buffers must not be null, unless the (sub-)buffer has ""
+          ""zero elements."",
+          allocation.param_shape_index().ToString(), param_no);
+    }
+    return registered_buffer;
+  } else if (allocation.is_constant()) {
+    return FindOrDie(*globals, arg_idx);
+  } else {
+    // Allocate each allocation that might escape, or is the temp buffer.
+    CHECK(allocation.maybe_live_out() || allocation.IsPreallocatedTempBuffer());
+    const int64 buffer_size = allocation.size();
+    se::DeviceMemoryBase buffer_address;
+    if (buffer_size > 0) {
+      TF_ASSIGN_OR_RETURN(
+          se::OwningDeviceMemory buffer,
+          memory_allocator->Allocate(device_ordinal, buffer_size));
+      buffer_address = buffer.Release();
+    }
+    return buffer_address;
+  }
+}
+
 StatusOr<BufferAllocations> GpuExecutable::GenerateBufferAllocations(
     absl::Span<ExecutionInput const> arguments,
     const GpuExecutable::BufferAllocToDeviceMemoryMap* globals,
     se::DeviceMemoryAllocator* const memory_allocator,
     se::StreamExecutor* executor) {
-  absl::flat_hash_map<BufferAllocation::Index, se::DeviceMemoryBase>
-      registered_buffers;
   tensorflow::profiler::TraceMe hlo_module_activity(
       [&] { return std::string(""Build buffer allocations""); },
       tensorflow::profiler::TraceMeLevel::kInfo);
 
   const int64 num_buffers = assignment_->Allocations().size();
-  std::vector<se::DeviceMemoryBase> buffers(num_buffers);
-  for (BufferAllocation::Index i = 0; i < num_buffers; ++i) {
-    const BufferAllocation& allocation = assignment_->GetAllocation(i);
-    if (allocation.is_entry_computation_parameter()) {
-      auto param_no = allocation.parameter_number();
-      se::DeviceMemoryBase buffer = arguments[param_no]
-                                        .Buffer(allocation.param_shape_index())
-                                        .AsDeviceMemoryBase();
-
-      // All top-level buffers and sub-buffers must have an explicit, non-null
-      // pointer, except for zero-sized buffers, which may be null.
-      if (buffer.is_null() && buffer.size() > 0) {
-        return FailedPrecondition(
-            ""Cannot run XLA computation because pointer to (sub-)buffer at ""
-            ""index %s of parameter %d was null.  All pointers to ""
-            ""(sub-)buffers must not be null, unless the (sub-)buffer has ""
-            ""zero elements."",
-            allocation.param_shape_index().ToString(), param_no);
-      }
-
-      InsertOrDie(&registered_buffers, i, buffer);
-    }
-
-    if (allocation.is_constant()) {
-      InsertOrDie(&registered_buffers, i, FindOrDie(*globals, i));
-    }
-  }
-
-  int device_ordinal = executor->device_ordinal();
-  for (BufferAllocation::Index i = 0; i < num_buffers; ++i) {
+  std::vector<se::DeviceMemoryBase> buffers;
+  buffers.reserve(num_buffers);
+  for (int64 i = 0; i < num_buffers; ++i) {
     const BufferAllocation& allocation = assignment_->GetAllocation(i);
+    TF_ASSIGN_OR_RETURN(
+        se::DeviceMemoryBase buffer,
+        BufferForAllocation(arguments, globals, allocation, memory_allocator,
+                            executor->device_ordinal(), i));
     const int64 expected_alignment = [&] {
       if (allocation.is_entry_computation_parameter()) {
         return kEntryParameterAlignBytes;
@@ -372,50 +387,17 @@ StatusOr<BufferAllocations> GpuExecutable::GenerateBufferAllocations(
         return kXlaAllocatedBufferAlignBytes;
       }
     }();
-
-    // If buffer #i's address is already registered (e.g. external arguments or
-    // result buffers), use that registered buffer.
-    if (se::DeviceMemoryBase* address =
-            tensorflow::gtl::FindOrNull(registered_buffers, i)) {
-      if (reinterpret_cast<uintptr_t>(address->opaque()) % expected_alignment !=
-          0) {
-        return InternalError(
-            ""Address of registered buffer %d must be a multiple of %x, but ""
-            ""was %p"",
-            i, kEntryParameterAlignBytes, address->opaque());
-      }
-      CHECK_LT(i, buffers.size());
-      buffers[i] = *address;
-      continue;
-    }
-
-    // Allocate each allocation that might escape, or is the temp buffer.
-    if (allocation.maybe_live_out() || allocation.IsPreallocatedTempBuffer()) {
-      const int64 buffer_size = allocation.size();
-      se::DeviceMemoryBase buffer_address;
-      if (buffer_size > 0) {
-        TF_ASSIGN_OR_RETURN(
-            se::OwningDeviceMemory buffer,
-            memory_allocator->Allocate(device_ordinal, buffer_size));
-        if (reinterpret_cast<uintptr_t>(buffer->opaque()) %
-                expected_alignment !=
+    if (!buffer.is_null() &&
+        reinterpret_cast<uintptr_t>(buffer.opaque()) % expected_alignment !=
             0) {
-          return InternalError(
-              ""Address returned by memory_allocator->Allocate must be a ""
-              ""multiple of 0x%x, but was %p"",
-              kXlaAllocatedBufferAlignBytes, buffer->opaque());
-        }
-        // We do manual memory management within BufferAllocations.  Be sure not
-        // to do a TF_RETURN_IF_ERROR between this line and the
-        // buffer_allocations.SetBuffer(buffer_address) call below!
-        buffer_address = buffer.Release();
-      }
-
-      CHECK_LT(i, buffers.size());
-      buffers[i] = buffer_address;
+      return InternalError(
+          ""Address of buffer %d must be a multiple of %x, but ""
+          ""was %p"",
+          i, expected_alignment, buffer.opaque());
     }
+    buffers.push_back(buffer);
   }
-  return {{buffers, device_ordinal, memory_allocator}};
+  return {{buffers, executor->device_ordinal(), memory_allocator}};
 }
 
 StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStream(
@@ -457,13 +439,11 @@ StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStream(
 
   HloInstruction* root = hlo_module_->entry_computation()->root_instruction();
   auto device_ordinal = executor->device_ordinal();
-  ScopedShapedBuffer shaped_buffer(root->shape(), root->shape(),
-                                   memory_allocator, device_ordinal);
+  ExecutionOutput result(root->shape(), root->shape(), memory_allocator,
+                         device_ordinal);
 
-  // Copy DeviceMemoryBase values which contain the array(s) of the result into
-  // the respective location in ShapedBuffer.
   std::set<se::DeviceMemoryBase> buffers_in_result;
-  for (auto& p : shaped_buffer.buffers()) {
+  for (auto& p : result.MutableResult()->buffers()) {
     const ShapeIndex& index = p.first;
     se::DeviceMemoryBase& device_memory = p.second;
     const auto& sources = GetRootValueSet().element(index);
@@ -522,7 +502,7 @@ StatusOr<ExecutionOutput> GpuExecutable::ExecuteAsyncOnStream(
       }
     }
   }
-  return ExecutionOutput(std::move(shaped_buffer), std::move(buffers_to_free));
+  return result;
 }
 
 const InstructionValueSet& GpuExecutable::GetRootValueSet() const {
",0,test
5febf24c804c692b7444b12cbea8c63fe6d06f31,tensorflow/tensorflow,"[XLA:GPU] [NFC] Refactor the code to allocate buffers for a given BufferAssignment

PiperOrigin-RevId: 314453117
Change-Id: I4be382808f1026d6136967de8954627f408db15b",gpu_executable.h,"@@ -129,10 +129,17 @@ class GpuExecutable : public Executable {
       se::DeviceMemoryAllocator* const memory_allocator,
       se::StreamExecutor* executor);
 
-  // The LLVM IR, in string format, of the unoptimized module generated for this
-  // GpuExecutable. We save a string instead of an llvm::Module* because leaving
-  // llvm::Module* in a singleton can cause the heap checker to emit false
-  // positives.
+  StatusOr<se::DeviceMemoryBase> BufferForAllocation(
+      absl::Span<ExecutionInput const> arguments,
+      const GpuExecutable::BufferAllocToDeviceMemoryMap* globals,
+      const BufferAllocation& allocation,
+      se::DeviceMemoryAllocator* const memory_allocator, int device_ordinal,
+      int64 arg_idx);
+
+  // The LLVM IR, in string format, of the unoptimized module generated for
+  // this GpuExecutable. We save a string instead of an llvm::Module* because
+  // leaving llvm::Module* in a singleton can cause the heap checker to emit
+  // false positives.
   //
   // This string should be modified only before ExecuteOnStream.
   string ir_module_string_;
",0,test
8e195d991452af81b467192e538fe8f459d7c9c0,tensorflow/tensorflow,"Update mnist.py

clean and clear",mnist.py,"@@ -261,17 +261,13 @@ def read_data_sets(train_dir,
   train_images = train_images[validation_size:]
   train_labels = train_labels[validation_size:]
 
-  train = DataSet(
-      train_images, train_labels, dtype=dtype, reshape=reshape, seed=seed)
-  validation = DataSet(
-      validation_images,
-      validation_labels,
-      dtype=dtype,
-      reshape=reshape,
-      seed=seed)
-  test = DataSet(
-      test_images, test_labels, dtype=dtype, reshape=reshape, seed=seed)
-
+  
+  options = dict(dtype=dtype, reshape=reshape, seed=seed)
+  
+  train = DataSet(train_images, train_labels, **options)
+  validation = DataSet(validation_images, validation_labels, **options)
+  test = DataSet(test_images, test_labels, **options)
+  
   return base.Datasets(train=train, validation=validation, test=test)
 
 
",0,train
133867e35f75360d5df83cfe03df70115a670264,tensorflow/tensorflow,"Add a tf_executor.graph pruning pass

In a tf_executor.graph block, only the operations contributing to the fetch
results need to be preserved regardless of side-effects. This ""dead-code
elimination"" pass is made trivial by this property.

PiperOrigin-RevId: 263066534",graph_pruning.cc,"@@ -0,0 +1,87 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#include ""llvm/ADT/STLExtras.h""
+#include ""llvm/ADT/SmallVector.h""
+#include ""llvm/ADT/iterator_range.h""
+#include ""mlir/IR/Block.h""  // TF:local_config_mlir
+#include ""mlir/IR/Builders.h""  // TF:local_config_mlir
+#include ""mlir/IR/Location.h""  // TF:local_config_mlir
+#include ""mlir/IR/Operation.h""  // TF:local_config_mlir
+#include ""mlir/Pass/Pass.h""  // TF:local_config_mlir
+#include ""mlir/Pass/PassRegistry.h""  // TF:local_config_mlir
+#include ""tensorflow/compiler/mlir/tensorflow/ir/tf_executor.h""
+#include ""tensorflow/compiler/mlir/tensorflow/transforms/passes.h""
+
+namespace mlir {
+namespace tf_executor {
+
+// Prunes a TF graph eliminating dead nodes.
+void prune_graph(GraphOp graph) {
+  // A graph has a single block which forms a DAG: nodes that aren't reachable
+  // from the `fetch` operands can be eliminated.
+
+  // Delete unreachable node from the graph. We traverse it in reverse order so
+  // that we just have to check that a node does not have any users to delete
+  // it.
+  for (Operation &op : llvm::make_early_inc_range(
+           llvm::drop_begin(llvm::reverse(graph.GetBody()), 1))) {
+    // NextIteration.Sink operation are handled specially: they are live if the
+    // source is live, and removed when the source is processed.
+    if (auto sinkOp = dyn_cast<NextIterationSinkOp>(op)) continue;
+
+    // For NextIteration.Source, we just check that the source does not have any
+    // other user than the sink.
+    if (auto sourceOp = dyn_cast<NextIterationSourceOp>(op)) {
+      Operation *sink = sourceOp.GetSink().getOperation();
+      if (llvm::any_of(sourceOp.getResults(), [sink](Value *result) {
+            return llvm::any_of(result->getUsers(), [sink](Operation *user) {
+              return user != sink;
+            });
+          }))
+        continue;
+
+      // No other users than the sink, erase the pair!
+      sink->erase();
+      sourceOp.erase();
+      continue;
+    }
+
+    // General case.
+    if (op.use_empty()) op.erase();
+  }
+}
+
+namespace {
+
+// This transformation pass prunes a TF graph eliminating dead-nodes.
+struct GraphPruning : public FunctionPass<GraphPruning> {
+  void runOnFunction() override {
+    getFunction().walk<tf_executor::GraphOp>(
+        [](tf_executor::GraphOp graph) { prune_graph(graph); });
+  }
+};
+
+}  // namespace
+
+FunctionPassBase *CreateTFExecutorGraphPruningPass() {
+  return new GraphPruning();
+}
+
+static PassRegistration<GraphPruning> pass(
+    ""tf-executor-graph-pruning"", ""Prune a TensorFlow Graph from dead nodes."");
+
+}  // namespace tf_executor
+}  // namespace mlir
",0,train
133867e35f75360d5df83cfe03df70115a670264,tensorflow/tensorflow,"Add a tf_executor.graph pruning pass

In a tf_executor.graph block, only the operations contributing to the fetch
results need to be preserved regardless of side-effects. This ""dead-code
elimination"" pass is made trivial by this property.

PiperOrigin-RevId: 263066534",passes.h,"@@ -37,9 +37,17 @@ std::unique_ptr<FunctionPassBase> CreateRaiseTFControlFlowPass();
 }  // namespace TFControlFlow
 
 namespace tf_executor {
+class GraphOp;
+
 // Create a pass to merge IslandOps from TFExecutor dialect.
 std::unique_ptr<FunctionPassBase> CreateTFExecutorIslandCoarseningPass();
 
+// Create a pass to prune tf_executor.graph from dead nodes.
+FunctionPassBase* CreateTFExecutorGraphPruningPass();
+
+// Prune a tf_executor.graph operation from dead nodes.
+void prune_graph(GraphOp graph);
+
 }  // namespace tf_executor
 
 namespace TFDevice {
",0,train
7f12947e4f31cdf9a0cca291a653980fa204d686,tensorflow/tensorflow,"Don't provide a padding op for strings: the code never did what could be
reasonably expected (i.e. pad with spaces), and doesn't compile anymore.
Change: 115936929",pad_op.cc,"@@ -136,7 +136,7 @@ class PadOp : public OpKernel {
                               .HostMemory(""paddings""),   \
                           PadOp<CPUDevice, type>)
 
-TF_CALL_ALL_TYPES(REGISTER_KERNEL);
+TF_CALL_POD_TYPES(REGISTER_KERNEL);
 #undef REGISTER_KERNEL
 
 #if GOOGLE_CUDA
",0,test
1bbca7cfb6f01eb3536577102e3e0088c57c6a31,tensorflow/tensorflow,"tf.max will return -inf if the input is empty, which results in exception in tf.range() below.

PiperOrigin-RevId: 235773577",array_ops.py,"@@ -3091,6 +3091,7 @@ def sequence_mask(lengths, maxlen=None, dtype=dtypes.bool, name=None):
 
     if maxlen is None:
       maxlen = gen_math_ops._max(lengths, _all_dimensions(lengths))
+      maxlen = gen_math_ops.maximum(constant(0, maxlen.dtype), maxlen)
     else:
       maxlen = ops.convert_to_tensor(maxlen)
     if maxlen.get_shape().ndims is not None and maxlen.get_shape().ndims != 0:
",0,train
5a29a86050ae0c093dbf98e9321bdbd766f13096,tensorflow/tensorflow,"[XLA] Simplify dynamic-slice(iota, index) => index.
- This is a common pattern generated by SPMD partitioning for 1D sharding.

PiperOrigin-RevId: 370084429
Change-Id: Ie924f1356b5ce7f9efeb389af31fafdd97eb05b1",algebraic_simplifier.cc,"@@ -2683,6 +2683,26 @@ Status AlgebraicSimplifierVisitor::HandleClamp(HloInstruction* clamp) {
     return Status::OK();
   }
 
+  // Eliminate redundant clamping of replica-id or partition-id.
+  if ((Match(to_clamp, m::PartitionId()) || Match(to_clamp, m::ReplicaId())) &&
+      Match(clamp_lower_bound, m::ConstantScalar(0U)) &&
+      Match(clamp_upper_bound, m::ConstantScalar())) {
+    int64 upper_bound = Cast<HloConstantInstruction>(clamp_upper_bound)
+                            ->literal()
+                            .GetFirstElement<uint32_t>();
+    const HloModuleConfig& config = clamp->GetModule()->config();
+    int64 runtime_bound = Match(to_clamp, m::PartitionId())
+                              ? config.num_partitions()
+                              : config.replica_count();
+
+    // If num_partitions or replica_count is 1, infer it as unknown.
+    // pid/rid < runtime_bound => The clamp(0, pid/rid, upper_bound) is
+    // redundant if the runtime_bound <= upper_bound + 1;
+    if (runtime_bound != 1 && runtime_bound <= upper_bound + 1) {
+      return ReplaceInstruction(clamp, to_clamp);
+    }
+  }
+
   return Status::OK();
 }
 
@@ -4416,6 +4436,50 @@ Status AlgebraicSimplifierVisitor::HandleDynamicSlice(
         HloInstruction::CreateSlice(dynamic_slice->shape(), operand,
                                     slice_starts, slice_limits, slice_strides));
   }
+
+  // Convert the dynamic slice of an iota to just a reference to the index
+  // (possibly clamped). Index is always a scalar integer. Output should be a
+  // rank 1 array of size 1 with element type matching that of the scalar index
+  // (except the signedness).
+  const PrimitiveType element_type = dynamic_slice->shape().element_type();
+  if (operand->opcode() == HloOpcode::kIota && operand->shape().rank() == 1 &&
+      dynamic_slice->shape().rank() == 1 &&
+      dynamic_slice->shape().dimensions(0) == 1 &&
+      (element_type == S32 || element_type == U32)) {
+    // This dynamic_slice will have a single start_index operand (since its
+    // operand is rank 1).
+    HloInstruction* index = dynamic_slice->mutable_operand(1);
+    const PrimitiveType index_type = index->shape().element_type();
+
+    auto create_constant = [&](int64 value) {
+      if (index_type == S32) {
+        return MakeScalarLike<int32_t>(index, value);
+      } else {
+        return MakeScalarLike<uint32_t>(index, value);
+      }
+    };
+
+    if (index_type == S32 || index_type == U32) {
+      // Clamp the index to the range of the iota.
+      int64 iota_size = operand->shape().dimensions(0);
+      HloInstruction* low = create_constant(0);
+      HloInstruction* high = create_constant(iota_size - 1);
+      HloInstruction* clamped =
+          computation_->AddInstruction(HloInstruction::CreateTernary(
+              index->shape(), HloOpcode::kClamp, low, index, high));
+      Shape reshape_shape = ShapeUtil::MakeShape(index_type, {1});
+      HloInstruction* result = computation_->AddInstruction(
+          HloInstruction::CreateReshape(reshape_shape, clamped));
+
+      if (index_type != element_type) {
+        result = computation_->AddInstruction(
+            HloInstruction::CreateConvert(dynamic_slice->shape(), result));
+      }
+
+      return ReplaceInstruction(dynamic_slice, result);
+    }
+  }
+
   return Status::OK();
 }
 
",0,train
5a29a86050ae0c093dbf98e9321bdbd766f13096,tensorflow/tensorflow,"[XLA] Simplify dynamic-slice(iota, index) => index.
- This is a common pattern generated by SPMD partitioning for 1D sharding.

PiperOrigin-RevId: 370084429
Change-Id: Ie924f1356b5ce7f9efeb389af31fafdd97eb05b1",algebraic_simplifier_test.cc,"@@ -7373,5 +7373,53 @@ ENTRY f {
   EXPECT_EQ(pad->padding_config().dimensions(0).edge_padding_high(), 0);
 }
 
+// Test folding of dynamic_slice(iota, index) -> clamp(index, 0, size-1)
+TEST_F(AlgebraicSimplifierTest, DynamicSliceOfIota) {
+  const char* hlo_string = R""(
+HloModule module
+
+ENTRY f {
+  %cst = s32[2]{0} constant({0, 1})
+  %index = u32[] parameter(0)
+  ROOT %dynamic-slice = s32[1]{0} dynamic-slice(s32[2]{0} %cst, u32[] %index),
+                                  dynamic_slice_sizes={1}
+}
+)"";
+
+  TF_ASSERT_OK_AND_ASSIGN(auto module,
+                          ParseAndReturnVerifiedModule(hlo_string));
+  AlgebraicSimplifier simplifier(default_options_);
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  VLOG(2) << ""After rewrite \n"" << module->ToString();
+
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              GmockMatch(m::Convert(m::Reshape(
+                  m::Clamp(m::Constant(), m::Parameter(0), m::Constant())))));
+}
+
+// Test folding of clamp(pid, 0, limit) -> pid
+TEST_F(AlgebraicSimplifierTest, ClampOfPartitionId) {
+  const char* hlo_string = R""(
+HloModule module
+
+ENTRY f {
+  %pid = u32[] partition-id()
+  %low = u32[] constant(0)
+  %high = u32[] constant(5)
+  ROOT %c = u32[] clamp(%low, %pid, %high)
+}
+)"";
+
+  TF_ASSERT_OK_AND_ASSIGN(
+      auto module, ParseAndReturnVerifiedModule(hlo_string, /*replica_count=*/1,
+                                                /*num_partitions=*/6));
+  AlgebraicSimplifier simplifier(default_options_);
+  ASSERT_TRUE(simplifier.Run(module.get()).ValueOrDie());
+  VLOG(2) << ""After rewrite \n"" << module->ToString();
+
+  EXPECT_THAT(module->entry_computation()->root_instruction(),
+              GmockMatch(m::PartitionId()));
+}
+
 }  // namespace
 }  // namespace xla
",0,train
5a29a86050ae0c093dbf98e9321bdbd766f13096,tensorflow/tensorflow,"[XLA] Simplify dynamic-slice(iota, index) => index.
- This is a common pattern generated by SPMD partitioning for 1D sharding.

PiperOrigin-RevId: 370084429
Change-Id: Ie924f1356b5ce7f9efeb389af31fafdd97eb05b1",hlo_matchers.h,"@@ -209,6 +209,7 @@ HLO_MATCHER(AllToAll);
 HLO_MATCHER(And);
 HLO_MATCHER(BatchNormGrad);
 HLO_MATCHER(Bitcast);
+HLO_MATCHER(BitcastConvert);
 HLO_MATCHER(Broadcast);
 HLO_MATCHER(Call);
 HLO_MATCHER(Ceil);
",0,train
5a29a86050ae0c093dbf98e9321bdbd766f13096,tensorflow/tensorflow,"[XLA] Simplify dynamic-slice(iota, index) => index.
- This is a common pattern generated by SPMD partitioning for 1D sharding.

PiperOrigin-RevId: 370084429
Change-Id: Ie924f1356b5ce7f9efeb389af31fafdd97eb05b1",pattern_matcher.h,"@@ -1982,6 +1982,8 @@ XLA_NULLOP_PATTERN(Constant)
 XLA_NULLOP_PATTERN(Parameter)
 XLA_NULLOP_PATTERN(Iota)
 XLA_NULLOP_PATTERN(Rng)
+XLA_NULLOP_PATTERN(PartitionId)
+XLA_NULLOP_PATTERN(ReplicaId)
 #undef XLA_NULLOP_PATTERN
 
 // Helpers for unary instructions.
",0,train
659c981a3556c6424237eacd0bf4cdc86f228f16,tensorflow/tensorflow,"Fix error when trying to fit a model with a nested model that has been compiled with metrics.

PiperOrigin-RevId: 254472839",training.py,"@@ -60,6 +60,7 @@ from tensorflow.python.ops import math_ops
 from tensorflow.python.ops.losses import util as tf_losses_utils
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.training.tracking import base as trackable
+from tensorflow.python.training.tracking import layer_utils as trackable_layer_utils
 from tensorflow.python.util import nest
 from tensorflow.python.util import serialization
 from tensorflow.python.util.tf_export import keras_export
@@ -381,7 +382,9 @@ class Model(network.Network):
     metrics = []
     if self._is_compiled:
       metrics += self._compile_metric_functions
-    return metrics + super(Model, self).metrics
+    metrics.extend(self._metrics)
+    metrics.extend(_get_metrics_from_layers(self._layers))
+    return metrics
 
   @property
   def metrics_names(self):
@@ -3113,3 +3116,27 @@ def _convert_scipy_sparse_tensor(value, expected_input):
       return sparse_tensor.SparseTensor(indices, data, shape)
   else:
     return value
+
+
+def _get_metrics_from_layers(layers):
+  """"""Returns list of metrics from the given layers.
+
+  This will not include the `compile` metrics of a model layer.
+
+  Arguments:
+    layers: List of layers.
+
+  Returns:
+    List of metrics.
+  """"""
+  metrics = []
+  layers = trackable_layer_utils.filter_empty_layer_containers(layers)
+  for layer in layers:
+    if isinstance(layer, Model):
+      # We cannot call 'metrics' on the model because we do not want to
+      # include the metrics that were added in compile API of a nested model.
+      metrics.extend(layer._metrics)  # pylint: disable=protected-access
+      metrics.extend(_get_metrics_from_layers(layer.layers))
+    else:
+      metrics.extend(layer.metrics)
+  return metrics
",0,train
659c981a3556c6424237eacd0bf4cdc86f228f16,tensorflow/tensorflow,"Fix error when trying to fit a model with a nested model that has been compiled with metrics.

PiperOrigin-RevId: 254472839",training_test.py,"@@ -3123,6 +3123,51 @@ class TestTrainingWithMetrics(keras_parameterized.TestCase):
     for key in ['loss', 'mae_1', 'mae_2', 'mae_3']:
       self.assertAllClose(history.history[key], expected_val, 1e-3)
 
+  @keras_parameterized.run_all_keras_modes
+  def test_model_with_nested_compiled_model(self):
+
+    class LayerWithAddMetric(keras.layers.Layer):
+
+      def __init__(self):
+        super(LayerWithAddMetric, self).__init__()
+        self.dense = keras.layers.Dense(1, kernel_initializer='ones')
+
+      def call(self, inputs):
+        outputs = self.dense(inputs)
+        self.add_metric(
+            math_ops.reduce_sum(outputs), name='mean', aggregation='mean')
+        return outputs
+
+    x = keras.layers.Input(shape=(1,))
+    y = LayerWithAddMetric()(x)
+
+    inner_model = keras.models.Model(x, y)
+    inner_model.add_metric(
+        math_ops.reduce_sum(y), name='mean1', aggregation='mean')
+
+    inner_model.compile(
+        'sgd',
+        loss='mse',
+        metrics=[metrics_module.Accuracy('acc')],
+        run_eagerly=testing_utils.should_run_eagerly())
+
+    self.assertEqual([m.name for m in inner_model.metrics],
+                     ['acc', 'mean', 'mean1'])
+
+    x = keras.layers.Input(shape=[1])
+    y = inner_model(x)
+    outer_model = keras.Model(x, y)
+    outer_model.add_metric(
+        math_ops.reduce_sum(y), name='mean2', aggregation='mean')
+
+    outer_model.compile(
+        'sgd',
+        loss='mse',
+        metrics=[metrics_module.Accuracy('acc2')],
+        run_eagerly=testing_utils.should_run_eagerly())
+    self.assertEqual([m.name for m in outer_model.metrics],
+                     ['acc2', 'mean', 'mean1', 'mean2'])
+
 
 class BareUpdateLayer(keras.layers.Layer):
 
",0,train
6f968a3a59b2d11ac74e0c0d9921dc3d660e765c,tensorflow/tensorflow,"Remove all the save model related code in base_layer.

They are not applicable for the v1 tf.layers case.

PiperOrigin-RevId: 299962368
Change-Id: I7c6f4fb76ff3e5aa83d4fc0db817ed4e1b73a827",legacy_base_layer.py,"@@ -61,7 +61,6 @@ from tensorflow.python.frozen_keras.engine import node as node_module
 from tensorflow.python.frozen_keras.utils import generic_utils
 from tensorflow.python.frozen_keras.utils import layer_utils
 from tensorflow.python.frozen_keras.utils import tf_utils
-from tensorflow.python.keras.saving.saved_model import layer_serialization
 # A module that only depends on `keras.layers` import these from here.
 from tensorflow.python.keras.utils.generic_utils import to_snake_case  # pylint: disable=unused-import
 from tensorflow.python.keras.utils.tf_utils import is_tensor_or_tensor_list  # pylint: disable=unused-import
@@ -2575,26 +2574,6 @@ class LegacyBaseLayer(module.Module):
 
   # SavedModel properties. Please see keras/saving/saved_model for details.
 
-  @property
-  def _trackable_saved_model_saver(self):
-    return layer_serialization.LayerSavedModelSaver(self)
-
-  @property
-  def _object_identifier(self):
-    return self._trackable_saved_model_saver.object_identifier
-
-  @property
-  def _tracking_metadata(self):
-    return self._trackable_saved_model_saver.tracking_metadata
-
-  def _list_extra_dependencies_for_serialization(self, serialization_cache):
-    return (self._trackable_saved_model_saver
-            .list_extra_dependencies_for_serialization(serialization_cache))
-
-  def _list_functions_for_serialization(self, serialization_cache):
-    return (self._trackable_saved_model_saver
-            .list_functions_for_serialization(serialization_cache))
-
   def __getstate__(self):
     # Override to support `copy.deepcopy` and pickling.
     # Thread-local objects cannot be copied in Python 3, so pop these.
",0,train
3c98b456afb144832294df944aa01b80e6004a0f,tensorflow/tensorflow,"Switch `FastParseSingleExample()` to accept an `absl::string_view`.

PiperOrigin-RevId: 261806721",example_proto_fast_parsing.cc,"@@ -1273,8 +1273,8 @@ Status FastParseExample(const Config& config,
   return Status::OK();
 }
 
-Status FastParseSingleExample(const Config& config, const string& serialized,
-                              Result* result) {
+Status FastParseSingleExample(const Config& config,
+                              absl::string_view serialized, Result* result) {
   DCHECK(result != nullptr);
   // Check config so we can safely CHECK(false) in switches on config.*.dtype
   for (auto& c : config.sparse) {
",0,train
3c98b456afb144832294df944aa01b80e6004a0f,tensorflow/tensorflow,"Switch `FastParseSingleExample()` to accept an `absl::string_view`.

PiperOrigin-RevId: 261806721",example_proto_fast_parsing.h,"@@ -107,7 +107,7 @@ Status FastParseExample(const FastParseExampleConfig& config,
 typedef FastParseExampleConfig FastParseSingleExampleConfig;
 
 Status FastParseSingleExample(const FastParseSingleExampleConfig& config,
-                              const string& serialized, Result* result);
+                              absl::string_view serialized, Result* result);
 
 // Parses a batch of serialized SequenceExample protos and converts them into
 // result according to given config.
",0,train
6b0ab72dfcd8dd608b2a056a156be960b1abe878,tensorflow/tensorflow,"Integrate LLVM at https://github.com/llvm/llvm-project/commit/c2171457e281

PiperOrigin-RevId: 306950518
Change-Id: I6da501ea9c226bfeb35f8854b6a12dc7a42938df",xla_legalize_to_linalg.cc,"@@ -134,7 +134,7 @@ class PointwiseToLinalgConverter : public OpConversionPattern<OpTy> {
         rewriter.getI64IntegerAttr(bodyResultTypes.size()),  // args_out
         rewriter.getArrayAttr(indexingMaps),
         GetNParallelLoopsAttrs(nloops, &rewriter),
-        /*doc=*/nullptr, /*fun=*/nullptr, /*library_call=*/nullptr);
+        /*doc=*/nullptr, /*library_call=*/nullptr);
 
     // Add a block to the region.
     auto* region = &linalgOp.region();
@@ -218,7 +218,7 @@ class DataMovementOpConverter : public OpConversionPattern<OpTy> {
         loc, isLHLO ? ArrayRef<Type>{} : resultType, args,
         rewriter.getI64IntegerAttr(1), rewriter.getI64IntegerAttr(1),
         indexingMapsAttr, GetNParallelLoopsAttrs(nloops, &rewriter),
-        /*doc=*/nullptr, /*fun=*/nullptr, /*library_call=*/nullptr);
+        /*doc=*/nullptr, /*library_call=*/nullptr);
 
     auto* region = &linalgOp.region();
     auto* block = rewriter.createBlock(region, region->end());
@@ -400,7 +400,7 @@ class IotaConverter : public OpConversionPattern<xla_lhlo::IotaOp> {
         rewriter.getI64IntegerAttr(1),  // args_out
         rewriter.getArrayAttr(indexingMaps),
         GetNParallelLoopsAttrs(nloops, &rewriter),
-        /*doc=*/nullptr, /*fun=*/nullptr, /*library_call=*/nullptr);
+        /*doc=*/nullptr, /*library_call=*/nullptr);
 
     // Add a block to the region.
     auto* region = &linalgOp.region();
",0,train
c487a28e5e84835ef5cd36daa09ddfad8e99c5cb,tensorflow/tensorflow,"Fix comment typos and formatting

PiperOrigin-RevId: 218836217",Identifier.h,"@@ -52,14 +52,10 @@ public:
   const char *c_str() const { return pointer; }
 
   /// Return a pointer to the start of the string data.
-  const char *data() const {
-    return pointer;
-  }
+  const char *data() const { return pointer; }
 
   /// Return the number of bytes in this string.
-  unsigned size() const {
-    return ::strlen(pointer);
-  }
+  unsigned size() const { return ::strlen(pointer); }
 
   /// Return true if this identifier is the specified string.
   bool is(StringRef string) const { return strref().equals(string); }
",0,train
c487a28e5e84835ef5cd36daa09ddfad8e99c5cb,tensorflow/tensorflow,"Fix comment typos and formatting

PiperOrigin-RevId: 218836217",OpDefinition.h,"@@ -311,7 +311,7 @@ protected:
   }
 };
 
-/// This class provides the API for ops that are known to have exactly one
+/// This class provides the API for ops that are known to have no
 /// SSA operand.
 template <typename ConcreteType>
 class ZeroOperands : public TraitBase<ConcreteType, ZeroOperands> {
@@ -473,7 +473,7 @@ public:
   }
 };
 
-/// This class provides return value APIs for ops that are known to have a
+/// This class provides return value APIs for ops that are known to have
 /// zero results.
 template <typename ConcreteType>
 class ZeroResult : public TraitBase<ConcreteType, ZeroResult> {
",0,train
c487a28e5e84835ef5cd36daa09ddfad8e99c5cb,tensorflow/tensorflow,"Fix comment typos and formatting

PiperOrigin-RevId: 218836217",OperationSupport.h,"@@ -134,7 +134,7 @@ private:
   const OperationProperties opProperties;
 };
 
-/// NamedAttribute is a used for operation attribute lists, it holds an
+/// NamedAttribute is used for operation attribute lists, it holds an
 /// identifier for the name and a value for the attribute.  The attribute
 /// pointer should always be non-null.
 using NamedAttribute = std::pair<Identifier, Attribute>;
",0,train
c487a28e5e84835ef5cd36daa09ddfad8e99c5cb,tensorflow/tensorflow,"Fix comment typos and formatting

PiperOrigin-RevId: 218836217",StmtVisitor.h,"@@ -212,7 +212,7 @@ public:
   // and try visiting the subtype.  All of this should be inlined perfectly,
   // because there are no virtual functions to get in the way.
 
-  // When visiting a specific stmt directly during a walk, these  methods get
+  // When visiting a specific stmt directly during a walk, these methods get
   // called. These are typically O(1) complexity and shouldn't be recursively
   // processing their descendants in some way. When using RetTy, all of these
   // need to be overridden.
",0,train
0d6939bd371b3558278720f06a03083c28c1b0b7,tensorflow/tensorflow,"[mhlo] Import tuple-return from mhlo::mapOp's reducer-block to flattened return-val.

During import (from HLO to MHLO) we flatten the tuple return-type of
region-blocks. MHLO mapOp::verifier ensures that the flattened return-type is
comaptible with the op-specification.

PiperOrigin-RevId: 433874864",hlo_function_importer.cc,"@@ -1190,8 +1190,9 @@ StatusOr<mlir::Operation*> HloFunctionImporter::ImportInstructionImpl(
       auto op = func_builder->create<mlir::mhlo::MapOp>(
           loc, result_type, operands,
           ConvertDimensions(instruction->dimensions()));
-      TF_RETURN_IF_ERROR(
-          ImportAsRegion(*instruction->to_apply(), &op.computation()));
+      TF_RETURN_IF_ERROR(ImportAsRegion(*instruction->to_apply(),
+                                        &op.computation(),
+                                        /*flatten_region_arg_tuple=*/true));
       return op.getOperation();
     }
     case HloOpcode::kConvolution: {
",0,train
3bdccb536b4cc96e66ff0452e11c21bfff44376e,tensorflow/tensorflow,"Extracting hand-coded real number support out of generated C++ Conj op and into the only file (math_grad.cc) where it was used. This allows the current set of C++ ops to be fully auto-generated.

PiperOrigin-RevId: 368748247
Change-Id: I5634d14022f2456bbd09194774c41916c2115f3d",math_grad.cc,"@@ -22,7 +22,6 @@ limitations under the License.
 
 using std::vector;
 using tensorflow::ops::AddV2;
-using tensorflow::ops::Conj;
 using tensorflow::ops::Div;
 using tensorflow::ops::DivNoNan;
 using tensorflow::ops::MatMul;
@@ -35,6 +34,20 @@ namespace tensorflow {
 namespace gradients {
 namespace {
 
+static Status SafeConj(AbstractContext* ctx, AbstractTensorHandle* const input,
+                       AbstractTensorHandle** output, const char* name) {
+  auto dtype = input->DataType();
+  if (DataTypeIsFloating(BaseType(dtype)) ||
+      DataTypeIsInteger(BaseType(dtype))) {
+    return tensorflow::ops::Identity(ctx, input, output, name);
+  } else if (!DataTypeIsComplex(BaseType(dtype)) &&
+             BaseType(dtype) != DT_VARIANT) {
+    return errors::InvalidArgument(
+        ""Expected numeric or variant tensor, got dtype "", dtype);
+  }
+  return tensorflow::ops::Conj(ctx, input, output, name);
+}
+
 class AddGradientFunction : public GradientFunction {
  public:
   Status Compute(AbstractContext* ctx,
@@ -63,7 +76,7 @@ class ExpGradientFunction : public GradientFunction {
                  absl::Span<AbstractTensorHandle*> grad_inputs) override {
     AbstractTensorHandle* conj_output;
     std::string name = ""Conj_Exp_Grad"";
-    TF_RETURN_IF_ERROR(Conj(ctx, exp_.get(), &conj_output, name.c_str()));
+    TF_RETURN_IF_ERROR(SafeConj(ctx, exp_.get(), &conj_output, name.c_str()));
     AbstractTensorHandlePtr conj_output_releaser(conj_output);
 
     name = ""Mul_Exp_Grad"";
@@ -131,13 +144,13 @@ class MatMulGradientFunction : public GradientFunction {
     AbstractTensorHandle* conj_output;
     std::string name = ""Conj_A_MatMul_Grad"";
     TF_RETURN_IF_ERROR(
-        Conj(ctx, forward_inputs_[0], &conj_output, name.c_str()));
+        SafeConj(ctx, forward_inputs_[0], &conj_output, name.c_str()));
 
     AbstractTensorHandlePtr A(conj_output);
 
     name = ""Conj_B_MatMul_Grad"";
     TF_RETURN_IF_ERROR(
-        Conj(ctx, forward_inputs_[1], &conj_output, name.c_str()));
+        SafeConj(ctx, forward_inputs_[1], &conj_output, name.c_str()));
 
     AbstractTensorHandlePtr B(conj_output);
 
@@ -332,7 +345,7 @@ class Log1pGradientFunction : public GradientFunction {
 
     // Calculate conjugate of X
     std::string name = ""Conj_Log1p_Grad_X"";
-    TF_RETURN_IF_ERROR(Conj(ctx, X, &temp_output, name.c_str()));
+    TF_RETURN_IF_ERROR(SafeConj(ctx, X, &temp_output, name.c_str()));
 
     AbstractTensorHandlePtr Conj_X(temp_output);
 
",0,train
3bdccb536b4cc96e66ff0452e11c21bfff44376e,tensorflow/tensorflow,"Extracting hand-coded real number support out of generated C++ Conj op and into the only file (math_grad.cc) where it was used. This allows the current set of C++ ops to be fully auto-generated.

PiperOrigin-RevId: 368748247
Change-Id: I5634d14022f2456bbd09194774c41916c2115f3d",math_ops.cc,"@@ -17,7 +17,6 @@ limitations under the License.
 #include ""tensorflow/c/eager/abstract_context.h""
 #include ""tensorflow/c/eager/abstract_tensor_handle.h""
 #include ""tensorflow/c/eager/tracing_utils.h""
-#include ""tensorflow/c/experimental/ops/array_ops.h""
 #include ""tensorflow/core/framework/types.h""
 #include ""tensorflow/core/platform/errors.h""
 
@@ -64,16 +63,6 @@ Status Mul(AbstractContext* ctx, AbstractTensorHandle* const x,
 //   ```
 Status Conj(AbstractContext* ctx, AbstractTensorHandle* const input,
             AbstractTensorHandle** output, const char* name) {
-  // Hand-coded optimization:
-  auto dtype = input->DataType();
-  if (DataTypeIsFloating(BaseType(dtype)) ||
-      DataTypeIsInteger(BaseType(dtype))) {
-    return Identity(ctx, input, output, name);
-  } else if (!DataTypeIsComplex(BaseType(dtype)) &&
-             BaseType(dtype) != DT_VARIANT) {
-    return errors::InvalidArgument(
-        ""Expected numeric or variant tensor, got dtype "", dtype);
-  }
   AbstractOperationPtr op_ptr(ctx->CreateOperation());
   TF_RETURN_IF_ERROR(op_ptr->Reset(""Conj"", /*raw_device_name=*/nullptr));
   TF_RETURN_IF_ERROR(MaybeSetOpName(op_ptr.get(), name));
",0,train
3b4e53b0739804af7e8f51412bac366dd842a3f1,tensorflow/tensorflow,"Add an options argument to EqualGraphDef and EqualNodeDef. Currently the only option is controlling whether internal attributes (whose names start with ""_"") are tested for equality.
Change: 145362690",equal_graph_def.cc,"@@ -25,7 +25,7 @@ limitations under the License.
 namespace tensorflow {
 
 bool EqualGraphDef(const GraphDef& actual, const GraphDef& expected,
-                   string* diff) {
+                   string* diff, const EqualGraphDefOptions& options) {
   // Intentionally do not check that versions match so that this routine can
   // be used for less brittle golden file tests.
 
@@ -44,7 +44,9 @@ bool EqualGraphDef(const GraphDef& actual, const GraphDef& expected,
       return false;
     }
 
-    if (!EqualNodeDef(*actual_iter->second, expected_node, diff)) return false;
+    if (!EqualNodeDef(*actual_iter->second, expected_node, diff, options)) {
+      return false;
+    }
 
     actual_index.erase(actual_iter);
   }
@@ -75,8 +77,8 @@ string JoinStringField(const protobuf::RepeatedPtrField<string>& f) {
 
 }  // namespace
 
-bool EqualNodeDef(const NodeDef& actual, const NodeDef& expected,
-                  string* diff) {
+bool EqualNodeDef(const NodeDef& actual, const NodeDef& expected, string* diff,
+                  const EqualGraphDefOptions& options) {
   if (actual.name() != expected.name()) {
     if (diff != nullptr) {
       *diff = strings::StrCat(""Actual node name '"", actual.name(),
@@ -156,13 +158,15 @@ bool EqualNodeDef(const NodeDef& actual, const NodeDef& expected,
 
   std::unordered_set<string> actual_attr;
   for (const auto& a : actual.attr()) {
-    if (!a.first.empty() && a.first[0] == '_') {
+    if (options.ignore_internal_attrs && !a.first.empty() &&
+        a.first[0] == '_') {
       continue;
     }
     actual_attr.insert(a.first);
   }
   for (const auto& e : expected.attr()) {
-    if (!e.first.empty() && e.first[0] == '_') {
+    if (options.ignore_internal_attrs && !e.first.empty() &&
+        e.first[0] == '_') {
       continue;
     }
 
",0,train
3b4e53b0739804af7e8f51412bac366dd842a3f1,tensorflow/tensorflow,"Add an options argument to EqualGraphDef and EqualNodeDef. Currently the only option is controlling whether internal attributes (whose names start with ""_"") are tested for equality.
Change: 145362690",equal_graph_def.h,"@@ -22,20 +22,27 @@ limitations under the License.
 
 namespace tensorflow {
 
+struct EqualGraphDefOptions {
+  // Should internal attributes (attribute names that start with '_') be
+  // ignored?
+  bool ignore_internal_attrs = true;
+};
+
 // Determines if actual and expected are equal, ignoring versions and ordering
 // of nodes, attrs, and control inputs.  If the GraphDefs are different and
 // diff != nullptr, *diff is set to an explanation of the difference.  Note that
 // we use node names to match up nodes between the graphs, and so the naming of
 // nodes must be consistent.
 bool EqualGraphDef(const GraphDef& actual, const GraphDef& expected,
-                   string* diff);
+                   string* diff, const EqualGraphDefOptions& options = {});
 
 // Determines if actual and expected are equal, ignoring: ordering of
-// attrs, internal attributes, and control inputs.
+// attrs, internal attributes (if set in `options`), and control inputs.
 //
 // If the NodeDefs are different and
 // diff != nullptr, *diff is set to an explanation of the difference.
-bool EqualNodeDef(const NodeDef& actual, const NodeDef& expected, string* diff);
+bool EqualNodeDef(const NodeDef& actual, const NodeDef& expected, string* diff,
+                  const EqualGraphDefOptions& options = {});
 
 #define TF_EXPECT_GRAPH_EQ(expected, actual)                  \
   do {                                                        \
",0,train
b265dd5fb3be0b7a860b5419c0a2285f9693ae6d,tensorflow/tensorflow,"Set the AssignedDevice of the lowered control flow nodes (Enter, Switch, Merge, NextIteration, Exit) to be the same as the assigned device of the input loop variable if that is available during lowering.

PiperOrigin-RevId: 310578865
Change-Id: I7118c26054be2d8fb239c3ed03b9a3e5c4685ef6",lower_while_op.cc,"@@ -238,12 +238,14 @@ Status LowerWhileHelper::CreateEnterNodes() {
   TF_RETURN_IF_ERROR(while_op_->input_edges(&edges));
   for (const Edge* edge : edges) {
     Node* enter_node;
-    NodeBuilder builder = NodeBuilder(NewName(""enter""), ""Enter"",
-                                      graph_->op_registry(), &debug_info_)
-                              .Input(NodeOut(edge->src(), edge->src_output()))
-                              .Attr(""frame_name"", name_)
-                              .Attr(""parallel_iterations"", parallel_iterations_)
-                              .Device(while_op_->requested_device());
+    NodeBuilder builder =
+        NodeBuilder(NewName(""enter""), ""Enter"", graph_->op_registry(),
+                    &debug_info_)
+            .Input(NodeOut(edge->src(), edge->src_output()))
+            .Attr(""frame_name"", name_)
+            .Attr(""parallel_iterations"", parallel_iterations_)
+            .Device(edge->src()->requested_device())
+            .AssignedDevice(edge->src()->assigned_device_name());
     if (IsResource(edge->dst_input())) {
       builder.Attr(""is_constant"", true);
     }
@@ -282,7 +284,8 @@ Status LowerWhileHelper::CreateMergeNodes() {
         NodeBuilder(NewName(""merge""), ""Merge"", graph_->op_registry(),
                     &debug_info_)
             .Input({NodeOut(enter_node, 0), NodeOut(enter_node, 0)})
-            .Device(while_op_->requested_device())
+            .Device(enter_node->requested_device())
+            .AssignedDevice(enter_node->assigned_device_name())
             .Finalize(graph_, &merge_node));
     merge_nodes_.emplace_back(merge_node);
   }
@@ -323,21 +326,19 @@ Status LowerWhileHelper::CreateSwitchNodes() {
       TF_RETURN_IF_ERROR(while_op_->input_node(i, &input_node));
       op_name = strings::StrCat(input_node->name(), ""_switch"");
     }
+    Node* merge_node = merge_nodes_[op_input_output_to_lowered_node_[i]];
     Node* switch_node;
     string op_type = ""Switch"";
-    if (IsRefType(
-            merge_nodes_[op_input_output_to_lowered_node_[i]]->output_type(
-                0))) {
+    if (IsRefType(merge_node->output_type(0))) {
       op_type = ""RefSwitch"";
     }
-    TF_RETURN_IF_ERROR(
-        NodeBuilder(NewName(op_name), op_type, graph_->op_registry(),
-                    &debug_info_)
-            .Input(
-                NodeOut(merge_nodes_[op_input_output_to_lowered_node_[i]], 0))
-            .Input(NodeOut(loop_cond_node_, 0))
-            .Device(while_op_->requested_device())
-            .Finalize(graph_, &switch_node));
+    TF_RETURN_IF_ERROR(NodeBuilder(NewName(op_name), op_type,
+                                   graph_->op_registry(), &debug_info_)
+                           .Input(NodeOut(merge_node, 0))
+                           .Input(NodeOut(loop_cond_node_, 0))
+                           .Device(merge_node->requested_device())
+                           .AssignedDevice(merge_node->assigned_device_name())
+                           .Finalize(graph_, &switch_node));
     switch_nodes_.emplace_back(switch_node);
   }
   return Status::OK();
@@ -392,7 +393,10 @@ Status LowerWhileHelper::CreateExitNodes() {
                       &debug_info_)
               .Input(NodeOut(switch_nodes_[op_input_output_to_lowered_node_[i]],
                              0))
-              .Device(while_op_->requested_device())
+              .Device(switch_nodes_[op_input_output_to_lowered_node_[i]]
+                          ->requested_device())
+              .AssignedDevice(switch_nodes_[op_input_output_to_lowered_node_[i]]
+                                  ->assigned_device_name())
               .Finalize(graph_, &exit_node));
       exit_nodes_.emplace_back(exit_node);
       outputs.emplace_back(NodeOut(exit_node, 0));
@@ -440,12 +444,15 @@ Status LowerWhileHelper::CreateNextIterationNodes() {
     if (IsResource(i)) {
       continue;
     }
-    TF_RETURN_IF_ERROR(NodeBuilder(NewName(""next_iteration""), ""NextIteration"",
-                                   graph_->op_registry(), &debug_info_)
-                           .Input(NodeOut(body_call_node_, i))
-                           .ControlInput(body_call_node_)
-                           .Device(while_op_->requested_device())
-                           .Finalize(graph_, &next_iteration));
+    TF_RETURN_IF_ERROR(
+        NodeBuilder(NewName(""next_iteration""), ""NextIteration"",
+                    graph_->op_registry(), &debug_info_)
+            .Input(NodeOut(body_call_node_, i))
+            .ControlInput(body_call_node_)
+            .Device(while_op_->requested_device())
+            .AssignedDevice(merge_nodes_[op_input_output_to_lowered_node_[i]]
+                                ->assigned_device_name())
+            .Finalize(graph_, &next_iteration));
     next_iterations_nodes_.emplace_back(next_iteration);
   }
   return Status::OK();
",0,train
b265dd5fb3be0b7a860b5419c0a2285f9693ae6d,tensorflow/tensorflow,"Set the AssignedDevice of the lowered control flow nodes (Enter, Switch, Merge, NextIteration, Exit) to be the same as the assigned device of the input loop variable if that is available during lowering.

PiperOrigin-RevId: 310578865
Change-Id: I7118c26054be2d8fb239c3ed03b9a3e5c4685ef6",lower_while_op_test.cc,"@@ -169,6 +169,99 @@ TEST(LowerWhileOpTest, Simple) {
   }
 }
 
+TEST(LowerWhileOpTest, ForwardAssignedInputDevice) {
+  std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
+
+  // Add test functions for cond and body.
+  FunctionDefLibrary f_lib_proto;
+  *f_lib_proto.add_function() = test::function::XTimesTwo();
+  *f_lib_proto.add_function() = test::function::LessThanOrEqualToN(8);
+
+  TF_ASSERT_OK(graph->AddFunctionLibrary(f_lib_proto));
+  auto type = DT_FLOAT;
+  Node* placeholder;
+  TF_CHECK_OK(NodeBuilder(""placed_node"", ""Placeholder"")
+                  .Attr(""dtype"", type)
+                  .Finalize(graph.get(), &placeholder));
+  const string assigned_device_name = ""/job:localhost/replica:0/task:0/gpu:0"";
+  placeholder->set_assigned_device_name(assigned_device_name);
+  Node* while_node;
+  std::vector<NodeBuilder::NodeOut> inputs({NodeBuilder::NodeOut(placeholder)});
+  AttrValue cond_func;
+  cond_func.mutable_func()->set_name(""LessThanOrEqualToN"");
+  AttrValue body_func;
+  body_func.mutable_func()->set_name(""XTimesTwo"");
+  TF_ASSERT_OK(
+      NodeBuilder(""while"", ""While"", &graph->flib_def())
+          .Input(inputs)
+          .Attr(""T"", {type})
+          .Attr(""cond"", cond_func)
+          .Attr(""body"", body_func)
+          .Attr(""parallel_iterations"", 100)
+          .Attr(LowerFunctionalOpsPass::kLowerUsingSwitchMergeAttr, true)
+          .Finalize(graph.get(), &while_node));
+  TF_ASSERT_OK(Rewrite(&graph));
+
+  const Node* placeholder_node = nullptr;
+  for (const auto* op : graph->op_nodes()) {
+    if (op->name() == ""placed_node"") {
+      placeholder_node = op;
+    }
+  }
+  ASSERT_NE(placeholder_node, nullptr);
+  // Verify the assigned device of the Enter node.
+  int enter_consumers = 0;
+  const Node* enter_node = nullptr;
+  for (const Node* consumer : placeholder_node->out_nodes()) {
+    if (consumer->type_string() == ""Enter"") {
+      enter_consumers += 1;
+      enter_node = consumer;
+      ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name);
+    }
+  }
+  ASSERT_EQ(enter_consumers, 1);
+  // Verify the assigned device of the Merge node.
+  int merge_consumers = 0;
+  const Node* merge_node = nullptr;
+  for (const Node* consumer : enter_node->out_nodes()) {
+    if (consumer->type_string() == ""Merge"") {
+      merge_consumers += 1;
+      merge_node = consumer;
+      ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name);
+    }
+  }
+  ASSERT_EQ(merge_consumers, 1);
+  // Verify the assigned device of the NextIteration node.
+  int next_iteration_consumers = 0;
+  for (const Node* consumer : merge_node->in_nodes()) {
+    if (consumer->type_string() == ""NextIteration"") {
+      next_iteration_consumers += 1;
+      ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name);
+    }
+  }
+  ASSERT_EQ(next_iteration_consumers, 1);
+  // Verify the assigned device of the Switch node.
+  int switch_consumers = 0;
+  const Node* switch_node = nullptr;
+  for (const Node* consumer : merge_node->out_nodes()) {
+    if (consumer->type_string() == ""Switch"") {
+      switch_consumers += 1;
+      switch_node = consumer;
+      ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name);
+    }
+  }
+  ASSERT_EQ(switch_consumers, 1);
+  // Verify the assigned device of the Exit node.
+  int exit_consumers = 0;
+  for (const Node* consumer : switch_node->out_nodes()) {
+    if (consumer->type_string() == ""Exit"") {
+      exit_consumers += 1;
+      ASSERT_EQ(consumer->assigned_device_name(), assigned_device_name);
+    }
+  }
+  ASSERT_EQ(exit_consumers, 1);
+}
+
 TEST(LowerWhileOpTest, MultipleInputs) {
   std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
 
",0,train
7a7b72855e7894b169ae78f4b46f247552bb62cb,tensorflow/tensorflow,"Pulls out variable initialization in tf.function().get_concrete_function

PiperOrigin-RevId: 220548234",def_function.py,"@@ -51,6 +51,7 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
                name=None,
                dtype=None,
                constraint=None,
+               add_initializers_to=None,
                **unused_kwargs):
     """"""Creates a variable.
 
@@ -81,6 +82,9 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
         variable and return the Tensor for the projected value
         (which must have the same shape). Constraints are not safe to
         use when doing asynchronous distributed training.
+      add_initializers_to: if not None and not in legacy graph mode, the
+        initializer tensor will be added to this map instead of adding the
+        assignment to the function.
 
     Raises:
       ValueError: If the initial value is not specified, or does not have a
@@ -166,21 +170,24 @@ class UnliftedInitializerVariable(resource_variable_ops.ResourceVariable):
             self._graph_element = value
           ops.add_to_collection(ops.GraphKeys.GLOBAL_VARIABLES, self)
       else:
-        def assign_fn():
-          with ops.name_scope(""Assign"") as n, ops.colocate_with(self._handle):
-            resource_variable_ops.assign_variable_op(
-                self._handle,
-                initial_value,
-                name=n)
-            # Returning values to keep tf.cond happy.
-          return ops.convert_to_tensor(1)
-        def not_assign_fn():
-          return ops.convert_to_tensor(0)
-        # Note: this cond is always guaranteed to run because we're inside a
-        # defun which will insert automatic control dependencies.
-        control_flow_ops.cond(
-            resource_variable_ops.var_is_initialized_op(self._handle),
-            not_assign_fn, assign_fn)
+        if add_initializers_to is not None:
+          add_initializers_to[self] = initial_value
+        else:
+          def assign_fn():
+            with ops.name_scope(""Assign"") as n, ops.colocate_with(self._handle):
+              resource_variable_ops.assign_variable_op(
+                  self._handle,
+                  initial_value,
+                  name=n)
+              # Returning values to keep tf.cond happy.
+            return ops.convert_to_tensor(1)
+          def not_assign_fn():
+            return ops.convert_to_tensor(0)
+          # Note: this cond is always guaranteed to run because we're inside a
+          # defun which will insert automatic control dependencies.
+          control_flow_ops.cond(
+              resource_variable_ops.var_is_initialized_op(self._handle),
+              not_assign_fn, assign_fn)
 
     # After the handle has been created, set up a way to clean it up when
     # executing eagerly. We'll hold the only reference to the deleter, so that
@@ -252,14 +259,15 @@ class PolymorphicFunction(object):
         input_signature=self._input_signature,
         experimental_autograph=self._autograph)
 
-  def _initialize(self, args, kwds):
+  def _initialize(self, args, kwds, add_initializers_to=None):
     """"""Initializes, on the first call.""""""
 
     self._created_variables = []
 
     def variable_capturing_scope(unused_next_creator, **kwds):
       """"""Creates UnliftedInitializerVariables and saves references to them.""""""
-      v = UnliftedInitializerVariable(**kwds)
+      v = UnliftedInitializerVariable(
+          add_initializers_to=add_initializers_to, **kwds)
       self._created_variables.append(weakref.ref(v))
       return v
 
@@ -405,14 +413,22 @@ class PolymorphicFunction(object):
     Raises:
       ValueError: if this object has not yet been called on concrete values.
     """"""
-    # TODO(apassos) figure out how to handle this case (what should we return
-    # here?)
+    assert context.executing_eagerly()
     if self._stateful_fn is None:
-      raise ValueError(
-          ""Call this function with concrete values before asking for a""
-          "" concrete function. Calling the function will ensure that, in""
-          "" case this function creates variables, that those are properly""
-          "" initialized."")
+      # Here we trace the function, collect the initializers, and attempt to
+      # extract them and run them eagerly. Fail only if we cannot do so.
+      initializer_map = {}
+      self._initialize(args, kwargs, add_initializers_to=initializer_map)
+      if not self._created_variables:
+
+        @function
+        def initialize_variables():
+          for v, init in initializer_map.items():
+            v.assign(lift_to_graph.lift_to_graph(
+                init, ops.get_default_graph())[init])
+
+        initialize_variables()
+
     if self._created_variables:
       # In this case we have created variables on the first call, so we run the
       # defunned version which is guaranteed to never create variables.
",0,train
7a7b72855e7894b169ae78f4b46f247552bb62cb,tensorflow/tensorflow,"Pulls out variable initialization in tf.function().get_concrete_function

PiperOrigin-RevId: 220548234",function_test.py,"@@ -190,7 +190,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
   def testBasicGraphFunction(self):
     matmul = def_function.function(math_ops.matmul)
 
-    @function.defun
+    @def_function.function
     def sq(a):
       return matmul(a, a)
 
@@ -204,7 +204,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
   def testInputSpecGraphFunction(self):
     matmul = def_function.function(math_ops.matmul)
 
-    @function.defun
+    @def_function.function
     def sq(a):
       return matmul(a, a)
 
@@ -223,7 +223,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
   def testNestedInputSpecGraphFunction(self):
     matmul = def_function.function(math_ops.matmul)
 
-    @function.defun
+    @def_function.function
     def sq(mats):
       ((a, b),) = mats
       return matmul(a, b)
@@ -347,7 +347,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
     pair = collections.namedtuple('pair', ['a', 'b'])
 
-    @function.defun
+    @def_function.function
     def a_times_b(inputs):
       return matmul(inputs.a['a'], inputs.b['b'])
 
@@ -362,7 +362,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
   def testNestedOutputGraphFunction(self):
     matmul = def_function.function(math_ops.matmul)
 
-    @function.defun
+    @def_function.function
     def sq(a):
       return (matmul(a, a), {'b': constant_op.constant(1.0)})
 
@@ -381,7 +381,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
   def testGraphFunctionWithGradients(self):
     v = resource_variable_ops.ResourceVariable(1.0, name='v')
 
-    @function.defun
+    @def_function.function
     def step():
       def inner():
         return v * v
@@ -394,7 +394,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     self.assertAllEqual(step_op(), 2.0)
 
   def testGraphFunctionNoneOutput(self):
-    @function.defun
+    @def_function.function
     def fn(unused_a, unused_b):
       return None
 
@@ -968,7 +968,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       v_gpu = resource_variable_ops.ResourceVariable(
           [0.0, 1.0, 2.0], name='gpu')
 
-    @function.defun
+    @def_function.function
     def resource_apply_adam():
       training_ops.resource_apply_adam(
           v_cpu.handle,
@@ -1040,11 +1040,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testNestedDifferentiableFunction(self):
-    @function.defun
+    @def_function.function
     def inner_fn(a, b):
       return a * math_ops.add(a, b)
 
-    @function.defun
+    @def_function.function
     def outer_fn(x):
       return inner_fn(x, 1.0)
 
@@ -1058,19 +1058,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testDeeplyNestedDifferentiableFunction(self):
-    @function.defun
+    @def_function.function
     def inner_inner_fn(a, b):
       return math_ops.add(a, b)
 
-    @function.defun
+    @def_function.function
     def inner_fn(a, b):
       return inner_inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def middle_fn(a, b):
       return a * inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def outer_fn(x):
       return middle_fn(x, 1.0)
 
@@ -1084,15 +1084,15 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testDeeplyNestedDifferentiableFunctionWithMultipleGradCalls(self):
-    @function.defun
+    @def_function.function
     def inner_fn(a, b):
       return math_ops.add(a, b)
 
-    @function.defun
+    @def_function.function
     def middle_fn(a, b):
       return math_ops.mul(a, inner_fn(a, b))
 
-    @function.defun
+    @def_function.function
     def outer_fn(x):
       return middle_fn(x, 3.0)
 
@@ -1132,19 +1132,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testDeeplyNestedDifferentiableFunctionGradientTapeInDefun(self):
-    @function.defun
+    @def_function.function
     def inner_inner_fn(a, b):
       return math_ops.add(a, b)
 
-    @function.defun
+    @def_function.function
     def inner_fn(a, b):
       return inner_inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def middle_fn(a, b):
       return a * inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def outer_fn(x):
       with backprop.GradientTape() as tp:
         tp.watch(x)
@@ -1158,19 +1158,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testDeeplyNestedDifferentiableFunctionGradientTapeInNestedDefun(self):
-    @function.defun
+    @def_function.function
     def inner_inner_fn(a, b):
       return math_ops.add(a, b)
 
-    @function.defun
+    @def_function.function
     def inner_fn(a, b):
       return inner_inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def middle_fn(a, b):
       return a * inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def almost_outer_fn(x):
       with backprop.GradientTape() as tp:
         tp.watch(x)
@@ -1178,7 +1178,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       grad = tp.gradient(result, x)
       return grad
 
-    @function.defun
+    @def_function.function
     def outer_fn(x):
       return almost_outer_fn(x)
 
@@ -1188,19 +1188,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testDeeplyNestedDifferentiableFunctionGradientTapeInMultNestedDefun(self):
-    @function.defun
+    @def_function.function
     def inner_inner_fn(a, b):
       return math_ops.add(a, b)
 
-    @function.defun
+    @def_function.function
     def inner_fn(a, b):
       return inner_inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def middle_fn(a, b):
       return a * inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def almost_outer_fn(x):
       with backprop.GradientTape() as tp:
         tp.watch(x)
@@ -1208,11 +1208,11 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       grad = tp.gradient(result, x)
       return grad
 
-    @function.defun
+    @def_function.function
     def outer_fn(x):
       return almost_outer_fn(x)
 
-    @function.defun
+    @def_function.function
     def outer_outer_fn(x):
       return outer_fn(x)
 
@@ -1222,19 +1222,19 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testDeeplyNestedDifferentiableFunctionTFGradientInDefun(self):
-    @function.defun
+    @def_function.function
     def inner_inner_fn(a, b):
       return math_ops.add(a, b)
 
-    @function.defun
+    @def_function.function
     def inner_fn(a, b):
       return inner_inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def middle_fn(a, b):
       return a * inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def outer_fn(x):
       result = middle_fn(x, 1.0)
       return gradients_impl.gradients(result, [x])[0]
@@ -1245,24 +1245,24 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testDeeplyNestedDifferentiableFunctionTFGradientInNestedDefun(self):
-    @function.defun
+    @def_function.function
     def inner_inner_fn(a, b):
       return math_ops.add(a, b)
 
-    @function.defun
+    @def_function.function
     def inner_fn(a, b):
       return inner_inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def middle_fn(a, b):
       return a * inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def almost_outer_fn(x):
       result = middle_fn(x, 1.0)
       return gradients_impl.gradients(result, [x])[0]
 
-    @function.defun
+    @def_function.function
     def outer_fn(x):
       return almost_outer_fn(x)
 
@@ -1272,28 +1272,28 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
 
   @test_util.run_in_graph_and_eager_modes
   def testDeeplyNestedDifferentiableFunctionTFGradientInMultNestedDefun(self):
-    @function.defun
+    @def_function.function
     def inner_inner_fn(a, b):
       return math_ops.add(a, b)
 
-    @function.defun
+    @def_function.function
     def inner_fn(a, b):
       return inner_inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def middle_fn(a, b):
       return a * inner_fn(a, b)
 
-    @function.defun
+    @def_function.function
     def almost_outer_fn(x):
       result = middle_fn(x, 1.0)
       return gradients_impl.gradients(result, [x])[0]
 
-    @function.defun
+    @def_function.function
     def outer_fn(x):
       return almost_outer_fn(x)
 
-    @function.defun
+    @def_function.function
     def outer_outer_fn(x):
       return outer_fn(x)
 
@@ -1461,7 +1461,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     def add(a, b):
       return math_ops.add(a, b)
 
-    @function.defun
+    @def_function.function
     def add_one(x):
       return add(x, 1)
 
@@ -1675,7 +1675,7 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
     with ops.device('gpu:0'):
       y = constant_op.constant(1.0)
 
-    @function.defun
+    @def_function.function
     def foo():
       return test_ops.device_placement_op()
 
",0,train
7a7b72855e7894b169ae78f4b46f247552bb62cb,tensorflow/tensorflow,"Pulls out variable initialization in tf.function().get_concrete_function

PiperOrigin-RevId: 220548234",lift_to_graph.py,"@@ -37,10 +37,8 @@ def lift_to_graph(init_tensor, graph, sources=None):
   visited_ops = set([x.op for x in sources])
   ops_to_visit = [init_tensor.op]
   op_outputs = collections.defaultdict(set)
-  print(""ops_to_visit"", ops_to_visit)
   while ops_to_visit:
     op = ops_to_visit.pop()
-    print(""visiting"", op)
     if op in visited_ops:
       continue
     visited_ops.add(op)
",0,train
0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,window_util.cc,"@@ -42,7 +42,7 @@ Window MakeWindow(absl::Span<const int64> sizes,
                   absl::Span<const int64> strides) {
   Window window;
   CHECK_EQ(sizes.size(), strides.size());
-  for (auto nb = 0; static_cast<size_t>(nb) < sizes.size(); ++nb) {
+  for (auto nb = 0; nb < sizes.size(); ++nb) {
     auto* dimension = window.add_dimensions();
     dimension->set_size(sizes[nb]);
     dimension->set_stride(strides[nb]);
",0,train
0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,tensor_shape.cc,"@@ -182,7 +182,7 @@ void TensorShapeBase<Shape>::InitDims(gtl::ArraySlice<int64> dim_sizes) {
 
   // Allow sizes that are under kint64max^0.25 so that 4-way multiplication
   // below cannot overflow.
-  static const int64 kMaxSmall = 0xd744;
+  static const uint64 kMaxSmall = 0xd744;
   static_assert(kMaxSmall * kMaxSmall * kMaxSmall * kMaxSmall <= kint64max,
                 ""bad overflow check"");
   bool large_size = false;
",0,train
0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,random_inputstream.cc,"@@ -92,7 +92,7 @@ Status RandomAccessInputStream::SkipNBytes(int64 bytes_to_skip) {
     } else {
       return s;
     }
-    if (data.size() < static_cast<size_t>(bytes_to_read)) {
+    if (data.size() < bytes_to_read) {
       return errors::OutOfRange(""reached end of file"");
     }
     bytes_to_skip -= bytes_to_read;
",0,train
0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,snappy_inputbuffer.cc,"@@ -134,7 +134,7 @@ Status SnappyInputBuffer::ReadCompressedBlockLength(uint32* length) {
     }
     size_t readable = std::min(bytes_to_read, avail_in_);
 
-    for (size_t i = 0; i < readable; i++) {
+    for (int i = 0; i < readable; i++) {
       // The ""unsigned char"" type cast is intentional to avoid implicit type
       // casting of the signed char to unsigned int during bitwise OR which
       // causes weird overflow errors.
",0,train
0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,snappy_outputbuffer.cc,"@@ -76,7 +76,7 @@ Status SnappyOutputBuffer::Write(StringPiece data) {
 
   // If there is sufficient free space in input_buffer_ to fit data we
   // add it there and return.
-  if (static_cast<int32>(bytes_to_write) <= AvailableInputSpace()) {
+  if (bytes_to_write <= AvailableInputSpace()) {
     AddToInputBuffer(data);
     return Status::OK();
   }
@@ -87,7 +87,7 @@ Status SnappyOutputBuffer::Write(StringPiece data) {
   TF_RETURN_IF_ERROR(DeflateBuffered());
 
   // input_buffer_ should be empty at this point.
-  if (static_cast<int32>(bytes_to_write) <= AvailableInputSpace()) {
+  if (bytes_to_write <= AvailableInputSpace()) {
     AddToInputBuffer(data);
     return Status::OK();
   }
@@ -144,7 +144,7 @@ void SnappyOutputBuffer::AddToInputBuffer(StringPiece data) {
   const int32 free_tail_bytes =
       input_buffer_capacity_ - (read_bytes + unread_bytes);
 
-  if (static_cast<int32>(bytes_to_write) > free_tail_bytes) {
+  if (bytes_to_write > free_tail_bytes) {
     memmove(input_buffer_.get(), next_in_, avail_in_);
     next_in_ = input_buffer_.get();
   }
",0,train
0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,zlib_outputbuffer.cc,"@@ -98,7 +98,7 @@ void ZlibOutputBuffer::AddToInputBuffer(StringPiece data) {
   int32 unread_bytes = z_stream_->avail_in;
   int32 free_tail_bytes = input_buffer_capacity_ - (read_bytes + unread_bytes);
 
-  if (static_cast<int32>(bytes_to_write) > free_tail_bytes) {
+  if (bytes_to_write > free_tail_bytes) {
     memmove(z_stream_input_.get(), z_stream_->next_in, z_stream_->avail_in);
     z_stream_->next_in = z_stream_input_.get();
   }
@@ -154,7 +154,7 @@ Status ZlibOutputBuffer::Append(StringPiece data) {
 
   size_t bytes_to_write = data.size();
 
-  if (static_cast<int32>(bytes_to_write) <= AvailableInputSpace()) {
+  if (bytes_to_write <= AvailableInputSpace()) {
     AddToInputBuffer(data);
     return Status::OK();
   }
@@ -162,7 +162,7 @@ Status ZlibOutputBuffer::Append(StringPiece data) {
   TF_RETURN_IF_ERROR(DeflateBuffered(zlib_options_.flush_mode));
 
   // At this point input stream should be empty.
-  if (static_cast<int32>(bytes_to_write) <= AvailableInputSpace()) {
+  if (bytes_to_write <= AvailableInputSpace()) {
     AddToInputBuffer(data);
     return Status::OK();
   }
",0,train
0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,env.cc,"@@ -214,7 +214,7 @@ bool Env::FilesExist(const std::vector<string>& files,
     }
     if (fs_status) {
       result &= fs_result;
-      for (size_t i = 0; i < itr.second.size(); ++i) {
+      for (int i = 0; i < itr.second.size(); ++i) {
         per_file_status[itr.second[i]] = fs_status->at(i);
       }
     } else if (!fs_result) {
",0,train
0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,file_system.cc,"@@ -308,7 +308,7 @@ StringPiece FileSystem::Basename(StringPiece path) const {
 StringPiece FileSystem::Extension(StringPiece path) const {
   StringPiece basename = this->Basename(path);
 
-  size_t pos = basename.rfind('.');
+  int pos = basename.rfind('.');
   if (pos == StringPiece::npos) {
     return StringPiece(path.data() + path.size(), 0);
   } else {
",0,train
0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,file_system_helper.cc,"@@ -103,7 +103,7 @@ Status GetMatchingPaths(FileSystem* fs, Env* env, const string& pattern,
                 children_dir_status[i] = fs->IsDirectory(child_path);
               }
             });
-    for (size_t i = 0; i < children.size(); ++i) {
+    for (int i = 0; i < children.size(); ++i) {
       const string child_path = io::JoinPath(current_dir, children[i]);
       // If the IsDirectory call was cancelled we bail.
       if (children_dir_status[i].code() == tensorflow::error::CANCELLED) {
",0,train
0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,status.cc,"@@ -74,9 +74,7 @@ class StatusLogSink : public TFLogSink {
 
     mutex_lock lock(mu_);
     messages_.emplace_back(entry.ToString());
-    if (messages_.size() > static_cast<size_t>(num_messages_)){
-        messages_.pop_front();
-    }
+    if (messages_.size() > num_messages_) messages_.pop_front();
   }
 
  private:
",0,train
0096d0a19b5543b368a5d2426cb2810931913272,tensorflow/tensorflow,final segratation,parse_annotation.cc,"@@ -50,7 +50,7 @@ std::vector<absl::string_view> SplitNameAndMetadata(
 std::vector<absl::string_view> SplitPairs(absl::string_view metadata) {
   std::vector<absl::string_view> key_value_pairs;
   std::stack<char> quotes;
-  size_t start = 0, end = 0;
+  int start = 0, end = 0;
   for (; end < metadata.size(); ++end) {
     char ch = metadata[end];
     switch (ch) {
",0,train
83839064dd8061089a7fdf69e1065655b432c4fd,tensorflow/tensorflow,"[tf.data] Optimize `tf.contrib.data.sample_from_datasets()` when the weights are not a dataset.

Previously, we were recomputing the logits from the weights for each element, which is only necessary when the weights can differ for each element.

PiperOrigin-RevId: 210128640",interleave_ops.py,"@@ -216,25 +216,46 @@ def sample_from_datasets(datasets, weights=None, seed=None):
       length of the `datasets` element.
   """"""
   num_datasets = len(datasets)
-  if weights is None:
-    weights = dataset_ops.Dataset.from_tensors([1.0] * num_datasets).repeat()
-  elif not isinstance(weights, dataset_ops.Dataset):
-    weights = ops.convert_to_tensor(weights, name=""weights"")
-    if weights.dtype not in (dtypes.float32, dtypes.float64):
-      raise TypeError(""`weights` must be convertible to a tensor of ""
-                      ""`tf.float32` or `tf.float64` elements."")
-    if not weights.shape.is_compatible_with([num_datasets]):
-      raise ValueError(""`weights` must be a vector of length `len(datasets)`."")
-    weights = dataset_ops.Dataset.from_tensors(weights).repeat()
-
-  # The `stateless_multinomial()` op expects log-probabilities, as opposed to
-  # weights.
-  logits_ds = weights.map(lambda *p: math_ops.log(p, name=""logits""))
-  def select_dataset(logits, seed):
-    return array_ops.squeeze(
-        stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1])
-  selector_input = dataset_ops.Dataset.zip(
-      (logits_ds, random_ops.RandomDataset(seed).batch(2))).map(select_dataset)
+  if not isinstance(weights, dataset_ops.Dataset):
+    if weights is None:
+      # Select inputs with uniform probability.
+      logits = [[1.0] * num_datasets]
+    else:
+      # Use the given `weights` as the probability of choosing the respective
+      # input.
+      weights = ops.convert_to_tensor(weights, name=""weights"")
+      if weights.dtype not in (dtypes.float32, dtypes.float64):
+        raise TypeError(""`weights` must be convertible to a tensor of ""
+                        ""`tf.float32` or `tf.float64` elements."")
+      if not weights.shape.is_compatible_with([num_datasets]):
+        raise ValueError(
+            ""`weights` must be a vector of length `len(datasets)`."")
+
+      # The `stateless_multinomial()` op expects log-probabilities, as opposed
+      # to weights.
+      logits = array_ops.expand_dims(math_ops.log(weights, name=""logits""), 0)
+
+    def select_dataset_constant_logits(seed):
+      return array_ops.squeeze(
+          stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1])
+
+    selector_input = random_ops.RandomDataset(seed).batch(2).map(
+        select_dataset_constant_logits)
+  else:
+    # Use each element of the given `weights` dataset as the probability of
+    # choosing the respective input.
+
+    # The `stateless_multinomial()` op expects log-probabilities, as opposed to
+    # weights.
+    logits_ds = weights.map(lambda *p: math_ops.log(p, name=""logits""))
+
+    def select_dataset_varying_logits(logits, seed):
+      return array_ops.squeeze(
+          stateless.stateless_multinomial(logits, 1, seed=seed), axis=[0, 1])
+
+    selector_input = dataset_ops.Dataset.zip(
+        (logits_ds, random_ops.RandomDataset(seed).batch(2)
+        )).map(select_dataset_varying_logits)
 
   return _DirectedInterleaveDataset(selector_input, datasets)
 
",0,train
2d8b5115ab308c8d934eb150c1015d102728013e,tensorflow/tensorflow,"Automated g4 rollback of changelist 193451839

PiperOrigin-RevId: 200275406",transpose_folding.cc,"@@ -178,7 +178,6 @@ bool FoldTransposeIntoConvolution(InstructionOperandsPair pair) {
 
   auto new_conv = HloInstruction::CreateConvolve(
       convolution.shape(), new_lhs, new_rhs, convolution.window(), new_dnums);
-  convolution.SetupDerivedInstruction(new_conv.get());
   TF_CHECK_OK(convolution.parent()->ReplaceWithNewInstruction(
       &convolution, std::move(new_conv)));
 
",0,train
7036e0472258ad392c13fec50717f19e1670fa22,tensorflow/tensorflow,"Remove the stride-matching restriction on reduce-windows

PiperOrigin-RevId: 371705477
Change-Id: I8154c6e79dc3ee8de3d6b0302758f7aefc8abe89",space_to_batch_converter.cc,"@@ -1397,19 +1397,19 @@ bool ConvolutionVisitor::SupportedOpForPropagation(HloInstruction* consumer,
 
     auto new_operand = old_to_new_instrs_[first_operand];
     auto permute_dims = instr_to_dim_permute_map_[new_operand];
-    const int64 new_space_dim = DimLookUp(permute_dims, old_space_dim);
-
-    // Make sure that the stride lines up.
-    if (window.dimensions(old_space_dim).size() != 1) {
-      if (new_operand->shape().dimensions(new_space_dim) %
-              window.dimensions(old_space_dim).stride() !=
-          0) {
-        return false;
-      }
-    }
 
     // Select-and-scatter specific checks.
     if (consumer->opcode() == HloOpcode::kSelectAndScatter) {
+      const int64 new_space_dim = DimLookUp(permute_dims, old_space_dim);
+      // Make sure that the stride lines up.
+      if (window.dimensions(old_space_dim).size() != 1) {
+        if (new_operand->shape().dimensions(new_space_dim) %
+                window.dimensions(old_space_dim).stride() !=
+            0) {
+          return false;
+        }
+      }
+
       // Only support floating point datatypes.
       if (!ShapeUtil::ElementIsFloating(consumer->shape())) {
         return false;
@@ -1657,6 +1657,14 @@ StatusOr<bool> ConvolutionVisitor::Propagate(HloInstruction* consumer,
     const int64 new_batch_dim = DimLookUp(permute_dims, old_batch_dim);
     const int64 new_space_dim = DimLookUp(permute_dims, old_space_dim);
 
+    // Calculate the required halo size
+    auto new_shape = first_operand->shape();
+    auto old_shape = consumer->mutable_operand(0)->shape();
+
+    const int64 new_batch_size = new_shape.dimensions(new_batch_dim);
+    const int64 new_space_size = new_shape.dimensions(new_space_dim);
+    const int64 stride = consumer->window().dimensions(old_space_dim).stride();
+
     auto pad_val =
         is_select_and_scatter
             ? computation_->AddInstruction(
@@ -1669,13 +1677,27 @@ StatusOr<bool> ConvolutionVisitor::Propagate(HloInstruction* consumer,
                            new_batch_dim, new_space_dim, old_batch_dim,
                            old_space_dim));
 
-    // Calculate the required halo size
-    auto new_shape = first_operand->shape();
-    auto old_shape = consumer->mutable_operand(0)->shape();
-
-    const int64 new_batch_size = new_shape.dimensions(new_batch_dim);
-    const int64 new_space_size = new_shape.dimensions(new_space_dim);
-    const int64 stride = consumer->window().dimensions(old_space_dim).stride();
+    const int64 extra_space = new_space_size % stride;
+    if (extra_space) {
+      CHECK_EQ(consumer->opcode(), HloOpcode::kReduceWindow);
+      const int64 old_batch_size = old_shape.dimensions(old_batch_dim);
+      const int64 old_space_size = old_shape.dimensions(old_space_dim);
+      // If the shrunk space is still larger/equal than the original space, we
+      // reduce the space.
+      if ((new_space_size - extra_space) * new_batch_size >=
+          old_batch_size * old_space_size) {
+        TF_ASSIGN_OR_RETURN(first_operand,
+                            DecreaseSpatialSizeOnSpaceToBatchedShape(
+                                first_operand, new_batch_dim, old_batch_size,
+                                new_space_dim, new_space_size - extra_space));
+      } else {
+        TF_ASSIGN_OR_RETURN(
+            first_operand,
+            IncreaseSpatialSizeOnSpaceToBatchedShape(
+                first_operand, new_batch_dim, old_batch_size, new_space_dim,
+                new_space_size + stride - extra_space));
+      }
+    }
     const int64 window_size =
         consumer->window().dimensions(old_space_dim).size();
     const int64 last_overlap_point = ((new_space_size - 1) / stride) * stride;
",0,train
f0ffba31ed278e2ada5537b54575ea05af1091a9,tensorflow/tensorflow,Update output_init_files_test.py,output_init_files_test.py,"@@ -45,7 +45,7 @@ def _get_modules(package, attr_name, constants_attr_name):
       API constant names.
 
   Returns:
-    Set of TensorFow API modules.
+    Set of TensorFlow API modules.
   """"""
   modules = set()
   # TODO(annarev): split up the logic in create_python_api.py so that
",0,train
bfa7016612c0255edb6a02d7134f4babacfbf1ca,tensorflow/tensorflow,"[XLA:HLO] Prevent while buffer entry parameter buffer sharing if buffer is live out.

PiperOrigin-RevId: 170099782",buffer_assignment.cc,"@@ -1121,6 +1121,7 @@ void BufferAssigner::AddWhileSetToColocatedBufferSets(
   // Scan 'colocated_buffer_sets' in reverse order for locality; colocated sets
   // are added in postorder over computations and instructions.
   const int64 init_buffer_size = buffer_size(*while_init_buffer);
+  const bool is_live_out = buffer_liveness.MaybeLiveOut(*while_result_buffer);
   for (int i = colocated_buffer_sets->size() - 1; i >= 0; --i) {
     const ColocatedBufferSet& predecessor_set = (*colocated_buffer_sets)[i];
 
@@ -1141,6 +1142,20 @@ void BufferAssigner::AddWhileSetToColocatedBufferSets(
       continue;
     }
 
+    // Skip predecessor sets with entry parameter if the while result is live
+    // out.
+    if (is_live_out &&
+        std::any_of(predecessor_set.begin(), predecessor_set.end(),
+                    [](const LogicalBuffer* buffer) {
+                      auto* instruction = buffer->instruction();
+                      auto* computation = instruction->parent();
+                      auto* module = computation->parent();
+                      return instruction->opcode() == HloOpcode::kParameter &&
+                             computation == module->entry_computation();
+                    })) {
+      continue;
+    }
+
     // Build vector of predecessor while result and init buffers, which are
     // checked for liveness interference below. We must check both the result
     // and init buffers because they're aliased together, but
",0,train
bfa7016612c0255edb6a02d7134f4babacfbf1ca,tensorflow/tensorflow,"[XLA:HLO] Prevent while buffer entry parameter buffer sharing if buffer is live out.

PiperOrigin-RevId: 170099782",buffer_assignment_test.cc,"@@ -1764,5 +1764,62 @@ TEST_F(WhileBufferAssignmentTest, DISABLED_TwoWhiles) {
   EXPECT_TRUE(BuffersDistinct({while0}, {while1}, *assignment));
 }
 
+TEST_F(WhileBufferAssignmentTest, WhilesDontShareEntryParamIfLiveOut) {
+  auto module = MakeUnique<HloModule>(TestName());
+  auto builder = HloComputation::Builder(""entry"");
+
+  auto input0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, data_shape_, ""input0""));
+  auto weights0 = builder.AddInstruction(
+      HloInstruction::CreateParameter(1, data_shape_, ""weights0""));
+
+  auto zero = builder.AddInstruction(
+      HloInstruction::CreateConstant(Literal::CreateR0<float>(0.0)));
+  auto output0 = builder.AddInstruction(
+      HloInstruction::CreateBroadcast(data_shape_, zero, {1}));
+  auto output1 = builder.AddInstruction(
+      HloInstruction::CreateBroadcast(data_shape_, zero, {1}));
+
+  auto cond0 =
+      module->AddEmbeddedComputation(BuildWhileConditionComputation(""cond""));
+  auto body0 =
+      module->AddEmbeddedComputation(BuildWhileBodyComputation(""body""));
+
+  auto tuple0 = builder.AddInstruction(
+      HloInstruction::CreateTuple({input0, weights0, output0}));
+  auto while0 = builder.AddInstruction(
+      HloInstruction::CreateWhile(loop_state_shape_, cond0, body0, tuple0));
+
+  // Get output of 'while0' and feed as input to 'while1'.
+  auto while0_out = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(data_shape_, while0, 2));
+
+  auto cond1 =
+      module->AddEmbeddedComputation(BuildWhileConditionComputation(""cond""));
+  auto body1 =
+      module->AddEmbeddedComputation(BuildWhileBodyComputation(""body""));
+
+  auto tuple1 = builder.AddInstruction(
+      HloInstruction::CreateTuple({while0_out, weights0, output1}));
+  auto while1 = builder.AddInstruction(
+      HloInstruction::CreateWhile(loop_state_shape_, cond1, body1, tuple1));
+
+  // Get output of 'while1' so that it is live out of computation.
+  auto while1_out = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(data_shape_, while1, 2));
+
+  module->AddEntryComputation(builder.Build());
+  RunCopyInsertion(module.get());
+  auto assignment = RunBufferAssignment(module.get());
+  // Get BufferAllocation for root instruction.
+  auto* root_alloc = assignment->GetUniqueTopLevelSlice(while1_out)
+                         .ConsumeValueOrDie()
+                         .allocation();
+  // Test that root instruction allocation is live out.
+  EXPECT_TRUE(root_alloc->maybe_live_out());
+  // Test that root instruction allocation is not an entry parameter.
+  EXPECT_FALSE(root_alloc->is_entry_computation_parameter());
+}
+
 }  // namespace
 }  // namespace xla
",0,train
634888a82f46694e2747ffde745d269b6cdf7c80,tensorflow/tensorflow,TFLu: detection_postprocess: fix review comments and build issues,detection_postprocess.cc,"@@ -309,14 +309,12 @@ void DequantizeBoxEncodings(const TfLiteTensor* input_box_encodings, int idx,
 
 template <class T>
 T ReInterpretTensor(const TfLiteTensor* tensor) {
-  // TODO (chowdhery): check float
   const float* tensor_base = GetTensorData<float>(tensor);
   return reinterpret_cast<T>(tensor_base);
 }
 
 template <class T>
 T ReInterpretTensor(TfLiteTensor* tensor) {
-  // TODO (chowdhery): check float
   float* tensor_base = GetTensorData<float>(tensor);
   return reinterpret_cast<T>(tensor_base);
 }
@@ -791,7 +789,6 @@ TfLiteStatus NonMaxSuppressionMultiClass(TfLiteContext* context,
 }
 
 TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
-  // TODO(chowdhery): Generalize for any batch size
   TF_LITE_ENSURE(context, (kBatchSize == 1));
 
   // Set up scratch buffers
@@ -837,17 +834,20 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // highest scoring non-overlapping boxes.
   TF_LITE_ENSURE_STATUS(NonMaxSuppressionMultiClass(context, node, op_data));
 
-  // TODO(chowdhery): Generalize for any batch size
-
   return kTfLiteOk;
 }
 
 }  // namespace detection_postprocess
 
 TfLiteRegistration* Register_DETECTION_POSTPROCESS() {
-  static TfLiteRegistration r = {
-      detection_postprocess::Init, detection_postprocess::Free,
-      detection_postprocess::Prepare, detection_postprocess::Eval};
+  static TfLiteRegistration r = {/*init=*/detection_postprocess::Init,
+                                 /*free=*/detection_postprocess::Free,
+                                 /*prepare=*/detection_postprocess::Prepare,
+                                 /*invoke=*/detection_postprocess::Eval,
+                                 /*profiling_string=*/nullptr,
+                                 /*builtin_code=*/0,
+                                 /*custom_name=*/nullptr,
+                                 /*version=*/0};
   return &r;
 }
 
",0,test
6b1371de9389f90ed93c1d5db2112a10877b410b,tensorflow/tensorflow,"Remove legacy EagerContext constructor

PiperOrigin-RevId: 253598769",context.cc,"@@ -53,14 +53,6 @@ bool ReadBoolFromEnvVar(StringPiece env_var_name, bool default_val) {
 
 }  // namespace
 
-EagerContext::EagerContext(const SessionOptions& opts,
-                           ContextDevicePlacementPolicy default_policy,
-                           bool async,
-                           std::unique_ptr<const DeviceMgr> device_mgr,
-                           Rendezvous* rendezvous)
-    : EagerContext(opts, default_policy, async, device_mgr.release(),
-                   /*device_mgr_owned*/ true, rendezvous, nullptr) {}
-
 EagerContext::EagerContext(
     const SessionOptions& opts, ContextDevicePlacementPolicy default_policy,
     bool async, const DeviceMgr* device_mgr, bool device_mgr_owned,
",0,train
6b1371de9389f90ed93c1d5db2112a10877b410b,tensorflow/tensorflow,"Remove legacy EagerContext constructor

PiperOrigin-RevId: 253598769",context.h,"@@ -83,12 +83,6 @@ class RunMetadataListener {
 
 class EagerContext : public core::RefCounted {
  public:
-  // TODO: remove this constructor once we migrate all callers to the next one.
-  EagerContext(const SessionOptions& opts,
-               ContextDevicePlacementPolicy default_policy, bool async,
-               std::unique_ptr<const DeviceMgr> device_mgr,
-               Rendezvous* rendezvous);
-
   EagerContext(
       const SessionOptions& opts, ContextDevicePlacementPolicy default_policy,
       bool async, const DeviceMgr* device_mgr, bool device_mgr_owned,
@@ -97,7 +91,7 @@ class EagerContext : public core::RefCounted {
       std::function<Rendezvous*(const int64)> rendezvous_creator = nullptr,
       const DeviceMgr* remote_device_mgr = nullptr);
 
-  ~EagerContext();
+  ~EagerContext() override;
 
   // Returns the function library runtime for the given device.
   FunctionLibraryRuntime* func_lib(const Device* d) const {
",0,train
6b1371de9389f90ed93c1d5db2112a10877b410b,tensorflow/tensorflow,"Remove legacy EagerContext constructor

PiperOrigin-RevId: 253598769",delegate_data.cc,"@@ -37,7 +37,7 @@ tensorflow::Status DelegateData::Prepare(
   TF_RETURN_IF_ERROR(tensorflow::DeviceFactory::AddDevices(
       session_options, ""/job:localhost/replica:0/task:0"", &devices));
 
-  std::unique_ptr<tensorflow::DeviceMgr> device_mgr =
+  auto device_mgr =
       absl::make_unique<tensorflow::DeviceMgr>(std::move(devices));
   // Note that Rendezvous is ref-counted so it will be automatically deleted.
   tensorflow::Rendezvous* rendezvous =
@@ -45,7 +45,8 @@ tensorflow::Status DelegateData::Prepare(
   eager_context_ = new tensorflow::EagerContext(
       session_options,
       tensorflow::ContextDevicePlacementPolicy::DEVICE_PLACEMENT_SILENT,
-      /*async=*/false, std::move(device_mgr), rendezvous);
+      /*async=*/false, device_mgr.release(), /*device_mgr_owned*/ true,
+      rendezvous, nullptr);
   return tensorflow::Status();
 }
 
",0,train
8eb015561b3a2f2e27f01617fd14d5a4f4b215bd,tensorflow/tensorflow,"[XLA] Sort vector of HloBuffer::Id values before uniqifying.
Fix a latent bug in HLO dataflow analysis in the method Phi. When uniquifying
a vector of HloBuffer:Id values, first std::sort it before calling std::erase
and std::unique.

PiperOrigin-RevId: 158888326",hlo_dataflow_analysis.cc,"@@ -409,6 +409,7 @@ InstructionValueSet HloDataflowAnalysis::Phi(
             input_value_ids.push_back(value_id);
           }
         }
+        std::sort(input_value_ids.begin(), input_value_ids.end());
         input_value_ids.erase(
             std::unique(input_value_ids.begin(), input_value_ids.end()),
             input_value_ids.end());
",0,train
4182ece77aae763f2acc07255c40279cbe3c587a,tensorflow/tensorflow,"improve compute high rank hessians (#15308)

* fix possible compute high rank hessian

fix possible compute high rank hessian

* add high rank hessians unittest

* fix retuning a shape of hessian \w test

* fix use implicitly tensor shape

* Space nearby operators.

* fix to tensorflow style guide

* fix to tensorflow style guide (Space nearby operators)",gradients_impl.py,"@@ -977,9 +977,7 @@ def hessians(ys, xs, name=""hessians"", colocate_gradients_with_ops=False,
 
   `hessians()` adds ops to the graph to output the Hessian matrix of `ys`
   with respect to `xs`.  It returns a list of `Tensor` of length `len(xs)`
-  where each tensor is the Hessian of `sum(ys)`. This function currently
-  only supports evaluating the Hessian with respect to (a list of) one-
-  dimensional tensors.
+  where each tensor is the Hessian of `sum(ys)`.
 
   The Hessian is a matrix of second-order partial derivatives of a scalar
   tensor (see https://en.wikipedia.org/wiki/Hessian_matrix for more details).
@@ -1005,31 +1003,32 @@ def hessians(ys, xs, name=""hessians"", colocate_gradients_with_ops=False,
       'colocate_gradients_with_ops': colocate_gradients_with_ops,
       'gate_gradients': gate_gradients,
       'aggregation_method': aggregation_method
-    }
+  }
   # Compute first-order derivatives and iterate for each x in xs.
   hessians = []
   _gradients = gradients(ys, xs, **kwargs)
-  for i, _gradient, x in zip(range(len(xs)), _gradients, xs):
-    # Ensure that x is a vector.
-    check_rank = check_ops.assert_rank(
-      x, 1, message='Cannot compute Hessian because element %d of `xs` does '
-      'not have rank one.' % i
-    )
-    with ops.control_dependencies([check_rank]):
-      # Declare an iterator and tensor array loop variables for the gradients.
-      n = array_ops.size(x)
-      loop_vars = [
+  for gradient, x in zip(_gradients, xs):
+    # change shape to one-dimension without graph branching
+    gradient = array_ops.reshape(gradient, [-1])
+
+    # Declare an iterator and tensor array loop variables for the gradients.
+    n = array_ops.size(x)
+    loop_vars = [
         array_ops.constant(0, dtypes.int32),
         tensor_array_ops.TensorArray(x.dtype, n)
-      ]
-      # Iterate over all elements of the gradient and compute second order
-      # derivatives.
-      _, hessian = control_flow_ops.while_loop(
-          lambda j, _: j < n,
-          lambda j, result: (j + 1,
-                             result.write(j, gradients(_gradient[j], x)[0])),
-          loop_vars
-      )
-
-      hessians.append(hessian.stack())
+    ]
+    # Iterate over all elements of the gradient and compute second order
+    # derivatives.
+    _, hessian = control_flow_ops.while_loop(
+        lambda j, _: j < n,
+        lambda j, result: (j + 1,
+                           result.write(j, gradients(gradient[j], x)[0])),
+        loop_vars
+    )
+
+    _shape = array_ops.shape(x)
+    _reshaped_hessian = array_ops.reshape(
+        hessian.stack(), array_ops.concat((_shape, _shape), 0)
+    )
+    hessians.append(_reshaped_hessian)
   return hessians
",0,train
4182ece77aae763f2acc07255c40279cbe3c587a,tensorflow/tensorflow,"improve compute high rank hessians (#15308)

* fix possible compute high rank hessian

fix possible compute high rank hessian

* add high rank hessians unittest

* fix retuning a shape of hessian \w test

* fix use implicitly tensor shape

* Space nearby operators.

* fix to tensorflow style guide

* fix to tensorflow style guide (Space nearby operators)",gradients_test.py,"@@ -621,6 +621,45 @@ class HessianTest(test_util.TensorFlowTestCase):
         with self.assertRaises(ValueError):
           gradients.hessians(x, x)
 
+  def testHessian2D_square_matrix(self):
+    # Manually compute the Hessian explicitly for a low-dimensional problem
+    # and check that `hessian` matches. Specifically, the Hessian of
+    # f(x) = 1/2 * x^T * x is H = constant (block identity matrix)
+    m = 3
+    rng = np.random.RandomState([1, 2, 3])
+    x_value = rng.randn(m, m).astype(""float32"")
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_value)
+      x_square = math_ops.reduce_sum(
+          math_ops.matmul(array_ops.transpose(x), x) * 0.5
+      )
+      hess = gradients.hessians(x_square, x)[0]
+      hess_actual = hess.eval()
+    hess_value = np.bmat([
+        [elem*np.ones((m, m)) for elem in vec]
+        for vec in np.eye(m)
+    ]).astype(""float32"")
+    self.assertAllEqual((m, m, m, m), hess_actual.shape)
+    self.assertAllClose(hess_value, hess_actual.reshape((m * m, m * m)))
+
+  def testHessian2D_non_square_matrix(self):
+    m = 3
+    n = 4
+    rng = np.random.RandomState([1, 2, 3])
+    x_value = rng.randn(m, n).astype(""float32"")
+    with self.test_session(use_gpu=True):
+      x = constant_op.constant(x_value)
+      x_square = math_ops.reduce_sum(
+          math_ops.matmul(array_ops.transpose(x), x) * 0.5
+      )
+      hess = gradients.hessians(x_square, x)[0]
+      hess_actual = hess.eval()
+    hess_value = np.bmat([
+        [elem*np.ones((n, n)) for elem in vec]
+        for vec in np.eye(m)
+    ]).astype(""float32"")
+    self.assertAllEqual((m, n, m, n), hess_actual.shape)
+    self.assertAllClose(hess_value, hess_actual.reshape((m * n, m * n)))
 
 @test_util.with_c_api
 class IndexedSlicesToTensorTest(test_util.TensorFlowTestCase):
",0,train
9f42ebd5982688511ecc0ef7d23de02b64d8dd1e,tensorflow/tensorflow,"Improve error messages and doc strings for eager-mode tf.keras.Model.fit() + tf.data objects

- Previously, when validation_steps was missing, the error message incorrectly says ""please provide either batch_size or steps_per_epoch"". Now it reads ""please provide either batch_size or validation_steps"".
- Some whitespace-related fixes.

PiperOrigin-RevId: 215503991",training.py,"@@ -1419,6 +1419,8 @@ class Model(Network):
               - tuple `(x_val, y_val)` of Numpy arrays or tensors
               - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays
               - dataset or a dataset iterator
+            For the first two cases, `batch_size` must be provided.
+            For the last case, `validation_steps` must be provided.
         shuffle: Boolean (whether to shuffle the training data
             before each epoch) or str (for 'batch').
             'batch' is a special option for dealing with the
@@ -1454,9 +1456,10 @@ class Model(Network):
             TensorFlow data tensors, the default `None` is equal to
             the number of samples in your dataset divided by
             the batch size, or 1 if that cannot be determined.
-        validation_steps: Only relevant if `steps_per_epoch`
-            is specified. Total number of steps (batches of samples)
-            to validate before stopping.
+        validation_steps: Only relevant if `validation_data` is provided and
+            is a dataset or dataset iterator. Total number of steps (batches of
+            samples) to draw before stopping when performing validation
+            at the end of every epoch.
         max_queue_size: Integer. Used for generator or `keras.utils.Sequence`
             input only. Maximum size for the generator queue.
             If unspecified, `max_queue_size` will default to 10.
",0,test
9f42ebd5982688511ecc0ef7d23de02b64d8dd1e,tensorflow/tensorflow,"Improve error messages and doc strings for eager-mode tf.keras.Model.fit() + tf.data objects

- Previously, when validation_steps was missing, the error message incorrectly says ""please provide either batch_size or steps_per_epoch"". Now it reads ""please provide either batch_size or validation_steps"".
- Some whitespace-related fixes.

PiperOrigin-RevId: 215503991",training_eager.py,"@@ -739,7 +739,8 @@ def test_loop(model, inputs, targets,
       y=targets,
       sample_weights=sample_weights,
       batch_size=batch_size,
-      steps_per_epoch=steps)
+      steps_per_epoch=steps,
+      is_validation=True)
   with backend.learning_phase_scope(0):
     return iterator_test_loop(model, inputs, steps, verbose=verbose)
 
",0,test
9f42ebd5982688511ecc0ef7d23de02b64d8dd1e,tensorflow/tensorflow,"Improve error messages and doc strings for eager-mode tf.keras.Model.fit() + tf.data objects

- Previously, when validation_steps was missing, the error message incorrectly says ""please provide either batch_size or steps_per_epoch"". Now it reads ""please provide either batch_size or validation_steps"".
- Some whitespace-related fixes.

PiperOrigin-RevId: 215503991",training_eager_test.py,"@@ -125,6 +125,36 @@ class TrainingTest(test.TestCase):
     model.train_on_batch(inputs, targets)
     model.test_on_batch(inputs, targets)
 
+  def test_model_fit_and_validation_with_missing_arg_errors(self):
+    x = keras.layers.Input(shape=(3,), name='input')
+    y = keras.layers.Dense(4, name='dense')(x)
+    model = keras.Model(x, y)
+    model.compile(optimizer=RMSPropOptimizer(learning_rate=0.001), loss='mse')
+
+    x = keras.backend.zeros(shape=(10, 3))
+    y = keras.backend.zeros(shape=(10, 4))
+    dataset = dataset_ops.Dataset.from_tensor_slices((x, y)).repeat(10).batch(5)
+    iterator = dataset.make_one_shot_iterator()
+    validation_dataset = dataset_ops.Dataset.from_tensor_slices(
+        (x, y)).repeat(10).batch(5)
+    validation_iterator = validation_dataset.make_one_shot_iterator()
+
+    with self.assertRaisesRegexp(
+        ValueError, r'specify .* `steps_per_epoch`'):
+      model.fit(iterator, epochs=1, verbose=0)
+    with self.assertRaisesRegexp(
+        ValueError, r'provide either `batch_size` or `validation_steps`'):
+      model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
+                validation_data=(x, y))
+    with self.assertRaisesRegexp(
+        ValueError, r'provide either `batch_size` or `validation_steps`'):
+      model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
+                validation_data=validation_dataset)
+    with self.assertRaisesRegexp(
+        ValueError, r'provide either `batch_size` or `validation_steps`'):
+      model.fit(iterator, steps_per_epoch=2, epochs=1, verbose=0,
+                validation_data=validation_iterator)
+
   def test_generator_methods(self):
     model = keras.Sequential()
     model.add(keras.layers.Dense(4, input_shape=(3,)))
",0,test
9f42ebd5982688511ecc0ef7d23de02b64d8dd1e,tensorflow/tensorflow,"Improve error messages and doc strings for eager-mode tf.keras.Model.fit() + tf.data objects

- Previously, when validation_steps was missing, the error message incorrectly says ""please provide either batch_size or steps_per_epoch"". Now it reads ""please provide either batch_size or validation_steps"".
- Some whitespace-related fixes.

PiperOrigin-RevId: 215503991",training_utils.py,"@@ -106,7 +106,8 @@ def convert_to_iterator(x=None,
                         batch_size=None,
                         steps_per_epoch=None,
                         epochs=1,
-                        shuffle=False):
+                        shuffle=False,
+                        is_validation=False):
   """"""Converts NumPy arrays or EagerTensors to an EagerIterator.
 
   Combines all provided data into a single EagerIterator.
@@ -124,6 +125,9 @@ def convert_to_iterator(x=None,
         epoch.
       epochs: Epochs to repeat iterator for.
       shuffle: Whether to shuffle data after each epoch.
+      is_validation: Whether this call is for validation during a training
+        (e.g., `fit()`) call. This info is used to construct error messages
+        (if any).
 
   Raises:
       ValueError: if steps_per_epoch cannot be calculated from the data
@@ -151,9 +155,12 @@ def convert_to_iterator(x=None,
     steps_per_epoch = int(math.ceil(num_samples / batch_size))
 
   if steps_per_epoch is None:
-    raise ValueError('Could not determine steps_per_epoch.'
-                     'Please provide either batch_size or'
-                     'steps_per_epoch.')
+    alternative_arg_name = (
+        'validation_steps' if is_validation else 'steps_per_epoch')
+    raise ValueError(
+        'Could not determine how to convert EagerTensors into EagerIterator. '
+        'Please provide either `batch_size` or '
+        '`%s`.' % alternative_arg_name)
 
   # TODO(omalleyt) for NumPy arrays in graph mode
   # placeholder ops should be used
",0,test
a13b9d4d58e311729de3a967d8c780a95f6691ae,tensorflow/tensorflow,NFC following code review. comment typo and rename unroll to unrollInnerTileLoop.,ir_emitter_unnested.cc,"@@ -1897,7 +1897,7 @@ bool MayPreventVectorization(const HloInstruction& hlo) {
         return false;
     }
   } else if (hlo.opcode() == HloOpcode::kReduce) {
-    // TODO: check if the to_apply() attribute contain instruction
+    // TODO: check if the to_apply() attribute contains instruction
     // that break LLVM vectorization.
     return false;
   }
@@ -1942,17 +1942,17 @@ static llvm::Value* GetStartOffsetX(const KernelMappingScheme& mapping_scheme,
   return b->CreateMul(thread_id_x, constant(x_num_steps));
 }
 
-// This function calls emit_elem_function() x_num_steps times.  If
-// vector_size==1, then each element index passed to
-// emit_elem_function() will be separated by step_x. If vector_size>1,
-// then it must be a multiple of x_num_steps.  In that case, it
+// Calls `emit_elem_function()` `x_num_steps` times.  If
+// `vector_size`==1, then each element index passed to
+// `emit_elem_function()` will be separated by `step_x`. If `vector_size`>1,
+// then it must be a multiple of `x_num_steps`.  In that case, it
 // triggers a different indexing order that is vectorizable by
-// LLVM. It generates many groups of calls to emit_elem_function. Each
-// group is separated by step_x elements.  Inside a group, elements
-// are consecutive. If check_x_tile_bounds is true, then it will check
-// if the element index is in bound compared to tile_width before
-// calling emit_elem_function.
-static void Unroll(
+// LLVM. It generates many groups of calls to `emit_elem_function`. Each
+// group is separated by `step_x` elements.  Inside a group, elements
+// are consecutive. If `check_x_tile_bounds` is true, then it will check
+// if the element index is in bound compared to `tile_width` before
+// calling `emit_elem_function`.
+static void UnrollInnerTileLoop(
     bool check_x_tile_bounds, int64 x_num_steps, int64 step_x,
     int64 vector_size, const string& loop_name, KernelSupportLibrary* ksl,
     llvm::Value* start_offset_x, llvm::Value* y_loc, llvm::Value* tile_width,
@@ -2035,38 +2035,39 @@ void IrEmitterUnnested::EmitTile(
   //
   // TODO(cheshire): Once ptxas is fixed and TF switches to it, remove the
   // workaround.
-  ksl->For(loop_name + ""_y_in_tile"",
-           /*start=*/constant(0),
-           /*end=*/
-           ceil_of_ratio(b_.CreateSub(tile_height, thread_id_info.thread_id_y),
-                         num_threads_y),
-           /*step=*/constant(1), [&](llvm::Value* y_indvar) {
-             llvm::Value* y_loc =
-                 b_.CreateAdd(thread_id_info.thread_id_y,
-                              b_.CreateMul(y_indvar, num_threads_y));
-             auto unroll = [&](bool check_x_tile_bounds) {
-               return Unroll(check_x_tile_bounds, x_num_steps, step_x,
-                             vector_size, loop_name, ksl, start_offset_x, y_loc,
-                             tile_width, source_idx, b_, &emit_elem_function);
-             };
-
-             // Only take this path when we unroll in a way vectorizable by
-             // LLVM. Special case when the tile doesn't fit completely for even
-             // row size. For odd row size every other row isn't aligned to the
-             // vectorized size, so it can't be vectorized by LLVM.
-             if (!x_tile_fits &&
-                 mapping_scheme.GetIndexingOrder() == kLinearStridedIndexingX) {
-               ksl->If(loop_name + ""_is_full_tile"",
-                       // For the last block, tile_width will be the number of
-                       // elements left.
-                       b_.CreateICmpEQ(constant(mapping_scheme.GetTileSizeX()),
-                                       tile_width),
-                       [&] { unroll(/*check_x_tile_bounds=*/false); },
-                       [&] { unroll(/*check_x_tile_bounds=*/true); });
-             } else {
-               unroll(/*check_x_tile_bounds=*/!x_tile_fits);
-             }
-           });
+  ksl->For(
+      loop_name + ""_y_in_tile"",
+      /*start=*/constant(0),
+      /*end=*/
+      ceil_of_ratio(b_.CreateSub(tile_height, thread_id_info.thread_id_y),
+                    num_threads_y),
+      /*step=*/constant(1), [&](llvm::Value* y_indvar) {
+        llvm::Value* y_loc = b_.CreateAdd(
+            thread_id_info.thread_id_y, b_.CreateMul(y_indvar, num_threads_y));
+        auto unrollInnerTileLoop = [&](bool check_x_tile_bounds) {
+          return UnrollInnerTileLoop(check_x_tile_bounds, x_num_steps, step_x,
+                                     vector_size, loop_name, ksl,
+                                     start_offset_x, y_loc, tile_width,
+                                     source_idx, b_, &emit_elem_function);
+        };
+
+        // Only take this path when we unroll in a way vectorizable by
+        // LLVM. Special case when the tile doesn't fit completely for even
+        // row size. For odd row size every other row isn't aligned to the
+        // vectorized size, so it can't be vectorized by LLVM.
+        if (!x_tile_fits &&
+            mapping_scheme.GetIndexingOrder() == kLinearStridedIndexingX) {
+          ksl->If(loop_name + ""_is_full_tile"",
+                  // For the last block, tile_width will be the number of
+                  // elements left.
+                  b_.CreateICmpEQ(constant(mapping_scheme.GetTileSizeX()),
+                                  tile_width),
+                  [&] { unrollInnerTileLoop(/*check_x_tile_bounds=*/false); },
+                  [&] { unrollInnerTileLoop(/*check_x_tile_bounds=*/true); });
+        } else {
+          unrollInnerTileLoop(/*check_x_tile_bounds=*/!x_tile_fits);
+        }
+      });
 }
 
 // Emits code to process a tensor element in a tile for the given kCopy HLO that
",0,train
342f6b571f261da303969e0d2da275661d93955a,tensorflow/tensorflow,"0 Hz is now accepted as the lower frequency limit for the MFCC filterbank.

PiperOrigin-RevId: 170594836",mfcc_mel_filterbank.cc,"@@ -62,8 +62,8 @@ bool MfccMelFilterbank::Initialize(int input_length,
     return false;
   }
 
-  if (lower_frequency_limit <= 0) {
-    LOG(ERROR) << ""Lower frequency limit must be positive."";
+  if (lower_frequency_limit < 0) {
+    LOG(ERROR) << ""Lower frequency limit must be nonnegative."";
     return false;
   }
 
",0,train
9810da1b87aae689cac42bae754e7e4cb5a99d57,tensorflow/tensorflow,"Adds XLA support for tf.nn.dynamic_rnn

Changes tf.nn.dynamic_rnn to specify `maximum_iterations` argument for the while_loop.

When `maximum_iterations` argument is supplied to tf.while_loop, use this to provide an upper bound on the size of Stacks used for gradient computation.
By specifying the stack limit we can generate gradient code for while loops that uses fixed shape TensorArrays and hence can be compiled with XLA.

PiperOrigin-RevId: 178802710",control_flow_ops.py,"@@ -748,22 +748,26 @@ class GradLoopState(object):
 
       outer_grad_ctxt = outer_grad_state.grad_context
       outer_grad_ctxt.Enter()
-      self._grad_context = WhileContext(forward_ctxt.parallel_iterations,
-                                        forward_ctxt.back_prop,
-                                        forward_ctxt.swap_memory,
-                                        forward_ctxt.name,
-                                        self)
+      self._grad_context = WhileContext(
+          maximum_iterations=forward_ctxt.maximum_iterations,
+          parallel_iterations=forward_ctxt.parallel_iterations,
+          back_prop=forward_ctxt.back_prop,
+          swap_memory=forward_ctxt.swap_memory,
+          name=forward_ctxt.name,
+          grad_state=self)
       real_cnt = outer_grad_state.AddBackpropAccumulatedValue(history_cnt, cnt)
       self._grad_index = self._grad_context.AddBackpropLoopCounter(
           real_cnt, outer_grad_state)
       outer_grad_ctxt.Exit()
     else:
       if outer_forward_ctxt: outer_forward_ctxt.Enter()
-      self._grad_context = WhileContext(forward_ctxt.parallel_iterations,
-                                        forward_ctxt.back_prop,
-                                        forward_ctxt.swap_memory,
-                                        forward_ctxt.name,
-                                        self)
+      self._grad_context = WhileContext(
+          maximum_iterations=forward_ctxt.maximum_iterations,
+          parallel_iterations=forward_ctxt.parallel_iterations,
+          back_prop=forward_ctxt.back_prop,
+          swap_memory=forward_ctxt.swap_memory,
+          name=forward_ctxt.name,
+          grad_state=self)
       self._grad_index = self._grad_context.AddBackpropLoopCounter(
           cnt, outer_grad_state)
       if outer_forward_ctxt: outer_forward_ctxt.Exit()
@@ -893,9 +897,14 @@ class GradLoopState(object):
     with ops.control_dependencies(None):
       if curr_ctxt: curr_ctxt.Enter()
       with ops.colocate_with(value):
+        maximum_iterations = self.forward_context.maximum_iterations
+        if maximum_iterations is None:
+          maximum_iterations = constant_op.constant(-1, dtypes.int32)
         # pylint: disable=protected-access
-        acc = gen_data_flow_ops._stack_v2(-1, value.dtype.base_dtype,
-                                          name=""f_acc"")
+        acc = gen_data_flow_ops._stack_v2(
+            max_size=maximum_iterations,
+            elem_type=value.dtype.base_dtype,
+            name=""f_acc"")
         # pylint: enable=protected-access
       if curr_ctxt: curr_ctxt.Exit()
 
@@ -1767,6 +1776,7 @@ def _UnpackIfSingleton(res):
     return res
 
 
+# pylint: disable=redefined-outer-name
 # pylint: disable=g-doc-args
 @deprecation.deprecated_args(
     None,
@@ -1943,6 +1953,7 @@ def cond(pred, true_fn=None, false_fn=None, strict=False, name=None,
       merges = _UnpackIfSingleton(merges)
     return merges
 # pylint: enable=g-doc-args
+# pylint: enable=redefined-outer-name
 
 
 def _resource_safe_shape(t):
@@ -1960,12 +1971,19 @@ def _resource_safe_shape(t):
 class WhileContext(ControlFlowContext):
   """"""The context for the loop construct.""""""
 
-  def __init__(self, parallel_iterations=10, back_prop=True, swap_memory=False,
-               name=""while_context"", grad_state=None, context_def=None,
+  def __init__(self,
+               maximum_iterations=None,
+               parallel_iterations=10,
+               back_prop=True,
+               swap_memory=False,
+               name=""while_context"",
+               grad_state=None,
+               context_def=None,
                import_scope=None):
     """"""""Creates a `WhileContext`.
 
     Args:
+      maximum_iterations: Optional upper bound on number of loop iterations.
       parallel_iterations: The number of iterations allowed to run in parallel.
       back_prop: Whether backprop is enabled for this while loop.
       swap_memory: Whether GPU-CPU memory swap is enabled for this loop.
@@ -1980,16 +1998,17 @@ class WhileContext(ControlFlowContext):
       self._init_from_proto(context_def, import_scope=import_scope)
     else:
       ControlFlowContext.__init__(self)
-      self._init_from_args(parallel_iterations, back_prop, swap_memory,
-                           name)
+      self._init_from_args(maximum_iterations, parallel_iterations, back_prop,
+                           swap_memory, name)
     # The gradient loop state.
     self._grad_state = grad_state
 
-  def _init_from_args(self, parallel_iterations, back_prop, swap_memory,
-                      name):
+  def _init_from_args(self, maximum_iterations, parallel_iterations, back_prop,
+                      swap_memory, name):
     """"""Creates a new `WhileContext` from arguments.
 
     Args:
+      maximum_iterations: Optional upper bound on number of loop iterations.
       parallel_iterations: The number of iterations allowed to run in parallel.
       back_prop: Whether backprop is enabled for this while loop.
       swap_memory: Whether GPU-CPU memory swap is enabled for this loop.
@@ -2002,6 +2021,7 @@ class WhileContext(ControlFlowContext):
       raise ValueError(""`parallel_iterations` must be a positive integer: ""
                        ""%s"" % parallel_iterations)
     self._name = ops.get_default_graph().unique_name(name)
+    self._maximum_iterations = maximum_iterations
     self._parallel_iterations = parallel_iterations
     self._back_prop = back_prop
     self._swap_memory = swap_memory
@@ -2029,6 +2049,12 @@ class WhileContext(ControlFlowContext):
     g = ops.get_default_graph()
     self._name = ops.prepend_name_scope(
         context_def.context_name, import_scope)
+    if context_def.maximum_iterations_name:
+      self._maximum_iterations = g.as_graph_element(
+          ops.prepend_name_scope(context_def.maximum_iterations_name,
+                                 import_scope))
+    else:
+      self._maximum_iterations = None
     self._parallel_iterations = context_def.parallel_iterations
     self._back_prop = context_def.back_prop
     self._swap_memory = context_def.swap_memory
@@ -2056,6 +2082,11 @@ class WhileContext(ControlFlowContext):
   def name(self):
     return self._name
 
+  @property
+  def maximum_iterations(self):
+    """"""The maximum number of iterations that will be executed.""""""
+    return self._maximum_iterations
+
   @property
   def parallel_iterations(self):
     """"""The number of iterations allowed to run in parallel.""""""
@@ -2106,6 +2137,9 @@ class WhileContext(ControlFlowContext):
       context_def.context_name = ops.strip_name_scope(
           self.name, export_scope)
       context_def.parallel_iterations = self._parallel_iterations
+      if self._maximum_iterations is not None:
+        context_def.maximum_iterations_name = ops.strip_name_scope(
+            self._maximum_iterations.name, export_scope)
       context_def.back_prop = self._back_prop
       context_def.swap_memory = self._swap_memory
       context_def.pivot_for_pred_name = ops.strip_name_scope(
@@ -2724,6 +2758,7 @@ class WhileContext(ControlFlowContext):
     return True
 
 
+# pylint: disable=redefined-outer-name
 def while_loop(cond, body, loop_vars, shape_invariants=None,
                parallel_iterations=10, back_prop=True, swap_memory=False,
                name=None, maximum_iterations=None):
@@ -2889,13 +2924,18 @@ def while_loop(cond, body, loop_vars, shape_invariants=None,
         shape_invariants = (tensor_shape.TensorShape([]), shape_invariants)
       nest.assert_same_structure(loop_vars, shape_invariants)
 
-    loop_context = WhileContext(parallel_iterations, back_prop, swap_memory)  # pylint: disable=redefined-outer-name
+    loop_context = WhileContext(
+        maximum_iterations=maximum_iterations,
+        parallel_iterations=parallel_iterations,
+        back_prop=back_prop,
+        swap_memory=swap_memory)
     ops.add_to_collection(ops.GraphKeys.WHILE_CONTEXT, loop_context)
     result = loop_context.BuildLoop(cond, body, loop_vars, shape_invariants)
     if maximum_iterations is not None:
       return result[1]
     else:
       return result
+# pylint: enable=redefined-outer-name
 
 
 def _AsTensorList(x, p):
",0,train
9810da1b87aae689cac42bae754e7e4cb5a99d57,tensorflow/tensorflow,"Adds XLA support for tf.nn.dynamic_rnn

Changes tf.nn.dynamic_rnn to specify `maximum_iterations` argument for the while_loop.

When `maximum_iterations` argument is supplied to tf.while_loop, use this to provide an upper bound on the size of Stacks used for gradient computation.
By specifying the stack limit we can generate gradient code for while loops that uses fixed shape TensorArrays and hence can be compiled with XLA.

PiperOrigin-RevId: 178802710",control_flow_ops_test.py,"@@ -452,18 +452,25 @@ class ContextTest(test_util.TensorFlowTestCase):
               c.to_proto(),
               control_flow_ops.CondContext.from_proto(c.to_proto()).to_proto())
 
-  def testWhileContext(self):
+  def _testWhileContextHelper(self, maximum_iterations=None):
     with self.test_session() as sess:
       i = constant_op.constant(0)
       c = lambda i: math_ops.less(i, 10)
       b = lambda i: math_ops.add(i, 1)
-      control_flow_ops.while_loop(c, b, [i])
+      control_flow_ops.while_loop(
+          c, b, [i], maximum_iterations=maximum_iterations)
       for op in sess.graph.get_operations():
-        c = op._get_control_flow_context()
-        if c:
-          self.assertProtoEquals(
-              c.to_proto(),
-              control_flow_ops.WhileContext.from_proto(c.to_proto()).to_proto())
+        context = op._get_control_flow_context()
+        if context:
+          self.assertProtoEquals(context.to_proto(),
+                                 control_flow_ops.WhileContext.from_proto(
+                                     context.to_proto()).to_proto())
+
+  def testWhileContext(self):
+    self._testWhileContextHelper()
+
+  def testWhileContextWithMaximumIterations(self):
+    self._testWhileContextHelper(maximum_iterations=10)
 
   def testControlContextImportScope(self):
     with self.test_session():
",0,train
9810da1b87aae689cac42bae754e7e4cb5a99d57,tensorflow/tensorflow,"Adds XLA support for tf.nn.dynamic_rnn

Changes tf.nn.dynamic_rnn to specify `maximum_iterations` argument for the while_loop.

When `maximum_iterations` argument is supplied to tf.while_loop, use this to provide an upper bound on the size of Stacks used for gradient computation.
By specifying the stack limit we can generate gradient code for while loops that uses fixed shape TensorArrays and hence can be compiled with XLA.

PiperOrigin-RevId: 178802710",rnn.py,"@@ -665,7 +665,7 @@ def _dynamic_rnn_loop(cell,
     final_outputs:
       A `Tensor` of shape `[time, batch_size, cell.output_size]`.  If
       `cell.output_size` is a (possibly nested) tuple of ints or `TensorShape`
-      objects, then this returns a (possibly nsted) tuple of Tensors matching
+      objects, then this returns a (possibly nested) tuple of Tensors matching
       the corresponding shapes.
     final_state:
       A `Tensor`, or possibly nested tuple of Tensors, matching in length
@@ -806,11 +806,17 @@ def _dynamic_rnn_loop(cell,
 
     return (time + 1, output_ta_t, new_state)
 
+  # TODO(pbar) `loop_bound` can be reduced to `max_sequence_length` once
+  # TensorArray shape inference is working.  When sequence lengths are highly
+  # variable, this will reduce the performance overheads of padding to a fixed
+  # maximum length.
+  loop_bound = time_steps
   _, output_final_ta, final_state = control_flow_ops.while_loop(
-      cond=lambda time, *_: time < time_steps,
+      cond=lambda time, *_: time < loop_bound,
       body=_time_step,
       loop_vars=(time, output_ta, state),
       parallel_iterations=parallel_iterations,
+      maximum_iterations=time_steps,
       swap_memory=swap_memory)
 
   # Unpack final output if not using output tuples.
",0,train
9810da1b87aae689cac42bae754e7e4cb5a99d57,tensorflow/tensorflow,"Adds XLA support for tf.nn.dynamic_rnn

Changes tf.nn.dynamic_rnn to specify `maximum_iterations` argument for the while_loop.

When `maximum_iterations` argument is supplied to tf.while_loop, use this to provide an upper bound on the size of Stacks used for gradient computation.
By specifying the stack limit we can generate gradient code for while loops that uses fixed shape TensorArrays and hence can be compiled with XLA.

PiperOrigin-RevId: 178802710",model_analyzer_test.py,"@@ -230,12 +230,12 @@ class PrintModelAnalysisTest(test.TestCase):
         with gfile.Open(outfile, 'r') as f:
           lines = f.read().split('\n')
           result = '\n'.join([l[:min(len(l), 80)] for l in lines])
-          self.assertEqual(compat.as_bytes('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/168.85k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (0/1.80k params, 0/45.37k flops)\n    model_analyzer_testlib.py:40:BuildSmallModel (0/0 params, 0/0 flops)\n    model_analyzer_testlib.py:44:BuildSmallModel (0/4 params, 0/8 flops)\n    model_analyzer_testlib.py:48:BuildSmallModel (0/648 params, 0/1.30k flops)\n    model_analyzer_testlib.py:49:BuildSmallModel (0/0 params, 0/23.33k flops)\n    model_analyzer_testlib.py:53:BuildSmallModel (0/1.15k params, 0/2.30k flops)\n    model_analyzer_testlib.py:54:BuildSmallModel (0/0 params, 0/18.43k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (gradient) (0/0 params, 0/67.39k f\n    model_analyzer_testlib.py:49:BuildSmallModel (gradient) (0/0 params, 0/46.66\n    model_analyzer_testlib.py:54:BuildSmallModel (gradient) (0/0 params, 0/20.74\n  model_analyzer_testlib.py:67:BuildFullModel (0/1.04k params, 0/18.57k flops)\n  model_analyzer_testlib.py:67:BuildFullModel (gradient) (0/0 params, 0/37.00k f\n  model_analyzer_testlib.py:69:BuildFullModel (0/0 params, 0/0 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (0/0 params, 0/258 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (gradient) (0/0 params, 0/129 flop\n  model_analyzer_testlib.py:72:BuildFullModel (0/0 params, 0/141 flops)\n'),
+          self.assertEqual(compat.as_bytes('node name | # parameters | # float_ops\n_TFProfRoot (--/2.84k params, --/168.86k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (0/1.80k params, 0/45.37k flops)\n    model_analyzer_testlib.py:40:BuildSmallModel (0/0 params, 0/0 flops)\n    model_analyzer_testlib.py:44:BuildSmallModel (0/4 params, 0/8 flops)\n    model_analyzer_testlib.py:48:BuildSmallModel (0/648 params, 0/1.30k flops)\n    model_analyzer_testlib.py:49:BuildSmallModel (0/0 params, 0/23.33k flops)\n    model_analyzer_testlib.py:53:BuildSmallModel (0/1.15k params, 0/2.30k flops)\n    model_analyzer_testlib.py:54:BuildSmallModel (0/0 params, 0/18.43k flops)\n  model_analyzer_testlib.py:63:BuildFullModel (gradient) (0/0 params, 0/67.39k f\n    model_analyzer_testlib.py:49:BuildSmallModel (gradient) (0/0 params, 0/46.66\n    model_analyzer_testlib.py:54:BuildSmallModel (gradient) (0/0 params, 0/20.74\n  model_analyzer_testlib.py:67:BuildFullModel (0/1.04k params, 0/18.58k flops)\n  model_analyzer_testlib.py:67:BuildFullModel (gradient) (0/0 params, 0/37.00k f\n  model_analyzer_testlib.py:69:BuildFullModel (0/0 params, 0/0 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (0/0 params, 0/258 flops)\n  model_analyzer_testlib.py:70:BuildFullModel (gradient) (0/0 params, 0/129 flop\n  model_analyzer_testlib.py:72:BuildFullModel (0/0 params, 0/141 flops)\n'),
                            compat.as_bytes(result))
 
         self.assertLess(0, tfprof_node.total_exec_micros)
         self.assertEqual(2844, tfprof_node.total_parameters)
-        self.assertEqual(168854, tfprof_node.total_float_ops)
+        self.assertEqual(168863, tfprof_node.total_float_ops)
         self.assertEqual(8, len(tfprof_node.children))
         self.assertEqual('_TFProfRoot', tfprof_node.name)
         self.assertEqual(
",0,train
6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),clustering_ops.cc,"@@ -353,7 +353,7 @@ class NearestNeighborsOp : public OpKernel {
     auto worker_threads = *(context->device()->tensorflow_cpu_worker_threads());
     const int64 num_threads = worker_threads.num_threads;
     // This kernel might be configured to use fewer than the total number of
-    // available CPUs on the host machine. To avoid descructive interference
+    // available CPUs on the host machine. To avoid destructive interference
     // with other jobs running on the host machine, we must only use a fraction
     // of total available L3 cache. Unfortunately, we cannot query the host
     // machine to get the number of physical CPUs. So, we use a fixed per-CPU
",0,train
6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),factorization_ops.py,"@@ -106,7 +106,7 @@ class WALSModel(object):
       # the prep_gramian_op for row(column) can be run.
       worker_init_op = model.worker_init
 
-      # To be run once per interation sweep before the row(column) update
+      # To be run once per integration sweep before the row(column) update
       # initialize ops can be run. Note that in the distributed training
       # situations, this should only be run by the chief trainer. All other
       # trainers need to block until this is done.
@@ -118,9 +118,9 @@ class WALSModel(object):
       init_row_update_op = model.initialize_row_update_op
       init_col_update_op = model.initialize_col_update_op
 
-      # Ops to upate row(column). This can either take the entire sparse tensor
-      # or slices of sparse tensor. For distributed trainer, each trainer
-      # handles just part of the matrix.
+      # Ops to update row(column). This can either take the entire sparse
+      # tensor or slices of sparse tensor. For distributed trainer, each
+      # trainer handles just part of the matrix.
       _, row_update_op, unreg_row_loss, row_reg, _ = model.update_row_factors(
            sp_input=matrix_slices_from_queue_for_worker_shard)
       row_loss = unreg_row_loss + row_reg
@@ -220,7 +220,7 @@ class WALSModel(object):
         in the form of [[w_0, w_1, ...], [w_k, ... ], [...]], with the number of
         inner lists matching the number of row factor shards and the elements in
         each inner list are the weights for the rows of the corresponding row
-        factor shard. In this case,  w_ij = unonbserved_weight +
+        factor shard. In this case,  w_ij = unobserved_weight +
                                             row_weights[i] * col_weights[j].
         - If this is a single non-negative real number, this value is used for
         all row weights and w_ij = unobserved_weight + row_weights *
@@ -435,7 +435,7 @@ class WALSModel(object):
       gramian: Variable storing the gramian calculated from the factors.
 
     Returns:
-      A op that updates the gramian with the calcuated value from the factors.
+      A op that updates the gramian with the calculated value from the factors.
     """"""
     partial_gramians = []
     for f in factors:
@@ -564,7 +564,7 @@ class WALSModel(object):
 
     Note that specifically this initializes the cache of the row and column
     weights on workers when `use_factors_weights_cache` is True. In this case,
-    if these weights are being calcualted and reset after the object is created,
+    if these weights are being calculated and reset after the object is created,
     it is important to ensure this ops is run afterwards so the cache reflects
     the correct values.
     """"""
",0,train
6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),factorization_ops_test.py,"@@ -210,7 +210,7 @@ class WalsModelTest(test.TestCase):
 
       # Test row projection.
       # Using the specified projection weights for the 2 row feature vectors.
-      # This is expected to reprodue the same row factors in the model as the
+      # This is expected to reproduce the same row factors in the model as the
       # weights and feature vectors are identical to that used in model
       # training.
       projected_rows = wals_model.project_row_factors(
@@ -283,7 +283,7 @@ class WalsModelTest(test.TestCase):
 
       # Test column projection.
       # Using the specified projection weights for the 3 column feature vectors.
-      # This is expected to reprodue the same column factors in the model as the
+      # This is expected to reproduce the same column factors in the model as the
       # weights and feature vectors are identical to that used in model
       # training.
       projected_cols = wals_model.project_col_factors(
@@ -385,7 +385,7 @@ class WalsModelTest(test.TestCase):
 
       # Test row projection.
       # Using the specified projection weights for the 2 row feature vectors.
-      # This is expected to reprodue the same row factors in the model as the
+      # This is expected to reproduce the same row factors in the model as the
       # weights and feature vectors are identical to that used in model
       # training.
       projected_rows = wals_model.project_row_factors(
@@ -462,7 +462,7 @@ class WalsModelTest(test.TestCase):
 
       # Test column projection.
       # Using the specified projection weights for the 2 column feature vectors.
-      # This is expected to reprodue the same column factors in the model as the
+      # This is expected to reproduce the same column factors in the model as the
       # weights and feature vectors are identical to that used in model
       # training.
       projected_cols = wals_model.project_col_factors(
",0,train
6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),gmm_ops.py,"@@ -280,7 +280,7 @@ class GmmAlgorithm(object):
     self._define_score_samples()
 
   def _define_full_covariance_probs(self, shard_id, shard):
-    """"""Defines the full covariance probabilties per example in a class.
+    """"""Defines the full covariance probabilities per example in a class.
 
     Updates a matrix with dimension num_examples X num_classes.
 
@@ -344,7 +344,7 @@ class GmmAlgorithm(object):
   def _define_prior_log_prob_operation(self, shard_id):
     """"""Computes the prior probability of all samples.
 
-    Updates a vector where each item is the prior probabibility of an
+    Updates a vector where each item is the prior probability of an
     input example.
 
     Args:
",0,train
6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),gmm_test.py,"@@ -210,7 +210,7 @@ class GMMTestQueues(test.TestCase):
     return _fn
 
   # This test makes sure that there are no deadlocks when using a QueueRunner.
-  # Note that since cluster initialization is dependendent on inputs, if input
+  # Note that since cluster initialization is dependent on inputs, if input
   # is generated using a QueueRunner, one has to make sure that these runners
   # are started before the initialization.
   def test_queues(self):
",0,train
6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),kmeans_test.py,"@@ -413,7 +413,7 @@ class KMeansCosineDistanceTest(KMeansTestBase):
     self.assertAllClose(score, self.true_score, atol=1e-2)
 
   def test_predict_kmeans_plus_plus(self):
-    # Most points are concetrated near one center. KMeans++ is likely to find
+    # Most points are concentrated near one center. KMeans++ is likely to find
     # the less populated centers.
     points = np.array(
         [[2.5, 3.5], [2.5, 3.5], [-2, 3], [-2, 3], [-3, -3], [-3.1, -3.2],
@@ -604,7 +604,7 @@ class KMeansTestQueues(test.TestCase):
     return _fn
 
   # This test makes sure that there are no deadlocks when using a QueueRunner.
-  # Note that since cluster initialization is dependendent on inputs, if input
+  # Note that since cluster initialization is dependent on inputs, if input
   # is generated using a QueueRunner, one has to make sure that these runners
   # are started before the initialization.
   def test_queues(self):
",0,train
6c1737e6c8c9e5db405853178fb5e42abc080ba3,tensorflow/tensorflow,contrib/factorization: minor spelling tweaks (#17992),wals.py,"@@ -235,7 +235,7 @@ def _wals_factorization_model_function(features, labels, mode, params):
         num_items: An integer, the total number of items of this axis.
         update_fn: A function that takes one argument (`sp_input`), and that
         returns a tuple of
-          * new_factors: A flot Tensor of the factor values after update.
+          * new_factors: A float Tensor of the factor values after update.
           * update_op: a TensorFlow op which updates the factors.
           * loss: A float Tensor, the unregularized loss.
           * reg_loss: A float Tensor, the regularization loss.
",0,train
fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor.

When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python.

PiperOrigin-RevId: 243681547",benchmarks_test.py,"@@ -917,6 +917,14 @@ class MicroBenchmarks(test.Benchmark):
 
     self._run(scan, 100)
 
+  def benchmark_fastpath_conversion_type_inference(self):
+    c = constant_op.constant(1., dtype=dtypes.float32)
+
+    def fn():
+      return gen_math_ops.add(c, 1)
+
+    self._run(fn, 10000)
+
   def _benchmarkFunctionWithResourceInputs(self, num_resources, num_iters):
     @def_function.function
     def add_all(*args):
",0,test
fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor.

When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python.

PiperOrigin-RevId: 243681547",pywrap_tensor.cc,"@@ -13,24 +13,22 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
+#include ""tensorflow/python/eager/pywrap_tensor.h""
+
 #include <stdlib.h>
 
+#include ""structmember.h""  // NOLINT // For PyMemberDef
+#include ""tensorflow/c/c_api.h""
+#include ""tensorflow/core/framework/types.h""
+#include ""tensorflow/core/framework/types.pb.h""
+#include ""tensorflow/core/lib/strings/strcat.h""
+#include ""tensorflow/python/eager/pywrap_tfe.h""
+#include ""tensorflow/python/lib/core/ndarray_tensor.h""
 #include ""tensorflow/python/lib/core/ndarray_tensor_bridge.h""
 #include ""tensorflow/python/lib/core/numpy.h""
 #include ""tensorflow/python/lib/core/py_seq_tensor.h""
 #include ""tensorflow/python/lib/core/safe_ptr.h""
 
-#include ""tensorflow/python/eager/pywrap_tensor.h""
-#include ""tensorflow/python/eager/pywrap_tfe.h""
-
-#include ""tensorflow/c/c_api.h""
-#include ""tensorflow/core/lib/strings/strcat.h""
-#include ""tensorflow/python/lib/core/ndarray_tensor.h""
-
-#include ""tensorflow/core/framework/types.h""
-
-#include ""structmember.h""  // NOLINT // For PyMemberDef
-
 // forward declare
 struct EagerTensor;
 
@@ -106,19 +104,19 @@ TFE_TensorHandle* CopyToDevice(TFE_TensorHandle* handle, PyObject* ctx,
   return new_handle;
 }
 
-// Helper function to convert `v` to an int and store it in `*out`. Returns true
-// on success, false otherwise.
+// Helper function to convert `v` to a tensorflow::DataType and store it in
+// `*out`. Returns true on success, false otherwise.
 // Note that we assume that v is a python int (not long) representing a
-// TF_DataType value.
-bool PyIntToDataType(PyObject* v, int* out) {
+// TF_DataType/tensorflow::DataType value.
+bool PyIntToDataType(PyObject* v, tensorflow::DataType* out) {
 #if PY_MAJOR_VERSION < 3
   if (PyInt_Check(v)) {
-    *out = PyInt_AS_LONG(v);
+    *out = static_cast<tensorflow::DataType>(PyInt_AS_LONG(v));
     return true;
   }
 #else
   if (PyLong_Check(v)) {
-    *out = PyLong_AsLong(v);
+    *out = static_cast<tensorflow::DataType>(PyLong_AsLong(v));
     return true;
   }
 #endif
@@ -208,18 +206,8 @@ TFE_TensorHandle* EagerCast(TFE_Context* ctx, TFE_TensorHandle* handle,
 #undef RETURN_ERROR
 }
 
-TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype) {
-  int desired_dtype = -1;
-  if (dtype != Py_None) {
-    if (!PyIntToDataType(dtype, &desired_dtype)) {
-      PyErr_SetString(PyExc_TypeError,
-                      tensorflow::strings::StrCat(
-                          ""Expecting a DataType value for dtype. Got "",
-                          Py_TYPE(dtype)->tp_name)
-                          .c_str());
-      return nullptr;
-    }
-  }
+TFE_TensorHandle* ConvertToEagerTensor(PyObject* value,
+                                       tensorflow::DataType dtype) {
   tensorflow::Safe_PyObjectPtr value_decrefer;
   if (PyArray_IsScalar(value, Generic)) {
     // Convert numpy scalars to numpy arrays.
@@ -230,14 +218,14 @@ TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype) {
   }
   if (PyArray_Check(value)) {
     int desired_np_dtype = -1;
-    if (desired_dtype >= 0) {
+    if (dtype != tensorflow::DT_INVALID) {
       if (!tensorflow::TF_DataType_to_PyArray_TYPE(
-               static_cast<TF_DataType>(desired_dtype), &desired_np_dtype)
+               static_cast<TF_DataType>(dtype), &desired_np_dtype)
                .ok()) {
-        PyErr_SetString(PyExc_TypeError,
-                        tensorflow::strings::StrCat(
-                            ""Invalid dtype argument value "", desired_dtype)
-                            .c_str());
+        PyErr_SetString(
+            PyExc_TypeError,
+            tensorflow::strings::StrCat(""Invalid dtype argument value "", dtype)
+                .c_str());
         return nullptr;
       }
     }
@@ -402,7 +390,7 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) {
   }
 
   // Extract dtype
-  int desired_dtype = -1;
+  tensorflow::DataType desired_dtype = tensorflow::DT_INVALID;
   if (dtype != Py_None) {
     if (!PyIntToDataType(dtype, &desired_dtype)) {
       PyErr_SetString(PyExc_TypeError,
@@ -416,10 +404,11 @@ int EagerTensor_init(EagerTensor* self, PyObject* args, PyObject* kwds) {
   PyErr_Clear();
   tensorflow::Safe_TFE_TensorHandlePtr handle =
       tensorflow::make_safe(static_cast<TFE_TensorHandle*>(
-          tensorflow::ConvertToEagerTensor(value, dtype)));
+          tensorflow::ConvertToEagerTensor(value, desired_dtype)));
   if (handle == nullptr) return -1;
   TF_DataType handle_dtype = TFE_TensorHandleDataType(handle.get());
-  if (desired_dtype >= 0 && desired_dtype != handle_dtype) {
+  if (desired_dtype != tensorflow::DT_INVALID &&
+      static_cast<TF_DataType>(desired_dtype) != handle_dtype) {
     // Check type compatibility.
     if (tensorflow::IsCompatible(desired_dtype, handle_dtype)) {
       handle = tensorflow::make_safe(tensorflow::EagerCast(
",0,test
fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor.

When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python.

PiperOrigin-RevId: 243681547",pywrap_tensor.h,"@@ -27,7 +27,7 @@ tensorflow::int64 PyEagerTensor_NumElements(const PyObject* tensor);
 
 namespace tensorflow {
 bool IsCompatible(int desired_dtype, TF_DataType returned_dtype);
-TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, PyObject* dtype);
+TFE_TensorHandle* ConvertToEagerTensor(PyObject* value, DataType dtype);
 
 // TODO(nareshmodi): Move EagerCast and ReadVariableOp (which use the C API to
 // execute TFE Ops) to a separate common library.
",0,test
fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor.

When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python.

PiperOrigin-RevId: 243681547",pywrap_tfe_src.cc,"@@ -16,8 +16,6 @@ limitations under the License.
 #include <cstring>
 #include <thread>
 
-#include ""tensorflow/python/eager/pywrap_tfe.h""
-
 #include ""absl/strings/str_cat.h""
 #include ""absl/types/variant.h""
 #include ""tensorflow/c/c_api.h""
@@ -35,6 +33,7 @@ limitations under the License.
 #include ""tensorflow/core/platform/protobuf.h""
 #include ""tensorflow/core/platform/types.h""
 #include ""tensorflow/python/eager/pywrap_tensor.h""
+#include ""tensorflow/python/eager/pywrap_tfe.h""
 #include ""tensorflow/python/lib/core/safe_ptr.h""
 #include ""tensorflow/python/util/util.h""
 
@@ -1872,33 +1871,25 @@ bool CheckInputsOk(PyObject* seq, int start_index,
   return true;
 }
 
-PyObject* MaybeGetDType(PyObject* item) {
-  if (EagerTensor_CheckExact(item)) {
-    tensorflow::Safe_PyObjectPtr py_dtype(
-        PyObject_GetAttrString(item, ""dtype""));
-    return PyObject_GetAttrString(py_dtype.get(), ""_type_enum"");
+tensorflow::DataType MaybeGetDType(PyObject* item) {
+  if (EagerTensor_CheckExact(item) || CheckResourceVariable(item)) {
+    return FastTensorDtype(item);
   }
 
-  if (CheckResourceVariable(item)) {
-    tensorflow::Safe_PyObjectPtr py_dtype(
-        PyObject_GetAttrString(item, ""_dtype""));
-    return PyObject_GetAttrString(py_dtype.get(), ""_type_enum"");
-  }
-
-  return nullptr;
+  return tensorflow::DT_INVALID;
 }
 
-PyObject* MaybeGetDTypeForAttr(const string& attr,
-                               FastPathOpExecInfo* op_exec_info) {
+tensorflow::DataType MaybeGetDTypeForAttr(const string& attr,
+                                          FastPathOpExecInfo* op_exec_info) {
   auto cached_it = op_exec_info->cached_dtypes.find(attr);
   if (cached_it != op_exec_info->cached_dtypes.end()) {
-    return GetPythonObjectFromInt(cached_it->second);
+    return cached_it->second;
   }
 
   auto it = op_exec_info->attr_to_inputs_map->find(attr);
   if (it == op_exec_info->attr_to_inputs_map->end()) {
     // No other inputs - this should never happen.
-    Py_RETURN_NONE;
+    return tensorflow::DT_INVALID;
   }
 
   for (const auto& input_info : it->second) {
@@ -1908,17 +1899,17 @@ PyObject* MaybeGetDTypeForAttr(const string& attr,
       tensorflow::Safe_PyObjectPtr fast_item(
           PySequence_Fast(item, ""Unable to allocate""));
       for (int i = 0; i < PySequence_Fast_GET_SIZE(fast_item.get()); i++) {
-        auto* dtype =
+        auto dtype =
             MaybeGetDType(PySequence_Fast_GET_ITEM(fast_item.get(), i));
-        if (dtype != nullptr) return dtype;
+        if (dtype != tensorflow::DT_INVALID) return dtype;
       }
     } else {
-      auto* dtype = MaybeGetDType(item);
-      if (dtype != nullptr) return dtype;
+      auto dtype = MaybeGetDType(item);
+      if (dtype != tensorflow::DT_INVALID) return dtype;
     }
   }
 
-  Py_RETURN_NONE;
+  return tensorflow::DT_INVALID;
 }
 
 // TODO(agarwal): use an automatic mechanism for handling None arguments to
@@ -2310,9 +2301,9 @@ bool ConvertToTensor(
     const FastPathOpExecInfo& op_exec_info, PyObject* input,
     tensorflow::Safe_PyObjectPtr* output_handle,
     // This gets a hint for this particular input.
-    const std::function<PyObject*()>& dtype_hint_getter,
+    const std::function<tensorflow::DataType()>& dtype_hint_getter,
     // This sets the dtype after conversion is complete.
-    const std::function<void(const TF_DataType& dtype)>& dtype_setter,
+    const std::function<void(const tensorflow::DataType dtype)>& dtype_setter,
     TF_Status* status) {
   if (EagerTensor_CheckExact(input)) {
     Py_INCREF(input);
@@ -2323,28 +2314,18 @@ bool ConvertToTensor(
   }
 
   // The hint comes from a supposedly similarly typed tensor.
-  tensorflow::Safe_PyObjectPtr dtype_hint(dtype_hint_getter());
-  if (PyErr_Occurred()) {
-    return false;
-  }
+  tensorflow::DataType dtype_hint = dtype_hint_getter();
 
   tensorflow::Safe_TFE_TensorHandlePtr handle =
       tensorflow::make_safe(static_cast<TFE_TensorHandle*>(
-          tensorflow::ConvertToEagerTensor(input, dtype_hint.get())));
+          tensorflow::ConvertToEagerTensor(input, dtype_hint)));
   if (handle == nullptr) {
     return MaybeRaiseExceptionFromTFStatus(status, nullptr);
   }
 
   int desired_dtype = -1;
-  if (dtype_hint.get() != Py_None) {
-    if (!ParseTypeValue("""", dtype_hint.get(), status, &desired_dtype)) {
-      PyErr_SetString(PyExc_TypeError,
-                      tensorflow::strings::StrCat(
-                          ""Expecting a DataType value for dtype. Got "",
-                          Py_TYPE(dtype_hint.get())->tp_name)
-                          .c_str());
-      return false;
-    }
+  if (dtype_hint != tensorflow::DT_INVALID) {
+    desired_dtype = static_cast<int>(dtype_hint);
   }
 
   // Maybe cast to the desired type. This is intended to match python
@@ -2372,7 +2353,7 @@ bool ConvertToTensor(
   }
 
   output_handle->reset(EagerTensorFromHandle(handle.release()));
-  dtype_setter(output_dtype);
+  dtype_setter(static_cast<tensorflow::DataType>(output_dtype));
 
   return true;
 }
@@ -2394,13 +2375,12 @@ bool AddInputToOp(FastPathOpExecInfo* op_exec_info, PyObject* input,
           *op_exec_info, input, &py_eager_tensor,
           [&]() {
             if (input_arg.type() != tensorflow::DataType::DT_INVALID) {
-              return GetPythonObjectFromInt(input_arg.type());
+              return input_arg.type();
             }
             return MaybeGetDTypeForAttr(input_arg.type_attr(), op_exec_info);
           },
-          [&](const TF_DataType dtype) {
-            op_exec_info->cached_dtypes[input_arg.type_attr()] =
-                static_cast<tensorflow::DataType>(dtype);
+          [&](const tensorflow::DataType dtype) {
+            op_exec_info->cached_dtypes[input_arg.type_attr()] = dtype;
           },
           status)) {
     return false;
@@ -2737,8 +2717,8 @@ PyObject* TFE_Py_FastPathExecute_C(PyObject*, PyObject* args) {
         tensorflow::Safe_PyObjectPtr py_eager_tensor;
         if (!ConvertToTensor(
                 op_exec_info, py_input, &py_eager_tensor,
-                []() { Py_RETURN_NONE; }, [](const TF_DataType& dtype) {},
-                status)) {
+                []() { return tensorflow::DT_INVALID; },
+                [](const tensorflow::DataType dtype) {}, status)) {
           return nullptr;
         }
 
",0,test
fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor.

When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python.

PiperOrigin-RevId: 243681547",py_seq_tensor.cc,"@@ -490,16 +490,13 @@ DEFINE_HELPER(ConvertBool, bool, DT_BOOL, ConvertOneBool);
     return errors::InvalidArgument(_error);                      \
   } while (0)
 
-Status PySeqToTensor(PyObject* obj, PyObject* dtype, Tensor* ret) {
+Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret) {
   DataType infer_dtype;
   TensorShape shape;
   TF_RETURN_IF_ERROR(InferShapeAndType(obj, &shape, &infer_dtype));
   DataType requested_dtype = DT_INVALID;
-  if (dtype != Py_None) {
-    int32 dtype_as_int = -1;
-    if (ConvertOneInt32(dtype, &dtype_as_int) == nullptr) {
-      requested_dtype = static_cast<DataType>(dtype_as_int);
-    }
+  if (dtype != DT_INVALID) {
+    requested_dtype = dtype;
   }
   // NOTE(josh11b): If don't successfully convert to the requested type,
   // we just try instead to create a tensor of the inferred type and
",0,test
fd73284e085ab9b969775b7a71ac9b7dd8d8d2b6,tensorflow/tensorflow,"Use eagertensor dtype when inferring dtypes. Also pass tensorflow dtype to PySeqToTensor.

When inferring types from other tensors for converttotensor in the eager fastpath, I was seeing calls to the python EagerTensor method dtype(). This change makes it get the dtype from the EagerTensor in C when possible instead of calling back into python.

PiperOrigin-RevId: 243681547",py_seq_tensor.h,"@@ -30,7 +30,7 @@ namespace tensorflow {
 // representing the desired dtype of the resulting Tensor.
 // This is used only as a hint, *ret may not have that dtype on
 // success and may require a cast.
-Status PySeqToTensor(PyObject* obj, PyObject* dtype, Tensor* ret);
+Status PySeqToTensor(PyObject* obj, DataType dtype, Tensor* ret);
 
 }  // namespace tensorflow
 
",0,test
3b167c42039dfbc70cec16ae2ac75c11870e1729,tensorflow/tensorflow,"Fix typo in docstring

PiperOrigin-RevId: 251339076",loss_scale.py,"@@ -289,7 +289,7 @@ class DynamicLossScale(LossScale):
       initial_loss_scale: A Python float.  The loss scale to use at the
         beginning. It's better to start this at a very high number, because a
         loss scale that is too high gets lowered far more quickly than a loss
-        scale that is to low gets raised. The default is 2 ** 15, which is
+        scale that is too low gets raised. The default is 2 ** 15, which is
         approximately half the maximum float16 value.
       increment_period: Increases loss scale every `increment_period`
         consecutive steps that finite gradients are encountered. If a nonfinite
",0,train
87f5182a5644993e747c2f42dfe6da75b7431e66,tensorflow/tensorflow,"Fix broken test: tensorflow/contrib/eager/python:datasets_test

PiperOrigin-RevId: 168914742",function.py,"@@ -348,7 +348,11 @@ class _DefinedFunction(object):
 
   def _create_definition_if_needed(self):
     """"""Creates the function definition if it's not created yet.""""""
+    with context.graph_mode():
+      self._create_definition_if_needed_impl()
 
+  def _create_definition_if_needed_impl(self):
+    """"""This is not what you want, see _create_definition_if_needed.""""""
     if self._definition is not None:
       return
 
",0,train
5e6153293b73fdded18657efacb33440a5cef91b,tensorflow/tensorflow,"[tf:tfrt] Verify returned tensors alignment in TFRT/JIT python tests

PiperOrigin-RevId: 401732079
Change-Id: I0c39292b21622d0fe7e5abbf00c19c8aa83d42d6",tf_cpurt_executor.cc,"@@ -22,6 +22,7 @@ limitations under the License.
 
 #include ""mlir/ExecutionEngine/CRunnerUtils.h""
 #include ""mlir/Transforms/Bufferize.h""
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 #include ""tensorflow/compiler/mlir/tensorflow/dialect_registration.h""
 #include ""tensorflow/compiler/mlir/tfrt/jit/tf_cpurt_pipeline.h""
 #include ""tensorflow/core/platform/dynamic_annotations.h""
@@ -220,6 +221,15 @@ using PyBindingReturnValueConverter =
     ReturnValueConverter<PyBindingConversionContext>;
 }  // namespace
 
+template <typename T>
+static bool IsAligned(const T* ptr) {
+#if EIGEN_MAX_ALIGN_BYTES == 0
+  return true;
+#else
+  return reinterpret_cast<intptr_t>(ptr) % EIGEN_MAX_ALIGN_BYTES == 0;
+#endif
+}
+
 // Converts StridedMemrefType to the Python array. This struct satisfies
 // ReturnStridedMemref's concept (see cpurt.h).
 //
@@ -234,6 +244,7 @@ struct MemrefToPyArray {
   template <typename T, int rank>
   static py::array Convert(const ConversionContext&, void* memref_ptr) {
     auto* memref = static_cast<StridedMemRefType<T, rank>*>(memref_ptr);
+    assert(IsAligned(memref->data) && ""returned memref must be aligned"");
 
     auto memref_sizes = Sizes(memref);
     auto memref_strides = Strides(memref);
",0,train
05f8ea8e9522a3027d4f3f7a54d716bfafed427a,tensorflow/tensorflow,"[XLA:GPU] Do not fuse loop fusions with different output shapes.

PiperOrigin-RevId: 209724594",multi_output_fusion.cc,"@@ -187,6 +187,19 @@ bool GpuMultiOutputFusion::LegalToFuse(HloInstruction* instr1,
     return false;
   }
 
+  // Multi-output loop fusions must have equal output shapes to be lowered.
+  if (instr1->fusion_kind() == HloInstruction::FusionKind::kLoop) {
+    Shape shape1 = instr1->IsMultiOutputFusion()
+                       ? instr1->shape().tuple_shapes(0)
+                       : instr1->shape();
+    Shape shape2 = instr2->IsMultiOutputFusion()
+                       ? instr2->shape().tuple_shapes(0)
+                       : instr2->shape();
+    if (!ShapeUtil::Equal(shape1, shape2)) {
+      return false;
+    }
+  }
+
   // Do this check last, as it may be expensive.
   return !GpuInstructionFusion::FusionWouldBeTooLarge(instr1, instr2);
 }
",0,train
05f8ea8e9522a3027d4f3f7a54d716bfafed427a,tensorflow/tensorflow,"[XLA:GPU] Do not fuse loop fusions with different output shapes.

PiperOrigin-RevId: 209724594",multi_output_fusion_test.cc,"@@ -256,6 +256,90 @@ TEST_F(MultiOutputFusionTest, MultiOutputFusionTwoLoops) {
               op::Tuple(op::Multiply(), op::Divide()));
 }
 
+TEST_F(MultiOutputFusionTest, MultiOutputFusionSiblingLoopsDifferentShapes) {
+  auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""(
+    fused_computation_1 {
+      p0.1 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0)
+      ROOT mul = f32[8,1,5,16,1,1]{5,4,3,2,1,0} multiply(p0.1, p0.1)
+    }
+
+    fused_computation_2 {
+      p0.2 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0)
+      const.2 = f32[] constant(0)
+      ROOT reduce = f32[8,1,5,1,1]{4,3,2,1,0} reduce(p0.2, const.2), dimensions={3}, to_apply=scalar_add_computation
+    }
+
+    ENTRY entry {
+      p0 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0)
+      fusion.1 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} fusion(p0), kind=kLoop, calls=fused_computation_1
+      fusion.2 = f32[8,1,5,1,1]{4,3,2,1,0} fusion(p0), kind=kLoop, calls=fused_computation_2
+      ROOT root = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,1,1]{4,3,2,1,0}) tuple(fusion.1, fusion.2)
+    })""))
+                    .ValueOrDie();
+  ASSERT_FALSE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie());
+}
+
+TEST_F(MultiOutputFusionTest, MultiOutputFusionSiblingLoopAndMultiOutputLoop) {
+  auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""(
+    fused_computation_1 {
+      p0.1 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0)
+      mul = f32[8,1,5,16,1,1]{5,4,3,2,1,0} multiply(p0.1, p0.1)
+      exp = f32[8,1,5,16,1,1]{5,4,3,2,1,0} exponential(p0.1)
+      ROOT tuple = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}) tuple(mul, exp)
+    }
+
+    fused_computation_2 {
+      p0.2 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0)
+      const.2 = f32[] constant(0)
+      ROOT add = f32[8,1,5,16,1,1]{5,4,3,2,1,0} add(p0.2, const.2)
+    }
+
+    ENTRY entry {
+      p0 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0)
+      fusion.1 = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}) fusion(p0), kind=kLoop, calls=fused_computation_1
+      fusion.2 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} fusion(p0), kind=kLoop, calls=fused_computation_2
+      gte0 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} get-tuple-element(fusion.1), index=0
+      gte1 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} get-tuple-element(fusion.1), index=1
+      ROOT root = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}) tuple(gte0, gte1, fusion.2)
+    })""))
+                    .ValueOrDie();
+  ASSERT_TRUE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie());
+  SCOPED_TRACE(module->ToString());
+  const HloInstruction* fusion =
+      module->entry_computation()->root_instruction()->operand(0)->operand(0);
+  ASSERT_TRUE(fusion->IsMultiOutputFusion());
+  EXPECT_THAT(fusion->fused_expression_root(),
+              op::Tuple(op::Multiply(), op::Exp(), op::Add()));
+}
+
+TEST_F(MultiOutputFusionTest,
+       MultiOutputFusionSiblingLoopAndMultiOutputLoopDifferentShapes) {
+  auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""(
+    fused_computation_1 {
+      p0.1 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0)
+      mul = f32[8,1,5,16,1,1]{5,4,3,2,1,0} multiply(p0.1, p0.1)
+      exp = f32[8,1,5,16,1,1]{5,4,3,2,1,0} exponential(p0.1)
+      ROOT tuple = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}) tuple(mul, exp)
+    }
+
+    fused_computation_2 {
+      p0.2 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0)
+      const.2 = f32[] constant(0)
+      ROOT reduce = f32[8,1,5,1,1]{4,3,2,1,0} reduce(p0.2, const.2), dimensions={3}, to_apply=scalar_add_computation
+    }
+
+    ENTRY entry {
+      p0 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} parameter(0)
+      fusion.1 = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}) fusion(p0), kind=kLoop, calls=fused_computation_1
+      fusion.2 = f32[8,1,5,1,1]{4,3,2,1,0} fusion(p0), kind=kLoop, calls=fused_computation_2
+      gte0 = f32[8,1,5,16,1,1]{5,4,3,2,1,0} get-tuple-element(fusion.1), index=0
+      gte1 =  f32[8,1,5,16,1,1]{5,4,3,2,1,0} get-tuple-element(fusion.1), index=1
+      ROOT root = (f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,16,1,1]{5,4,3,2,1,0}, f32[8,1,5,1,1]{4,3,2,1,0}) tuple(gte0, gte1, fusion.2)
+    })""))
+                    .ValueOrDie();
+  ASSERT_FALSE(GpuMultiOutputFusion().Run(module.get()).ValueOrDie());
+}
+
 TEST_F(MultiOutputFusionTest, ProducerConsumerFusionElementwiseAndReduce) {
   auto module = ParseHloString(tensorflow::strings::StrCat(kModulePrefix, R""(
     ENTRY reduce {
",0,train
9aedc576ae2499d337b9e5ceaa78bd6f123bc77d,tensorflow/tensorflow,"[tf.data service] Extend round robin unit test to cover multiple replicas per host.

PiperOrigin-RevId: 351900031
Change-Id: I2d1eb18cc24da64581f324f66ff244a277cc37ce",data_service_ops_test.py,"@@ -334,14 +334,18 @@ class DataServiceOpsTest(data_service_test_base.TestBase,
     # Round robin reads can cause slow cluster shutdown.
     GLOBAL_CLUSTERS.add(cluster)
     num_elements = 100
-    ds = dataset_ops.Dataset.range(num_elements, output_type=dtypes.int32)
-    ds = ds.shuffle(num_elements)
     low_bucket_max = 30
     mid_bucket_max = 60
     bucket_boundaries = [low_bucket_max, mid_bucket_max]
     batch_size = 10
-    num_consumers = 3
+    num_consumer_hosts = 3
+    replicas_per_consumer_host = 5
+    num_consumers = num_consumer_hosts * replicas_per_consumer_host
     bucket_batch_sizes = [batch_size] * (len(bucket_boundaries) + 1)
+    # Set up the dataset that will run on the tf.data workers.
+    ds = dataset_ops.Dataset.range(num_elements, output_type=dtypes.int32)
+    ds = ds.shuffle(num_elements)
+    ds = ds.repeat()
     ds = ds.apply(
         grouping.bucket_by_sequence_length(
             lambda x: x,
@@ -354,28 +358,43 @@ class DataServiceOpsTest(data_service_test_base.TestBase,
             lambda _, x: dataset_ops.Dataset.from_tensors(x),
             window_size=num_consumers))
     ds = ds.flat_map(lambda x: x)
-    ds = ds.repeat()
 
-    consumers = []
-    for consumer_index in range(num_consumers):
-      consumers.append(
-          self.make_distributed_dataset(
-              ds,
-              cluster,
-              job_name=""test"",
-              consumer_index=consumer_index,
-              num_consumers=num_consumers))
-    # Use parallel interleave to read from consumers in parallel.
-    ds = dataset_ops.Dataset.from_tensor_slices(consumers)
+    # Set up the per-consumer-host datasets. During each global step, we pull
+    # `replicas_per_consumer_host` batches from each of these datasets.
+    host_datasets = []
+    for host_index in range(num_consumer_hosts):
+      per_replica_datasets = []
+      for i in range(replicas_per_consumer_host):
+        consumer_index = host_index * replicas_per_consumer_host + i
+        per_replica_datasets.append(
+            self.make_distributed_dataset(
+                ds,
+                cluster,
+                job_name=""test"",
+                consumer_index=consumer_index,
+                num_consumers=num_consumers))
+      host_dataset = dataset_ops.Dataset.from_tensor_slices(
+          per_replica_datasets)
+      host_dataset = host_dataset.interleave(
+          lambda x: x,
+          cycle_length=len(per_replica_datasets),
+          num_parallel_calls=len(per_replica_datasets),
+          deterministic=True)
+      host_datasets.append(host_dataset)
+
+    # Use parallel interleave to read from host datasets in parallel.
+    ds = dataset_ops.Dataset.from_tensor_slices(host_datasets)
     ds = ds.interleave(
-        lambda x: x.prefetch(num_elements),
-        cycle_length=num_consumers,
-        num_parallel_calls=num_consumers)
+        lambda x: x,
+        block_length=replicas_per_consumer_host,
+        cycle_length=len(host_datasets),
+        num_parallel_calls=len(host_datasets),
+        deterministic=True)
 
     num_rounds = 10
     get_next = self.getNext(ds, requires_initialization=True)
     results = []
-    for _ in range(num_rounds):
+    for _ in range(num_rounds * num_consumers):
       results.append(self.evaluate(get_next()))
 
     def get_bucket(elem):
@@ -385,8 +404,10 @@ class DataServiceOpsTest(data_service_test_base.TestBase,
         bucket_ind += 1
       return bucket_ind
 
+    # Check that the batches for each step contain elements from the same
+    # bucket.
     for i in range(0, len(results), num_consumers):
-      batches = results[num_consumers * i:num_consumers * i + num_consumers]
+      batches = results[num_consumers * i:num_consumers * (i + 1)]
       bucket_inds = [get_bucket(batch[0]) for batch in batches]
       for bucket_ind in bucket_inds[1:]:
         self.assertEqual(bucket_inds[0], bucket_ind)
",0,train
af62b5ccb9d06381096d18418920d06390d90be9,tensorflow/tensorflow,"Fixed the order of arguments for softmax_loss_function in two places, including a semantic code change.
Change: 150685391",loss.py,"@@ -48,7 +48,7 @@ def sequence_loss(logits, targets, weights,
       timesteps.
     average_across_batch: If set, sum the cost across the batch dimension and
       divide the returned cost by the batch size.
-    softmax_loss_function: Function (inputs-batch, labels-batch) -> loss-batch
+    softmax_loss_function: Function (labels-batch, inputs-batch) -> loss-batch
       to be used instead of the standard softmax (the default if this is None).
     name: Optional name for this operation, defaults to ""sequence_loss"".
 
@@ -76,7 +76,7 @@ def sequence_loss(logits, targets, weights,
       crossent = nn_ops.sparse_softmax_cross_entropy_with_logits(
         labels=targets, logits=probs_flat)
     else:
-      crossent = softmax_loss_function(probs_flat, targets)
+      crossent = softmax_loss_function(targets, probs_flat)
     crossent = crossent * array_ops.reshape(weights, [-1])
     if average_across_timesteps and average_across_batch:
       crossent = math_ops.reduce_sum(crossent)
",0,test
e2b5397f126ba9cbc76a840ea0a46331e0f10897,tensorflow/tensorflow,"Update GraphDef version to 434.

PiperOrigin-RevId: 316639748
Change-Id: I2f62575a1ffdf72dbbafd5a2d6a10ae2a64d4b7c",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 433  // Updated: 2020/6/15
+#define TF_GRAPH_DEF_VERSION 434  // Updated: 2020/6/16
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
7282528b715696147a927cb3bf595d957b2f4c42,tensorflow/tensorflow,readd some types,lookup_table_op.cc,"@@ -820,6 +820,7 @@ REGISTER_KERNEL(int64, double);
 REGISTER_KERNEL(int64, float);
 REGISTER_KERNEL(int64, int32);
 REGISTER_KERNEL(int64, int64);
+REGISTER_KERNEL(int64, string);
 REGISTER_KERNEL(string, bool);
 REGISTER_KERNEL(string, double);
 REGISTER_KERNEL(string, float);
",0,train
41523b32072d3fd8d87bc246234e0ec29d9146f6,tensorflow/tensorflow,"Adding examples for tf.image.random_flip_left_right and
tf.image.random_flip_up_down usage.

PiperOrigin-RevId: 270365884",image_ops_impl.py,"@@ -326,6 +326,26 @@ def random_flip_up_down(image, seed=None):
 
   With a 1 in 2 chance, outputs the contents of `image` flipped along the first
   dimension, which is `height`.  Otherwise output the image as-is.
+  When passing a batch of images, each image will be randomly flipped
+  independent of other images.
+
+  Example usage:
+
+    Randomly flip a single image.
+    >>> import numpy as np
+
+    >>> image = np.array([[[1], [2]], [[3], [4]]])
+    >>> tf.image.random_flip_up_down(image, 3).numpy().tolist()
+    [[[3], [4]], [[1], [2]]]
+
+    Randomly flip multiple images.
+    >>> images = np.array(
+    ... [
+    ...     [[[1], [2]], [[3], [4]]],
+    ...     [[[5], [6]], [[7], [8]]]
+    ... ])
+    >>> tf.image.random_flip_up_down(images, 4).numpy().tolist()
+    [[[[3], [4]], [[1], [2]]], [[[5], [6]], [[7], [8]]]]
 
   Args:
     image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
@@ -347,6 +367,25 @@ def random_flip_left_right(image, seed=None):
 
   With a 1 in 2 chance, outputs the contents of `image` flipped along the
   second dimension, which is `width`.  Otherwise output the image as-is.
+  When passing a batch of images, each image will be randomly flipped
+  independent of other images.
+
+  Example usage:
+    Randomly flip a single image.
+    >>> import numpy as np
+
+    >>> image = np.array([[[1], [2]], [[3], [4]]])
+    >>> tf.image.random_flip_left_right(image, 5).numpy().tolist()
+    [[[2], [1]], [[4], [3]]]
+
+    Randomly flip multiple images.
+    >>> images = np.array(
+    ... [
+    ...     [[[1], [2]], [[3], [4]]],
+    ...     [[[5], [6]], [[7], [8]]]
+    ... ])
+    >>> tf.image.random_flip_left_right(images, 6).numpy().tolist()
+    [[[[2], [1]], [[4], [3]]], [[[5], [6]], [[7], [8]]]]
 
   Args:
     image: 4-D Tensor of shape `[batch, height, width, channels]` or 3-D Tensor
",0,train
f5de0a77b3301fa1990eda1047f77c1236324b58,tensorflow/tensorflow,"Add the quantization specs for the inputs and outputs

If the value is annotated by the fake quant ops, the quantization spec is
extracted from the fake quant and put in the quantization attributes.

PiperOrigin-RevId: 302558753
Change-Id: I26b79ee1eab32f71e4be356bd58f6d815bc19243",cpu_kernel_fusion.cc,"@@ -32,6 +32,7 @@ limitations under the License.
 #include ""mlir/IR/Attributes.h""  // from @llvm-project
 #include ""mlir/IR/BlockAndValueMapping.h""  // from @llvm-project
 #include ""mlir/IR/MLIRContext.h""  // from @llvm-project
+#include ""mlir/IR/Matchers.h""  // from @llvm-project
 #include ""mlir/IR/PatternMatch.h""  // from @llvm-project
 #include ""mlir/IR/StandardTypes.h""  // from @llvm-project
 #include ""mlir/IR/Value.h""  // from @llvm-project
@@ -47,21 +48,56 @@ limitations under the License.
 
 #define DEBUG_TYPE ""quant-kernel-fusion""
 
+constexpr int kFakeQuantOperandsNum = 5;
+constexpr int kFakeQuantPerChannelOperandsNum = 6;
+
 namespace mlir {
 namespace xla_hlo {
 
 namespace {
 
+TypeAttr GetQuantSpec(Operation* op) {
+  auto fake_quant = llvm::dyn_cast_or_null<CustomCallOp>(op);
+  if (!fake_quant || fake_quant.getNumOperands() < kFakeQuantOperandsNum ||
+      fake_quant.getNumOperands() > kFakeQuantPerChannelOperandsNum ||
+      fake_quant.call_target_name() != ""fake_quant_with_min_max_vars"")
+    return {};
+
+  DenseFPElementsAttr min, max;
+  DenseIntElementsAttr bit_width, narrow_range, quant_dim;
+  if (!matchPattern(fake_quant.getOperand(1), m_Constant(&min)) ||
+      !matchPattern(fake_quant.getOperand(2), m_Constant(&max)) ||
+      !matchPattern(fake_quant.getOperand(3), m_Constant(&bit_width)) ||
+      !matchPattern(fake_quant.getOperand(4), m_Constant(&narrow_range)))
+    return {};
+
+  auto bit_width_val = (*bit_width.attr_value_begin()).cast<IntegerAttr>();
+  auto narrow_range_val = (*narrow_range.int_value_begin()).getSExtValue();
+  int quant_dim_val = -1;
+  if (fake_quant.getNumOperands() == kFakeQuantPerChannelOperandsNum &&
+      matchPattern(fake_quant.getOperand(kFakeQuantPerChannelOperandsNum - 1),
+                   m_Constant(&quant_dim))) {
+    quant_dim_val = (*quant_dim.int_value_begin()).getSExtValue();
+  }
+
+  OpBuilder builder(op);
+  Type input_type =
+      fake_quant.getOperand(0).getType().cast<ShapedType>().getElementType();
+  return quant::GetQuantizedTypeAttr(
+      builder, input_type, min, max, quant_dim_val, bit_width_val,
+      builder.getBoolAttr(narrow_range_val), /*is_signed=*/true);
+}
+
 // Collects input values from outside for 'ops'.
 void CollectInputs(llvm::ArrayRef<Operation*> ops,
                    llvm::SmallVectorImpl<Value>* inputs,
                    llvm::SmallVectorImpl<Attribute>* input_specs) {
-  for (auto* op : ops) {
-    for (auto operand : op->getOperands()) {
+  for (Operation* op : ops) {
+    for (Value operand : op->getOperands()) {
       if (std::find(inputs->begin(), inputs->end(), operand) != inputs->end()) {
         continue;
       }
-      if (auto* def_op = operand.getDefiningOp()) {
+      if (Operation* def_op = operand.getDefiningOp()) {
         if (std::find(ops.begin(), ops.end(), def_op) == ops.end()) {
           inputs->push_back(operand);
         }
@@ -71,10 +107,13 @@ void CollectInputs(llvm::ArrayRef<Operation*> ops,
     }
   }
 
-  for (auto input : *inputs) {
+  for (Value input : *inputs) {
     ShapedType input_type = input.getType().cast<ShapedType>();
-    // TODO(fengliuai): detect whether it is from fake quant.
-    input_specs->push_back(TypeAttr::get(input_type.getElementType()));
+    if (TypeAttr spec = GetQuantSpec(input.getDefiningOp())) {
+      input_specs->push_back(spec);
+    } else {
+      input_specs->push_back(TypeAttr::get(input_type.getElementType()));
+    }
   }
 }
 
@@ -84,16 +123,19 @@ void CollectRets(llvm::ArrayRef<Operation*> ops,
                  llvm::SmallVectorImpl<Value>* rets,
                  llvm::SmallVectorImpl<Type>* ret_types,
                  llvm::SmallVectorImpl<Attribute>* ret_specs) {
-  for (auto* op : ops) {
-    for (auto result : op->getResults()) {
-      for (auto* user : result.getUsers()) {
+  for (Operation* op : ops) {
+    for (Value result : op->getResults()) {
+      for (Operation* user : result.getUsers()) {
         // If there are any user outside of 'ops'
         if (std::find(ops.begin(), ops.end(), user) == ops.end()) {
           ShapedType ret_type = result.getType().cast<ShapedType>();
           rets->push_back(result);
           ret_types->push_back(ret_type);
-          // TODO(fengliuai): detect whether it is used by fake quant.
-          ret_specs->push_back(TypeAttr::get(ret_type.getElementType()));
+          if (TypeAttr spec = GetQuantSpec(user)) {
+            ret_specs->push_back(spec);
+          } else {
+            ret_specs->push_back(TypeAttr::get(ret_type.getElementType()));
+          }
           break;
         }
       }
",0,train
0655342040635311ba9221da6ab2b8e6a8ec7f32,tensorflow/tensorflow,Ref #40: Simple example for text classification saving/restoring,text_classification_save_restore.py,"@@ -0,0 +1,101 @@
+#  Copyright 2015 Google Inc. All Rights Reserved.
+#
+#  Licensed under the Apache License, Version 2.0 (the ""License"");
+#  you may not use this file except in compliance with the License.
+#  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an ""AS IS"" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+import os
+import csv
+import numpy as np
+from sklearn import metrics
+
+import tensorflow as tf
+from tensorflow.models.rnn import rnn, rnn_cell
+import skflow
+
+### Training data
+
+# Download dbpedia_csv.tar.gz from
+# https://drive.google.com/folderview?id=0Bz8a_Dbh9Qhbfll6bVpmNUtUcFdjYmF2SEpmZUZUcVNiMUw1TWN6RDV3a0JHT3kxLVhVR2M
+# Unpack: tar -xvf dbpedia_csv.tar.gz
+
+def load_dataset(filename):
+    target = []
+    data = []
+    reader = csv.reader(open(filename), delimiter=',')
+    for line in reader:
+        target.append(int(line[0]))
+        data.append(line[2])
+    return data, np.array(target, np.float32)
+
+X_train, y_train = load_dataset('dbpedia_csv/train.csv')
+X_test, y_test = load_dataset('dbpedia_csv/test.csv')
+
+### Process vocabulary
+
+MAX_DOCUMENT_LENGTH = 10
+
+vocab_processor = skflow.preprocessing.VocabularyProcessor(MAX_DOCUMENT_LENGTH)
+X_train = np.array(list(vocab_processor.fit_transform(X_train)))
+X_test = np.array(list(vocab_processor.transform(X_test)))
+
+n_words = len(vocab_processor.vocabulary_)
+print('Total words: %d' % n_words)
+
+### Models
+
+EMBEDDING_SIZE = 50
+
+def average_model(X, y):
+    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
+        embedding_size=EMBEDDING_SIZE, name='words')
+    features = tf.reduce_max(word_vectors, reduction_indices=1)
+    return skflow.models.logistic_regression(features, y)
+
+def rnn_model(X, y):
+    """"""Recurrent neural network model to predict from sequence of words
+    to a class.""""""
+    # Convert indexes of words into embeddings.
+    # This creates embeddings matrix of [n_words, EMBEDDING_SIZE] and then
+    # maps word indexes of the sequence into [batch_size, sequence_length,
+    # EMBEDDING_SIZE].
+    word_vectors = skflow.ops.categorical_variable(X, n_classes=n_words,
+        embedding_size=EMBEDDING_SIZE, name='words')
+    # Split into list of embedding per word, while removing doc length dim.
+    # word_list results to be a list of tensors [batch_size, EMBEDDING_SIZE].
+    word_list = skflow.ops.split_squeeze(1, MAX_DOCUMENT_LENGTH, word_vectors)
+    # Create a Gated Recurrent Unit cell with hidden size of EMBEDDING_SIZE.
+    cell = rnn_cell.GRUCell(EMBEDDING_SIZE)
+    # Create an unrolled Recurrent Neural Networks to length of
+    # MAX_DOCUMENT_LENGTH and passes word_list as inputs for each unit.
+    _, encoding = rnn.rnn(cell, word_list, dtype=tf.float32)
+    # Given encoding of RNN, take encoding of last step (e.g hidden size of the
+    # neural network of last step) and pass it as features for logistic
+    # regression over output classes.
+    return skflow.models.logistic_regression(encoding[-1], y)
+
+model_path = '/tmp/skflow_examples/text_classification'
+if os.path.exists(model_path):
+    classifier = skflow.TensorFlowEstimator.restore(model_path)
+    score = metrics.accuracy_score(classifier.predict(X_test), y_test)
+    print('Accuracy: {0:f}'.format(score))
+else:
+    classifier = skflow.TensorFlowEstimator(model_fn=rnn_model, n_classes=15,
+        steps=100, optimizer='Adam', learning_rate=0.01, continue_training=True)
+
+    # Continuesly train for 1000 steps & predict on test set.
+    while True:
+        try:
+            classifier.fit(X_train, y_train)
+        except KeyboardInterrupt:
+            classifier.save(model_path)
+            break
+
",0,train
032daa478ef18007cf8214dbd4db0a83daebb62f,tensorflow/tensorflow,Add a comment,ir_emitter_unnested.cc,"@@ -5377,6 +5377,9 @@ ReductionCodegenInfo IrEmitterUnnested::ComputeReductionCodegenInfo(
       if (auto fusion = mlir::dyn_cast<mlir::lmhlo::FusionOp>(unnested_hlo)) {
         fan_out = fusion.getFusionResults().size();
       }
+
+      // 64 is the general advice as the smallest block sizes.
+      // Moreover, XLA:GPU emitters need at least 32 threads at some places.
       int64 max_block_size = std::max(64LL, 512LL / NearestPowerOfTwo(fan_out));
       return std::min(
           max_block_size,
",0,train
d4ddccd3cca4fc837c66ae1dfa190739420ad122,tensorflow/tensorflow,"SVDF Hybrid op INT8 support.

PiperOrigin-RevId: 226440102",register.cc,"@@ -173,7 +173,9 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_DEPTHWISE_CONV_2D, Register_DEPTHWISE_CONV_2D(),
              /* min_version */ 1,
              /* max_version */ 3);
-  AddBuiltin(BuiltinOperator_SVDF, Register_SVDF());
+  AddBuiltin(BuiltinOperator_SVDF, Register_SVDF(),
+             /* min_version */ 1,
+             /* max_version */ 2);
   AddBuiltin(BuiltinOperator_RNN, Register_RNN(),
              /* min_version */ 1,
              /* max_version */ 2);
",0,train
d4ddccd3cca4fc837c66ae1dfa190739420ad122,tensorflow/tensorflow,"SVDF Hybrid op INT8 support.

PiperOrigin-RevId: 226440102",svdf.cc,"@@ -176,8 +176,9 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
                     context->ResizeTensor(context, output, output_size_array));
 
   // The weights are of consistent type, so it suffices to check one.
-  const bool is_hybrid_op =
-      (input->type == kTfLiteFloat32 && weights_feature->type == kTfLiteUInt8);
+  const bool is_hybrid_op = (input->type == kTfLiteFloat32 &&
+                             (weights_feature->type == kTfLiteUInt8 ||
+                              weights_feature->type == kTfLiteInt8));
 
   // Resize scratch.
   TfLiteIntArrayFree(node->temporaries);
@@ -203,7 +204,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     // of input tensors.
     node->temporaries->data[1] = scratch_tensor_index + 1;
     TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/1);
-    input_quantized->type = kTfLiteUInt8;
+    input_quantized->type = weights_feature->type;
     input_quantized->allocation_type = kTfLiteArenaRw;
     if (!TfLiteIntArrayEqual(input_quantized->dims, input->dims)) {
       TfLiteIntArray* input_quantized_size = TfLiteIntArrayCopy(input->dims);
@@ -297,16 +298,24 @@ TfLiteStatus EvalHybrid(
   // Initialize the pointer to input.
   const float* input_ptr_batch = input->data.f;
 
-  // Initialize the pointer to storage for quantized values and
-  // scaling factors.
-  int8_t* quantized_input_ptr_batch =
-      reinterpret_cast<int8_t*>(input_quantized->data.uint8);
+  // Initialize the pointer to storage for quantized values and the weights
+  // feature.
+  int8_t* quantized_input_ptr_batch;
+  const int8_t* weights_feature_ptr;
+  if (weights_feature->type == kTfLiteUInt8) {
+    quantized_input_ptr_batch =
+        reinterpret_cast<int8_t*>(input_quantized->data.uint8);
+    weights_feature_ptr =
+        reinterpret_cast<int8_t*>(weights_feature->data.uint8);
+  } else {
+    quantized_input_ptr_batch = input_quantized->data.int8;
+    weights_feature_ptr = weights_feature->data.int8;
+  }
 
+  // Initialize the pointer to storage for scaling factors.
   float* scaling_factors_ptr = scaling_factors->data.f;
 
-  // Other initializations.
-  const int8_t* weights_feature_ptr =
-      reinterpret_cast<int8_t*>(weights_feature->data.uint8);
+  // Initialize the weights scale.
   const float weights_feature_scale = weights_feature->params.scale;
 
   // Clear the activation (state left most column).
@@ -374,7 +383,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
                        bias, params, scratch, activation_state, output);
       break;
     }
-    case kTfLiteUInt8: {
+    case kTfLiteUInt8:
+    case kTfLiteInt8: {
       TfLiteTensor* input_quantized = GetTemporary(context, node, /*index=*/1);
       TfLiteTensor* scaling_factors = GetTemporary(context, node, /*index=*/2);
       TfLiteTensor* float_weights_time =
@@ -388,8 +398,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       // TODO(alanchiao): refactor logic out into dequantize function.
       if (!op_data->float_weights_time_initialized) {
         const float dequantization_scale = weights_time->params.scale;
-        const int8_t* weights_time_ptr =
-            reinterpret_cast<int8_t*>(weights_time->data.uint8);
+        const int8_t* weights_time_ptr;
+        if (weights_feature->type == kTfLiteUInt8) {
+          weights_time_ptr =
+              reinterpret_cast<int8_t*>(weights_time->data.uint8);
+        } else {
+          weights_time_ptr = weights_time->data.int8;
+        }
         for (int i = 0; i < NumElements(float_weights_time); ++i) {
           float_weights_time->data.f[i] =
               weights_time_ptr[i] * dequantization_scale;
",0,train
d4ddccd3cca4fc837c66ae1dfa190739420ad122,tensorflow/tensorflow,"SVDF Hybrid op INT8 support.

PiperOrigin-RevId: 226440102",svdf_test.cc,"@@ -203,17 +203,30 @@ class SVDFOpModel : public BaseSVDFOpModel {
 class HybridSVDFOpModel : public BaseSVDFOpModel {
  public:
   HybridSVDFOpModel(int batches, int units, int input_size, int memory_size,
-                    int rank)
+                    int rank, TensorType tensor_type)
       : BaseSVDFOpModel(batches, units, input_size, memory_size, rank,
-                        TensorType_UINT8, TensorType_UINT8) {}
+                        tensor_type, tensor_type) {
+    tensor_type_ = tensor_type;
+  }
+
+  void SetWeights(int weights_idx, std::vector<float> f) {
+    if (tensor_type_ == TensorType_UINT8) {
+      SymmetricQuantizeAndPopulate(weights_idx, f);
+    } else {
+      SignedSymmetricQuantizeAndPopulate(weights_idx, f);
+    }
+  }
 
   void SetWeightsFeature(std::initializer_list<float> f) {
-    SymmetricQuantizeAndPopulate(weights_feature_, f);
+    SetWeights(weights_feature_, f);
   }
 
   void SetWeightsTime(std::initializer_list<float> f) {
-    SymmetricQuantizeAndPopulate(weights_time_, f);
+    SetWeights(weights_time_, f);
   }
+
+ protected:
+  TensorType tensor_type_;
 };
 
 class SVDFOpTest : public ::testing::Test {
@@ -312,9 +325,74 @@ TEST_F(SVDFOpTest, BlackBoxTestRank2) {
                 &svdf);
 }
 
-TEST_F(SVDFOpTest, BlackBoxTestHybridRank1) {
+TEST_F(SVDFOpTest, BlackBoxTestHybridRank1Uint8) {
+  HybridSVDFOpModel svdf(/*batches=*/2, /*units=*/4, /*input_size=*/3,
+                         /*memory_size=*/10, /*rank=*/1, TensorType_UINT8);
+  svdf.SetWeightsFeature({-0.31930989, -0.36118156, 0.0079667, 0.37613347,
+                          0.22197971, 0.12416199, 0.27901134, 0.27557442,
+                          0.3905206, -0.36137494, -0.06634006, -0.10640851});
+
+  svdf.SetWeightsTime(
+      {-0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+       0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+       0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+       -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+       -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+       0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+       -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+       -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657});
+
+  VerifyGoldens(svdf_input, svdf_golden_output_rank_1, sizeof(svdf_input),
+                &svdf,
+                /*tolerance=*/0.002945);
+}
+
+TEST_F(SVDFOpTest, BlackBoxTestHybridRank2Uint8) {
+  HybridSVDFOpModel svdf(/*batches=*/2, /*units=*/4, /*input_size=*/3,
+                         /*memory_size=*/10, /*rank=*/2, TensorType_UINT8);
+  svdf.SetWeightsFeature({-0.31930989, 0.0079667,   0.39296314,  0.37613347,
+                          0.12416199,  0.15785322,  0.27901134,  0.3905206,
+                          0.21931258,  -0.36137494, -0.10640851, 0.31053296,
+                          -0.36118156, -0.0976817,  -0.36916667, 0.22197971,
+                          0.15294972,  0.38031587,  0.27557442,  0.39635518,
+                          -0.21580373, -0.06634006, -0.02702999, 0.27072677});
+
+  svdf.SetWeightsTime(
+      {-0.31930989, 0.37613347,  0.27901134,  -0.36137494, -0.36118156,
+       0.22197971,  0.27557442,  -0.06634006, 0.0079667,   0.12416199,
+
+       0.3905206,   -0.10640851, -0.0976817,  0.15294972,  0.39635518,
+       -0.02702999, 0.39296314,  0.15785322,  0.21931258,  0.31053296,
+
+       -0.36916667, 0.38031587,  -0.21580373, 0.27072677,  0.23622236,
+       0.34936687,  0.18174365,  0.35907319,  -0.17493086, 0.324846,
+
+       -0.10781813, 0.27201805,  0.14324132,  -0.23681851, -0.27115166,
+       -0.01580888, -0.14943552, 0.15465137,  0.09784451,  -0.0337657,
+
+       -0.14884081, 0.19931212,  -0.36002168, 0.34663299,  -0.11405486,
+       0.12672701,  0.39463779,  -0.07886535, -0.06384811, 0.08249187,
+
+       -0.26816407, -0.19905911, 0.29211238,  0.31264046,  -0.28664589,
+       0.05698794,  0.11613581,  0.14078894,  0.02187902,  -0.21781836,
+
+       -0.15567942, 0.08693647,  -0.38256618, 0.36580828,  -0.22922277,
+       -0.0226903,  0.12878349,  -0.28122205, -0.10850525, -0.11955214,
+
+       0.27179423,  -0.04710215, 0.31069002,  0.22672787,  0.09580326,
+       0.08682203,  0.1258215,   0.1851041,   0.29228821,  0.12366763});
+
+  VerifyGoldens(svdf_input, svdf_golden_output_rank_2, sizeof(svdf_input),
+                &svdf,
+                /*tolerance=*/0.00625109);
+}
+
+TEST_F(SVDFOpTest, BlackBoxTestHybridRank1Int8) {
   HybridSVDFOpModel svdf(/*batches=*/2, /*units=*/4, /*input_size=*/3,
-                         /*memory_size=*/10, /*rank=*/1);
+                         /*memory_size=*/10, /*rank=*/1, TensorType_INT8);
   svdf.SetWeightsFeature({-0.31930989, -0.36118156, 0.0079667, 0.37613347,
                           0.22197971, 0.12416199, 0.27901134, 0.27557442,
                           0.3905206, -0.36137494, -0.06634006, -0.10640851});
@@ -337,9 +415,9 @@ TEST_F(SVDFOpTest, BlackBoxTestHybridRank1) {
                 /*tolerance=*/0.002945);
 }
 
-TEST_F(SVDFOpTest, BlackBoxTestHybridRank2) {
+TEST_F(SVDFOpTest, BlackBoxTestHybridRank2Int8) {
   HybridSVDFOpModel svdf(/*batches=*/2, /*units=*/4, /*input_size=*/3,
-                         /*memory_size=*/10, /*rank=*/2);
+                         /*memory_size=*/10, /*rank=*/2, TensorType_INT8);
   svdf.SetWeightsFeature({-0.31930989, 0.0079667,   0.39296314,  0.37613347,
                           0.12416199,  0.15785322,  0.27901134,  0.3905206,
                           0.21931258,  -0.36137494, -0.10640851, 0.31053296,
",0,train
d4ddccd3cca4fc837c66ae1dfa190739420ad122,tensorflow/tensorflow,"SVDF Hybrid op INT8 support.

PiperOrigin-RevId: 226440102",operator.cc,"@@ -472,6 +472,20 @@ class Svdf : public BuiltinOperator<SvdfOperator, ::tflite::SVDFOptions,
   }
 
   int GetVersion(const OperatorSignature& op_signature) const override {
+    const string& input_name = op_signature.op->inputs[0];
+    const string& weights_feature_name = op_signature.op->inputs[1];
+    const string& output_name = op_signature.op->outputs[0];
+    const Array& input_array = op_signature.model->GetArray(input_name);
+    const Array& weights_feature_array =
+        op_signature.model->GetArray(weights_feature_name);
+    const Array& output_array = op_signature.model->GetArray(output_name);
+    // If the op is a signed int8 hybrid operation, we need to return
+    // version 2.
+    if (input_array.data_type == ArrayDataType::kFloat &&
+        weights_feature_array.data_type == ArrayDataType::kInt8 &&
+        output_array.data_type == ArrayDataType::kFloat) {
+      return 2;
+    }
     return 1;
   }
 };
",0,train
a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change.

PiperOrigin-RevId: 213770000",xla_device.cc,"@@ -434,6 +434,16 @@ Status XlaDevice::MakeTensorFromProto(const TensorProto& tensor_proto,
   return status;
 }
 
+void XlaDevice::SetRequiresSyncOnCompletion(bool sync_on_completion) {
+  mutex_lock lock(mu_);
+  sync_on_completion_ = sync_on_completion;
+}
+
+bool XlaDevice::RequiresSyncOnCompletion() const {
+  mutex_lock lock(mu_);
+  return sync_on_completion_;
+}
+
 XlaDeviceOpRegistrations* RegisterXlaDeviceKernels(const char* device,
                                                    const char* jit_device) {
   // Any op assigned to the device that isn't rewritten by the graph rewriter
",0,train
a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change.

PiperOrigin-RevId: 213770000",xla_device.h,"@@ -151,6 +151,12 @@ class XlaDevice : public LocalDevice {
   // information for GPU and TPU devices.
   Status UseGpuDeviceInfo() LOCKS_EXCLUDED(mu_);
 
+  // Instructs this XlaDevice to return 'sync_on_completion' for
+  // RequiresSyncOnCompletion().
+  void SetRequiresSyncOnCompletion(bool sync_on_completion) LOCKS_EXCLUDED(mu_);
+
+  bool RequiresSyncOnCompletion() const override LOCKS_EXCLUDED(mu_);
+
  private:
   xla::LocalClient* client() const;
   Allocator* GetAllocatorLocked(AllocatorAttributes attr)
@@ -165,7 +171,7 @@ class XlaDevice : public LocalDevice {
   static Status GetMetadataFromDevice(DeviceBase* device,
                                       const XlaDevice::Metadata** metadata);
 
-  mutex mu_;
+  mutable mutex mu_;
   // The metadata of this XlaDevice.
   const Metadata xla_metadata_;
   // Which hardware device in the client's platform this XlaDevice controls.
@@ -207,6 +213,10 @@ class XlaDevice : public LocalDevice {
 
   // Thread pool used for running closures
   std::unique_ptr<thread::ThreadPool> thread_pool_;
+
+  // True if the device requires XlaDevice::Sync to be called on completion
+  // regardless of status.
+  bool sync_on_completion_ GUARDED_BY(mu_) = false;
 };
 
 // Builds OpKernel registrations on 'device' for the JIT operators
",0,train
a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change.

PiperOrigin-RevId: 213770000",stream_pool.cc,"@@ -28,8 +28,14 @@ StreamPool::Ptr StreamPool::BorrowStream(se::StreamExecutor* executor) {
       // Re-use an existing stream from the pool.
       stream = std::move(streams_.back());
       streams_.pop_back();
-      VLOG(1) << stream->DebugStreamPointers()
-              << "" StreamPool reusing existing stream"";
+      if (stream->ok()) {
+        VLOG(1) << stream->DebugStreamPointers()
+                << "" StreamPool reusing existing stream"";
+      } else {
+        VLOG(1) << stream->DebugStreamPointers()
+                << "" stream was not ok, StreamPool deleting"";
+        stream = nullptr;
+      }
     }
   }
 
",0,train
a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change.

PiperOrigin-RevId: 213770000",stream_pool_test.cc,"@@ -132,5 +132,39 @@ TEST_F(StreamPoolTest, BadStreamDiscarded) {
   EXPECT_EQ(stream2_ptr, stream3_ptr);
 }
 
+TEST_F(StreamPoolTest, BadStreamAfterReturnDiscarded) {
+  std::unique_ptr<se::StreamExecutor> executor = NewStreamExecutor();
+  StreamPool pool;
+
+  // Borrow a stream.
+  StreamPool::Ptr stream1 = pool.BorrowStream(executor.get());
+  EXPECT_TRUE(stream1->ok());
+
+  // Return the stream, but hold a handle to it.
+  se::Stream* stream1_ptr = stream1.get();
+  stream1 = nullptr;
+
+  // Now stream1 is back in the pool, force an error on the stream. Here we call
+  // a method that requires DNN support, which we know the Host platform doesn't
+  // support.
+  stream1_ptr->ThenDepthConcatenate({}, {}, nullptr);
+  EXPECT_FALSE(stream1_ptr->ok());
+
+  // Borrow stream2.
+  StreamPool::Ptr stream2 = pool.BorrowStream(executor.get());
+  EXPECT_TRUE(stream2->ok());
+
+  // The underlying streams should be different. They would have been
+  // the same, but since we forced an error on stream1, it cannot be
+  // put back into the pool. Sadly we can't just check:
+  //    EXPECT_NE(stream1_ptr, stream2_ptr);
+  //
+  // The above should hold logically, but it may fail if the new
+  // stream instance allocated for stream2 happens to reside in the
+  // same memory address as stream1, which has been deleted.
+  //
+  // The check that stream2->ok() serves as a good-enough check.
+}
+
 }  // namespace
 }  // namespace xla
",0,train
a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change.

PiperOrigin-RevId: 213770000",device.h,"@@ -106,6 +106,10 @@ class Device : public DeviceBase {
   // at completion.
   virtual Status Sync() = 0;
 
+  // Override this to return true for devices that require a Sync() call before
+  // session completion.
+  virtual bool RequiresSyncOnCompletion() const { return false; }
+
   // Optionally modify the device's GraphDef before execution.
   //
   // This method should be considered experimental and is supplied to enable
",0,train
a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change.

PiperOrigin-RevId: 213770000",executor.cc,"@@ -2301,13 +2301,15 @@ void ExecutorState::Finish() {
   auto done_cb = std::move(done_cb_);
   auto runner = std::move(runner_);
   mu_.unlock();
-  if (sync_on_finish_ && status.ok()) {
+  Device* device = impl_->params_.device;
+  if ((sync_on_finish_ && status.ok()) || device->RequiresSyncOnCompletion()) {
     // Block until the device has finished all queued operations. For
     // devices like GPUs that continue to execute Ops after their Compute
     // methods have completed, this ensures that control is not returned to
     // the user until the step (and its side-effects) has actually completed.
-    status = impl_->params_.device->Sync();
+    status.Update(device->Sync());
   }
+
   delete this;
   CHECK(done_cb != nullptr);
   runner([=]() { done_cb(status); });
",0,train
a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change.

PiperOrigin-RevId: 213770000",cancellation.cc,"@@ -89,6 +89,16 @@ bool CancellationManager::DeregisterCallback(CancellationToken token) {
   }
 }
 
+bool CancellationManager::TryDeregisterCallback(CancellationToken token) {
+  mutex_lock lock(mu_);
+  if (is_cancelled_ || is_cancelling_) {
+    return false;
+  } else {
+    callbacks_.erase(token);
+    return true;
+  }
+}
+
 CancellationManager::~CancellationManager() {
   if (!callbacks_.empty()) {
     StartCancel();
",0,train
a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change.

PiperOrigin-RevId: 213770000",cancellation.h,"@@ -122,6 +122,15 @@ class CancellationManager {
   // cancellation manager.
   bool DeregisterCallback(CancellationToken token);
 
+  // Deregister the callback that, when registered, was associated
+  // with the given cancellation token. Returns true iff the callback
+  // was deregistered and will not be invoked; otherwise returns false
+  // immediately, with no guarantee that the callback has completed.
+  //
+  // This method is guaranteed to return true if StartCancel has not been
+  // called.
+  bool TryDeregisterCallback(CancellationToken token);
+
  private:
   bool is_cancelling_;
   std::atomic_bool is_cancelled_;
",0,train
a54310b1faa39df94dcef9ad1b5aaa0acc691e35,tensorflow/tensorflow,"Internal change.

PiperOrigin-RevId: 213770000",cancellation_test.cc,"@@ -115,4 +115,56 @@ TEST(Cancellation, IsCancelled) {
   delete cm;
 }
 
+TEST(Cancellation, TryDeregisterWithoutCancel) {
+  bool is_cancelled = false;
+  CancellationManager* manager = new CancellationManager();
+  auto token = manager->get_cancellation_token();
+  bool registered = manager->RegisterCallback(
+      token, [&is_cancelled]() { is_cancelled = true; });
+  EXPECT_TRUE(registered);
+  bool deregistered = manager->TryDeregisterCallback(token);
+  EXPECT_TRUE(deregistered);
+  delete manager;
+  EXPECT_FALSE(is_cancelled);
+}
+
+TEST(Cancellation, TryDeregisterAfterCancel) {
+  bool is_cancelled = false;
+  CancellationManager* manager = new CancellationManager();
+  auto token = manager->get_cancellation_token();
+  bool registered = manager->RegisterCallback(
+      token, [&is_cancelled]() { is_cancelled = true; });
+  EXPECT_TRUE(registered);
+  manager->StartCancel();
+  EXPECT_TRUE(is_cancelled);
+  bool deregistered = manager->TryDeregisterCallback(token);
+  EXPECT_FALSE(deregistered);
+  delete manager;
+}
+
+TEST(Cancellation, TryDeregisterDuringCancel) {
+  Notification cancel_started, finish_callback, cancel_complete;
+  CancellationManager* manager = new CancellationManager();
+  auto token = manager->get_cancellation_token();
+  bool registered = manager->RegisterCallback(token, [&]() {
+    cancel_started.Notify();
+    finish_callback.WaitForNotification();
+  });
+  EXPECT_TRUE(registered);
+
+  thread::ThreadPool w(Env::Default(), ""test"", 1);
+  w.Schedule([&]() {
+    manager->StartCancel();
+    cancel_complete.Notify();
+  });
+  cancel_started.WaitForNotification();
+
+  bool deregistered = manager->TryDeregisterCallback(token);
+  EXPECT_FALSE(deregistered);
+
+  finish_callback.Notify();
+  cancel_complete.WaitForNotification();
+  delete manager;
+}
+
 }  // namespace tensorflow
",0,train
811a9ef9974d61bf1a351aaeb3895e95909aece1,tensorflow/tensorflow,Fix for fsanitize=undefined.,common.h,"@@ -156,7 +156,7 @@ inline int32 MultiplyByQuantizedMultiplier(int32 x, int32 quantized_multiplier,
                              right_shift);
 }
 
-inline int32 MultiplyByQuantizedMultiplier(std::int64_t x,
+inline int32 MultiplyByQuantizedMultiplier(int64_t x,
                                            int32 quantized_multiplier,
                                            int shift) {
   // Inputs:
@@ -172,7 +172,7 @@ inline int32 MultiplyByQuantizedMultiplier(std::int64_t x,
 
   int32_t reduced_multiplier = (quantized_multiplier + (1 << 15)) >> 16;
   int total_shift = 15 - shift;
-  x = (x * (int64_t)reduced_multiplier) + (1 << (total_shift - 1));
+  x = (x * (int64_t)reduced_multiplier) + ((int64_t)1 << (total_shift - 1));
   int32_t result = x >> total_shift;
   return result;
 }
",0,train
3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,mkl_cpu_allocator.cc,"@@ -21,7 +21,6 @@ namespace tensorflow {
 
 constexpr const char* MklCPUAllocator::kMaxLimitStr;
 constexpr const size_t MklCPUAllocator::kDefaultMaxLimit;
-
 }  // namespace tensorflow
 
 #endif  // INTEL_MKL
",0,train
3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,mkl_cpu_allocator.h,"@@ -30,6 +30,7 @@ limitations under the License.
 #include ""tensorflow/core/platform/mem.h""
 #include ""tensorflow/core/platform/numa.h""
 #include ""tensorflow/core/util/env_var.h""
+#include ""tensorflow/core/util/onednn_env_vars.h""
 #ifdef _WIN32
 typedef unsigned int uint;
 #endif
@@ -217,7 +218,7 @@ class MklCPUAllocator : public Allocator {
     // otherwise call large-size allocator (BFC). We found that BFC allocator
     // does not deliver good performance for small allocations when
     // inter_op_parallelism_threads is high.
-    if (always_use_system_allocator_ ||
+    if (UseSystemAlloc() ||
         num_bytes < kSmallAllocationsThreshold) {
       return small_size_allocator_->AllocateRaw(alignment, num_bytes);
     } else {
@@ -230,7 +231,7 @@ class MklCPUAllocator : public Allocator {
   inline void DeallocateRaw(void* ptr) override {
     // Check if ptr is for ""small"" allocation. If it is, then call Free
     // directly. Otherwise, call BFC to handle free.
-    if (always_use_system_allocator_ || IsSmallSizeAllocation(ptr)) {
+    if (UseSystemAlloc() || IsSmallSizeAllocation(ptr)) {
       small_size_allocator_->DeallocateRaw(ptr);
     } else {
       mutex_lock l(mutex_);
@@ -265,11 +266,6 @@ class MklCPUAllocator : public Allocator {
 
  private:
   // Hooks provided by this allocator for memory allocation routines from MKL
-  bool always_use_system_allocator_ = [] {
-    bool value = false;
-    TF_CHECK_OK(ReadBoolFromEnvVar(""TF_USE_SYSTEM_ALLOCATOR"", false, &value));
-    return value;
-  }();
   static inline void* MallocHook(size_t size) {
     VLOG(3) << ""MklCPUAllocator: In MallocHook"";
     return cpu_allocator()->AllocateRaw(kAlignment, size);
",0,train
3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,mkl_conv_ops.h,"@@ -38,6 +38,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/strings/str_util.h""
 #include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/util/mkl_util.h""
+#include ""tensorflow/core/util/onednn_env_vars.h""
 #include ""tensorflow/core/util/padding.h""
 #include ""tensorflow/core/util/tensor_format.h""
 
",0,train
3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,mkl_matmul_ops_common.h,"@@ -25,6 +25,7 @@ limitations under the License.
 #include ""tensorflow/core/framework/op.h""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/util/mkl_util.h""
+#include ""tensorflow/core/util/onednn_env_vars.h""
 
 using dnnl::inner_product_forward;
 using dnnl::primitive_attr;
",0,train
3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,mkl_util.h,"@@ -135,8 +135,6 @@ inline void execute_primitives(
   }
 }
 
-bool AreWeightsFrozen();
-
 // In oneDNN v1.x, the format (ex. NCHW) used to initialize a memory descriptor
 // (md) structure will no longer be recorded in its `format` field. Instead, it
 // will be set to a canonical `blocked` format for every fully described md.
",0,train
3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,onednn_env_vars.cc,"@@ -1,4 +1,4 @@
-/* Copyright 2021 The TensorFlow Authors. All Rights Reserved.
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
 
 Licensed under the Apache License, Version 2.0 (the ""License"");
 you may not use this file except in compliance with the License.
@@ -15,7 +15,9 @@ limitations under the License.
 
 #ifdef INTEL_MKL
 
-#include ""tensorflow/core/util/mkl_util.h""
+#include ""absl/base/call_once.h""
+#include ""tensorflow/core/util/onednn_env_vars.h""
+#include ""tensorflow/core/util/env_var.h""
 
 namespace tensorflow {
 
@@ -28,5 +30,16 @@ bool AreWeightsFrozen() {
   });
   return weights_const;
 }
+
+bool UseSystemAlloc() {
+  static bool use_sys_alloc = false;
+  static absl::once_flag once;
+  absl::call_once(once, [&] {
+    TF_CHECK_OK(ReadBoolFromEnvVar(""TF_ONEDNN_USE_SYSTEM_ALLOCATOR"",
+                                   /*default_value*/ false, &use_sys_alloc));
+  });
+  return use_sys_alloc;
+}
+
 }  // namespace tensorflow
 #endif  // INTEL_MKL
",0,train
3a86edf9a298a33f08936daffa97119e545fd706,tensorflow/tensorflow,review changes.,onednn_env_vars.h,"@@ -0,0 +1,25 @@
+/* Copyright 2022 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_CORE_UTIL_ONEDNN_ENV_VARS_H_
+#define TENSORFLOW_CORE_UTIL_ONEDNN_ENV_VARS_H_
+#ifdef INTEL_MKL
+
+namespace tensorflow {
+  bool AreWeightsFrozen();
+  bool UseSystemAlloc();
+}  // namespace tensorflow
+#endif  // INTEL_MKL
+#endif  // TENSORFLOW_CORE_UTIL_ONEDNN_ENV_VARS_H_
",0,train
29a587e4aac990c1529b3c4a3331e3945cfff0ff,tensorflow/tensorflow,"Improved support for variables

PiperOrigin-RevId: 156646899",op_types.cc,"@@ -43,7 +43,7 @@ bool IsTranspose(const NodeDef& node) {
 bool IsVariable(const NodeDef& node) {
   const auto op = node.op();
   return op == ""Variable"" || op == ""VariableV2"" || op == ""AutoReloadVariable"" ||
-         op == ""VarHandleOp"";
+         op == ""VarHandleOp"" || op == ""TemporaryVariable"";
 }
 
 bool IsMerge(const NodeDef& node) {
",0,train
689f7a0b8468c8feec4d2a6db54bb6bc3759fbe2,tensorflow/tensorflow,"Fix #5946.
Fix error in documentation about global_variables_initializer.",variables.py,"@@ -82,12 +82,12 @@ class Variable(object):
   ```
 
   The most common initialization pattern is to use the convenience function
-  `global_variable_initializers()` to add an Op to the graph that initializes
+  `global_variables_initializer()` to add an Op to the graph that initializes
   all the variables. You then run that Op after launching the graph.
 
   ```python
   # Add an Op to initialize global variables.
-  init_op = tf.global_variable_initializers()
+  init_op = tf.global_variables_initializer()
 
   # Launch the graph in a session.
   with tf.Session() as sess:
@@ -494,7 +494,7 @@ class Variable(object):
 
     ```python
     v = tf.Variable([1, 2])
-    init = tf.global_variable_initializers()
+    init = tf.global_variables_initializer()
 
     with tf.Session() as sess:
         sess.run(init)
",0,train
6ae13f689ff2c65691e790cd50a3c1c867ad00ac,tensorflow/tensorflow,"Migrate experimental_relax_shapes to reduce_retracing

PiperOrigin-RevId: 438578345",benchmarks_test.py,"@@ -470,7 +470,7 @@ class MicroBenchmarks(benchmarks_test_base.MicroBenchmarksBase):
                                             num_iters,
                                             execution_mode=None):
 
-    @def_function.function(experimental_relax_shapes=True)
+    @def_function.function(reduce_retracing=True)
     def defun_matmul(m):
       return math_ops.matmul(m, m)
 
",0,train
188a2a87248c2d96140080542dd402ac517df59b,tensorflow/tensorflow,"Fix ShapeOp result type for scalar input

ShapeOp returns empty 1-d tensor if the input is scalar.

Also, allow folder to fold scalar inputs.

TESTED with unit test

PiperOrigin-RevId: 256571257",tf_ops.cc,"@@ -573,9 +573,7 @@ static LogicalResult Verify(ShapeOp op) {
     // The operand is a ranked tensor.
     if (resultType.hasStaticShape()) {
       if ((!rankedTensorType.getShape().empty() &&
-           resultType.getDimSize(0) != rankedTensorType.getShape().size()) ||
-          (rankedTensorType.getShape().empty() &&
-           resultType.getDimSize(0) != 1))
+           resultType.getDimSize(0) != rankedTensorType.getShape().size()))
         return op.emitOpError(
             ""requires dimension size of result to match rank of operand"");
     }
@@ -597,7 +595,6 @@ OpFoldResult ShapeOp::fold(ArrayRef<Attribute> operands) {
 
   auto shape = rankedTensorType.getShape();
   int rank = shape.size();
-  if (rank == 0) return {};
 
   Builder b(getContext());
   auto elementType = getType().cast<ShapedType>().getElementType();
",0,train
8703a34f9d998ff33eae3124bd634e663aed252c,tensorflow/tensorflow,"Add a debug option to disable xla dumping.

Rename xla_detailed_logging to disable both logging and dumping.

PiperOrigin-RevId: 364414688
Change-Id: I6d1d4c394d13653e94be74005c45f744c1edceab",xla_compilation_cache.cc,"@@ -181,7 +181,7 @@ Status XlaCompilationCache::BuildExecutable(
   build_options.set_result_layout(result.xla_output_shape);
   build_options.set_device_allocator(options.device_allocator.get());
   build_options.set_alias_passthrough_params(options.alias_passthrough_params);
-  build_options.mutable_debug_options()->set_xla_detailed_logging(
+  build_options.mutable_debug_options()->set_xla_detailed_logging_and_dumping(
       options.detailed_logging);
   TF_ASSIGN_OR_RETURN(
       auto executables,
",0,train
8703a34f9d998ff33eae3124bd634e663aed252c,tensorflow/tensorflow,"Add a debug option to disable xla dumping.

Rename xla_detailed_logging to disable both logging and dumping.

PiperOrigin-RevId: 364414688
Change-Id: I6d1d4c394d13653e94be74005c45f744c1edceab",debug_options_flags.cc,"@@ -76,7 +76,7 @@ DebugOptions DefaultDebugOptionsIgnoringFlags() {
   opts.set_xla_cpu_enable_xprof_traceme(false);
   opts.set_xla_gpu_unsafe_fallback_to_driver_on_ptxas_not_found(false);
   opts.set_xla_multiheap_size_constraint_per_heap(-1);
-  opts.set_xla_detailed_logging(true);
+  opts.set_xla_detailed_logging_and_dumping(true);
   return opts;
 }
 
",0,train
8703a34f9d998ff33eae3124bd634e663aed252c,tensorflow/tensorflow,"Add a debug option to disable xla dumping.

Rename xla_detailed_logging to disable both logging and dumping.

PiperOrigin-RevId: 364414688
Change-Id: I6d1d4c394d13653e94be74005c45f744c1edceab",dump.cc,"@@ -67,6 +67,11 @@ struct CanonicalDebugOptions {
       dump_as_text = true;
     }
 
+    // Disable dumping if specified by the user.
+    if (!opts.xla_detailed_logging_and_dumping()) {
+      dump_to = """";
+    }
+
     // If dump_to is empty, default to dumping to stdout, so long as some dump
     // format other than dump-as-url was specified.  If the user only specified
     // --xla_dump_hlo_as_url, then don't dump to stdout, that is likely noise
@@ -110,7 +115,7 @@ struct CanonicalDebugOptions {
     // Output dirs ""sponge"" and ""test_undeclared_outputs_dir"" (case-insensitive)
     // have a special meaning: Dump into the directory specified by the
     // environment variable TEST_UNDECLARED_OUTPUTS_DIR.
-    string dump_to_lower = absl::AsciiStrToLower(opts.xla_dump_to());
+    string dump_to_lower = absl::AsciiStrToLower(dump_to);
     if (dump_to_lower == ""sponge"" ||
         dump_to_lower == ""test_undeclared_outputs_dir"") {
       if (!tensorflow::io::GetTestUndeclaredOutputsDir(&dump_to)) {
",0,train
055855171b52a3d284ded6a549ee5c6471d9a4c9,tensorflow/tensorflow,"Fix space_to_batch_converter on windows.

PiperOrigin-RevId: 333398025
Change-Id: I4d713db2c910d462f1c70a1cd4979a9a3cfe0905",space_to_batch_converter.cc,"@@ -371,8 +371,8 @@ Status ConvolutionVisitor::HandleConvolution(HloInstruction* convolution) {
   new_dim_numbers.set_output_feature_dimension(dim_count);
 
   int p = 0;
-  for (auto [k, v] : dim_map) {
-    transpose_dims[p] = v;
+  for (const auto& entry : dim_map) {
+    transpose_dims[p] = entry.second;
     p++;
   }
 
",0,train
0c2565de1108fc2063ec04b335ef7356298b7ad6,tensorflow/tensorflow,"Don't lower nested control flow if we're compiling to XLA.

PiperOrigin-RevId: 227579512",cond_v2_test.py,"@@ -145,6 +145,22 @@ class CondV2Test(test.TestCase):
     self.assertEqual(cond_op.type, ""If"")
     return output, cond_op
 
+  def _createNestedCond(self, name):
+    """"""Like _createCond but creates a nested cond_v2 call as well.""""""
+    pred = constant_op.constant(True, name=""pred"")
+    x = constant_op.constant(1.0, name=""x"")
+
+    def true_fn():
+      return cond_v2.cond_v2(pred, lambda: x, lambda: x + 1)
+
+    def false_fn():
+      return x + 2
+
+    output = cond_v2.cond_v2(pred, true_fn, false_fn, name=name)
+    cond_op = output.op.inputs[0].op
+    self.assertEqual(cond_op.type, ""If"")
+    return output, cond_op
+
   def testDefaultName(self):
     with ops.Graph().as_default():
       _, cond_op = self._createCond(None)
@@ -645,9 +661,14 @@ class CondV2Test(test.TestCase):
       # Build the cond_v2 in an XLA context
       xla_context = control_flow_ops.XLAControlFlowContext()
       xla_context.Enter()
-      cond_output, _ = self._createCond(""cond"")
+      cond_output, cond_op = self._createCond(""cond"")
       xla_context.Exit()
 
+      # Check lowering attr is not set.
+      with self.assertRaises(ValueError):
+        cond_op.get_attr(""_lower_using_switch_merge"")
+
+      # Check the actual graph that is run.
       run_options = config_pb2.RunOptions(output_partition_graphs=True)
       run_metadata = config_pb2.RunMetadata()
       sess.run(cond_output, options=run_options, run_metadata=run_metadata)
@@ -672,6 +693,29 @@ class CondV2Test(test.TestCase):
           if_found,
           ""An `If` op was not found, but the graph should not be lowered."")
 
+  @test_util.run_deprecated_v1
+  def testNestedLoweringDisabledInXLA(self):
+    # Build the cond_v2 in an XLA context
+    xla_context = control_flow_ops.XLAControlFlowContext()
+    xla_context.Enter()
+    _, cond_op = self._createNestedCond(""cond"")
+    xla_context.Exit()
+
+    # Check lowering attr is not set for either If node.
+    with self.assertRaises(ValueError):
+      cond_op.get_attr(""_lower_using_switch_merge"")
+
+    nested_if_ops = []
+    for func in ops.get_default_graph()._functions.values():
+      nested_if_ops.extend(op for op in func._graph.get_operations()
+                           if op.type == ""If"")
+    self.assertEqual(len(nested_if_ops), 1)
+    with self.assertRaises(ValueError):
+      nested_if_ops[0].get_attr(""_lower_using_switch_merge"")
+
+    # TODO(skyewm): check the actual graphs that are run once we have a way to
+    # programmatically access those graphs.
+
   @test_util.run_deprecated_v1
   def testLoweringDisabledWithSingleThreadedExecutorContext(self):
     with self.session(graph=ops.Graph()) as sess:
",0,train
0c2565de1108fc2063ec04b335ef7356298b7ad6,tensorflow/tensorflow,"Don't lower nested control flow if we're compiling to XLA.

PiperOrigin-RevId: 227579512",control_flow_util.py,"@@ -57,6 +57,15 @@ def InXlaContext(graph):
   return GetContainingXLAContext(ctxt) is not None
 
 
+def GraphOrParentsInXlaContext(graph):
+  while True:
+    if InXlaContext(graph): return True
+    try:
+      graph = graph.outer_graph
+    except AttributeError:
+      return False
+
+
 def IsInWhileLoop(op):
   ctxt = op._get_control_flow_context()  # pylint: disable=protected-access
   return GetContainingWhileContext(ctxt) is not None
",0,train
0c2565de1108fc2063ec04b335ef7356298b7ad6,tensorflow/tensorflow,"Don't lower nested control flow if we're compiling to XLA.

PiperOrigin-RevId: 227579512",control_flow_util_v2.py,"@@ -114,7 +114,7 @@ def maybe_set_lowering_attr(op):
   Args:
     op: An `If` or `While` Operation.
   """"""
-  if (not control_flow_util.IsInXLAContext(op) and
+  if (not control_flow_util.GraphOrParentsInXlaContext(op.graph) and
       context.context().get_function_call_options().executor_type
       != ""SINGLE_THREADED_EXECUTOR""):
     # pylint: disable=protected-access
",0,train
c412c22d19e1a198b9e0b2409f026ce742b15df6,tensorflow/tensorflow,"[MLIR][HLO] Hold symbolic and concrete factors together in `SymbolicProduct`

Introduce struct `SymbolicProduct` to hold concrete and symbolic factors
together. This is in preparation to merge shape collapsing and expanding, which
requires in-depth analysis of symbolic products.

PiperOrigin-RevId: 436787990",symbolic_shape_optimization.cc,"@@ -205,16 +205,13 @@ struct RemoveComputeReshapeShape final
   }
 };
 
-bool IsSimpleProduct(
-    AffineExpr expr,
-    llvm::function_ref<void(AffineConstantExpr)> cbkConstantFactor,
-    llvm::function_ref<void(AffineSymbolExpr)> cbkSymbolicFactor) {
+bool IsProduct(AffineExpr expr,
+               llvm::function_ref<void(AffineConstantExpr)> cbkConstantFactor,
+               llvm::function_ref<void(AffineSymbolExpr)> cbkSymbolicFactor) {
   auto binExpr = expr.dyn_cast<AffineBinaryOpExpr>();
   if (binExpr && binExpr.getKind() == AffineExprKind::Mul) {
-    return IsSimpleProduct(binExpr.getLHS(), cbkConstantFactor,
-                           cbkSymbolicFactor) &&
-           IsSimpleProduct(binExpr.getRHS(), cbkConstantFactor,
-                           cbkSymbolicFactor);
+    return IsProduct(binExpr.getLHS(), cbkConstantFactor, cbkSymbolicFactor) &&
+           IsProduct(binExpr.getRHS(), cbkConstantFactor, cbkSymbolicFactor);
   }
   if (auto symExpr = expr.dyn_cast<AffineSymbolExpr>()) {
     cbkSymbolicFactor(symExpr);
@@ -227,10 +224,10 @@ bool IsSimpleProduct(
   return false;
 }
 
-bool IsSimpleProduct(const SymbolicExpr &symbolicExpr,
-                     llvm::function_ref<void(int64_t)> cbkConstantFactor,
-                     llvm::function_ref<void(Symbol)> cbkSymbolicFactor) {
-  return IsSimpleProduct(
+bool IsSymbolicProduct(const SymbolicExpr &symbolicExpr,
+                       llvm::function_ref<void(int64_t)> cbkConstantFactor,
+                       llvm::function_ref<void(Symbol)> cbkSymbolicFactor) {
+  return IsProduct(
       symbolicExpr.expr,
       [&](AffineConstantExpr cexpr) { cbkConstantFactor(cexpr.getValue()); },
       [&](AffineSymbolExpr sexpr) {
@@ -238,11 +235,21 @@ bool IsSimpleProduct(const SymbolicExpr &symbolicExpr,
       });
 }
 
-bool IsSimpleProduct(const SymbolicExpr &symbolicExpr, int64_t *concreteProduct,
-                     SmallVectorImpl<Symbol> *symbolicFactors) {
-  return IsSimpleProduct(
-      symbolicExpr, [&](int64_t c) { *concreteProduct *= c; },
-      [&](Symbol s) { symbolicFactors->push_back(s); });
+// Represents a product of symbolic and concrete factors. This will allow us to
+// prove product equalities symbolically.
+struct SymbolicProduct {
+  // Product of all concrete factors.
+  int64_t concrete = 1;
+  // List all symbolic factors as they can not be aggregated.
+  llvm::SmallVector<Symbol> symbolic;
+  bool empty() { return concrete == 1 && symbolic.empty(); }
+};
+
+bool IsSymbolicProduct(const SymbolicExpr &symbolicExpr,
+                       SymbolicProduct *product) {
+  return IsSymbolicProduct(
+      symbolicExpr, [&](int64_t c) { product->concrete *= c; },
+      [&](Symbol s) { product->symbolic.push_back(s); });
 }
 
 struct RemoveRedundantCstrReshapable final
@@ -278,13 +285,11 @@ struct RemoveRedundantCstrReshapable final
 
     // We can only handle simple products with constants and symbols. Find all
     // the factors based on the number of elements.
-    int64_t concreteProductNumElems = 1;
-    SmallVector<Symbol> remainingSymbolicFactorsNumElems;
-    if (!IsSimpleProduct(numElements, &concreteProductNumElems,
-                         &remainingSymbolicFactorsNumElems)) {
+    SymbolicProduct numElementsRemainingFactors;
+    if (!IsSymbolicProduct(numElements, &numElementsRemainingFactors)) {
       return failure();
     }
-    assert(concreteProductNumElems >= 1 &&
+    assert(numElementsRemainingFactors.concrete >= 1 &&
            ""number of elements cannot entail negative or zero factors"");
 
     // Find all factors based on the dynamic shape.
@@ -296,7 +301,7 @@ struct RemoveRedundantCstrReshapable final
     int64_t concreteProductDynShape = 1;
     for (const auto &dim : *dynShapeDims) {
       SmallVector<Symbol> partialSymbolicFactorsDynShape;
-      if (!IsSimpleProduct(
+      if (!IsSymbolicProduct(
               dim,
               [&](int64_t c) {
                 if (c != -1) concreteProductDynShape *= c;
@@ -305,9 +310,10 @@ struct RemoveRedundantCstrReshapable final
         return failure();
       }
       for (const Symbol &symDynShape : partialSymbolicFactorsDynShape) {
-        auto *it = llvm::find(remainingSymbolicFactorsNumElems, symDynShape);
-        if (it == remainingSymbolicFactorsNumElems.end()) return failure();
-        remainingSymbolicFactorsNumElems.erase(it);
+        auto *it =
+            llvm::find(numElementsRemainingFactors.symbolic, symDynShape);
+        if (it == numElementsRemainingFactors.symbolic.end()) return failure();
+        numElementsRemainingFactors.symbolic.erase(it);
       }
     }
     assert(concreteProductDynShape >= 1 &&
@@ -316,15 +322,16 @@ struct RemoveRedundantCstrReshapable final
     // A wildcard dimension can subsume the remaining symbolic factors and
     // potentially also a concrete factor.
     if (unique_wildcard_dimension) {
-      if (concreteProductNumElems % concreteProductDynShape != 0)
+      if (numElementsRemainingFactors.concrete % concreteProductDynShape != 0)
         return failure();
       rewriter.replaceOpWithNewOp<shape::ConstWitnessOp>(op, true);
       return success();
     }
 
     // W/o a wildcard, the symbolic and concrete products must be equal.
-    bool isReshapable = remainingSymbolicFactorsNumElems.empty() &&
-                        concreteProductNumElems == concreteProductDynShape;
+    bool isReshapable =
+        numElementsRemainingFactors.symbolic.empty() &&
+        numElementsRemainingFactors.concrete == concreteProductDynShape;
     rewriter.replaceOpWithNewOp<shape::ConstWitnessOp>(op, isReshapable);
     return success();
   }
@@ -359,42 +366,42 @@ struct TurnDynamicReshapeIntoCollapseShape final
 
       // Find the concrete/symbolic factors for the current dimension of the
       // target shape.
-      int64_t remainingConcreteProductShapeDim = 1;
-      SmallVector<Symbol> remainingSymbolicFactorsShapeDim;
-      if (!IsSimpleProduct(shapeDim, &remainingConcreteProductShapeDim,
-                           &remainingSymbolicFactorsShapeDim)) {
+      SymbolicProduct remainingFactorsShapeDim;
+      if (!IsSymbolicProduct(shapeDim, &remainingFactorsShapeDim)) {
         return failure();
       }
 
       // Consume (and collapse) as many of the operand dimensions as needed to
       // match the target dimension. This is monotonic.
-      while (remainingConcreteProductShapeDim != 1 ||
-             !remainingSymbolicFactorsShapeDim.empty()) {
+      while (!remainingFactorsShapeDim.empty()) {
         // Fail if there are no more operand dimensions to consume.
         if (i >= argShapeInfo->size()) return failure();
 
         // Find the concrete/symbolic factors for the next dimension of the
         // operand shape.
-        int64_t concreteProductArgShapeDim = 1;
-        SmallVector<Symbol> symbolicFactorsArgShapeDim;
-        if (!IsSimpleProduct((*argShapeInfo)[i], &concreteProductArgShapeDim,
-                             &symbolicFactorsArgShapeDim)) {
+        SymbolicProduct remainingFactorsArgShapeDim;
+        if (!IsSymbolicProduct((*argShapeInfo)[i],
+                               &remainingFactorsArgShapeDim)) {
           return failure();
         }
 
         // Eliminate the common concrete factors. Fail if we cannot consume a
         // concrete factor of the operand shape.
-        if (remainingConcreteProductShapeDim % concreteProductArgShapeDim != 0)
+        if (remainingFactorsShapeDim.concrete %
+                remainingFactorsArgShapeDim.concrete !=
+            0)
           return failure();
-        remainingConcreteProductShapeDim /= concreteProductArgShapeDim;
+        remainingFactorsShapeDim.concrete /=
+            remainingFactorsArgShapeDim.concrete;
 
         // Eliminate the common symbolic factors. Fail if we cannot consume a
         // symbolic factor of the operand shape.
-        for (const Symbol &symArgShapeDim : symbolicFactorsArgShapeDim) {
+        for (const Symbol &symArgShapeDim :
+             remainingFactorsArgShapeDim.symbolic) {
           auto *it =
-              llvm::find(remainingSymbolicFactorsShapeDim, symArgShapeDim);
-          if (it == remainingSymbolicFactorsShapeDim.end()) return failure();
-          remainingSymbolicFactorsShapeDim.erase(it);
+              llvm::find(remainingFactorsShapeDim.symbolic, symArgShapeDim);
+          if (it == remainingFactorsShapeDim.symbolic.end()) return failure();
+          remainingFactorsShapeDim.symbolic.erase(it);
         }
 
         // If all the concrete/symbolic factors were consumable, collapse this
",0,train
ea95f4a740725e9b7f864ef5766772c64dbbaf0d,tensorflow/tensorflow,"TPUEstimator: Fix shutdown behavior after preemption.

PiperOrigin-RevId: 299005210
Change-Id: I925e5308c8470dce0e071361441266856278a391",session_support.py,"@@ -418,7 +418,7 @@ class ResetComputation(object):
 
   def __call__(self, run_context, all_workers, lame_workers):
     del run_context, lame_workers
-    all_workers.shutdown()
+    all_workers.shutdown(exit_code=42)
 
     logging.info('Resetting coordinator.')
     raise CoordinatorResetError()
@@ -435,7 +435,7 @@ class ShutdownLameWorkers(object):
     pass
 
   def __call__(self, run_context, all_workers, lame_workers):
-    lame_workers.shutdown()
+    lame_workers.shutdown(exit_code=42)
 
 
 class ShutdownAllWorkers(object):
@@ -449,4 +449,4 @@ class ShutdownAllWorkers(object):
     pass
 
   def __call__(self, run_context, all_workers, lame_workers):
-    all_workers.shutdown()
+    all_workers.shutdown(exit_code=42)
",0,test
9aa32a6eacd0e8f507d1c57f0658d6c3ecaecaba,tensorflow/tensorflow,"Enable mixing value tensors (eager tensors or numpy arrays) and Keras symbolic tensors when building Keras graphs-of-layers in an eager scope.
In these cases, the value tensors are treated as symbolic constants.

This enables the following pattern to work in the same way in both V1 and V2:

```
lstm = LSTM(2)
inputs = keras.Input((None, 3))
outputs = lstm(inputs, initial_state=tf.ones(shape))
```

(without this change, the above code works in V1 but fails in V2 with an artificial exception).

Known issue: in case a random tensor is used, there is a (usually harmless) behavior discrepancy remaining between V1 and V2, which is that in V2 we'd be using the same random value every time, whereas in V1 we'd be drawing new random values (since the tensor would be treated as a random op and not as a constant). We think this is not a problem because in V2 users should have the mental model ""tensors are values"" and thus would be expecting a random tensor to behave like a constant value and not like a random generator.

PiperOrigin-RevId: 224915621",execute.py,"@@ -66,12 +66,6 @@ def quick_execute(op_name, num_outputs, inputs, attrs, ctx, name=None):
     six.raise_from(core._status_to_exception(e.code, message), None)
   except TypeError as e:
     if any(ops._is_keras_symbolic_tensor(x) for x in inputs):
-      if any(isinstance(x, ops.EagerTensor) for x in inputs):
-        raise TypeError(""You are attempting to mix computation of symbolic ""
-                        ""Tensors (computation rooted at tf.keras.Input()) ""
-                        ""and concrete values. This is not supported. ""
-                        ""If you need this support, file an issue on the ""
-                        ""TensorFlow GitHub repository."")
       raise core._SymbolicException
     raise e
   # pylint: enable=protected-access
",0,train
9aa32a6eacd0e8f507d1c57f0658d6c3ecaecaba,tensorflow/tensorflow,"Enable mixing value tensors (eager tensors or numpy arrays) and Keras symbolic tensors when building Keras graphs-of-layers in an eager scope.
In these cases, the value tensors are treated as symbolic constants.

This enables the following pattern to work in the same way in both V1 and V2:

```
lstm = LSTM(2)
inputs = keras.Input((None, 3))
outputs = lstm(inputs, initial_state=tf.ones(shape))
```

(without this change, the above code works in V1 but fails in V2 with an artificial exception).

Known issue: in case a random tensor is used, there is a (usually harmless) behavior discrepancy remaining between V1 and V2, which is that in V2 we'd be using the same random value every time, whereas in V1 we'd be drawing new random values (since the tensor would be treated as a random op and not as a constant). We think this is not a problem because in V2 users should have the mental model ""tensors are values"" and thus would be expecting a random tensor to behave like a constant value and not like a random generator.

PiperOrigin-RevId: 224915621",base_layer_test.py,"@@ -167,19 +167,26 @@ class BaseLayerTest(test.TestCase):
   def test_mixing_keras_symbolic_tensors_and_eager_tensors(self):
     x1 = keras.Input((3,))
     x2 = array_ops.ones((3, 3))
-    with self.assertRaisesRegexp(
-        TypeError,
-        'mix computation of symbolic Tensors'):
-      math_ops.matmul(x1, x2)
+    y = math_ops.matmul(x1, x2)
+    self.assertEqual(y.graph, keras.backend.get_graph())
+    fn = keras.backend.function(inputs=[x1], outputs=[y])
+    x_val = np.random.random((3, 3))
+    y_val = np.ones((3, 3))
+    self.assertAllClose(fn([x_val])[0],
+                        np.matmul(x_val, y_val),
+                        atol=1e-5)
 
   def test_mixing_keras_symbolic_tensors_and_numpy_arrays(self):
-    # For the time being we treat Numpy arrays as EagerTensors when mixing both.
     x1 = keras.Input((3,))
     x2 = np.ones((3, 3), dtype='float32')
-    with self.assertRaisesRegexp(
-        TypeError,
-        'mix computation of symbolic Tensors'):
-      math_ops.matmul(x1, x2)
+    y = math_ops.matmul(x1, x2)
+    self.assertEqual(y.graph, keras.backend.get_graph())
+    fn = keras.backend.function(inputs=[x1], outputs=[y])
+    x_val = np.random.random((3, 3))
+    y_val = np.ones((3, 3))
+    self.assertAllClose(fn([x_val])[0],
+                        np.matmul(x_val, y_val),
+                        atol=1e-5)
 
 
 if __name__ == '__main__':
",0,train
aef53d7fe63191dc3adb3efd417c2054e3addc3e,tensorflow/tensorflow,deleted extraneous comment,c_api_unified_experimental_test.cc,"@@ -117,7 +117,7 @@ TEST_P(UnifiedCAPI, TestBasicEagerMatMul) {
 
   float vals [] = {0.0f,0.0f,0.0f,0.0f};
   TFE_Context* eager_ctx = TF_ExecutionContextGetTFEContext(ctx,status.get());
-  TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals, dims,num_dims); //, dims[0],dims[1]);
+  TFE_TensorHandle* t = TestMatrixTensorHandleWithInput(eager_ctx, vals, dims,num_dims);
   
   TF_AbstractTensor* at =
       TF_CreateAbstractTensorFromEagerTensor(t, status.get()); // get abstract tensor
",0,train
cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string

PiperOrigin-RevId: 316789814
Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",cluster.h,"@@ -47,7 +47,7 @@ class Cluster {
   // 2- All the nodes in GraphDef which belong to this cluster.
   void SetGraphDefInfo(const tensorflow::GraphDef* graph_def);
 
-  const string& GetName() const { return name_; }
+  const std::string& GetName() const { return name_; }
 
   const std::vector<std::unique_ptr<tensorflow::NodeDef>>& GetNewNodes() const {
     return new_nodes_;
@@ -55,18 +55,18 @@ class Cluster {
 
   const std::vector<const tensorflow::NodeDef*>& GetNodes() { return nodes_; }
 
-  void SetName(const string& name) { name_ = name; }
+  void SetName(const std::string& name) { name_ = name; }
 
-  void SetDevice(const string& device) { device_ = device; }
+  void SetDevice(const std::string& device) { device_ = device; }
 
   // Find the input(s) and output(s) of this Cluster.
   bool FindClusterInputsAndOutputs();
 
  protected:
-  string name_;
-  string device_;
-  std::vector<string> inputs_;
-  std::vector<string> outputs_;
+  std::string name_;
+  std::string device_;
+  std::vector<std::string> inputs_;
+  std::vector<std::string> outputs_;
 
   // Used to hold the pointers to nodes which are in this cluster. These nodes
   // are pointing to the nodes in graph_def_.
",0,train
cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string

PiperOrigin-RevId: 316789814
Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",cluster_utils.cc,"@@ -16,8 +16,8 @@ limitations under the License.
 #include ""tensorflow/lite/toco/toco_types.h""
 namespace toco {
 
-bool StrContains(const string& x, const string& search_pattern) {
-  return x.find(search_pattern) != string::npos;
+bool StrContains(const std::string& x, const std::string& search_pattern) {
+  return x.find(search_pattern) != std::string::npos;
 }
 
 void Transpose2DTensor(const float* tensor, int row, int col,
",0,train
cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string

PiperOrigin-RevId: 316789814
Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",resolve_cluster.cc,"@@ -33,7 +33,8 @@ using tensorflow::GraphDef;
 using tensorflow::NodeDef;
 
 void AddNodeToGraph(const NodeDef& node,
-                    const std::vector<string>& cluster_names, GraphDef* graph) {
+                    const std::vector<std::string>& cluster_names,
+                    GraphDef* graph) {
   NodeDef* new_node = graph->add_node();
   new_node->set_op(node.op());
   new_node->set_name(node.name());
@@ -41,9 +42,9 @@ void AddNodeToGraph(const NodeDef& node,
   // If the inputs are coming from a node which belongs to another cluster, then
   // those inputs are renamed to the source cluster name. Otherwise the original
   // input name is used.
-  for (const string& node_input : node.input()) {
+  for (const std::string& node_input : node.input()) {
     bool input_from_cluster = false;
-    for (const string& cluster_name : cluster_names) {
+    for (const std::string& cluster_name : cluster_names) {
       if (StrContains(node_input, cluster_name) &&
           !StrContains(node.name(), cluster_name)) {
         new_node->add_input(cluster_name);
@@ -62,7 +63,7 @@ void AddNodeToGraph(const NodeDef& node,
 
 bool FindCluster(const ClusterFactoryInterface& cluster_factory,
                  const GraphDef& graph_def,
-                 std::unordered_map<string, bool>* is_node_in_cluster,
+                 std::unordered_map<std::string, bool>* is_node_in_cluster,
                  std::vector<std::unique_ptr<Cluster>>* clusters) {
   for (const NodeDef& node : graph_def.node()) {
     // If the node is not assigned to any cluster, then we check if it belong to
@@ -90,12 +91,12 @@ std::unique_ptr<GraphDef> MaybeResolveClusters(
   std::unique_ptr<GraphDef> pruned_graph(new GraphDef);
   // The structure to keep track of which cluster each node is assigned to, and
   // to initialize them to all un-assigned,
-  std::unordered_map<string, bool> is_node_in_cluster;
+  std::unordered_map<std::string, bool> is_node_in_cluster;
   for (const NodeDef& node : graph_def.node()) {
     is_node_in_cluster[node.name()] = false;
   }
 
-  std::vector<string> cluster_names;
+  std::vector<std::string> cluster_names;
   std::vector<std::unique_ptr<Cluster>> all_clusters;
   // Find the clusters for all available cluster factories.
   for (const ClusterFactoryInterface* cluster_factory : cluster_factories) {
",0,train
cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string

PiperOrigin-RevId: 316789814
Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",resolve_cluster.h,"@@ -40,7 +40,7 @@ std::unique_ptr<tensorflow::GraphDef> MaybeResolveClusters(
 // belongs to another cluster, then those inputs are renamed to the source
 // cluster name.
 void AddNodeToGraph(const tensorflow::NodeDef& node,
-                    const std::vector<string>& cluster_names,
+                    const std::vector<std::string>& cluster_names,
                     tensorflow::GraphDef* graph);
 
 // Given a graph and a cluster class, it finds all the nodes which belong to a
@@ -49,7 +49,7 @@ void AddNodeToGraph(const tensorflow::NodeDef& node,
 // they belong to the generated clusters.
 bool FindCluster(const ClusterFactoryInterface& cluster_factory,
                  const tensorflow::GraphDef& graph_def,
-                 std::unordered_map<string, bool>* is_node_in_cluster,
+                 std::unordered_map<std::string, bool>* is_node_in_cluster,
                  std::vector<std::unique_ptr<Cluster>>* clusters);
 
 // Receives a graph and generates another graph by replacing the cluster of
",0,train
cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string

PiperOrigin-RevId: 316789814
Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",resolve_svdf.cc,"@@ -47,11 +47,11 @@ namespace {
 // Since these nodes are connected to a Concatenate node, it makes sure the
 // axis value input of the Concatenate operator is 0.
 void FilterPartitionedConstNodes(
-    const string& const_pattern,
+    const std::string& const_pattern,
     const std::vector<const NodeDef*>& cluster_nodes,
     std::vector<const NodeDef*>* const_node_parts) {
   for (const NodeDef* node : cluster_nodes) {
-    string node_name_to_upper = node->name();
+    std::string node_name_to_upper = node->name();
     std::transform(node_name_to_upper.begin(), node_name_to_upper.end(),
                    node_name_to_upper.begin(), ::toupper);
     if (StrContains(node->name(), const_pattern) && node->op() == ""Const"") {
@@ -97,7 +97,7 @@ int SvdfCluster::InferFilterRank() {
 }
 
 void SvdfCluster::CreateNodes() {
-  for (const string& const_pattern : const_node_patterns_) {
+  for (const std::string& const_pattern : const_node_patterns_) {
     CreateConstNode(const_pattern);
   }
   std::unique_ptr<tensorflow::NodeDef> svdf_node(new NodeDef);
@@ -110,14 +110,14 @@ void SvdfCluster::CreateNodes() {
 
   // Add the rest of the inputs to Svdf cell: weights and bias.
   CHECK(new_nodes_.size() == 3 || new_nodes_.size() == 2);
-  string* weights_feature_input = svdf_node->add_input();
-  string* weights_time_input = svdf_node->add_input();
-  string* bias_input;
+  std::string* weights_feature_input = svdf_node->add_input();
+  std::string* weights_time_input = svdf_node->add_input();
+  std::string* bias_input;
   if (new_nodes_.size() == 3) {
     bias_input = svdf_node->add_input();
   }
   for (const std::unique_ptr<tensorflow::NodeDef>& node : new_nodes_) {
-    const string node_name = node->name();
+    const std::string node_name = node->name();
     if (StrContains(node_name, ""SVDF_weights_feature"")) {
       *weights_feature_input = node_name;
     } else if (StrContains(node_name, ""SVDF_weights_time"")) {
@@ -136,7 +136,7 @@ void SvdfCluster::CreateNodes() {
   CHECK_GT(rank, 0);
 
   // Add Svdf activation and rank.
-  string activation_function =
+  std::string activation_function =
       StrContains(outputs_[0], ""Relu"") ? ""Relu"" : ""None"";
   (*svdf_node->mutable_attr())[""ActivationFunction""].set_s(activation_function);
   (*svdf_node->mutable_attr())[""Rank""].set_i(rank);
@@ -145,7 +145,7 @@ void SvdfCluster::CreateNodes() {
   new_nodes_.push_back(std::move(svdf_node));
 }
 
-void SvdfCluster::CreateConstNode(const string& const_pattern) {
+void SvdfCluster::CreateConstNode(const std::string& const_pattern) {
   // Find the nodes with pattern like: ""const_pattern""/part_xxx of type Const.
   std::vector<const NodeDef*> const_node_parts;
   FilterPartitionedConstNodes(const_pattern, nodes_, &const_node_parts);
@@ -236,15 +236,15 @@ void SvdfCluster::MaybeMergeConstNodes(
 
     // Set the tensor attributes.
     allocated_tensor->set_tensor_content(
-        string(reinterpret_cast<const char*>(transposed_tensor.get()),
-               allocated_content_flat_size));
+        std::string(reinterpret_cast<const char*>(transposed_tensor.get()),
+                    allocated_content_flat_size));
   } else {
     tensor_shape_dim0->set_size(dim0_size);
 
     // Set the tensor attributes.
     allocated_tensor->set_tensor_content(
-        string(reinterpret_cast<const char*>(allocated_content.get()),
-               allocated_content_flat_size));
+        std::string(reinterpret_cast<const char*>(allocated_content.get()),
+                    allocated_content_flat_size));
   }
 }
 
@@ -252,21 +252,21 @@ void SvdfCluster::MaybeMergeConstNodes(
 
 std::unique_ptr<Cluster> SvdfClusterFactory::CreateCluster(
     const NodeDef& node, const GraphDef& graph_def) const {
-  std::vector<string> node_patterns = {""SVDF_weights_feature"",
-                                       ""SVDF_weights_time"", ""SVDF_bias""};
+  std::vector<std::string> node_patterns = {""SVDF_weights_feature"",
+                                            ""SVDF_weights_time"", ""SVDF_bias""};
 
-  string node_name_to_upper = node.name();
+  std::string node_name_to_upper = node.name();
   std::transform(node_name_to_upper.begin(), node_name_to_upper.end(),
                  node_name_to_upper.begin(), ::toupper);
   std::unique_ptr<SvdfCluster> cluster = nullptr;
-  if (node_name_to_upper.find(""SVDF"", 0) != string::npos) {
+  if (node_name_to_upper.find(""SVDF"", 0) != std::string::npos) {
     size_t weights_pos = node.name().find(node_patterns[0]);
-    if (weights_pos != string::npos) {
+    if (weights_pos != std::string::npos) {
       // Assuming the node name has a pattern like:
       // ""SOMESTRING1/CELLNAME/SEARCH_PATTERN/SOMESTRING2"", we use
       // CELLNAME as the cluster name.
       size_t cell_pos = node.name().rfind(""/"", weights_pos - 2) + 1;
-      string cell_name =
+      std::string cell_name =
           node.name().substr(cell_pos, weights_pos - cell_pos - 1);
       cluster = std::unique_ptr<SvdfCluster>(new SvdfCluster);
       cluster->SetName(cell_name);
@@ -274,7 +274,7 @@ std::unique_ptr<Cluster> SvdfClusterFactory::CreateCluster(
       cluster->SetGraphDefInfo(&graph_def);
       CHECK(cluster->FindClusterInputsAndOutputs());
 
-      for (const string& const_pattern : node_patterns) {
+      for (const std::string& const_pattern : node_patterns) {
         cluster->AddConstNodePattern(const_pattern);
       }
     }
",0,train
cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string

PiperOrigin-RevId: 316789814
Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",resolve_svdf.h,"@@ -36,7 +36,7 @@ class SvdfCluster : public Cluster {
 
   // A helper function to set the pattern of Const nodes which CreateNodes()
   // should handle specially.
-  void AddConstNodePattern(const string& const_pattern) {
+  void AddConstNodePattern(const std::string& const_pattern) {
     const_node_patterns_.push_back(const_pattern);
   }
 
@@ -46,7 +46,7 @@ class SvdfCluster : public Cluster {
   // The main function which is used to create Const nodes for this cluster.
   // These Const nodes are the inputs to the composite op generated for this
   // cluster.
-  void CreateConstNode(const string& const_pattern);
+  void CreateConstNode(const std::string& const_pattern);
 
   // Receives a vector of Const nodes, merge them (if necessary) and returns
   // only one Const node holding all the arrays contents. It transposes it if
@@ -61,7 +61,7 @@ class SvdfCluster : public Cluster {
   // shape to [num_units, rank, batch] shape. The 2nd shape element is rank.
   int InferFilterRank();
 
-  std::vector<string> const_node_patterns_;
+  std::vector<std::string> const_node_patterns_;
 };
 
 class SvdfClusterFactory : public ClusterFactoryInterface {
",0,train
cdb6e80b21997c2b24336eb524134198fa6754d8,tensorflow/tensorflow,"Qualify uses of std::string

PiperOrigin-RevId: 316789814
Change-Id: Ice83a74e70122008e090af3b818b9920abf7f5bc",resolve_svdf_test.cc,"@@ -77,8 +77,8 @@ class ResolveSvdfTest : public ::testing::Test {
   ~ResolveSvdfTest() override {}
 
  protected:
-  void AddNewNode(const string& name, const string& op,
-                  const std::vector<string>& inputs) {
+  void AddNewNode(const std::string& name, const std::string& op,
+                  const std::vector<std::string>& inputs) {
     NodeDef* node = graph_.add_node();
     node->set_name(name);
     node->set_op(op);
@@ -89,8 +89,8 @@ class ResolveSvdfTest : public ::testing::Test {
     }
   }
 
-  void AddNewNode(const string& name, const string& op,
-                  const std::vector<string>& inputs,
+  void AddNewNode(const std::string& name, const std::string& op,
+                  const std::vector<std::string>& inputs,
                   const std::vector<float>& values) {
     NodeDef* node = graph_.add_node();
     node->set_name(name);
@@ -109,12 +109,12 @@ class ResolveSvdfTest : public ::testing::Test {
     tensor_shape_dim0->set_size(values.size());
     allocated_tensor->set_allocated_tensor_shape(allocated_tensor_shape);
     allocated_tensor->set_tensor_content(
-        string(reinterpret_cast<const char*>(values.data()),
-               values.size() * sizeof(float)));
+        std::string(reinterpret_cast<const char*>(values.data()),
+                    values.size() * sizeof(float)));
     (*node->mutable_attr())[""value""].set_allocated_tensor(allocated_tensor);
   }
 
-  void AddShapeNode(const string& name, const std::vector<int>& values) {
+  void AddShapeNode(const std::string& name, const std::vector<int>& values) {
     NodeDef* node = graph_.add_node();
     node->set_name(name);
     node->set_op(""Const"");
@@ -128,8 +128,8 @@ class ResolveSvdfTest : public ::testing::Test {
     tensor_shape_dim0->set_size(values.size());
     allocated_tensor->set_allocated_tensor_shape(allocated_tensor_shape);
     allocated_tensor->set_tensor_content(
-        string(reinterpret_cast<const char*>(values.data()),
-               values.size() * sizeof(int)));
+        std::string(reinterpret_cast<const char*>(values.data()),
+                    values.size() * sizeof(int)));
     (*node->mutable_attr())[""value""].set_allocated_tensor(allocated_tensor);
   }
 
@@ -157,12 +157,12 @@ TEST_F(ResolveSvdfTest, TestTranspose2DTensor) {
 }
 
 TEST_F(ResolveSvdfTest, TestResolveSvdfFlow) {
-  std::unordered_map<string, bool> is_node_in_cluster;
+  std::unordered_map<std::string, bool> is_node_in_cluster;
   for (const NodeDef& node : graph_.node()) {
     is_node_in_cluster[node.name()] = false;
   }
 
-  std::vector<string> cluster_names;
+  std::vector<std::string> cluster_names;
   CHECK(FindCluster(svdf_cluster_factory_, graph_, &is_node_in_cluster,
                     &clusters_));
 
@@ -174,7 +174,7 @@ TEST_F(ResolveSvdfTest, TestResolveSvdfFlow) {
   EXPECT_THAT(cluster_names,
               testing::UnorderedElementsAreArray({""Svdf1"", ""Svdf2""}));
 
-  std::vector<string> new_node_names;
+  std::vector<std::string> new_node_names;
   std::vector<float> content_array(3);
   for (const std::unique_ptr<Cluster>& cluster : clusters_) {
     // After CreateNodes in each cluster we have three nodes: Svdf,
",0,train
9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs().

This is a preparation step to remove getAttrs() from OpState.

PiperOrigin-RevId: 360159716
Change-Id: I185103ded7c111c19f9a3177514221230469e22d",hlo_legalize_to_lhlo.cc,"@@ -424,7 +424,7 @@ struct HloToLhloReduceOpConverter : public BaseOpConversion<mhlo::ReduceOp> {
       buffer_args.push_back(InsertAlloc(loc, result, &rewriter));
     }
     auto new_op = rewriter.create<lmhlo::ReduceOp>(loc, llvm::None, buffer_args,
-                                                   op.getAttrs());
+                                                   op->getAttrs());
 
     // Copy over the operations inside the region.
     rewriter.inlineRegionBefore(op.body(), new_op.body(), new_op.body().end());
",0,train
9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs().

This is a preparation step to remove getAttrs() from OpState.

PiperOrigin-RevId: 360159716
Change-Id: I185103ded7c111c19f9a3177514221230469e22d",lower_static_tensor_list.cc,"@@ -816,7 +816,7 @@ struct ConvertIdentity : public OpConversionPattern<TF::IdentityOp> {
       ConversionPatternRewriter &rewriter) const override {
     Value input = operands[0];
     rewriter.replaceOpWithNewOp<TF::IdentityOp>(op, input.getType(), operands,
-                                                op.getAttrs());
+                                                op->getAttrs());
     return success();
   }
 };
@@ -948,7 +948,7 @@ struct ConvertWhile : public OpConversionPattern<TF::WhileOp> {
 
     // Create a new while op with new operands and updated result types.
     auto converted = rewriter.create<TF::WhileOp>(op.getLoc(), result_types,
-                                                  operands, op.getAttrs());
+                                                  operands, op->getAttrs());
     converted.removeAttr(""T"");
     (void)UpdateFunctionTypes(rewriter, converted, tensor_list_args);
 
@@ -972,7 +972,7 @@ struct ConvertWhileRegion : public OpConversionPattern<TF::WhileRegionOp> {
 
     // Create a new while op with new operands and updated result types.
     auto converted = rewriter.create<TF::WhileRegionOp>(
-        op.getLoc(), result_types, operands, op.getAttrs());
+        op.getLoc(), result_types, operands, op->getAttrs());
 
     // Inline the regions from the old while into the new one, and apply
     // signature conversion to inlined region.
",0,train
9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs().

This is a preparation step to remove getAttrs() from OpState.

PiperOrigin-RevId: 360159716
Change-Id: I185103ded7c111c19f9a3177514221230469e22d",while_loop_outline.cc,"@@ -254,7 +254,7 @@ void WhileOutlinePass::OutlineWhile(WhileOp while_op) {
     new_types.push_back(extra_operand.getType());
 
   auto new_while_op = OpBuilder(while_op).create<WhileOp>(
-      while_op.getLoc(), new_types, operands, while_op.getAttrs());
+      while_op.getLoc(), new_types, operands, while_op->getAttrs());
   new_while_op.cond().takeBody(while_op.cond());
   new_while_op.body().takeBody(while_op.body());
   while_op.replaceAllUsesWith(
",0,train
9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs().

This is a preparation step to remove getAttrs() from OpState.

PiperOrigin-RevId: 360159716
Change-Id: I185103ded7c111c19f9a3177514221230469e22d",tf_executor.cc,"@@ -213,7 +213,7 @@ LogicalResult Verify(GraphOp graph) {
 void Print(GraphOp graph, OpAsmPrinter &p) {
   p << graph.getOperationName();
   p.printRegion(graph.getOperation()->getRegion(0));
-  p.printOptionalAttrDict(graph.getAttrs());
+  p.printOptionalAttrDict(graph->getAttrs());
 }
 
 ParseResult ParseGraphOp(OpAsmParser &parser, OperationState &result) {
@@ -321,7 +321,7 @@ void Print(IslandOp op, OpAsmPrinter &p) {
   // Check if we can print the short ""wraps"" form: that is if the island
   // contains a single operation and the result of this operation are perfectly
   // forwarded to the yield.
-  if (op.getAttrs().empty() && op.WrapsSingleOp()) {
+  if (op->getAttrs().empty() && op.WrapsSingleOp()) {
     Operation &wrapped_op = op.GetBody().front();
     YieldOp yield_op = op.GetYield();
     // The ""wraps"" syntax only encodes a single location.
@@ -335,7 +335,7 @@ void Print(IslandOp op, OpAsmPrinter &p) {
     }
   }
   p.printRegion(op.getOperation()->getRegion(0));
-  p.printOptionalAttrDict(op.getAttrs());
+  p.printOptionalAttrDict(op->getAttrs());
 }
 
 ParseResult ParseIslandOp(OpAsmParser &parser, OperationState &result) {
@@ -449,7 +449,7 @@ void Print(SwitchOp switch_op, OpAsmPrinter &p) {
   } else {
     p << switch_op.getType(0);
   }
-  p.printOptionalAttrDict(switch_op.getAttrs());
+  p.printOptionalAttrDict(switch_op->getAttrs());
 }
 
 }  // anonymous namespace
@@ -525,7 +525,7 @@ void Print(SwitchNOp switchn, OpAsmPrinter &p) {
     p << "")"";
   }
   p << "" : "" << switchn.getType(0);
-  p.printOptionalAttrDict(switchn.getAttrs(), {""num_outs""});
+  p.printOptionalAttrDict(switchn->getAttrs(), {""num_outs""});
 }
 
 ParseResult ParseSwitchNOp(OpAsmParser &parser, OperationState &result) {
@@ -655,7 +655,7 @@ void Print(MergeOp merge, OpAsmPrinter &p) {
     p << output_type;
   }
 
-  p.printOptionalAttrDict(merge.getAttrs());
+  p.printOptionalAttrDict(merge->getAttrs());
 }
 
 ParseResult ParseMergeOp(OpAsmParser &parser, OperationState &result) {
@@ -723,7 +723,7 @@ void Print(EnterOp enter, OpAsmPrinter &p) {
     p << enter.getType(0);
   }
 
-  p.printOptionalAttrDict(enter.getAttrs(),
+  p.printOptionalAttrDict(enter->getAttrs(),
                           {""frame_name"", ""parallel_iterations"", ""is_constant""});
 }
 
@@ -843,7 +843,7 @@ void Print(ExitOp exit, OpAsmPrinter &p) {
   p << exit.getOperationName() << ' ';
   p.printOperands(exit.getOperands());
   p << "" : "" << exit.getType(0);
-  p.printOptionalAttrDict(exit.getAttrs());
+  p.printOptionalAttrDict(exit->getAttrs());
 }
 
 ParseResult ParseExitOp(OpAsmParser &parser, OperationState &result) {
@@ -887,7 +887,7 @@ void Print(LoopCondOp loop_cond, OpAsmPrinter &p) {
     p << "" : "" << loop_cond.input().getType();
   }
 
-  p.printOptionalAttrDict(loop_cond.getAttrs());
+  p.printOptionalAttrDict(loop_cond->getAttrs());
 }
 
 ParseResult ParseLoopCondOp(OpAsmParser &parser, OperationState &result) {
",0,train
9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs().

This is a preparation step to remove getAttrs() from OpState.

PiperOrigin-RevId: 360159716
Change-Id: I185103ded7c111c19f9a3177514221230469e22d",fused_kernel_matcher.cc,"@@ -156,7 +156,7 @@ class FuseContractionWithBiasAdd : public OpRewritePattern<SrcOpT> {
     // The fused contraction has the same attributes as the original
     // contraction, with two additions: the list of ops which have been fused
     // together; epsilon (only with FusedBatchNorm).
-    std::vector<NamedAttribute> attrs = contraction.getAttrs();
+    std::vector<NamedAttribute> attrs = contraction->getAttrs();
     ArrayAttr fused_ops_attr = ArrayAttr::get(context, fused_ops);
     attrs.push_back(
         NamedAttribute(Identifier::get(""fused_ops"", context), fused_ops_attr));
",0,train
9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs().

This is a preparation step to remove getAttrs() from OpState.

PiperOrigin-RevId: 360159716
Change-Id: I185103ded7c111c19f9a3177514221230469e22d",gpu_fusion.cc,"@@ -96,7 +96,7 @@ struct ReluToFusedBatchNorm : public OpRewritePattern<ReluOp> {
     state.addOperands(batch_norm.getOperands());
     if (side_input) state.operands.push_back(side_input);
     state.addTypes(batch_norm.getResultTypes());
-    state.addAttributes(batch_norm.getAttrs());
+    state.addAttributes(batch_norm->getAttrs());
     Operation *op = rewriter.createOperation(state);
     rewriter.replaceOp(batch_norm, op->getResults());
 
",0,train
9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs().

This is a preparation step to remove getAttrs() from OpState.

PiperOrigin-RevId: 360159716
Change-Id: I185103ded7c111c19f9a3177514221230469e22d",resource_op_lifting.cc,"@@ -931,7 +931,7 @@ LogicalResult HandleWhileLoop(TF::WhileOp while_op, FuncOp body, FuncOp cond) {
       while_op.getLoc(), body.getType().getResults(),
       FilterRange<Value, OperandRange>(while_op.getOperands(),
                                        resource_arg_uses),
-      while_op.getAttrs());
+      while_op->getAttrs());
   // Prepare for AddLoadsStoresOutsideControlFlowOp().
   llvm::SmallDenseMap<int64_t, std::pair<Type, int64_t>>
       arg_data_type_and_updated_output_index;
@@ -1035,7 +1035,7 @@ LogicalResult HandleCaseOrIfOp(CaseOrIfOp op, ArrayRef<FuncOp> branches) {
   FuncOp first_func = branches.front();
   auto new_op =
       builder.create<CaseOrIfOp>(op.getLoc(), first_func.getType().getResults(),
-                                 new_operands, op.getAttrs());
+                                 new_operands, op->getAttrs());
   // Prepare for AddLoadsStoresOutsideControlFlowOp()
   llvm::SmallDenseMap<int64_t, std::pair<Type, int64_t>>
       arg_data_type_and_updated_output_index;
@@ -1179,7 +1179,7 @@ void UpdatePartitionedCallOpWithNewCallee(
       FilterRange<Value, OperandRange>(call_op.args(), lifting_info.use_info);
   auto new_call = builder.create<CallOpType>(
       call_op.getLoc(), lifting_info.lifted_callee.getType().getResults(),
-      new_operands, call_op.getAttrs());
+      new_operands, call_op->getAttrs());
   new_call->setAttr(
       ""f"", builder.getSymbolRefAttr(lifting_info.lifted_callee.getName()));
   AddLoadsStoresOutsideControlFlowOp(
",0,train
9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs().

This is a preparation step to remove getAttrs() from OpState.

PiperOrigin-RevId: 360159716
Change-Id: I185103ded7c111c19f9a3177514221230469e22d",stack_ops_decomposition.cc,"@@ -204,7 +204,7 @@ LogicalResult HandleWhileOp(
   }
   auto new_while =
       builder.create<TF::WhileOp>(while_op.getLoc(), body.getType().getInputs(),
-                                  new_while_operands, while_op.getAttrs());
+                                  new_while_operands, while_op->getAttrs());
   for (int64_t i = 0; i < while_op.getNumResults(); ++i) {
     if (!getElementTypeOrSelf(while_op.getOperand(i).getType())
              .isa<TF::ResourceType>()) {
@@ -257,7 +257,7 @@ LogicalResult HandleIfOp(
   }
   auto new_if = OpBuilder(if_op).create<TF::IfOp>(
       if_op.getLoc(), then_func.getType().getResults(), new_if_operands,
-      if_op.getAttrs());
+      if_op->getAttrs());
   for (auto result : if_op.getResults()) {
     if (!getElementTypeOrSelf(result.getType()).isa<TF::ResourceType>()) {
       continue;
@@ -306,7 +306,7 @@ LogicalResult HandlePartitionedCallOp(
     OpBuilder builder(call);
     auto new_call = builder.create<CallOp>(
         call.getLoc(), info.decomposed_callee.getType().getResults(),
-        new_operands, call.getAttrs());
+        new_operands, call->getAttrs());
     new_call->setAttr(
         ""f"", builder.getSymbolRefAttr(
                  const_cast<FuncOp&>(info.decomposed_callee).getName()));
",0,train
9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs().

This is a preparation step to remove getAttrs() from OpState.

PiperOrigin-RevId: 360159716
Change-Id: I185103ded7c111c19f9a3177514221230469e22d",tensor_array_ops_decomposition.cc,"@@ -625,7 +625,7 @@ LogicalResult HandleWhileOp(TF::WhileOp while_op, ModuleOp module,
   OpBuilder builder(while_op);
   auto new_while =
       builder.create<TF::WhileOp>(while_op.getLoc(), body.getType().getInputs(),
-                                  operands, while_op.getAttrs());
+                                  operands, while_op->getAttrs());
   for (int64_t i = 0; i < while_op.getNumOperands(); ++i) {
     if (ta_arg_buffer_type(i)) {
       while_op.getResult(i).replaceAllUsesWith(while_op.getOperand(i));
@@ -692,7 +692,7 @@ LogicalResult HandleIfOp(TF::IfOp if_op, ModuleOp module,
   OpBuilder builder(if_op);
   auto new_if = builder.create<TF::IfOp>(if_op.getLoc(),
                                          then_branch.getType().getResults(),
-                                         operands, if_op.getAttrs());
+                                         operands, if_op->getAttrs());
   auto ret_forwards_input = [](FuncOp f, int64_t ret_ind) -> int64_t {
     auto retval = f.front().getTerminator()->getOperand(ret_ind);
     auto arg = retval.dyn_cast<BlockArgument>();
@@ -751,7 +751,7 @@ LogicalResult HandlePartitionedCallOp(
     OpBuilder builder(call);
     auto new_call = builder.create<CallOp>(
         call.getLoc(), info.decomposed_callee.getType().getResults(),
-        new_operands, call.getAttrs());
+        new_operands, call->getAttrs());
     new_call->setAttr(
         ""f"", builder.getSymbolRefAttr(
                  const_cast<FuncOp&>(info.decomposed_callee).getName()));
",0,train
9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs().

This is a preparation step to remove getAttrs() from OpState.

PiperOrigin-RevId: 360159716
Change-Id: I185103ded7c111c19f9a3177514221230469e22d",tpu_extract_head_tail_outside_compilation.cc,"@@ -326,7 +326,7 @@ tf_device::ClusterOp UpdateClusterResults(
 
   auto new_cluster = builder->create<tf_device::ClusterOp>(
       cluster.getLoc(), new_cluster_result_types,
-      /*operands=*/llvm::ArrayRef<Value>{}, cluster.getAttrs());
+      /*operands=*/llvm::ArrayRef<Value>{}, cluster->getAttrs());
   new_cluster.body().takeBody(cluster.body());
 
   auto operand_not_in_cluster = [&](OpOperand& operand) {
@@ -400,7 +400,7 @@ void RemoveClusterAliasedOutputs(OpBuilder* builder,
   builder->setInsertionPoint(cluster);
   auto new_cluster = builder->create<tf_device::ClusterOp>(
       cluster.getLoc(), new_cluster_result_types,
-      /*operands=*/llvm::ArrayRef<Value>{}, cluster.getAttrs());
+      /*operands=*/llvm::ArrayRef<Value>{}, cluster->getAttrs());
   new_cluster.body().takeBody(cluster.body());
   new_cluster.GetBody().getTerminator()->setOperands(new_cluster_results);
 
",0,train
9d1cfabb56e3b69099c122f7ada325d52e154138,tensorflow/tensorflow,"Use mlir::OpState::operator->() to get to Operation::getAttrs().

This is a preparation step to remove getAttrs() from OpState.

PiperOrigin-RevId: 360159716
Change-Id: I185103ded7c111c19f9a3177514221230469e22d",tpu_reorder_replicate_and_partitioned_inputs.cc,"@@ -94,13 +94,13 @@ LogicalResult ReorderReplicateAndPartitionedInputs(
   for (const auto& operands_per_replica : operands_per_replica_per_core) {
     auto replicate_op = builder.create<TF::TPUReplicatedInputOp>(
         replicated_input.getLoc(), replicated_input.getType(),
-        operands_per_replica, replicated_input.getAttrs());
+        operands_per_replica, replicated_input->getAttrs());
     operands_per_core.push_back(replicate_op);
   }
 
   auto pi = builder.create<TF::TPUPartitionedInputOp>(
       first_partitioned_input.getLoc(), replicated_input.getType(),
-      operands_per_core, first_partitioned_input.getAttrs());
+      operands_per_core, first_partitioned_input->getAttrs());
   replicated_input.replaceAllUsesWith(pi.output());
   return success();
 }
",0,train
a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL.

PiperOrigin-RevId: 335916814
Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",depthwise_conv.cc,"@@ -67,17 +67,18 @@ std::string GetSrcValue(int channel_multiplier, const std::string coords) {
   return c;
 }
 
-std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def,
-                                             bool stride_correction,
-                                             int channel_multiplier,
-                                             bool weights_are_buffer,
-                                             GPUOperation* op) {
+std::string GenerateDepthwiseConvolutionCode(
+    const OperationDef& op_def, bool stride_correction, int channel_multiplier,
+    bool weights_are_buffer, bool dynamic_weights, GPUOperation* op) {
   auto src_desc = op_def.src_tensors[0];
   src_desc.SetTextureAddressMode(TextureAddressMode::ZERO);
   if (op_def.IsBatchSupported()) {
     src_desc.SetStateVar(""BatchedWidth"", ""true"");
   }
   op->AddSrcTensor(""src_tensor"", src_desc);
+  if (dynamic_weights) {
+    op->AddSrcTensor(""weights"", op_def.src_tensors[1]);
+  }
 
   auto dst_desc = op_def.dst_tensors[0];
   if (op_def.IsBatchSupported()) {
@@ -122,16 +123,24 @@ std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def,
     }
   }
   c += ""  int y_offseted = Y * args.stride_y + args.padding_y;\n"";
-  std::string weights_offset = ""args.kernel_size_x * args.kernel_size_y"";
-  if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
-    c += ""  int z_offseted = Z * args.stride_z + args.padding_z;\n"";
-    weights_offset += "" * args.kernel_size_z"";
-  }
-  if (weights_are_buffer) {
-    c += ""  int fx_c = S * "" + weights_offset + "";\n"";
-  } else {
-    c += ""  int fx_c = 0;\n"";
+  if (!dynamic_weights) {
+    std::string weights_offset = ""args.kernel_size_x * args.kernel_size_y"";
+    if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
+      c += ""  int z_offseted = Z * args.stride_z + args.padding_z;\n"";
+      weights_offset += "" * args.kernel_size_z"";
+    }
+    if (weights_are_buffer) {
+      c += ""  int fx_c = S * "" + weights_offset + "";\n"";
+    } else {
+      c += ""  int fx_c = 0;\n"";
+    }
   }
+  std::string kernel_size_x =
+      dynamic_weights ? ""args.weights.Width()"" : ""args.kernel_size_x"";
+  std::string kernel_size_y =
+      dynamic_weights ? ""args.weights.Height()"" : ""args.kernel_size_y"";
+  std::string kernel_size_z =
+      dynamic_weights ? ""args.weights.Depth()"" : ""args.kernel_size_z"";
 
   std::string flat_coords = ""x_c, y_c"";
   if (manual_clamp) {
@@ -139,29 +148,35 @@ std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def,
     if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
       check += "" && !outside_z"";
       flat_coords += "", z_c"";
-      c += ""  for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n"";
+      c += ""  for (int kz = 0; kz < "" + kernel_size_z + ""; ++kz) {\n"";
       c += ""    int z_c = z_offseted + kz * args.dilation_z;\n"";
       c += ""    bool outside_z = z_c < 0 || z_c >= args.src_tensor.Depth();\n"";
     }
-    c += ""  for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n"";
+    c += ""  for (int ky = 0; ky < "" + kernel_size_y + ""; ++ky) {\n"";
     c += ""    int y_c = y_offseted + ky * args.dilation_y;\n"";
     c += ""    bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n"";
-    c += ""    for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n"";
+    c += ""    for (int kx = 0; kx < "" + kernel_size_x + ""; ++kx) {\n"";
     const std::string dilation_x =
         op_def.IsBatchSupported() ? ""args.dilation_x * args.src_tensor.Batch()""
                                   : ""args.dilation_x"";
     c += ""      int x_c = x_offseted + kx * "" + dilation_x + "";\n"";
     c += ""      bool outside_x = x_c < 0 || x_c >= args.src_tensor.Width();\n"";
     c += ""      if ("" + check + "") {\n"";
-    if (weights_are_buffer) {
-      c += ""        FLT4 f = args.weights.Read(fx_c);\n"";
+    if (dynamic_weights) {
+      c += ""        FLT4 f = args.weights.Read(kx, ky, S);\n"";
     } else {
-      c += ""        FLT4 f = args.weights.Read(fx_c, S);\n"";
+      if (weights_are_buffer) {
+        c += ""        FLT4 f = args.weights.Read(fx_c);\n"";
+      } else {
+        c += ""        FLT4 f = args.weights.Read(fx_c, S);\n"";
+      }
     }
     c += GetSrcValue(channel_multiplier, flat_coords);
     c += ""        r += TO_ACCUM_TYPE(src_final * f);\n"";
     c += ""      };\n"";
-    c += ""      fx_c++;\n"";
+    if (!dynamic_weights) {
+      c += ""      fx_c++;\n"";
+    }
     c += ""    }\n"";
     c += ""  }\n"";
     if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
@@ -170,7 +185,7 @@ std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def,
   } else {  // Texture types with ZERO clamping
     if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) {
       flat_coords += "", z_c"";
-      c += ""  for (int kz = 0; kz < args.kernel_size_z; ++kz) {\n"";
+      c += ""  for (int kz = 0; kz < "" + kernel_size_z + ""; ++kz) {\n"";
       c += ""    int z_c = z_offseted + kz * args.dilation_z;\n"";
       if (src_tensor_type !=
           TensorStorageType::TEXTURE_3D) {  // Only TEXTURE_3D supports clamping
@@ -181,20 +196,24 @@ std::string GenerateDepthwiseConvolutionCode(const OperationDef& op_def,
         c += ""    }\n"";
       }
     }
-    c += ""  for (int ky = 0; ky < args.kernel_size_y; ++ky) {\n"";
+    c += ""  for (int ky = 0; ky < "" + kernel_size_y + ""; ++ky) {\n"";
     c += ""    int y_c = y_offseted + ky * args.dilation_y;\n"";
-    c += ""    for (int kx = 0; kx < args.kernel_size_x; ++kx) {\n"";
+    c += ""    for (int kx = 0; kx < "" + kernel_size_x + ""; ++kx) {\n"";
     const std::string dilation_x =
         op_def.IsBatchSupported() ? ""args.dilation_x * args.src_tensor.Batch()""
                                   : ""args.dilation_x"";
     c += ""      int x_c = x_offseted + kx * "" + dilation_x + "";\n"";
     c += GetSrcValue(channel_multiplier, flat_coords);
-    if (weights_are_buffer) {
-      c += ""      FLT4 f = args.weights.Read(fx_c);\n"";
+    if (dynamic_weights) {
+      c += ""      FLT4 f = args.weights.Read(kx, ky, S);\n"";
     } else {
-      c += ""      FLT4 f = args.weights.Read(fx_c, S);\n"";
+      if (weights_are_buffer) {
+        c += ""      FLT4 f = args.weights.Read(fx_c);\n"";
+      } else {
+        c += ""      FLT4 f = args.weights.Read(fx_c, S);\n"";
+      }
+      c += ""      fx_c++;\n"";
     }
-    c += ""      fx_c++;\n"";
     c += ""      r += TO_ACCUM_TYPE(src_final * f);\n"";
     c += ""    }\n"";
     c += ""  }\n"";
@@ -234,7 +253,7 @@ GPUOperation CreateDepthwiseConvolution2D(
       definition.IsBatchSupported() && attr.strides.w != 1;
   op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction,
                                               attr.weights.shape.o,
-                                              weights_are_buffer, &op);
+                                              weights_are_buffer, false, &op);
   UploadWeightsForDWConv2D(attr.weights, weights_are_buffer,
                            definition.precision, &op);
   op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
@@ -249,6 +268,32 @@ GPUOperation CreateDepthwiseConvolution2D(
   return op;
 }
 
+GPUOperation CreateDepthwiseConvolution2DDynamicWeights(
+    const DeviceInfo& device_info, const OperationDef& definition,
+    const DepthwiseConvolution2DAttributes& attr) {
+  GPUOperation op(definition);
+  op.args_.AddInt(""stride_x"", attr.strides.w);
+  op.args_.AddInt(""padding_x"", -attr.padding.prepended.w);
+  op.args_.AddInt(""dilation_x"", attr.dilations.w);
+  op.args_.AddInt(""stride_y"", attr.strides.h);
+  op.args_.AddInt(""padding_y"", -attr.padding.prepended.h);
+  op.args_.AddInt(""dilation_y"", attr.dilations.h);
+  const bool stride_correction =
+      definition.IsBatchSupported() && attr.strides.w != 1;
+  op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction, 1,
+                                              false, true, &op);
+  op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
+
+  TensorLinearDescriptor desc;
+  desc.storage_type = device_info.IsMali() ? LinearStorageType::BUFFER
+                                           : LinearStorageType::TEXTURE_2D;
+  desc.element_type = definition.GetDataType();
+  desc.UploadLinearData(attr.bias);
+  op.args_.AddObject(
+      ""biases"", absl::make_unique<TensorLinearDescriptor>(std::move(desc)));
+  return op;
+}
+
 GPUOperation CreateDepthwiseConvolution3D(
     const DeviceInfo& device_info, const OperationDef& definition,
     const DepthwiseConvolution3DAttributes& attr) {
@@ -273,7 +318,7 @@ GPUOperation CreateDepthwiseConvolution3D(
       definition.IsBatchSupported() && attr.strides.w != 1;
   op.code_ = GenerateDepthwiseConvolutionCode(definition, stride_correction,
                                               attr.weights.shape.o,
-                                              weights_are_buffer, &op);
+                                              weights_are_buffer, false, &op);
   UploadWeightsForDWConv3D(attr.weights, weights_are_buffer,
                            definition.precision, &op);
   op.tensor_to_grid_ = TensorToGrid::kWBToX_HDToY_SToZ;
",0,train
a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL.

PiperOrigin-RevId: 335916814
Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",depthwise_conv.h,"@@ -186,6 +186,10 @@ GPUOperation CreateDepthwiseConvolution2D(
     const DeviceInfo& device_info, const OperationDef& definition,
     const DepthwiseConvolution2DAttributes& attr);
 
+GPUOperation CreateDepthwiseConvolution2DDynamicWeights(
+    const DeviceInfo& device_info, const OperationDef& definition,
+    const DepthwiseConvolution2DAttributes& attr);
+
 GPUOperation CreateDepthwiseConvolution3D(
     const DeviceInfo& device_info, const OperationDef& definition,
     const DepthwiseConvolution3DAttributes& attr);
",0,train
a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL.

PiperOrigin-RevId: 335916814
Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",operation_selector.cc,"@@ -315,7 +315,16 @@ absl::Status GPUOperationFromNode(const DeviceInfo& device_info,
     case OperationType::DEPTHWISE_CONVOLUTION: {
       auto attr = absl::any_cast<DepthwiseConvolution2DAttributes>(
           node.operation.attributes);
-      *gpu_op = SelectDWConvolution(attr, device_info, op_def);
+      if (inputs.size() == 1) {
+        *gpu_op = SelectDWConvolution(attr, device_info, op_def);
+      } else {
+        if (inputs[1]->tensor.shape.b != 1) {
+          return absl::UnimplementedError(
+              ""No support of depthwise runtime weights with channel multiplier ""
+              ""!= 1"");
+        }
+        *gpu_op = SelectDWConvolutionDynamicWeights(attr, device_info, op_def);
+      }
       return absl::OkStatus();
     }
     case OperationType::FULLY_CONNECTED: {
",0,train
a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL.

PiperOrigin-RevId: 335916814
Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",simple_selectors.cc,"@@ -22,6 +22,7 @@ limitations under the License.
 #include ""tensorflow/lite/delegates/gpu/cl/kernels/add.h""
 #include ""tensorflow/lite/delegates/gpu/cl/kernels/concat_xy.h""
 #include ""tensorflow/lite/delegates/gpu/cl/kernels/concat_z.h""
+#include ""tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.h""
 #include ""tensorflow/lite/delegates/gpu/cl/kernels/lstm.h""
 #include ""tensorflow/lite/delegates/gpu/cl/kernels/max_unpooling.h""
 #include ""tensorflow/lite/delegates/gpu/cl/kernels/mean.h""
@@ -110,6 +111,13 @@ absl::Status SelectConcat(const ConcatAttributes& attr,
   }
 }
 
+std::unique_ptr<GPUOperation> SelectDWConvolutionDynamicWeights(
+    const DepthwiseConvolution2DAttributes& attr, const DeviceInfo& device_info,
+    const OperationDef& op_def) {
+  return absl::make_unique<GPUOperation>(
+      CreateDepthwiseConvolution2DDynamicWeights(device_info, op_def, attr));
+}
+
 void SelectReshape(int src_channels, int dst_channels,
                    const OperationDef& op_def,
                    std::unique_ptr<GPUOperation>* ptr) {
",0,train
a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL.

PiperOrigin-RevId: 335916814
Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",simple_selectors.h,"@@ -57,6 +57,10 @@ absl::Status SelectConcat(const ConcatAttributes& attr,
                           const DeviceInfo& device_info,
                           std::unique_ptr<GPUOperation>* ptr);
 
+std::unique_ptr<GPUOperation> SelectDWConvolutionDynamicWeights(
+    const DepthwiseConvolution2DAttributes& attr, const DeviceInfo& device_info,
+    const OperationDef& op_def);
+
 void SelectReshape(int src_channels, int dst_channels,
                    const OperationDef& op_def,
                    std::unique_ptr<GPUOperation>* ptr);
",0,train
a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL.

PiperOrigin-RevId: 335916814
Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",special_selector.cc,"@@ -40,6 +40,10 @@ absl::Status TryDepthwiseConvPlus1x1Conv(
       OperationType::DEPTHWISE_CONVOLUTION) {
     return absl::NotFoundError(""DepthwiseConvPlus1x1Conv not suitable."");
   }
+  auto dw_inputs = graph.FindInputs(dw_node->id);
+  if (dw_inputs.size() != 1) {
+    return absl::NotFoundError(""DepthwiseConvPlus1x1Conv not suitable."");
+  }
   auto dw_outputs = graph.FindOutputs(dw_node->id);
   auto consumers = graph.FindConsumers(dw_outputs[0]->id);
   if (consumers.size() != 1) {
@@ -60,7 +64,6 @@ absl::Status TryDepthwiseConvPlus1x1Conv(
       dw_node->operation.attributes);
   auto conv_attr =
       absl::any_cast<Convolution2DAttributes>(conv_node->operation.attributes);
-  auto dw_inputs = graph.FindInputs(dw_node->id);
   auto conv_outputs = graph.FindOutputs(conv_node->id);
   OperationDef op_def;
   op_def.precision = precision;
",0,train
a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL.

PiperOrigin-RevId: 335916814
Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",model_builder.cc,"@@ -511,9 +511,22 @@ class DepthwiseConvolutionOperationParser : public TFLiteOperationParser {
                            const TfLiteNode* tflite_node,
                            const TfLiteRegistration* registration) final {
     RETURN_IF_ERROR(CheckMaxSupportedOpVersion(registration, 6));
-    RETURN_IF_ERROR(CheckInputsOutputs(context, tflite_node,
-                                       /*runtime_inputs=*/1, /*outputs=*/1));
-    RETURN_IF_ERROR(CheckTensorIsAvailable(context, tflite_node, 1));
+    const int runtime_inputs =
+        GetNumberOfRuntimeInputsForNode(context, tflite_node);
+    if (runtime_inputs > 2) {
+      return absl::InternalError(
+          absl::StrCat(""Expected 1 or 2 input tensor(s), but node has "",
+                       runtime_inputs, "" runtime inputs.""));
+    }
+    const int runtime_outputs = NumOutputs(tflite_node);
+    if (runtime_outputs != 1) {
+      return absl::InternalError(
+          absl::StrCat(""Expected 1 output tensor(s), but node has "",
+                       runtime_outputs, "" runtime outputs.""));
+    }
+    if (runtime_inputs == 1) {
+      RETURN_IF_ERROR(CheckTensorIsAvailable(context, tflite_node, 1));
+    }
     const TfLiteDepthwiseConvParams* tf_options;
     RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options));
     RETURN_IF_ERROR(CheckStridesAndDilation(
@@ -567,7 +580,12 @@ class DepthwiseConvolutionOperationParser : public TFLiteOperationParser {
     RETURN_IF_ERROR(reader->AddOutputs(node));
 
     DepthwiseConvolution2DAttributes attr;
-    RETURN_IF_ERROR(reader->ReadTensor(1, &attr.weights));
+    const int runtime_inputs = reader->GetNumberOfRuntimeInputs();
+    if (runtime_inputs == 2) {
+      RETURN_IF_ERROR(reader->AddInput(node, 1));
+    } else {  // runtime_inputs == 1;
+      RETURN_IF_ERROR(reader->ReadTensor(1, &attr.weights));
+    }
     reader->ReadTensor(2, &attr.bias).IgnoreError();  // bias is optional
     const TfLiteDepthwiseConvParams* tf_options;
     RETURN_IF_ERROR(RetrieveBuiltinData(tflite_node, &tf_options));
",0,train
a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL.

PiperOrigin-RevId: 335916814
Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",add_bias.cc,"@@ -70,6 +70,12 @@ class AddBias : public NodeTransformation {
     }
     if (node->operation.type ==
         ToString(OperationType::DEPTHWISE_CONVOLUTION)) {
+      if (graph->FindInputs(node->id).size() != 1) {
+        return {TransformStatus::DECLINED,
+                ""This transformation is only applicable to depth wise conv ""
+                ""with one ""
+                ""runtime input.""};
+      }
       auto& attr = absl::any_cast<DepthwiseConvolution2DAttributes&>(
           node->operation.attributes);
       return FillBias(attr.weights.shape.o * attr.weights.shape.i, &attr.bias);
",0,train
a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL.

PiperOrigin-RevId: 335916814
Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",fuse_add_to_conv.cc,"@@ -54,6 +54,10 @@ class MergeConvolutionWithAdd : public SequenceTransformation {
   TransformResult ApplyToNodesSequence(const std::vector<Node*>& sequence,
                                        GraphFloat32* graph) final {
     auto& conv_node = *sequence[0];
+    if (graph->FindInputs(conv_node.id).size() != 1) {
+      return {TransformStatus::DECLINED,
+              ""This fusion is only applicable to ops with one runtime input.""};
+    }
     auto& add_node = *sequence[1];
     if (add_node.operation.type != ToString(OperationType::ADD)) {
       return {TransformStatus::SKIPPED, """"};
",0,train
a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL.

PiperOrigin-RevId: 335916814
Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",depthwise_conv.cc,"@@ -38,6 +38,10 @@ class DepthwiseConvolution : public NodeShader {
  public:
   absl::Status GenerateCode(const GenerationContext& ctx,
                             GeneratedCode* generated_code) const final {
+    if (ctx.input_shapes.size() != 1) {
+      return absl::UnimplementedError(
+          ""DepthWise Convolution does not support more than 1 runtime tensor"");
+    }
     const auto& attr =
         absl::any_cast<const DepthwiseConvolution2DAttributes&>(ctx.op_attr);
     auto weights = attr.weights.shape;
",0,train
a2e00ba6742265ff828759f107c592bc9956cf3c,tensorflow/tensorflow,"Added support of dynamic weights for DepthWise Convolution in OpenCL.

PiperOrigin-RevId: 335916814
Change-Id: I95d93791293291e3499cf99e58bbc404119d88f8",api.cc,"@@ -267,6 +267,11 @@ absl::Status RegisterPrimaryOps(const GraphFloat32& graph, const Node* node,
           device_info, options);
       break;
     case OperationType::DEPTHWISE_CONVOLUTION:
+      if (graph.FindInputs(node->id).size() != 1) {
+        return absl::UnimplementedError(
+            ""DepthWise Convolution does not support more than 1 runtime ""
+            ""tensor"");
+      }
       *tasks =
           SelectDepthWiseConv(node_id, inputs[0], outputs[0],
                               absl::any_cast<DepthwiseConvolution2DAttributes>(
",0,train
1e1beefce2f40dc13f3374fdff4a83b63196d070,tensorflow/tensorflow,"250% GPU speed up of the convolution gradient computation wrt the weights
for Eigen.
Change: 118408303",eigen_backward_spatial_convolutions.h,"@@ -239,15 +239,43 @@ SpatialConvolutionBackwardInput(
   * It is possible to swap the order of the width and height dimensions provided that the same order is used in the input, the kernel, and the output.
   *
   */
-// TODO(gpapan): Resolve a bug in TensorContractionInputMapper at SpatialConvolutions.h that yangke circumvented by using .reshape().reshape().
-// This can significantly accelerate SpatialConvolutionBackwardKernel.
 
 template <typename OutputBackward, typename Input>
-EIGEN_ALWAYS_INLINE
-static const typename internal::conditional<
+EIGEN_ALWAYS_INLINE static const typename internal::conditional<
   internal::traits<OutputBackward>::Layout == ColMajor,
-  const TensorShufflingOp<const array<typename internal::traits<OutputBackward>::Index, 4>, const TensorReverseOp<const array<bool, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorContractionOp<const array<IndexPair<typename internal::traits<Input>::Index>, 2>, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 3>, const Input>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> > > > > > >,
-  const TensorShufflingOp<const array<typename internal::traits<OutputBackward>::Index, 4>, const TensorReverseOp<const array<bool, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorContractionOp<const array<IndexPair<typename internal::traits<Input>::Index>, 2>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorReshapingOp<const DSizes<typename internal::traits<OutputBackward>::Index, 4>, const TensorImagePatchOp<Dynamic, Dynamic, const OutputBackward> > >, const TensorReshapingOp<const DSizes<typename internal::traits<Input>::Index, 3>, const Input> > > > > >::type
+  TensorReshapingOp<
+    const DSizes<typename internal::traits<Input>::Index, 4>,
+    const TensorContractionOp<
+      const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
+      const TensorReshapingOp<
+        const DSizes<typename internal::traits<Input>::Index, 2>,
+        const OutputBackward>,
+      const TensorShufflingOp<
+        const array<typename internal::traits<OutputBackward>::Index, 2>,
+        const TensorReshapingOp<
+          const DSizes<typename internal::traits<Input>::Index, 2>,
+          const TensorImagePatchOp<Dynamic, Dynamic, const Input>
+          >
+        >
+      >
+    >,
+  TensorReshapingOp<
+    const DSizes<typename internal::traits<Input>::Index, 4>,
+    const TensorContractionOp<
+      const array<IndexPair<typename internal::traits<Input>::Index>, 1>,
+      const TensorShufflingOp<
+        const array<typename internal::traits<OutputBackward>::Index, 2>,
+        const TensorReshapingOp<
+          const DSizes<typename internal::traits<Input>::Index, 2>,
+          const TensorImagePatchOp<Dynamic, Dynamic, const Input>
+          >
+        >,
+      const TensorReshapingOp<
+        const DSizes<typename internal::traits<Input>::Index, 2>,
+        const OutputBackward>
+      >
+    >
+  >::type
 SpatialConvolutionBackwardKernel(const Input& input, const OutputBackward& output_backward, typename internal::traits<Input>::Index kernelRows, typename internal::traits<Input>::Index kernelCols, const DenseIndex stride = 1, const DenseIndex in_stride = 1) {
 
   typedef typename internal::traits<Input>::Index TensorIndex;
@@ -283,127 +311,93 @@ SpatialConvolutionBackwardKernel(const Input& input, const OutputBackward& outpu
   const TensorIndex kernelColsEff = kernelCols + (kernelCols - 1) * (in_stride - 1);
 
   // Computing the forward padding
-  const TensorIndex forward_pad_top = ((outputRows - 1) * stride + kernelRowsEff - inputRows) / 2;
-  const TensorIndex forward_pad_left = ((outputCols - 1) * stride + kernelColsEff - inputCols) / 2;
-
-  // TODO: factor out the padding computation.
-  const TensorIndex padding_top = kernelRowsEff - 1 - forward_pad_top;
-  const TensorIndex padding_left = kernelColsEff - 1 - forward_pad_left;
-  const TensorIndex padding_bottom = inputRows + kernelRowsEff - 1 - (outputRows - 1) * stride - 1 - padding_top;
-  const TensorIndex padding_right = inputCols + kernelColsEff - 1 - (outputCols - 1) * stride - 1 - padding_left;
-
-  eigen_assert(padding_top >= 0);
-  eigen_assert(padding_left >= 0);
-  eigen_assert(padding_bottom >= 0);
-  eigen_assert(padding_right >= 0);
-
-  // The output_backward has dimensions out_depth X out_rows X out_cols X OTHERS
-  // When we extract the image patches from output_backward (with input as the
-  // kernel), it will have dimensions
-  //  (out_depth) X (input_rows * input_cols) X (kernel_rows * kernel_cols) X OTHERS
-  DSizes<TensorIndex, 4> pre_contract_dims;
+  const TensorIndex padRows = numext::maxi<Index>(
+      0, (outputRows - 1) * stride + kernelRowsEff - inputRows);
+  const TensorIndex padCols = numext::maxi<Index>(
+      0, (outputCols - 1) * stride + kernelColsEff - inputCols);
+  const TensorIndex padding_top = padRows / 2;
+  const TensorIndex padding_bottom = padRows - padding_top;
+  const TensorIndex padding_left = padCols / 2;
+  const TensorIndex padding_right = padCols - padding_left;
+
+  // Reshaped out
+  DSizes<TensorIndex, 2> output_dims;
   if (isColMajor) {
-    pre_contract_dims[0] = kernelFilters;
-    pre_contract_dims[1] = inputRows * inputCols;
-    pre_contract_dims[2] = kernelRows * kernelCols;
-    pre_contract_dims[3] = 1;
+    output_dims[0] = kernelFilters;
+    output_dims[1] = outputRows * outputCols;
     for (int i = 3; i < NumDims; ++i) {
-      pre_contract_dims[3] *= out.dimension(i);
+      output_dims[1] *= out.dimension(i);
     }
   } else {
-    pre_contract_dims[3] = kernelFilters;
-    pre_contract_dims[2] = inputRows * inputCols;
-    pre_contract_dims[1] = kernelRows * kernelCols;
-    pre_contract_dims[0] = 1;
+    output_dims[1] = kernelFilters;
+    output_dims[0] = outputCols * outputRows;
     for (int i = 0; i < NumDims - 3; ++i) {
-      pre_contract_dims[0] *= out.dimension(i);
+      output_dims[0] *= out.dimension(i);
     }
   }
 
-  // The input has dimensions in_depth X (input_rows * input_cols) X OTHERS
-  DSizes<TensorIndex, 3> input_dims;
+  // Reshaped extract_image_patches(in)
+  DSizes<TensorIndex, 2> pre_contract_dims;
   if (isColMajor) {
-    input_dims[0] = kernelChannels;
-    input_dims[1] = inputRows * inputCols;
-    input_dims[2] = 1;
+    pre_contract_dims[0] = kernelChannels * kernelRows * kernelCols;
+    pre_contract_dims[1] = outputRows * outputCols;
     for (int i = 3; i < NumDims; ++i) {
-      input_dims[2] *= in.dimension(i);
+      pre_contract_dims[1] *= in.dimension(i);
     }
-    eigen_assert(input_dims[2] == pre_contract_dims[3]);
+    eigen_assert(output_dims[1] == pre_contract_dims[1]);
   } else {
-    input_dims[2] = kernelChannels;
-    input_dims[1] = inputRows * inputCols;
-    input_dims[0] = 1;
+    pre_contract_dims[1] = kernelCols * kernelRows * kernelChannels;
+    pre_contract_dims[0] = outputRows * outputCols;
     for (int i = 0; i < NumDims - 3; ++i) {
-      input_dims[0] *= in.dimension(i);
+      pre_contract_dims[0] *= in.dimension(i);
     }
-    eigen_assert(input_dims[0] == pre_contract_dims[0]);
+    eigen_assert(output_dims[0] == pre_contract_dims[0]);
   }
 
-  // We will contract along dimensions (1, 2) in in and (1, 3) in out, if
-  // this is col-major.
-  // For row-major, it's dimensions (0, 1) in in and (0, 2) in out.
-  array<IndexPair<TensorIndex>, 2> contract_dims;
-  if (isColMajor) {
-    // col-major: in.contract(output.patches)
-    contract_dims[0] = IndexPair<TensorIndex>(1, 1);
-    contract_dims[1] = IndexPair<TensorIndex>(2, 3);
-  } else {
-    // row-major: output.patches.contract(in)
-    contract_dims[0] = IndexPair<TensorIndex>(0, 0);
-    contract_dims[1] = IndexPair<TensorIndex>(2, 1);
-  }
+  array<TensorIndex, 2> shuffle_dims;
+  shuffle_dims[0] = 1;
+  shuffle_dims[1] = 0;
 
-  // After the contraction, the kernel will have dimension
-  // in_depth X out_depth X kernel_rows X kernel_cols
-  // We will need to shuffle the first two dimensions and reverse the latter
-  // two dimensions.
-  // The end shape is
-  // out_depth X in_shape X kernel_rows X kernel_cols
+  array<IndexPair<TensorIndex>, 1> contract_dims;
+  contract_dims[0] = IndexPair<TensorIndex>(1, 0);
 
-  // This is the shape of the kernel *before* the shuffling.
+  // After the contraction, the kernel will have the desired shape
+  // out_depth X in_shape X kernel_rows X kernel_cols
   DSizes<TensorIndex, 4> kernel_dims;
   if (isColMajor) {
-    kernel_dims[0] = kernelChannels;
-    kernel_dims[1] = kernelFilters;
+    kernel_dims[0] = kernelFilters;
+    kernel_dims[1] = kernelChannels;
     kernel_dims[2] = kernelRows;
     kernel_dims[3] = kernelCols;
   } else {
-    kernel_dims[0] = kernelCols;
+    kernel_dims[3] = kernelFilters;
+    kernel_dims[2] = kernelChannels;
     kernel_dims[1] = kernelRows;
-    kernel_dims[2] = kernelFilters;
-    kernel_dims[3] = kernelChannels;
-  }
-
-  array<TensorIndex, 4> kernel_shuffle;
-  if (isColMajor) {
-    kernel_shuffle[0] = 1;
-    kernel_shuffle[1] = 0;
-    kernel_shuffle[2] = 2;
-    kernel_shuffle[3] = 3;
-  } else {
-    kernel_shuffle[0] = 0;
-    kernel_shuffle[1] = 1;
-    kernel_shuffle[2] = 3;
-    kernel_shuffle[3] = 2;
-  }
-
-  array<bool, 4> kernel_reverse;
-  if (isColMajor) {
-    kernel_reverse[0] = false;
-    kernel_reverse[1] = false;
-    kernel_reverse[2] = true;
-    kernel_reverse[3] = true;
-  } else {
-    kernel_reverse[0] = true;
-    kernel_reverse[1] = true;
-    kernel_reverse[2] = false;
-    kernel_reverse[3] = false;
+    kernel_dims[0] = kernelCols;
   }
 
-  return choose(Cond<internal::traits<Input>::Layout == ColMajor>(),
-                input.reshape(input_dims).contract(output_backward.extract_image_patches(inputRows, inputCols, in_stride, in_stride, 1, 1, stride, stride, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)).reshape(pre_contract_dims).reshape(pre_contract_dims), contract_dims).reshape(kernel_dims).reverse(kernel_reverse).shuffle(kernel_shuffle),
-                output_backward.extract_image_patches(inputRows, inputCols, in_stride, in_stride, 1, 1, stride, stride, padding_top, padding_bottom, padding_left, padding_right, OutScalar(0)).reshape(pre_contract_dims).reshape(pre_contract_dims).contract(input.reshape(input_dims), contract_dims).reshape(kernel_dims).reverse(kernel_reverse).shuffle(kernel_shuffle));
+  return choose(
+      Cond<internal::traits<Input>::Layout == ColMajor>(),
+      output_backward.reshape(output_dims)
+      .contract(
+          input.extract_image_patches(
+              kernelRows, kernelCols, stride, stride,
+              in_stride, in_stride, 1, 1, padding_top, padding_bottom,
+              padding_left, padding_right, OutScalar(0))
+          .reshape(pre_contract_dims)
+          .shuffle(shuffle_dims),
+          contract_dims)
+      .reshape(kernel_dims),
+      input.extract_image_patches(
+          kernelRows, kernelCols, stride, stride,
+          in_stride, in_stride, 1, 1, padding_top, padding_bottom,
+          padding_left, padding_right, OutScalar(0))
+      .reshape(pre_contract_dims)
+      .shuffle(shuffle_dims)
+      .contract(
+          output_backward.reshape(output_dims),
+          contract_dims)
+      .reshape(kernel_dims));
 }
 
 } // end namespace Eigen
",0,train
cd964065fb03e0b74f338dd7d9a499d1e7544ffb,tensorflow/tensorflow,"Fix a subtle bug where we unsafely modify the list while iterating it.

PiperOrigin-RevId: 293933459
Change-Id: I0230df64b5dbfd03e941a0d19bd5d339b414cfff",cluster_formation.cc,"@@ -100,7 +100,8 @@ void ReplaceLiveOutExternalUses(llvm::ArrayRef<Value> live_outs,
   Region* launch_op_region = &launch_op.body();
   for (const auto& p : llvm::zip(live_outs, launch_op.getResults())) {
     Value from = std::get<0>(p);
-    for (auto& use : from.getUses()) {
+    // TODO(jingpu): move this to RegionUtils.h in MLIR core.
+    for (auto& use : llvm::make_early_inc_range(from.getUses())) {
       if (launch_op_region->isAncestor(use.getOwner()->getParentRegion()))
         continue;
       use.set(std::get<1>(p));
",0,test
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",process_function_library_runtime_test.cc,"@@ -764,8 +764,8 @@ Tensor GetResourceHandle(const string& var_name, const string& container,
   handle.set_device(device_name);
   handle.set_container(container);
   handle.set_name(var_name);
-  handle.set_hash_code(MakeTypeIndex<Var>().hash_code());
-  handle.set_maybe_type_name(MakeTypeIndex<Var>().name());
+  handle.set_hash_code(TypeIndex::Make<Var>().hash_code());
+  handle.set_maybe_type_name(TypeIndex::Make<Var>().name());
   Tensor tensor(DT_RESOURCE, TensorShape({}));
   tensor.scalar<ResourceHandle>()() = handle;
   return tensor;
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",resource_mgr.h,"@@ -301,7 +301,7 @@ ResourceHandle MakeResourceHandle(
   return MakeResourceHandle(
       container.empty() ? ctx->resource_manager()->default_container()
                         : container,
-      name, *ctx->device(), MakeTypeIndex<T>(), dtypes_and_shapes);
+      name, *ctx->device(), TypeIndex::Make<T>(), dtypes_and_shapes);
 }
 
 template <typename T>
@@ -311,7 +311,7 @@ ResourceHandle MakeResourceHandle(
   return MakeResourceHandle(
       container.empty() ? ctx->resource_manager()->default_container()
                         : container,
-      name, *ctx->device(), MakeTypeIndex<T>(), dtypes_and_shapes);
+      name, *ctx->device(), TypeIndex::Make<T>(), dtypes_and_shapes);
 }
 
 Status MakeResourceHandleToOutput(OpKernelContext* context, int output_index,
@@ -589,7 +589,7 @@ Status ResourceMgr::Create(const string& container, const string& name,
   CheckDeriveFromResourceBase<T>();
   CHECK(resource != nullptr);
   mutex_lock l(mu_);
-  return DoCreate(container, MakeTypeIndex<T>(), name, resource);
+  return DoCreate(container, TypeIndex::Make<T>(), name, resource);
 }
 
 template <typename T, bool use_dynamic_cast>
@@ -635,7 +635,7 @@ template <typename T, bool use_dynamic_cast>
 Status ResourceMgr::LookupInternal(const string& container, const string& name,
                                    T** resource) const {
   ResourceBase* found = nullptr;
-  Status s = DoLookup(container, MakeTypeIndex<T>(), name, &found);
+  Status s = DoLookup(container, TypeIndex::Make<T>(), name, &found);
   if (s.ok()) {
     // It's safe to down cast 'found' to T* since
     // typeid(T).hash_code() is part of the map key.
@@ -660,7 +660,7 @@ Status ResourceMgr::LookupOrCreate(const string& container, const string& name,
   s = LookupInternal<T, use_dynamic_cast>(container, name, resource);
   if (s.ok()) return s;
   TF_RETURN_IF_ERROR(creator(resource));
-  s = DoCreate(container, MakeTypeIndex<T>(), name, *resource);
+  s = DoCreate(container, TypeIndex::Make<T>(), name, *resource);
   if (!s.ok()) {
     return errors::Internal(""LookupOrCreate failed unexpectedly"");
   }
@@ -671,7 +671,7 @@ Status ResourceMgr::LookupOrCreate(const string& container, const string& name,
 template <typename T>
 Status ResourceMgr::Delete(const string& container, const string& name) {
   CheckDeriveFromResourceBase<T>();
-  return DoDelete(container, MakeTypeIndex<T>(), name);
+  return DoDelete(container, TypeIndex::Make<T>(), name);
 }
 
 template <typename T>
@@ -710,7 +710,7 @@ Status ValidateDevice(OpKernelContext* ctx, const ResourceHandle& p);
 template <typename T>
 Status ValidateDeviceAndType(OpKernelContext* ctx, const ResourceHandle& p) {
   TF_RETURN_IF_ERROR(internal::ValidateDevice(ctx, p));
-  auto type_index = MakeTypeIndex<T>();
+  auto type_index = TypeIndex::Make<T>();
   if (type_index.hash_code() != p.hash_code()) {
     return errors::InvalidArgument(
         ""Trying to access resource using the wrong type. Expected "",
@@ -883,7 +883,7 @@ ResourceHandle ScopedStepContainer::MakeResourceHandle(
   mutex_lock ml(mu_);
   dirty_ = true;
   return tensorflow::MakeResourceHandle(container_, name, device,
-                                        MakeTypeIndex<T>(), {});
+                                        TypeIndex::Make<T>(), {});
 }
 
 template <typename T>
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",resource_op_kernel.h,"@@ -105,7 +105,7 @@ class ResourceOpKernel : public OpKernel {
     if (has_resource_type_) {
       OP_REQUIRES_OK(context, MakeResourceHandleToOutput(
                                   context, 0, cinfo_.container(), cinfo_.name(),
-                                  MakeTypeIndex<T>()));
+                                  TypeIndex::Make<T>()));
     } else {
       context->set_output_ref(0, &mu_, handle_.AccessTensor(context));
     }
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",variant.h,"@@ -144,7 +144,7 @@ void EncodeVariant(const T& value, string* buf);
 //   Variant y_type_unknown = serialized_proto_f;  // Store serialized Variant.
 //
 //   EXPECT_EQ(x.TypeName(), y_type_unknown.TypeName());  // Looks like Foo.
-//   EXPECT_EQ(MakeTypeIndex<VariantTensorDataProto>(),
+//   EXPECT_EQ(TypeIndex::Make<VariantTensorDataProto>(),
 //             y_type_unknown.TypeId());
 //
 class Variant {
@@ -227,7 +227,7 @@ class Variant {
   // of the original type when a TensorValueDataProto is stored as the
   // value.  In this case, it returns the TypeIndex of TensorValueDataProto.
   TypeIndex TypeId() const {
-    const TypeIndex VoidTypeIndex = MakeTypeIndex<void>();
+    const TypeIndex VoidTypeIndex = TypeIndex::Make<void>();
     if (is_empty()) {
       return VoidTypeIndex;
     }
@@ -244,7 +244,7 @@ class Variant {
   // otherwise.
   template <typename T>
   T* get() {
-    const TypeIndex TTypeIndex = MakeTypeIndex<T>();
+    const TypeIndex TTypeIndex = TypeIndex::Make<T>();
     if (is_empty() || (TTypeIndex != TypeId())) return nullptr;
     return std::addressof(static_cast<Variant::Value<T>*>(GetValue())->value);
   }
@@ -253,7 +253,7 @@ class Variant {
   // otherwise.
   template <typename T>
   const T* get() const {
-    const TypeIndex TTypeIndex = MakeTypeIndex<T>();
+    const TypeIndex TTypeIndex = TypeIndex::Make<T>();
     if (is_empty() || (TTypeIndex != TypeId())) return nullptr;
     return std::addressof(
         static_cast<const Variant::Value<T>*>(GetValue())->value);
@@ -333,7 +333,7 @@ class Variant {
 
     TypeIndex TypeId() const final {
       const TypeIndex value_type_index =
-          MakeTypeIndex<typename std::decay<T>::type>();
+          TypeIndex::Make<typename std::decay<T>::type>();
       return value_type_index;
     }
 
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",variant_encode_decode.h,"@@ -160,7 +160,7 @@ string TypeNameVariantImpl(
     const T& value,
     TypeNameResolver<T, false /* has_type_name */, false /* Tensor */,
                      false /* protobuf */>) {
-  return port::MaybeAbiDemangle(MakeTypeIndex<T>().name());
+  return port::MaybeAbiDemangle(TypeIndex::Make<T>().name());
 }
 
 template <typename T>
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",variant_op_registry.h,"@@ -521,7 +521,7 @@ class UnaryVariantBinaryOpRegistration {
 #define INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION(T, direction,   \
                                                              device_copy_fn) \
   INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ_HELPER(          \
-      __COUNTER__, T, direction, MakeTypeIndex<T>(), device_copy_fn)
+      __COUNTER__, T, direction, TypeIndex::Make<T>(), device_copy_fn)
 
 #define INTERNAL_REGISTER_UNARY_VARIANT_DEVICE_COPY_FUNCTION_UNIQ_HELPER( \
     ctr, T, direction, type_index, device_copy_fn)                        \
@@ -542,7 +542,7 @@ class UnaryVariantBinaryOpRegistration {
 #define REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION(op, device, T,     \
                                                  unary_op_function) \
   REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ_HELPER(             \
-      __COUNTER__, op, device, T, MakeTypeIndex<T>(), unary_op_function)
+      __COUNTER__, op, device, T, TypeIndex::Make<T>(), unary_op_function)
 
 #define REGISTER_UNARY_VARIANT_UNARY_OP_FUNCTION_UNIQ_HELPER(       \
     ctr, op, device, T, type_index, unary_op_function)              \
@@ -563,7 +563,7 @@ class UnaryVariantBinaryOpRegistration {
 #define REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION(op, device, T,      \
                                                   binary_op_function) \
   REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ_HELPER(              \
-      __COUNTER__, op, device, T, MakeTypeIndex<T>(), binary_op_function)
+      __COUNTER__, op, device, T, TypeIndex::Make<T>(), binary_op_function)
 
 #define REGISTER_UNARY_VARIANT_BINARY_OP_FUNCTION_UNIQ_HELPER( \
     ctr, op, device, T, type_index, binary_op_function)        \
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",variant_op_registry_test.cc,"@@ -155,12 +155,12 @@ TEST(VariantOpCopyToGPURegistryTest, TestBasic) {
   // No registered copy fn for GPU<->GPU.
   EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetDeviceCopyFn(
                 VariantDeviceCopyDirection::DEVICE_TO_DEVICE,
-                MakeTypeIndex<VariantValue>()),
+                TypeIndex::Make<VariantValue>()),
             nullptr);
 
   auto* copy_to_gpu_fn = UnaryVariantOpRegistry::Global()->GetDeviceCopyFn(
       VariantDeviceCopyDirection::HOST_TO_DEVICE,
-      MakeTypeIndex<VariantValue>());
+      TypeIndex::Make<VariantValue>());
   EXPECT_NE(copy_to_gpu_fn, nullptr);
 
   VariantValue vv{true /* early_exit */};
@@ -183,7 +183,7 @@ TEST(VariantOpCopyToGPURegistryTest, TestDuplicate) {
   UnaryVariantOpRegistry registry;
   UnaryVariantOpRegistry::AsyncVariantDeviceCopyFn f;
   class FjFjFj {};
-  const auto kTypeIndex = MakeTypeIndex<FjFjFj>();
+  const auto kTypeIndex = TypeIndex::Make<FjFjFj>();
   registry.RegisterDeviceCopyFn(VariantDeviceCopyDirection::HOST_TO_DEVICE,
                                 kTypeIndex, f);
   EXPECT_DEATH(registry.RegisterDeviceCopyFn(
@@ -193,9 +193,10 @@ TEST(VariantOpCopyToGPURegistryTest, TestDuplicate) {
 
 TEST(VariantOpZerosLikeRegistryTest, TestBasicCPU) {
   class Blah {};
-  EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetUnaryOpFn(
-                ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, MakeTypeIndex<Blah>()),
-            nullptr);
+  EXPECT_EQ(
+      UnaryVariantOpRegistry::Global()->GetUnaryOpFn(
+          ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU, TypeIndex::Make<Blah>()),
+      nullptr);
 
   VariantValue vv_early_exit{true /* early_exit */, 0 /* value */};
   Variant v = vv_early_exit;
@@ -218,9 +219,10 @@ TEST(VariantOpZerosLikeRegistryTest, TestBasicCPU) {
 #if GOOGLE_CUDA || TENSORFLOW_USE_ROCM
 TEST(VariantOpUnaryOpRegistryTest, TestBasicGPU) {
   class Blah {};
-  EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetUnaryOpFn(
-                ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, MakeTypeIndex<Blah>()),
-            nullptr);
+  EXPECT_EQ(
+      UnaryVariantOpRegistry::Global()->GetUnaryOpFn(
+          ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_GPU, TypeIndex::Make<Blah>()),
+      nullptr);
 
   VariantValue vv_early_exit{true /* early_exit */, 0 /* value */};
   Variant v = vv_early_exit;
@@ -245,7 +247,7 @@ TEST(VariantOpUnaryOpRegistryTest, TestDuplicate) {
   UnaryVariantOpRegistry registry;
   UnaryVariantOpRegistry::VariantUnaryOpFn f;
   class FjFjFj {};
-  const auto kTypeIndex = MakeTypeIndex<FjFjFj>();
+  const auto kTypeIndex = TypeIndex::Make<FjFjFj>();
 
   registry.RegisterUnaryOpFn(ZEROS_LIKE_VARIANT_UNARY_OP, DEVICE_CPU,
                              kTypeIndex, f);
@@ -263,7 +265,7 @@ TEST(VariantOpUnaryOpRegistryTest, TestDuplicate) {
 TEST(VariantOpAddRegistryTest, TestBasicCPU) {
   class Blah {};
   EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetBinaryOpFn(
-                ADD_VARIANT_BINARY_OP, DEVICE_CPU, MakeTypeIndex<Blah>()),
+                ADD_VARIANT_BINARY_OP, DEVICE_CPU, TypeIndex::Make<Blah>()),
             nullptr);
 
   VariantValue vv_early_exit{true /* early_exit */, 3 /* value */};
@@ -290,7 +292,7 @@ TEST(VariantOpAddRegistryTest, TestBasicCPU) {
 TEST(VariantOpAddRegistryTest, TestBasicGPU) {
   class Blah {};
   EXPECT_EQ(UnaryVariantOpRegistry::Global()->GetBinaryOpFn(
-                ADD_VARIANT_BINARY_OP, DEVICE_GPU, MakeTypeIndex<Blah>()),
+                ADD_VARIANT_BINARY_OP, DEVICE_GPU, TypeIndex::Make<Blah>()),
             nullptr);
 
   VariantValue vv_early_exit{true /* early_exit */, 3 /* value */};
@@ -318,7 +320,7 @@ TEST(VariantOpAddRegistryTest, TestDuplicate) {
   UnaryVariantOpRegistry registry;
   UnaryVariantOpRegistry::VariantBinaryOpFn f;
   class FjFjFj {};
-  const auto kTypeIndex = MakeTypeIndex<FjFjFj>();
+  const auto kTypeIndex = TypeIndex::Make<FjFjFj>();
 
   registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_CPU, kTypeIndex, f);
   EXPECT_DEATH(registry.RegisterBinaryOpFn(ADD_VARIANT_BINARY_OP, DEVICE_CPU,
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",variant_test.cc,"@@ -589,7 +589,7 @@ TEST(VariantTest, TensorListTest) {
   serialized.ToProto(&data);
   const Variant y_unknown = data;
   EXPECT_EQ(y_unknown.TypeName(), ""TensorList"");
-  EXPECT_EQ(y_unknown.TypeId(), MakeTypeIndex<VariantTensorDataProto>());
+  EXPECT_EQ(y_unknown.TypeId(), TypeIndex::Make<VariantTensorDataProto>());
   EXPECT_EQ(y_unknown.DebugString(),
             strings::StrCat(
                 ""Variant<type: TensorList value: "", data.DebugString(), "">""));
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",conditional_accumulator_op.cc,"@@ -90,7 +90,7 @@ class ResourceConditionalAccumulatorOp : public ConditionalAccumulatorBaseOp {
     h(1) = cinfo_.name();
     OP_REQUIRES_OK(ctx, MakeResourceHandleToOutput(
                             ctx, 0, cinfo_.container(), cinfo_.name(),
-                            MakeTypeIndex<ConditionalAccumulatorBase>()));
+                            TypeIndex::Make<ConditionalAccumulatorBase>()));
   }
 
   TF_DISALLOW_COPY_AND_ASSIGN(ResourceConditionalAccumulatorOp);
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",dataset_utils.h,"@@ -35,7 +35,7 @@ Status CreateHandle(OpKernelContext* ctx, T* resource,
   TF_RETURN_IF_ERROR(mgr->Create<T>(container_name, unique_name, resource));
 
   *handle = MakeResourceHandle(container_name, unique_name, *ctx->device(),
-                               MakeTypeIndex<T>());
+                               TypeIndex::Make<T>());
   return Status::OK();
 }
 
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",threadpool_dataset_op.cc,"@@ -111,7 +111,7 @@ class ThreadPoolHandleOp : public OpKernel {
     }
     OP_REQUIRES_OK(ctx, MakeResourceHandleToOutput(
                             ctx, 0, cinfo_.container(), cinfo_.name(),
-                            MakeTypeIndex<ThreadPoolResource>()));
+                            TypeIndex::Make<ThreadPoolResource>()));
   }
 
  private:
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",iterator_ops.cc,"@@ -443,7 +443,7 @@ void IteratorHandleOp::Compute(OpKernelContext* context)
   }
   OP_REQUIRES_OK(context, MakeResourceHandleToOutput(
                               context, 0, cinfo_.container(), cinfo_.name(),
-                              MakeTypeIndex<IteratorResource>()));
+                              TypeIndex::Make<IteratorResource>()));
 }
 
 Status IteratorHandleOp::VerifyResource(IteratorResource* resource) {
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",multi_device_iterator_ops.cc,"@@ -475,7 +475,7 @@ class MultiDeviceIteratorHandleOp : public OpKernel {
     }
     OP_REQUIRES_OK(context, MakeResourceHandleToOutput(
                                 context, 0, container_name, unique_name,
-                                MakeTypeIndex<MultiDeviceIterator>()));
+                                TypeIndex::Make<MultiDeviceIterator>()));
   }
 
  private:
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",ops_testutil.h,"@@ -126,7 +126,7 @@ class OpsTestBase : public ::testing::Test {
     std::string container_name =
         container.empty() ? rm->default_container() : container;
     EXPECT_TRUE(rm->Create(container_name, name, resource).ok());
-    AddResourceInputInternal(container_name, name, MakeTypeIndex<T>());
+    AddResourceInputInternal(container_name, name, TypeIndex::Make<T>());
   }
 
   // Runs an operation producing 'num_outputs' outputs.
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",tile_ops.cc,"@@ -554,7 +554,7 @@ inline void TileGradientOp<Device, Tmultiples>::HandleCase(
     OpKernelContext* context, const std::vector<Tmultiples>& input_dims,
     const gtl::ArraySlice<Tmultiples>& multiples_array, Tensor* result) {
   LOG(FATAL) << ""TileGradientOp: Invalid combination of Device, DT and NDIM: ""
-             << MakeTypeIndex<Device>().name() << "", "" << DataTypeString(DT)
+             << TypeIndex::Make<Device>().name() << "", "" << DataTypeString(DT)
              << "", "" << NDIM;
 }
 
",0,train
e8c972652ad77076faf464df4f59240a2dd1548a,tensorflow/tensorflow,"Move uses of `tensorflow::MakeTypeIndex()` to `tensorflow::TypeIndex::Make`.

PiperOrigin-RevId: 317920618
Change-Id: I7af52fdf92c77858ffa897f6d5449bfb0213f4e5",abi_test.cc,"@@ -23,14 +23,14 @@ namespace tensorflow {
 struct MyRandomPODType {};
 
 TEST(AbiTest, AbiDemangleTest) {
-  EXPECT_EQ(port::MaybeAbiDemangle(MakeTypeIndex<int>().name()), ""int"");
+  EXPECT_EQ(port::MaybeAbiDemangle(TypeIndex::Make<int>().name()), ""int"");
 
 #ifdef PLATFORM_WINDOWS
   const char pod_type_name[] = ""struct tensorflow::MyRandomPODType"";
 #else
   const char pod_type_name[] = ""tensorflow::MyRandomPODType"";
 #endif
-  EXPECT_EQ(port::MaybeAbiDemangle(MakeTypeIndex<MyRandomPODType>().name()),
+  EXPECT_EQ(port::MaybeAbiDemangle(TypeIndex::Make<MyRandomPODType>().name()),
             pod_type_name);
 
   EXPECT_EQ(
",0,train
c07b18684c3b20dd91911a31bbd6169ad9cc1617,tensorflow/tensorflow,Fix set_difference doc,sets_impl.py,"@@ -247,7 +247,7 @@ def set_difference(a, b, aminusb=True, validate_indices=True):
     #
     # collections.OrderedDict([
     #     ((0, 0, 0), 2),
-    #     ((0, 0, 1), 3),
+    #     ((0, 1, 0), 3),
     # ])
   ```
 
",0,train
7b4389140094231ecf8c7491e3bb490a86ef1dd7,tensorflow/tensorflow,"Let log_every_steps <=0 disable logging hook.
Change: 134278810",basic_session_run_hooks.py,"@@ -51,7 +51,12 @@ class LoggingTensorHook(session_run_hook.SessionRunHook):
       tensors: `dict` of tag to tensors/names or
           `iterable` of tensors/names.
       every_n_iter: `int`, print every N iteration.
+
+    Raises:
+     ValueError: if `every_n_iter` is non-positive.
     """"""
+    if every_n_iter <= 0:
+      raise ValueError(""Invalid every_n_iter=%s."" % every_n_iter)
     if not isinstance(tensors, dict):
       tensors = {item: item for item in tensors}
     self._tensors = tensors
@@ -147,7 +152,7 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook):
     Raises:
       ValueError: One of `save_steps` or `save_secs` should be set.
     """"""
-    logging.info(""Create CheckpointSaverHook"")
+    logging.info(""Create CheckpointSaverHook."")
     self._saver = saver
     self._checkpoint_dir = checkpoint_dir
     self._summary_writer = SummaryWriterCache.get(checkpoint_dir)
@@ -173,7 +178,7 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook):
 
   def before_run(self, run_context):  # pylint: disable=unused-argument
     if self._last_saved_time is None:
-      # Write graph in the first call
+      # Write graph in the first call.
       training_util.write_graph(
           ops.get_default_graph().as_graph_def(add_shapes=True),
           self._checkpoint_dir,
",0,train
7b4389140094231ecf8c7491e3bb490a86ef1dd7,tensorflow/tensorflow,"Let log_every_steps <=0 disable logging hook.
Change: 134278810",graph_actions.py,"@@ -162,7 +162,7 @@ def _monitored_train(graph,
       This feed dictionary will be used when `init_op` is evaluated.
     init_fn: Optional callable passed to Supervisor to initialize the model.
     log_every_steps: Output logs regularly. The logs contain timing data and the
-      current loss.
+      current loss. A `0` or negative value disables logging.
     supervisor_is_chief: Whether the current process is the chief supervisor in
       charge of restoring the model and running standard services.
     supervisor_master: The master string to use when preparing the session.
@@ -231,14 +231,13 @@ def _monitored_train(graph,
   # (such as ExportMonitor). Appending them after the basic_session_run_hooks.
   all_hooks = []
   with graph.as_default():
-    all_hooks.extend([
-        basic_session_run_hooks.NanTensorHook(
-            loss_op, fail_on_nan_loss=fail_on_nan_loss),
-        basic_session_run_hooks.LoggingTensorHook({
-            'loss': loss_op.name,
-            'step': global_step_tensor.name
-        }, every_n_iter=log_every_steps),
-    ])
+    all_hooks.append(basic_session_run_hooks.NanTensorHook(
+        loss_op, fail_on_nan_loss=fail_on_nan_loss))
+    if log_every_steps > 0:
+      all_hooks.append(basic_session_run_hooks.LoggingTensorHook({
+          'loss': loss_op.name,
+          'step': global_step_tensor.name
+      }, every_n_iter=log_every_steps))
 
     scaffold = monitored_session.Scaffold(
         init_op=init_op,
",0,train
7b4389140094231ecf8c7491e3bb490a86ef1dd7,tensorflow/tensorflow,"Let log_every_steps <=0 disable logging hook.
Change: 134278810",basic_session_run_hooks_test.py,"@@ -100,6 +100,12 @@ class LoggingTensorHookTest(tf.test.TestCase):
   def tearDown(self):
     tf.logging.info = self._actual_log
 
+  def test_illegal_args(self):
+    with self.assertRaisesRegexp(ValueError, 'nvalid every_n_iter'):
+      basic_session_run_hooks.LoggingTensorHook(tensors=['t'], every_n_iter=0)
+    with self.assertRaisesRegexp(ValueError, 'nvalid every_n_iter'):
+      basic_session_run_hooks.LoggingTensorHook(tensors=['t'], every_n_iter=-10)
+
   def test_print(self):
     with tf.Graph().as_default(), tf.Session() as sess:
       t = tf.constant(42.0, name='foo')
",0,train
088bd27daba96e3905c00dc075a7b42e055345a6,tensorflow/tensorflow,"Add a note that the replica_id_in_sync_group might not correspond to XLA replica ID.

PiperOrigin-RevId: 292609147
Change-Id: I6052a297a50e213471ee8d3a62a4d0964affd9e1",distribute_lib.py,"@@ -2030,6 +2030,9 @@ class ReplicaContext(object):
     This identifies the replica that is part of a sync group. Currently we
     assume that all sync groups contain the same number of replicas. The value
     of the replica id can range from 0 to `num_replica_in_sync` - 1.
+
+    NOTE: This is not guaranteed to be the same ID as the XLA replica ID use
+    for low-level operations such as collective_permute.
     """"""
     require_replica_context(self)
     return self._replica_id_in_sync_group
",0,test
ec0e105c6fe537969a736ddb546c277ae18b9282,tensorflow/tensorflow,"Fix build failure of list_flex_ops_main in OSS

The cc_binary required --config=monolithic which can't be passed
into a native.genrule. Using tf_cc_binary solves the build failure.

PiperOrigin-RevId: 316631689
Change-Id: Ia706d532578ccbf5bc8f172f6344f166d05531fb",list_flex_ops_test.cc,"@@ -22,6 +22,7 @@ limitations under the License.
 #include ""flatbuffers/flexbuffers.h""  // from @flatbuffers
 #include ""tensorflow/core/framework/node_def.pb.h""
 #include ""tensorflow/core/platform/protobuf.h""
+#include ""tensorflow/core/platform/resource_loader.h""
 #include ""tensorflow/lite/kernels/test_util.h""
 
 namespace tflite {
@@ -31,8 +32,9 @@ class FlexOpsListTest : public ::testing::Test {
  protected:
   FlexOpsListTest() {}
 
-  void ReadOps(const string& model_path) {
-    auto model = FlatBufferModel::BuildFromFile(model_path.data());
+  void ReadOps(const string& path) {
+    std::string full_path = tensorflow::GetDataDependencyFilepath(path);
+    auto model = FlatBufferModel::BuildFromFile(full_path.data());
     AddFlexOpsFromModel(model->GetModel(), &flex_ops_);
     output_text_ = OpListToJSONString(flex_ops_);
   }
@@ -84,30 +86,29 @@ class FlexOpModel : public SingleOpModel {
 };
 
 TEST_F(FlexOpsListTest, TestModelsNoFlex) {
-  ReadOps(""third_party/tensorflow/lite/testdata/test_model.bin"");
+  ReadOps(""tensorflow/lite/testdata/test_model.bin"");
   EXPECT_EQ(output_text_, ""[]"");
 }
 
 TEST_F(FlexOpsListTest, TestBrokenModel) {
   EXPECT_DEATH_IF_SUPPORTED(
-      ReadOps(""third_party/tensorflow/lite/testdata/test_model_broken.bin""),
-      """");
+      ReadOps(""tensorflow/lite/testdata/test_model_broken.bin""), """");
 }
 
 TEST_F(FlexOpsListTest, TestZeroSubgraphs) {
-  ReadOps(""third_party/tensorflow/lite/testdata/0_subgraphs.bin"");
+  ReadOps(""tensorflow/lite/testdata/0_subgraphs.bin"");
   EXPECT_EQ(output_text_, ""[]"");
 }
 
 TEST_F(FlexOpsListTest, TestFlexAdd) {
-  ReadOps(""third_party/tensorflow/lite/testdata/multi_add_flex.bin"");
+  ReadOps(""tensorflow/lite/testdata/multi_add_flex.bin"");
   EXPECT_EQ(output_text_,
             ""[[\""Add\"", \""BinaryOp<CPUDevice, functor::add<float>>\""]]"");
 }
 
 TEST_F(FlexOpsListTest, TestTwoModel) {
-  ReadOps(""third_party/tensorflow/lite/testdata/multi_add_flex.bin"");
-  ReadOps(""third_party/tensorflow/lite/testdata/softplus_flex.bin"");
+  ReadOps(""tensorflow/lite/testdata/multi_add_flex.bin"");
+  ReadOps(""tensorflow/lite/testdata/softplus_flex.bin"");
   EXPECT_EQ(output_text_,
             ""[[\""Add\"", \""BinaryOp<CPUDevice, ""
             ""functor::add<float>>\""],\n[\""Softplus\"", \""SoftplusOp<CPUDevice, ""
@@ -115,8 +116,8 @@ TEST_F(FlexOpsListTest, TestTwoModel) {
 }
 
 TEST_F(FlexOpsListTest, TestDuplicatedOp) {
-  ReadOps(""third_party/tensorflow/lite/testdata/multi_add_flex.bin"");
-  ReadOps(""third_party/tensorflow/lite/testdata/multi_add_flex.bin"");
+  ReadOps(""tensorflow/lite/testdata/multi_add_flex.bin"");
+  ReadOps(""tensorflow/lite/testdata/multi_add_flex.bin"");
   EXPECT_EQ(output_text_,
             ""[[\""Add\"", \""BinaryOp<CPUDevice, functor::add<float>>\""]]"");
 }
",0,train
dd934175ecaa6d52d8a297144215acfa650360ac,tensorflow/tensorflow,"Avoid compiler crash on aggregate initialization of flexible array member

PiperOrigin-RevId: 335754239
Change-Id: Ibc812c55e7e64739a030a6f03976c9c73d799ad2",micro_allocator.cc,"@@ -59,7 +59,7 @@ struct AllocationInfo {
 // requirement for SIMD extensions.
 constexpr int kBufferAlignment = 16;
 constexpr char kOfflineMemAllocMetadata[] = ""OfflineMemoryAllocation"";
-const TfLiteIntArray kZeroLengthIntArray = {0, {}};
+const TfLiteIntArray kZeroLengthIntArray = {};
 
 class MicroBuiltinDataAllocator : public BuiltinDataAllocator {
  public:
",0,train
105174318a9e152a50f26bff60e29b1217371d93,tensorflow/tensorflow,Example code uses `tf.image` for image ops instead of `tf`.,image_ops.py,"@@ -109,9 +109,9 @@ Example:
 
 ```python
 # Decode an image and convert it to HSV.
-rgb_image = tf.decode_png(...,  channels=3)
-rgb_image_float = tf.convert_image_dtype(rgb_image, tf.float32)
-hsv_image = tf.rgb_to_hsv(rgb_image)
+rgb_image = tf.image.decode_png(...,  channels=3)
+rgb_image_float = tf.image.convert_image_dtype(rgb_image, tf.float32)
+hsv_image = tf.image.rgb_to_hsv(rgb_image)
 ```
 
 @@rgb_to_grayscale
@@ -776,7 +776,7 @@ def adjust_contrast(images, contrast_factor):
     contrast_factor: A float multiplier for adjusting contrast.
 
   Returns:
-    The constrast-adjusted image or images.
+    The contrast-adjusted image or images.
   """"""
   with ops.op_scope([images, contrast_factor], None, 'adjust_contrast') as name:
     # Remember original dtype to so we can convert back if needed
",0,train
ae9ff37386c0c5cf40d8877cc911394e90cbd7bd,tensorflow/tensorflow,"Enable NNAPI tests

PiperOrigin-RevId: 305160089
Change-Id: I446eb5481ca6adc76e258b25d41dd8406421d74b",acceleration_test_list.cc,"@@ -349,6 +349,7 @@ SVDFOpTest/BlackBoxTestRank2
 # tile_test
 -TileTest/TileTest/Int64.+/.+
 -TileTest/TileTest/Boolean.+/.+
+-TileTest/TileTest/String.+/.+
 # Const tensor only
 TileTest/TileTest/.+/0,29
 
",0,train
ae9ff37386c0c5cf40d8877cc911394e90cbd7bd,tensorflow/tensorflow,"Enable NNAPI tests

PiperOrigin-RevId: 305160089
Change-Id: I446eb5481ca6adc76e258b25d41dd8406421d74b",mul_test.cc,"@@ -291,12 +291,6 @@ void NoActivation() {
 
 template <TensorType tensor_type, typename integer_dtype>
 void NoActivationLargeMultiplier() {
-  // TODO(b/138722124): Remove this after setting the appropriate op version (3)
-  // for dependent tests.
-  if (SingleOpModel::GetForceUseNnapi()) {
-    // NNAPI doesn't currently support Mul with multiplier>1.
-    return;
-  }
   // Intentionally pathological output range much narrower than needed
   // to represent input values to exercise the multiplier>1 case.
   QuantizedMulOpModel m({tensor_type, {1, 2, 2, 1}, -100, 100},
",0,train
ae9ff37386c0c5cf40d8877cc911394e90cbd7bd,tensorflow/tensorflow,"Enable NNAPI tests

PiperOrigin-RevId: 305160089
Change-Id: I446eb5481ca6adc76e258b25d41dd8406421d74b",tile_test.cc,"@@ -203,10 +203,6 @@ TEST_P(TileTest, Int64Matrix64Multipliers) {
 }
 
 TEST_P(TileTest, StringMatrix) {
-  // TODO(b/138722124): Enable these tests on NNAPI.
-  if (SingleOpModel::GetForceUseNnapi()) {
-    return;
-  }
   Check<std::string>(
       /*input_shape=*/{2, 3},
       /*input_data=*/{""AA"", ""AB"", ""AC"", ""BA"", ""BB"", ""BC""},
@@ -218,10 +214,6 @@ TEST_P(TileTest, StringMatrix) {
 }
 
 TEST_P(TileTest, StringMatrix64Multipliers) {
-  // TODO(b/138722124): Enable these tests on NNAPI.
-  if (SingleOpModel::GetForceUseNnapi()) {
-    return;
-  }
   Check<std::string, int64_t>(
       /*input_shape=*/{2, 3},
       /*input_data=*/{""AA"", ""AB"", ""AC"", ""BA"", ""BB"", ""BC""},
@@ -233,10 +225,6 @@ TEST_P(TileTest, StringMatrix64Multipliers) {
 }
 
 TEST_P(TileTest, StringMatrix2) {
-  // TODO(b/138722124): Enable these tests on NNAPI.
-  if (SingleOpModel::GetForceUseNnapi()) {
-    return;
-  }
   Check<std::string>(
       /*input_shape=*/{3, 2, 1},
       /*input_data=*/{""AA"", ""AB"", ""AC"", ""BA"", ""BB"", ""BC""},
",0,train
8ccb3cf1b88a2c5d3431b333dd5d6b2215de4bed,tensorflow/tensorflow,"Add unit tests to assert that the strings ""true""/""True""/""false""/""0""/""1"" are rejected by set_hparam() on boolean hyperparameters.

PiperOrigin-RevId: 242492963",hparam_test.py,"@@ -491,6 +491,26 @@ class HParamsTest(test.TestCase):
     with self.assertRaises(ValueError):
       hparams.set_hparam('bool_', 1)
 
+    # Unfortunately there is no automagic conversion of bool-like strings to
+    # bool.
+    with self.assertRaises(ValueError):
+      hparams.set_hparam('bool_', 'true')
+
+    with self.assertRaises(ValueError):
+      hparams.set_hparam('bool_', 'True')
+
+    with self.assertRaises(ValueError):
+      hparams.set_hparam('bool_', 'false')
+
+    with self.assertRaises(ValueError):
+      hparams.set_hparam('bool_', 'False')
+
+    with self.assertRaises(ValueError):
+      hparams.set_hparam('bool_', '0')
+
+    with self.assertRaises(ValueError):
+      hparams.set_hparam('bool_', '1')
+
     with self.assertRaises(ValueError):
       hparams.set_hparam('int_', 2.2)
 
",0,test
9b6fd34a0850939fca054098b97b097c1039405a,tensorflow/tensorflow,"ConcatZ reverted and rewritten to support batch implicitly.

PiperOrigin-RevId: 272790908",concat_z.cc,"@@ -36,22 +36,27 @@ bool IsAllChannelsX4(const std::vector<int>& channels) {
   return true;
 }
 
+std::string GetSrcDepthSizeVar(int src_index) {
+  return ""src_size_"" + std::to_string(src_index) + ""_depth"";
+}
+
 std::string GetConcatKernelCode(
     const OperationDef& op_def, const std::vector<int>& channels,
     const std::vector<ElementwiseOperation*>& linked_operations) {
   std::vector<TensorCodeGenerator> srcs(channels.size());
   for (int i = 0; i < channels.size(); ++i) {
     const std::string tensor_name = ""src_data_"" + std::to_string(i);
-    const std::string uniform_name = ""src_size_"" + std::to_string(i);
-    srcs[i] =
-        TensorCodeGenerator(tensor_name, uniform_name, op_def.src_tensors[i]);
+    srcs[i] = TensorCodeGenerator(
+        tensor_name, {""dst_size.x"", ""dst_size.y"", GetSrcDepthSizeVar(i)},
+        op_def.src_tensors[i]);
   }
-  TensorCodeGenerator dst(""dst_data"", ""dst_size"", op_def.dst_tensors[0]);
+  TensorCodeGenerator dst(""dst_data"",
+                          {""dst_size.x"", ""dst_size.y"", ""dst_size.z""},
+                          op_def.dst_tensors[0]);
 
   std::string c = GetCommonDefines(op_def.precision);
   const std::string postfix[] = {"".x"", "".y"", "".z"", "".w""};
 
-  const std::string batch_id = op_def.batch_support ? ""batch_id"" : """";
   c += ""__kernel void main_function(\n"";
   for (const auto& src : srcs) {
     c += src.GetDeclaration(AccessType::READ) + "",\n"";
@@ -59,21 +64,13 @@ std::string GetConcatKernelCode(
   c += dst.GetDeclaration(AccessType::WRITE);
   c += GetArgsDeclaration(linked_operations);
   for (int i = 0; i < channels.size(); ++i) {
-    const std::string uniform_name = ""src_size_"" + std::to_string(i);
-    c += ""    int4 "" + uniform_name + "",\n"";
-  }
-  if (op_def.batch_support) {
-    c += ""    int BATCH_SIZE,  \n"";
+    c += ""    int "" + GetSrcDepthSizeVar(i) + "",\n"";
   }
   c += ""    int4 dst_size\n"";
   c += "") {\n"";
   c += ""  int X = get_global_id(0);\n"";
   c += ""  int Y = get_global_id(1);\n"";
-  c += ""  if (X >= dst_size.x || Y >= dst_size.y) return;\n"";
-  if (op_def.batch_support) {
-    c += ""  int batch_id = get_global_id(2);\n"";
-    c += ""  if (batch_id >= BATCH_SIZE) return;\n"";
-  }
+  c += ""  if (X >= dst_size.x || Y >= dst_size.y) return; \n"";
 
   if (IsAllChannelsX4(channels)) {
     // When all channels % 4 == 0 we can read/assign/write FLT4 elements easily.
@@ -81,37 +78,35 @@ std::string GetConcatKernelCode(
     // generation.
     c += ""  int Z = 0;\n"";
     for (int i = 0; i < channels.size(); ++i) {
-      const std::string uniform_name = ""src_size_"" + std::to_string(i);
       const int depth = IntegralDivideRoundUp(channels[i], 4);
       if (depth % 2 == 0) {
         // We can read more at once inside of loop in case depth % 2 == 0
         // it should be better for reading latency hiding
-        c += ""  for (int i = 0; i < "" + uniform_name + "".w; i += 2) {\n"";
+        c += ""  for (int i = 0; i < "" + GetSrcDepthSizeVar(i) + ""; i += 2) {\n"";
         c += ""    FLT4 result0 = "" +
-             srcs[i].Read4D(""X"", ""Y"", ""i"", batch_id,
-                            TextureAddressMode::DONT_CARE) +
+             srcs[i].Read3D(""X"", ""Y"", ""i"", TextureAddressMode::DONT_CARE) +
              "";\n"";
         c += ""    FLT4 result1 = "" +
-             srcs[i].Read4D(""X"", ""Y"", ""i + 1"", batch_id,
-                            TextureAddressMode::DONT_CARE) +
+             srcs[i].Read3D(""X"", ""Y"", ""i + 1"", TextureAddressMode::DONT_CARE) +
              "";\n"";
+        c += ""    "" + dst.GetAddress(""dst_adr0"", ""X"", ""Y"", ""Z"") + ""\n"";
+        c += ""    "" + dst.GetAddress(""dst_adr1"", ""X"", ""Y"", ""Z + 1"") + ""\n"";
         const LinkingContext context_0{""result0"", ""X"", ""Y"", ""Z""};
         const LinkingContext context_1{""result1"", ""X"", ""Y"", ""Z + 1""};
         c += PostProcess(linked_operations, context_0);
         c += PostProcess(linked_operations, context_1);
-        c += ""    "" + dst.Write4D(""result0"", ""X"", ""Y"", ""Z"", batch_id);
-        c += ""    "" + dst.Write4D(""result1"", ""X"", ""Y"", ""Z + 1"", batch_id);
+        c += ""    "" + dst.Write3D(""result0"", ""X"", ""Y"", ""Z"");
+        c += ""    "" + dst.Write3D(""result1"", ""X"", ""Y"", ""Z + 1"");
         c += ""    Z += 2;\n"";
         c += ""  }\n"";
       } else {
-        c += ""  for (int i = 0; i < "" + uniform_name + "".w; ++i) {\n"";
+        c += ""  for (int i = 0; i < "" + GetSrcDepthSizeVar(i) + ""; ++i) {\n"";
         c += ""    FLT4 result = "" +
-             srcs[i].Read4D(""X"", ""Y"", ""i"", batch_id,
-                            TextureAddressMode::DONT_CARE) +
+             srcs[i].Read3D(""X"", ""Y"", ""i"", TextureAddressMode::DONT_CARE) +
              "";\n"";
         const LinkingContext context{""result"", ""X"", ""Y"", ""Z""};
         c += PostProcess(linked_operations, context);
-        c += ""    "" + dst.Write4D(""result"", ""X"", ""Y"", ""Z"", batch_id);
+        c += ""    "" + dst.Write3D(""result"", ""X"", ""Y"", ""Z"");
         c += ""    Z++;\n"";
         c += ""  }\n"";
       }
@@ -126,8 +121,8 @@ std::string GetConcatKernelCode(
       for (int d = 0; d < depth; ++d) {
         const int channels_in_group = std::min(4, channels[i] - d * 4);
         const std::string temp_name = ""t"" + std::to_string(read_index);
-        c += ""  FLT4 "" + temp_name + "" = "" +
-             srcs[i].Read4D(""X"", ""Y"", std::to_string(d), batch_id,
+        c += ""  FLT4 "" + temp_name + "" = "";
+        c += srcs[i].Read3D(""X"", ""Y"", std::to_string(d),
                             TextureAddressMode::DONT_CARE) +
              "";\n"";
         for (int ch = 0; ch < channels_in_group; ++ch) {
@@ -139,8 +134,7 @@ std::string GetConcatKernelCode(
             c += ""  {\n"";
             const LinkingContext context{""result"", ""X"", ""Y"", std::to_string(z)};
             c += PostProcess(linked_operations, context);
-            c += ""  "" +
-                 dst.Write4D(""result"", ""X"", ""Y"", std::to_string(z), batch_id);
+            c += ""  "" + dst.Write3D(""result"", ""X"", ""Y"", std::to_string(z));
             c += ""  }\n"";
             z++;
           }
@@ -152,7 +146,7 @@ std::string GetConcatKernelCode(
       c += ""  {\n"";
       const LinkingContext context{""result"", ""X"", ""Y"", std::to_string(z)};
       c += PostProcess(linked_operations, context);
-      c += ""  "" + dst.Write4D(""result"", ""X"", ""Y"", ""Z"", std::to_string(z));
+      c += ""  "" + dst.Write3D(""result"", ""X"", ""Y"", std::to_string(z));
       c += ""  }\n"";
     }
   }
@@ -199,21 +193,16 @@ Status ConcatZ::BindArguments() {
   RETURN_IF_ERROR(kernel_.SetMemoryAuto(dst_[0]->GetMemoryPtrForWriting()));
   RETURN_IF_ERROR(BindArgs(&kernel_, linked_operations_));
   for (int i = 0; i < channels_.size(); ++i) {
-    int4 size(src_[i]->Width(), src_[i]->Height(), channels_[i],
-              IntegralDivideRoundUp(channels_[i], 4));
-    RETURN_IF_ERROR(kernel_.SetBytesAuto(size));
-  }
-  if (definition_.batch_support) {
-    RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->Batch()));
+    RETURN_IF_ERROR(kernel_.SetBytesAuto(src_[i]->Depth()));
   }
-  RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetSizeWithDepth()));
+  RETURN_IF_ERROR(kernel_.SetBytesAuto(dst_[0]->GetWBatchedHDB()));
   return OkStatus();
 }
 
 int3 ConcatZ::GetGridSize() const {
-  const int grid_x = dst_[0]->Width();
+  const int grid_x = dst_[0]->Width() * dst_[0]->Batch();
   const int grid_y = dst_[0]->Height();
-  const int grid_z = dst_[0]->Batch();
+  const int grid_z = 1;
   return int3(grid_x, grid_y, grid_z);
 }
 
",0,train
6a8e5328c68b037a741b40bc538fecfb72980953,tensorflow/tensorflow,"Update tensorflow/core/grappler/graph_analyzer/graph_analyzer.cc

Co-authored-by: Mihai Maruseac <mihai.maruseac@gmail.com>",graph_analyzer.cc,"@@ -92,8 +92,8 @@ void GraphAnalyzer::FindSubgraphs() {
 }
 
 void GraphAnalyzer::ExtendSubgraph(Subgraph* parent) {
-  const int parent_id_size_plus_one = parent->id().size() + 1;
-  bool will_complete = (parent_id_size_plus_one == subgraph_size_);
+  const int next_parent_id = parent->id().size() + 1;
+  bool will_complete = (next_parent_id == subgraph_size_);
   SubgraphPtrSet& sg_set = will_complete ? result_ : partial_;
 
   const GenNode* last_all_or_none_node = nullptr;
",0,train
14f00bd6d9f7e6c1df6b14f3d2553121ae515e74,tensorflow/tensorflow,"[NNAPI] Check for optional tensor when handling FP16 weights.

PiperOrigin-RevId: 427808358
Change-Id: I4a7e429c61c315a93825a596a1c8f81c97e1dd49",nnapi_delegate.cc,"@@ -5451,7 +5451,8 @@ TfLiteStatus NNAPIDelegateKernel::AddOpsAndTensors(
     }
     // Map inputs to NN API tensor indices.
     for (int input_pos = 0; input_pos < node->inputs->size; ++input_pos) {
-      if (context->tensors[node->inputs->data[input_pos]].type ==
+      if (node->inputs->data[input_pos] != kTfLiteOptionalTensor &&
+          context->tensors[node->inputs->data[input_pos]].type ==
               kTfLiteFloat16 &&
           IsConstantTensor(&context->tensors[node->inputs->data[input_pos]])) {
         input_tensor_flags |= NN_TENSOR_FLAG_HALF_TO_FLOAT_CONVERSION;
",0,train
025277a1598fa227b53ddc4e316a7a953b2006c8,tensorflow/tensorflow,"Small improvements to handling of Datasets in Keras.

* Allow sparse labels to work with Datasets.
* Allow sample_weights to be passed as the third output of a Dataset (like how
generator input is treated).

PiperOrigin-RevId: 211834259",keras_test.py,"@@ -446,8 +446,7 @@ class TestWithDistributionStrategy(test.TestCase):
       dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
       dataset = dataset.repeat(100)
 
-      with self.assertRaisesRegexp(ValueError,
-                                   'expected input to have 2 dimensions'):
+      with self.assertRaisesRegexp(ValueError, 'expected input to have shape'):
         model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=0)
 
       # Wrong input shape
",0,train
025277a1598fa227b53ddc4e316a7a953b2006c8,tensorflow/tensorflow,"Small improvements to handling of Datasets in Keras.

* Allow sparse labels to work with Datasets.
* Allow sample_weights to be passed as the third output of a Dataset (like how
generator input is treated).

PiperOrigin-RevId: 211834259",training.py,"@@ -928,11 +928,16 @@ class Model(Network):
                            'Make sure that your dataset can generate '
                            'required number of samples.')
 
-      if not isinstance(next_element, (list, tuple)) or len(next_element) != 2:
-        raise ValueError('Please provide model inputs as a list or tuple of 2 '
-                         'elements: input and target pair. '
-                         'Received %s' % next_element)
-      x, y = next_element
+      if (not isinstance(next_element, (list, tuple)) or
+          len(next_element) not in [2, 3]):
+        raise ValueError(
+            'Please provide model inputs as a list or tuple of 2  or 3'
+            'elements: (input, target) or (input, target, sample_weights)'
+            'Received %s' % next_element)
+      if len(next_element) == 2:
+        x, y = next_element
+      else:
+        x, y, sample_weight = next_element
     x, y, sample_weights = self._standardize_weights(x, y, sample_weight,
                                                      class_weight, batch_size)
     return x, y, sample_weights
@@ -1331,7 +1336,8 @@ class Model(Network):
             (in case the model has multiple inputs).
           - A dict mapping input names to the corresponding array/tensors,
             if the model has named inputs.
-          - A `tf.data` dataset or a dataset iterator.
+          - A `tf.data` dataset or a dataset iterator. Should return a tuple
+            of either (inputs, targets) or (inputs, targets, sample_weights).
         y: Target data. Like the input data `x`,
           it could be either Numpy array(s) or TensorFlow tensor(s).
           It should be consistent with `x` (you cannot have Numpy inputs and
@@ -1396,7 +1402,8 @@ class Model(Network):
             to apply a different weight to every timestep of every sample.
             In this case you should make sure to specify
             `sample_weight_mode=""temporal""` in `compile()`. This argument is not
-            supported when `x` is a dataset or a dataset iterator.
+            supported when `x` is a dataset or a dataset iterator, instead
+            provide the sample_weights as the third element of `x`.
         initial_epoch: Integer.
             Epoch at which to start training
             (useful for resuming a previous training run).
",0,train
025277a1598fa227b53ddc4e316a7a953b2006c8,tensorflow/tensorflow,"Small improvements to handling of Datasets in Keras.

* Allow sparse labels to work with Datasets.
* Allow sample_weights to be passed as the third output of a Dataset (like how
generator input is treated).

PiperOrigin-RevId: 211834259",training_eager.py,"@@ -417,11 +417,12 @@ def iterator_predict_loop(model, inputs, steps, verbose=0):
   """"""
   assert isinstance(inputs, iterator_ops.EagerIterator)
   if not isinstance(inputs.output_shapes,
-                    (list, tuple)) or len(inputs.output_shapes) > 2:
+                    (list, tuple)) or len(inputs.output_shapes) > 3:
     raise ValueError(
-        'Please provide data as a list or tuple of 1 or 2 elements '
-        ' - input or input and target pair. Received %s. We do not use the '
-        '`target` value here.' % inputs.output_shapes)
+        'Please provide data as a list or tuple of 1, 2, or 3 elements '
+        ' - `(input)`, or `(input, target)`, or `(input, target,'
+        'sample_weights)`. Received %s. We do not use the `target` or'
+        '`sample_weights` value here.' % inputs.output_shapes)
   outs = []
   if verbose == 1:
     progbar = generic_utils.Progbar(target=steps)
",0,train
025277a1598fa227b53ddc4e316a7a953b2006c8,tensorflow/tensorflow,"Small improvements to handling of Datasets in Keras.

* Allow sparse labels to work with Datasets.
* Allow sample_weights to be passed as the third output of a Dataset (like how
generator input is treated).

PiperOrigin-RevId: 211834259",training_test.py,"@@ -2097,6 +2097,43 @@ class TestTrainingWithDataset(test.TestCase):
                                  'you should specify the `steps` argument'):
       model.predict(dataset, verbose=0)
 
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_dataset_with_sample_weights(self):
+    model = testing_utils.get_small_functional_mlp(1, 4, input_dim=3)
+    optimizer = RMSPropOptimizer(learning_rate=0.001)
+    loss = 'mse'
+    metrics = ['mae', metrics_module.CategoricalAccuracy()]
+    model.compile(optimizer, loss, metrics=metrics)
+
+    inputs = np.zeros((10, 3), np.float32)
+    targets = np.zeros((10, 4), np.float32)
+    sample_weights = np.ones((10), np.float32)
+    dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets,
+                                                      sample_weights))
+    dataset = dataset.repeat(100)
+    dataset = dataset.batch(10)
+
+    model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
+    model.evaluate(dataset, steps=2, verbose=1)
+    model.predict(dataset, steps=2)
+    model.train_on_batch(dataset)
+    model.predict_on_batch(dataset)
+
+  @tf_test_util.run_in_graph_and_eager_modes
+  def test_dataset_with_sparse_labels(self):
+    model = testing_utils.get_small_functional_mlp(1, 4, input_dim=3)
+    optimizer = RMSPropOptimizer(learning_rate=0.001)
+    loss = 'sparse_categorical_crossentropy'
+    model.compile(optimizer, loss)
+
+    inputs = np.zeros((10, 3))
+    targets = np.random.randint(0, 4, size=10, dtype=np.int32)
+    dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
+    dataset = dataset.repeat(100)
+    dataset = dataset.batch(10)
+
+    model.fit(dataset, epochs=1, steps_per_epoch=2, verbose=1)
+
   def test_dataset_input_shape_validation(self):
     with self.test_session():
       model = testing_utils.get_small_functional_mlp(1, 4, input_dim=3)
@@ -2108,8 +2145,10 @@ class TestTrainingWithDataset(test.TestCase):
       dataset = dataset_ops.Dataset.from_tensor_slices((inputs, targets))
       dataset = dataset.repeat(100)
 
-      with self.assertRaisesRegexp(ValueError,
-                                   r'expected (.*?) to have 2 dimensions'):
+      with self.assertRaisesRegexp(
+          ValueError,
+          r'expected (.*?) to have shape \(3,\) but got array with shape \(1,\)'
+      ):
         model.train_on_batch(dataset)
 
       # Wrong input shape
",0,train
025277a1598fa227b53ddc4e316a7a953b2006c8,tensorflow/tensorflow,"Small improvements to handling of Datasets in Keras.

* Allow sparse labels to work with Datasets.
* Allow sample_weights to be passed as the third output of a Dataset (like how
generator input is treated).

PiperOrigin-RevId: 211834259",training_utils.py,"@@ -210,10 +210,11 @@ def check_num_samples(ins,
 def standardize_single_array(x):
   if x is None:
     return None
-  elif tensor_util.is_tensor(x):
-    return x
-  elif x.ndim == 1:
-    x = np.expand_dims(x, 1)
+  if x.shape is not None and len(x.shape) == 1:
+    if tensor_util.is_tensor(x):
+      return array_ops.expand_dims(x, axis=1)
+    else:
+      return np.expand_dims(x, 1)
   return x
 
 
@@ -341,7 +342,7 @@ def standardize_sample_or_class_weights(x_weight, output_names, weight_type):
   Raises:
       ValueError: In case of invalid user-provided argument.
   """"""
-  if x_weight is None or len(x_weight) == 0:  # pylint: disable=g-explicit-length-test
+  if x_weight is None or (isinstance(x_weight, list) and len(x_weight) == 0):  # pylint: disable=g-explicit-length-test
     return [None for _ in output_names]
   if len(output_names) == 1:
     if isinstance(x_weight, list) and len(x_weight) == 1:
@@ -675,7 +676,8 @@ def standardize_weights(y,
           'Expected sample_weight with rank '
           'less than or equal to ' + str(len(y.shape)))
 
-    if y.shape[:sample_weight.ndim] != sample_weight.shape:
+    if (not tensor_util.is_tensor(sample_weight) and
+        y.shape[:sample_weight.ndim] != sample_weight.shape):
       raise ValueError(
           'Found a sample_weight array with shape ' + str(sample_weight.shape) +
           ' for an input with shape ' + str(y.shape) + '. '
@@ -777,7 +779,9 @@ def validate_iterator_input(x, y, sample_weight, validation_split=None):
                      'Received: %s' % (x, y))
   if sample_weight is not None:
     raise ValueError('`sample_weight` argument is not supported when input '
-                     '`x` is a dataset or a dataset iterator. '
+                     '`x` is a dataset or a dataset iterator. Instead, you'
+                     'can provide sample_weight as the third element  of your'
+                     'dataset, i.e. (inputs, targets, sample_weight). '
                      'Received: x=%s, sample_weight=%s' % (x, sample_weight))
   if validation_split is not None and validation_split != 0.0:
     raise ValueError(
",0,train
8bb742049234d72c28ea22ed86f67f40b288aae8,tensorflow/tensorflow,"Use Env::LocalTempFilename for a temp filename.

This function works both in and outside of tests. Additionally,
LocalTempFilename works well on Windows where as TmpDir is a little problematic
because of bazel oddities.

PiperOrigin-RevId: 296250888
Change-Id: I2a8bc52ad784eda4d00f63c91eec681cc91e16e7",inputbuffer_test.cc,"@@ -16,7 +16,6 @@ limitations under the License.
 #include ""tensorflow/core/lib/io/inputbuffer.h""
 
 #include <vector>
-#include ""tensorflow/core/platform/env.h""
 
 #include ""tensorflow/core/lib/core/coding.h""
 #include ""tensorflow/core/lib/core/errors.h""
@@ -24,6 +23,7 @@ limitations under the License.
 #include ""tensorflow/core/lib/core/status_test_util.h""
 #include ""tensorflow/core/lib/strings/str_util.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
+#include ""tensorflow/core/platform/env.h""
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/core/platform/test.h""
 
@@ -37,7 +37,8 @@ static std::vector<int> BufferSizes() {
 
 TEST(InputBuffer, ReadLine_Empty) {
   Env* env = Env::Default();
-  string fname = testing::TmpDir() + ""/inputbuffer_test"";
+  string fname;
+  ASSERT_TRUE(env->LocalTempFilename(&fname));
   TF_ASSERT_OK(WriteStringToFile(env, fname, """"));
 
   for (auto buf_size : BufferSizes()) {
@@ -51,7 +52,8 @@ TEST(InputBuffer, ReadLine_Empty) {
 
 TEST(InputBuffer, ReadLine1) {
   Env* env = Env::Default();
-  string fname = testing::TmpDir() + ""/inputbuffer_test"";
+  string fname;
+  ASSERT_TRUE(env->LocalTempFilename(&fname));
   TF_CHECK_OK(
       WriteStringToFile(env, fname, ""line one\nline two\nline three\n""));
 
@@ -74,7 +76,8 @@ TEST(InputBuffer, ReadLine1) {
 
 TEST(InputBuffer, ReadLine_NoTrailingNewLine) {
   Env* env = Env::Default();
-  string fname = testing::TmpDir() + ""/inputbuffer_test"";
+  string fname;
+  ASSERT_TRUE(env->LocalTempFilename(&fname));
   TF_ASSERT_OK(WriteStringToFile(env, fname, ""line one\nline two\nline three""));
 
   for (auto buf_size : BufferSizes()) {
@@ -96,7 +99,8 @@ TEST(InputBuffer, ReadLine_NoTrailingNewLine) {
 
 TEST(InputBuffer, ReadLine_EmptyLines) {
   Env* env = Env::Default();
-  string fname = testing::TmpDir() + ""/inputbuffer_test"";
+  string fname;
+  ASSERT_TRUE(env->LocalTempFilename(&fname));
   TF_CHECK_OK(
       WriteStringToFile(env, fname, ""line one\n\n\nline two\nline three""));
 
@@ -123,7 +127,8 @@ TEST(InputBuffer, ReadLine_EmptyLines) {
 
 TEST(InputBuffer, ReadLine_CRLF) {
   Env* env = Env::Default();
-  string fname = testing::TmpDir() + ""/inputbuffer_test"";
+  string fname;
+  ASSERT_TRUE(env->LocalTempFilename(&fname));
   TF_ASSERT_OK(WriteStringToFile(env, fname,
                                  ""line one\r\n\r\n\r\nline two\r\nline three""));
 
@@ -150,7 +155,8 @@ TEST(InputBuffer, ReadLine_CRLF) {
 
 TEST(InputBuffer, ReadNBytes) {
   Env* env = Env::Default();
-  string fname = testing::TmpDir() + ""/inputbuffer_test"";
+  string fname;
+  ASSERT_TRUE(env->LocalTempFilename(&fname));
   TF_ASSERT_OK(WriteStringToFile(env, fname, ""0123456789""));
 
   // ReadNBytes(int64, string*).
@@ -223,7 +229,8 @@ TEST(InputBuffer, ReadNBytes) {
 
 TEST(InputBuffer, SkipNBytes) {
   Env* env = Env::Default();
-  string fname = testing::TmpDir() + ""/inputbuffer_test"";
+  string fname;
+  ASSERT_TRUE(env->LocalTempFilename(&fname));
   TF_ASSERT_OK(WriteStringToFile(env, fname, ""0123456789""));
 
   for (auto buf_size : BufferSizes()) {
@@ -258,7 +265,8 @@ TEST(InputBuffer, SkipNBytes) {
 
 TEST(InputBuffer, Seek) {
   Env* env = Env::Default();
-  string fname = testing::TmpDir() + ""/inputbuffer_test"";
+  string fname;
+  ASSERT_TRUE(env->LocalTempFilename(&fname));
   TF_ASSERT_OK(WriteStringToFile(env, fname, ""0123456789""));
 
   for (auto buf_size : BufferSizes()) {
@@ -293,7 +301,8 @@ TEST(InputBuffer, Seek) {
 
 TEST(InputBuffer, ReadVarint32) {
   Env* env = Env::Default();
-  string fname = testing::TmpDir() + ""/inputbuffer_test"";
+  string fname;
+  ASSERT_TRUE(env->LocalTempFilename(&fname));
 
   // Generates data.
   std::vector<uint32> data;
@@ -331,7 +340,8 @@ TEST(InputBuffer, ReadVarint32) {
 
 TEST(InputBuffer, ReadVarint64) {
   Env* env = Env::Default();
-  string fname = testing::TmpDir() + ""/inputbuffer_test"";
+  string fname;
+  ASSERT_TRUE(env->LocalTempFilename(&fname));
 
   // Generates data.
   std::vector<uint64> data;
",0,train
fea9d07d1e34d5330a13024cb42d9bc460869905,tensorflow/tensorflow,"Remove references to std::string in MKL-related code.

tensorflow::string is sometimes ::string and sometimes std::string, which
makes code that uses both subtly dangerous. For example,
FactoryKeyCreator::AddAsKey() has an overload for tensorflow::string but had
many callsites passing a std::string, causing incorrect behavior on
the google platform.
PiperOrigin-RevId: 208244169",mkl_fused_batch_norm_op.cc,"@@ -899,8 +899,8 @@ class MklFusedBatchNormFwdPrimitiveFactory : public MklPrimitiveFactory<T> {
   MklFusedBatchNormFwdPrimitiveFactory() {}
   ~MklFusedBatchNormFwdPrimitiveFactory() {}
 
-  static std::string CreateKey(const MklBatchNormFwdParams& fwdParams) {
-    std::string prefix = ""bn_fwd"";
+  static string CreateKey(const MklBatchNormFwdParams& fwdParams) {
+    string prefix = ""bn_fwd"";
     FactoryKeyCreator key_creator;
     key_creator.AddAsKey(prefix);
     key_creator.AddAsKey(fwdParams.src_dims);
@@ -911,13 +911,13 @@ class MklFusedBatchNormFwdPrimitiveFactory : public MklPrimitiveFactory<T> {
   }
 
   MklPrimitive* GetBatchNormFwd(const MklBatchNormFwdParams& fwdParams) {
-    std::string key = CreateKey(fwdParams);
+    string key = CreateKey(fwdParams);
     return this->GetOp(key);
   }
 
   void SetBatchNormFwd(const MklBatchNormFwdParams& fwdParams,
                        MklPrimitive* op) {
-    std::string key = CreateKey(fwdParams);
+    string key = CreateKey(fwdParams);
     this->SetOp(key, op);
   }
 };
@@ -1122,8 +1122,8 @@ class MklFusedBatchNormBwdPrimitiveFactory : public MklPrimitiveFactory<T> {
   MklFusedBatchNormBwdPrimitiveFactory() {}
   ~MklFusedBatchNormBwdPrimitiveFactory() {}
 
-  static std::string CreateKey(const MklBatchNormBwdParams& bwdParams) {
-    std::string prefix = ""bn_bwd"";
+  static string CreateKey(const MklBatchNormBwdParams& bwdParams) {
+    string prefix = ""bn_bwd"";
     FactoryKeyCreator key_creator;
     key_creator.AddAsKey(prefix);
     key_creator.AddAsKey(bwdParams.src_dims);
@@ -1135,13 +1135,13 @@ class MklFusedBatchNormBwdPrimitiveFactory : public MklPrimitiveFactory<T> {
   }
 
   MklPrimitive* GetBatchNormBwd(const MklBatchNormBwdParams& bwdParams) {
-    std::string key = CreateKey(bwdParams);
+    string key = CreateKey(bwdParams);
     return this->GetOp(key);
   }
 
   void SetBatchNormBwd(const MklBatchNormBwdParams& bwdParams,
                        MklPrimitive* op) {
-    std::string key = CreateKey(bwdParams);
+    string key = CreateKey(bwdParams);
     this->SetOp(key, op);
   }
 };
",0,train
fea9d07d1e34d5330a13024cb42d9bc460869905,tensorflow/tensorflow,"Remove references to std::string in MKL-related code.

tensorflow::string is sometimes ::string and sometimes std::string, which
makes code that uses both subtly dangerous. For example,
FactoryKeyCreator::AddAsKey() has an overload for tensorflow::string but had
many callsites passing a std::string, causing incorrect behavior on
the google platform.
PiperOrigin-RevId: 208244169",mkl_pooling_ops_common.h,"@@ -175,8 +175,8 @@ class MklPoolingFwdPrimitiveFactory : public MklPrimitiveFactory<T> {
   // primitive op from reuse perspective.
   // A pooling key is a string which concates key parameters
   // as well as algorithm kind (max versus avg).
-  static std::string CreateKey(const MklPoolingParams& fwdParams) {
-    std::string prefix = ""pooling_fwd"";
+  static string CreateKey(const MklPoolingParams& fwdParams) {
+    string prefix = ""pooling_fwd"";
     FactoryKeyCreator key_creator;
     key_creator.AddAsKey(prefix);
     key_creator.AddAsKey(fwdParams.src_dims);
@@ -190,12 +190,12 @@ class MklPoolingFwdPrimitiveFactory : public MklPrimitiveFactory<T> {
   }
 
   MklPrimitive* GetPoolingFwd(const MklPoolingParams& fwdParams) {
-    std::string key = CreateKey(fwdParams);
+    string key = CreateKey(fwdParams);
     return this->GetOp(key);
   }
 
   void SetPoolingFwd(const MklPoolingParams& fwdParams, MklPrimitive* op) {
-    std::string key = CreateKey(fwdParams);
+    string key = CreateKey(fwdParams);
     this->SetOp(key, op);
   }
 };
@@ -326,8 +326,8 @@ class MklPoolingBwdPrimitiveFactory : public MklPrimitiveFactory<T> {
   // primitive op from reuse perspective.
   // A pooling key is a string which concates key parameters
   // as well as algorithm kind (max versus avg).
-  static std::string CreateKey(const MklPoolingParams& bwdParams) {
-    std::string prefix = ""pooling_bwd"";
+  static string CreateKey(const MklPoolingParams& bwdParams) {
+    string prefix = ""pooling_bwd"";
     FactoryKeyCreator key_creator;
     key_creator.AddAsKey(prefix);
     key_creator.AddAsKey(bwdParams.src_dims);
@@ -341,12 +341,12 @@ class MklPoolingBwdPrimitiveFactory : public MklPrimitiveFactory<T> {
   }
 
   MklPrimitive* GetPoolingBwd(const MklPoolingParams& bwdParams) {
-    std::string key = CreateKey(bwdParams);
+    string key = CreateKey(bwdParams);
     return this->GetOp(key);
   }
 
   void SetPoolingBwd(const MklPoolingParams& bwdParams, MklPrimitive* op) {
-    std::string key = CreateKey(bwdParams);
+    string key = CreateKey(bwdParams);
     this->SetOp(key, op);
   }
 };
",0,train
fea9d07d1e34d5330a13024cb42d9bc460869905,tensorflow/tensorflow,"Remove references to std::string in MKL-related code.

tensorflow::string is sometimes ::string and sometimes std::string, which
makes code that uses both subtly dangerous. For example,
FactoryKeyCreator::AddAsKey() has an overload for tensorflow::string but had
many callsites passing a std::string, causing incorrect behavior on
the google platform.
PiperOrigin-RevId: 208244169",mkl_tfconv_op.h,"@@ -118,12 +118,11 @@ class MklToTfOp : public OpKernel {
         CHECK(output_tensor->CopyFrom(input_tensor, output_shape));
       }
     } catch (mkldnn::error& e) {
-      string error_msg = ""Status: "" + std::to_string(e.status) +
-                         "", message: "" + std::string(e.message) + "", in file "" +
-                         std::string(__FILE__) + "":"" + std::to_string(__LINE__);
       OP_REQUIRES_OK(
           context,
-          errors::Aborted(""Operation received an exception:"", error_msg));
+          errors::Aborted(""Operation received an exception: Status: "", e.status,
+                          "", message: "", StringPiece(e.message), "", in file "",
+                          __FILE__, "":"", __LINE__));
     }
   }
 #else
",0,train
d8840c4872df6f452e0c358cd26352ac4ddb6245,tensorflow/tensorflow,"Internal experimental C++ API change.

PiperOrigin-RevId: 377532643
Change-Id: Ifd167c09366547924153cc24b1e01e3c3d0be548",saved_model_api.h,"@@ -24,6 +24,7 @@ limitations under the License.
 #include ""absl/container/flat_hash_map.h""
 #include ""tensorflow/c/experimental/saved_model/core/concrete_function.h""
 #include ""tensorflow/c/experimental/saved_model/core/signature_def_function.h""
+#include ""tensorflow/cc/saved_model/bundle_v2.h""
 #include ""tensorflow/core/platform/status.h""
 
 namespace tensorflow {
@@ -54,6 +55,8 @@ class SavedModelAPI {
   virtual Status GetSignatureDefFunction(const std::string& signature_def_key,
                                          SignatureDefFunction** function) = 0;
 
+  virtual SavedModelV2Bundle* GetBundle() = 0;
+
   virtual ~SavedModelAPI() = default;
 };
 
",0,train
d8840c4872df6f452e0c358cd26352ac4ddb6245,tensorflow/tensorflow,"Internal experimental C++ API change.

PiperOrigin-RevId: 377532643
Change-Id: Ifd167c09366547924153cc24b1e01e3c3d0be548",tf_saved_model_api.cc,"@@ -247,6 +247,8 @@ Status TFSavedModelAPI::GetVariable(const std::string& variable_path,
   return Status();
 }
 
+SavedModelV2Bundle* TFSavedModelAPI::GetBundle() { return &this->bundle_; }
+
 TFSavedModelAPI::TFSavedModelAPI(const std::string& directory,
                                  SavedModelV2Bundle bundle,
                                  RevivedObjects revived_objects)
",0,train
d8840c4872df6f452e0c358cd26352ac4ddb6245,tensorflow/tensorflow,"Internal experimental C++ API change.

PiperOrigin-RevId: 377532643
Change-Id: Ifd167c09366547924153cc24b1e01e3c3d0be548",tf_saved_model_api.h,"@@ -75,6 +75,8 @@ class TFSavedModelAPI : public SavedModelAPI {
 
   Status GetVariable(const std::string& variable_path, Variable** variable);
 
+  SavedModelV2Bundle* GetBundle() override;
+
  private:
   TFSavedModelAPI(const std::string& directory, SavedModelV2Bundle bundle,
                   RevivedObjects revived_objects);
",0,train
b3701aac80622dde6529486ad118008c626eed65,tensorflow/tensorflow,"Update GraphDef version to 411.

PiperOrigin-RevId: 312963337
Change-Id: I9b9db44aa0010e1dea95442a4e5ff0ae88aef128",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 410  // Updated: 2020/5/23
+#define TF_GRAPH_DEF_VERSION 411  // Updated: 2020/5/24
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
339fdd82490968b3314b8d58ad23cd4808b4e24b,tensorflow/tensorflow,"C++ API changes
- Marked control flow ops as hidden
Change: 146300232",mark_for_compilation_pass_test.cc,"@@ -16,6 +16,7 @@ limitations under the License.
 #include ""tensorflow/compiler/jit/mark_for_compilation_pass.h""
 
 #include ""tensorflow/cc/framework/ops.h""
+#include ""tensorflow/cc/ops/control_flow_ops_internal.h""
 #include ""tensorflow/cc/ops/standard_ops.h""
 #include ""tensorflow/core/framework/node_def_util.h""
 #include ""tensorflow/core/framework/op.h""
@@ -337,9 +338,9 @@ TEST(XlaCompilationTest, Loops) {
   auto a = ops::Placeholder(root.WithOpName(""A""), DT_FLOAT);
   auto b = ops::Placeholder(root.WithOpName(""B""), DT_FLOAT);
   auto c = ops::Add(root.WithOpName(""C""), a, b);
-  auto enter = ops::Enter(root, c, ""aframe"");
+  auto enter = ops::internal::Enter(root, c, ""aframe"");
   auto next_iter = ops::NextIteration(root, enter);
-  auto exit = ops::Exit(root, next_iter);
+  auto exit = ops::internal::Exit(root, next_iter);
   auto d = ops::Add(root.WithOpName(""D""), c, exit);
 
   std::unique_ptr<Graph> graph(new Graph(OpRegistry::Global()));
",0,train
339fdd82490968b3314b8d58ad23cd4808b4e24b,tensorflow/tensorflow,"C++ API changes
- Marked control flow ops as hidden
Change: 146300232",graph_partition_test.cc,"@@ -20,6 +20,7 @@ limitations under the License.
 #include ""tensorflow/cc/ops/array_ops.h""
 #include ""tensorflow/cc/ops/const_op.h""
 #include ""tensorflow/cc/ops/control_flow_ops.h""
+#include ""tensorflow/cc/ops/control_flow_ops_internal.h""
 #include ""tensorflow/cc/ops/random_ops.h""
 #include ""tensorflow/cc/ops/sendrecv_ops.h""
 #include ""tensorflow/core/framework/op.h""
@@ -337,8 +338,10 @@ TEST_F(GraphPartitionTest, CrossDevice_DataControl) {
 TEST_F(GraphPartitionTest, CrossDeviceLoop) {
   using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
   auto a1 = BoolInput(in_.WithOpName(""A1""));
-  auto a2 = Enter(in_.WithOpName(""A2""), a1, ""foo"");
-  auto a3 = Merge(in_.WithOpName(""A3""), {a2, Input(""A5"", 0, DT_BOOL)}).output;
+  auto a2 = ::tensorflow::ops::internal::Enter(in_.WithOpName(""A2""), a1, ""foo"");
+  auto a3 = ::tensorflow::ops::internal::Merge(in_.WithOpName(""A3""),
+                                               {a2, Input(""A5"", 0, DT_BOOL)})
+                .output;
   LoopCond(in_.WithOpName(""A4""), a3);
   auto b1 = Identity(in_.WithOpName(""B1""), a3);
   NextIteration(in_.WithOpName(""A5""), b1);
@@ -349,8 +352,10 @@ TEST_F(GraphPartitionTest, CrossDeviceLoop) {
 TEST_F(GraphPartitionTest, CrossDeviceLoop1) {
   using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
   auto a1 = BoolInput(in_.WithOpName(""A1""));
-  auto a2 = Enter(in_.WithOpName(""B2""), a1, ""foo"");
-  auto a3 = Merge(in_.WithOpName(""A3""), {a2, Input(""B5"", 0, DT_BOOL)}).output;
+  auto a2 = ::tensorflow::ops::internal::Enter(in_.WithOpName(""B2""), a1, ""foo"");
+  auto a3 = ::tensorflow::ops::internal::Merge(in_.WithOpName(""A3""),
+                                               {a2, Input(""B5"", 0, DT_BOOL)})
+                .output;
   LoopCond(in_.WithOpName(""A4""), a3);
   auto b1 = Identity(in_.WithOpName(""B1""), a3);
   NextIteration(in_.WithOpName(""B5""), b1);
",0,train
1171258036e73c911d0487a3c2db8272fd9dc6be,tensorflow/tensorflow,"Automated rollback of commit d9e313d10790ae17d0eabbf6e63463510388e182

PiperOrigin-RevId: 264970919",api_template.__init__.py,"@@ -78,7 +78,7 @@ except ImportError:
   pass
 
 try:
-  from tensorflow.python.keras.api._v2 import keras
+  from .python.keras.api._v2 import keras
   _current_module.__path__ = (
       [_module_util.get_parent_dir(keras)] + _current_module.__path__)
   setattr(_current_module, ""keras"", keras)
",0,train
1171258036e73c911d0487a3c2db8272fd9dc6be,tensorflow/tensorflow,"Automated rollback of commit d9e313d10790ae17d0eabbf6e63463510388e182

PiperOrigin-RevId: 264970919",api_template_v1.__init__.py,"@@ -69,7 +69,7 @@ except ImportError:
   pass
 
 try:
-  from tensorflow.python.keras.api._v1 import keras
+  from .python.keras.api._v1 import keras
   _current_module.__path__ = (
       [_module_util.get_parent_dir(keras)] + _current_module.__path__)
   setattr(_current_module, ""keras"", keras)
",0,train
1171258036e73c911d0487a3c2db8272fd9dc6be,tensorflow/tensorflow,"Automated rollback of commit d9e313d10790ae17d0eabbf6e63463510388e182

PiperOrigin-RevId: 264970919",create_python_api.py,"@@ -195,8 +195,7 @@ class _ModuleInitCodeBuilder(object):
               dest_module_name=parent_module,
               dest_name=module_split[submodule_index])
         else:
-          if submodule_index > 0:
-            import_from += '.' + '.'.join(module_split[:submodule_index])
+          import_from = '.'
           self.add_import(
               symbol=None,
               source_module_name=import_from,
",0,train
1171258036e73c911d0487a3c2db8272fd9dc6be,tensorflow/tensorflow,"Automated rollback of commit d9e313d10790ae17d0eabbf6e63463510388e182

PiperOrigin-RevId: 264970919",module_test.py,"@@ -23,6 +23,7 @@ import pkgutil
 
 import tensorflow as tf
 
+from tensorflow.python import tf2
 from tensorflow.python.platform import test
 
 
@@ -50,6 +51,18 @@ class ModuleTest(test.TestCase):
   def testName(self):
     self.assertEqual('tensorflow', tf.__name__)
 
+  def testBuiltInName(self):
+    # range is a built-in name in Python. Just checking that
+    # tf.range works fine.
+    if tf2.enabled():
+      self.assertEqual(
+          'tf.Tensor([1 2 3 4 5 6 7 8 9], shape=(9,), dtype=int32)',
+          str(tf.range(1, 10)))
+    else:
+      self.assertEqual(
+          'Tensor(""range:0"", shape=(9,), dtype=int32)',
+          str(tf.range(1, 10)))
+
 
 if __name__ == '__main__':
   test.main()
",0,train
859cd49b628bb430a721ba89883c3a0efbbbdbbc,tensorflow/tensorflow,"Fix breakage: conversion of tf.data was allowed too soon and broke the autograph notebook.

PiperOrigin-RevId: 250764059",config.py,"@@ -28,8 +28,6 @@ DoNotConvert = config_lib.DoNotConvert
 # This list is evaluated in order and stops at the first rule that tests True
 # for a definitely_convert of definitely_bypass call.
 CONVERSION_RULES = (
-    Convert('tensorflow.python.data.ops'),
-
     DoNotConvert('tensorflow'),
 
     # TODO(b/133417201): Remove.
",0,test
cd8ced7a2d48574908d2c9b7127960078cf41690,tensorflow/tensorflow,"Enable deduping of Assert nodes.
Add unit test for deduping Assert and CheckNumerics.

PiperOrigin-RevId: 176680534",op_types.cc,"@@ -24,64 +24,40 @@ limitations under the License.
 namespace tensorflow {
 namespace grappler {
 
-bool IsAdd(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Add"";
-}
+bool IsAdd(const NodeDef& node) { return node.op() == ""Add""; }
 
-bool IsAddN(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""AddN"";
-}
+bool IsAddN(const NodeDef& node) { return node.op() == ""AddN""; }
 
-bool IsAvgPoolGrad(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""AvgPoolGrad"";
-}
+bool IsAvgPoolGrad(const NodeDef& node) { return node.op() == ""AvgPoolGrad""; }
 
-bool IsBiasAddGrad(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""BiasAddGrad"";
-}
+bool IsAssert(const NodeDef& node) { return node.op() == ""Assert""; }
 
-bool IsConcatOffset(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""ConcatOffset"";
-}
+bool IsBiasAddGrad(const NodeDef& node) { return node.op() == ""BiasAddGrad""; }
 
-bool IsConstant(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Const"";
-}
+bool IsConcatOffset(const NodeDef& node) { return node.op() == ""ConcatOffset""; }
 
-bool IsConv2D(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Conv2D"";
-}
+bool IsConstant(const NodeDef& node) { return node.op() == ""Const""; }
+
+bool IsConv2D(const NodeDef& node) { return node.op() == ""Conv2D""; }
 
 bool IsConv2DBackpropFilter(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Conv2DBackpropFilter"";
+  return node.op() == ""Conv2DBackpropFilter"";
 }
 
 bool IsConv2DBackpropInput(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Conv2DBackpropInput"";
+  return node.op() == ""Conv2DBackpropInput"";
 }
 
 bool IsDepthwiseConv2dNative(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""DepthwiseConv2dNative"";
+  return node.op() == ""DepthwiseConv2dNative"";
 }
 
 bool IsDepthwiseConv2dNativeBackpropFilter(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""DepthwiseConv2dNativeBackpropFilter"";
+  return node.op() == ""DepthwiseConv2dNativeBackpropFilter"";
 }
 
 bool IsDepthwiseConv2dNativeBackpropInput(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""DepthwiseConv2dNativeBackpropInput"";
+  return node.op() == ""DepthwiseConv2dNativeBackpropInput"";
 }
 
 bool IsDequeueOp(const NodeDef& node) {
@@ -101,14 +77,10 @@ bool IsExit(const NodeDef& node) {
   return op == ""Exit"" || op == ""RefExit"";
 }
 
-bool IsFloorMod(const NodeDef& node) {
-  const auto& op = node.op();
-  return op == ""FloorMod"";
-}
+bool IsFloorMod(const NodeDef& node) { return node.op() == ""FloorMod""; }
 
 bool IsFusedBatchNormGradV1(const NodeDef& node) {
-  const auto& op = node.op();
-  return op == ""FusedBatchNormGrad"";
+  return node.op() == ""FusedBatchNormGrad"";
 }
 
 bool IsIdentity(const NodeDef& node) {
@@ -121,25 +93,16 @@ bool IsMerge(const NodeDef& node) {
   return op == ""Merge"" || op == ""RefMerge"";
 }
 
-bool IsMul(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Mul"";
-}
+bool IsMul(const NodeDef& node) { return node.op() == ""Mul""; }
 
-bool IsNoOp(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""NoOp"";
-}
+bool IsNoOp(const NodeDef& node) { return node.op() == ""NoOp""; }
 
 bool IsNextIteration(const NodeDef& node) {
   const auto& op = node.op();
   return op == ""NextIteration"" || op == ""RefNextIteration"";
 }
 
-bool IsPad(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Pad"";
-}
+bool IsPad(const NodeDef& node) { return node.op() == ""Pad""; }
 
 bool IsPlaceholder(const NodeDef& node) {
   const auto op = node.op();
@@ -147,20 +110,11 @@ bool IsPlaceholder(const NodeDef& node) {
          op == ""PlaceholderWithDefault"";
 }
 
-bool IsRealDiv(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""RealDiv"";
-}
+bool IsRealDiv(const NodeDef& node) { return node.op() == ""RealDiv""; }
 
-bool IsReluGrad(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""ReluGrad"";
-}
+bool IsReluGrad(const NodeDef& node) { return node.op() == ""ReluGrad""; }
 
-bool IsRecv(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""_Recv"";
-}
+bool IsRecv(const NodeDef& node) { return node.op() == ""_Recv""; }
 
 bool IsReduction(const NodeDef& node) {
   const auto& op = node.op();
@@ -175,53 +129,34 @@ bool IsRestore(const NodeDef& node) {
           node.op() == ""RestoreSlice"");
 }
 
-bool IsSend(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""_Send"";
-}
+bool IsSend(const NodeDef& node) { return node.op() == ""_Send""; }
 
-bool IsSlice(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Slice"";
-}
+bool IsSlice(const NodeDef& node) { return node.op() == ""Slice""; }
 
 bool IsSquaredDifference(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""SquaredDifference"";
+  return node.op() == ""SquaredDifference"";
 }
 
-bool IsSqueeze(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Squeeze"";
-}
+bool IsSqueeze(const NodeDef& node) { return node.op() == ""Squeeze""; }
 
 bool IsStopGradient(const NodeDef& node) {
   const auto& op = node.op();
   return op == ""StopGradient"" || op == ""PreventGradient"";
 }
 
-bool IsSub(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Sub"";
-}
+bool IsSub(const NodeDef& node) { return node.op() == ""Sub""; }
 
-bool IsSum(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Sum"";
-}
+bool IsSum(const NodeDef& node) { return node.op() == ""Sum""; }
 
 bool IsSwitch(const NodeDef& node) {
   const auto& op = node.op();
   return op == ""Switch"" || op == ""RefSwitch"";
 }
 
-bool IsTranspose(const NodeDef& node) {
-  const auto op = node.op();
-  return op == ""Transpose"";
-}
+bool IsTranspose(const NodeDef& node) { return node.op() == ""Transpose""; }
 
 bool IsVariable(const NodeDef& node) {
-  const auto op = node.op();
+  const auto& op = node.op();
   return op == ""Variable"" || op == ""VariableV2"" || op == ""AutoReloadVariable"" ||
          op == ""VarHandleOp"" || op == ""ReadVariableOp"";
 }
",0,train
cd8ced7a2d48574908d2c9b7127960078cf41690,tensorflow/tensorflow,"Enable deduping of Assert nodes.
Add unit test for deduping Assert and CheckNumerics.

PiperOrigin-RevId: 176680534",op_types.h,"@@ -25,6 +25,7 @@ namespace grappler {
 bool IsAdd(const NodeDef& node);
 bool IsAddN(const NodeDef& node);
 bool IsAvgPoolGrad(const NodeDef& node);
+bool IsAssert(const NodeDef& node);
 bool IsBiasAddGrad(const NodeDef& node);
 bool IsConcatOffset(const NodeDef& node);
 bool IsConstant(const NodeDef& node);
",0,train
cd8ced7a2d48574908d2c9b7127960078cf41690,tensorflow/tensorflow,"Enable deduping of Assert nodes.
Add unit test for deduping Assert and CheckNumerics.

PiperOrigin-RevId: 176680534",arithmetic_optimizer.cc,"@@ -449,6 +449,10 @@ bool ArithmeticOptimizer::CanDedup(const NodeDef& node) const {
   if (node.device().find(""SPU"") != string::npos) {
     return false;
   }
+  // Workaround for Assert mistakenly being labeled as stateful.
+  if (IsAssert(node)) {
+    return true;
+  }
   return IsFreeOfSideEffect(node);
 }
 
",0,train
cd8ced7a2d48574908d2c9b7127960078cf41690,tensorflow/tensorflow,"Enable deduping of Assert nodes.
Add unit test for deduping Assert and CheckNumerics.

PiperOrigin-RevId: 176680534",arithmetic_optimizer_test.cc,"@@ -81,6 +81,38 @@ TEST_F(ArithmeticOptimizerTest, OpDedupping) {
   EXPECT_EQ(""c1"", new_mul.input(1));
 }
 
+TEST_F(ArithmeticOptimizerTest, OpDeduppingAssertAndCheckNumerics) {
+  tensorflow::Scope s = tensorflow::Scope::NewRootScope();
+  Output p = ops::Placeholder(s, DT_BOOL, ops::Placeholder::Shape({}));
+  Output c = ops::Const(s.WithOpName(""c""), {3.14, 2.7}, {1, 2});
+  auto check1 = ops::CheckNumerics(s.WithOpName(""check1""), c, ""foo"");
+  auto check2 = ops::CheckNumerics(s.WithOpName(""check2""), c, ""foo"");
+  auto assert1 = ops::Assert(s.WithOpName(""assert1""), p, {c});
+  auto assert2 = ops::Assert(s.WithOpName(""assert2""), p, {c});
+  Output mul = ops::Multiply(s.WithOpName(""mul"").WithControlDependencies(
+                                 {assert1.operation, assert2.operation}),
+                             check1, check2);
+  GrapplerItem item;
+  TF_CHECK_OK(s.ToGraphDef(&item.graph));
+
+  ArithmeticOptimizer optimizer;
+  GraphDef output;
+  Status status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+  // Run the optimizer twice to make sure the rewrite is idempotent.
+  item.graph.Swap(&output);
+  status = optimizer.Optimize(nullptr, item, &output);
+  TF_EXPECT_OK(status);
+
+  EXPECT_EQ(5, output.node_size());
+  const NodeDef& new_mul = output.node(3);
+  EXPECT_EQ(4, new_mul.input_size());
+  EXPECT_EQ(""check1"", new_mul.input(0));
+  EXPECT_EQ(""check1"", new_mul.input(1));
+  EXPECT_EQ(""^assert1"", new_mul.input(2));
+  EXPECT_EQ(""^assert1"", new_mul.input(3));
+}
+
 TEST_F(ArithmeticOptimizerTest, OpDedupCommutative) {
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
   Output c1 = ops::Const(s.WithOpName(""c1""), {1.0f, 2.0f}, {1, 2});
",0,train
8c51ff3258eb89dfe02f5aec8c4705033b30684f,tensorflow/tensorflow,"Add an explanation when cublas fails to initilaize that this may be due to OOM.

I hit this recently with JAX.  JAX allocates 90% of your GPU's total memory by
default, and it turned out that after doing this, I didn't have enough memory
free to initialize cublas!  But unfortunately, cublas didn't give a useful
error message.

PiperOrigin-RevId: 414144717
Change-Id: I05ecaa512bfd49211d26ecc4f09ee11386d12ec9",cuda_blas.cc,"@@ -219,11 +219,20 @@ class ScopedCublasMathMode {
 };
 #endif  // CUDA_VERSION >= 9000
 
+static const char *const kCublasNotInitializedExplanation =
+    ""Failure to initialize cublas may be due to OOM (cublas needs some free ""
+    ""memory when you initialize it, and your deep-learning framework may have ""
+    ""preallocated more than its fair share), or may be because this binary was ""
+    ""not built with support for the GPU in your machine."";
+
 bool CUDABlas::Init() {
   gpu::ScopedActivateExecutorContext sac{parent_};
   cublasStatus_t ret = cublasCreate(&blas_);
   if (ret != CUBLAS_STATUS_SUCCESS) {
     LOG(ERROR) << ""failed to create cublas handle: "" << ToString(ret);
+    if (ret == CUBLAS_STATUS_NOT_INITIALIZED) {
+      LOG(ERROR) << kCublasNotInitializedExplanation;
+    }
     return false;
   }
 
@@ -231,6 +240,9 @@ bool CUDABlas::Init() {
   ret = cublasLtCreate(&blasLt_);
   if (ret != CUBLAS_STATUS_SUCCESS) {
     LOG(ERROR) << ""failed to create cublasLt handle: "" << ToString(ret);
+    if (ret == CUBLAS_STATUS_NOT_INITIALIZED) {
+      LOG(ERROR) << kCublasNotInitializedExplanation;
+    }
     return false;
   }
 #endif  // CUDA_VERSION >= 11000
",0,train
a0991e859fe45ddb04d8b618e6b602684726b2e5,tensorflow/tensorflow,"Update GraphDef version to 789.

PiperOrigin-RevId: 377755989
Change-Id: I38fae40ca0dc9cebe45233c59769500674d7a966",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 788  // Updated: 2021/6/5
+#define TF_GRAPH_DEF_VERSION 789  // Updated: 2021/6/6
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
e2d0c2cb30f7f1472935350fd392ef473d07d089,tensorflow/tensorflow,Link to tf.ConfigProto,test_util.py,"@@ -2717,7 +2717,7 @@ def create_local_cluster(num_workers,
 
   TODO: image from https://www.tensorflow.org/images/diag1.svg
 
-  Figure 2 illustrates the interaction of these components.
+  Figure illustrates the interaction of these components.
   ""/job:worker/task:0"" and ""/job:ps/task:0"" are both tasks with worker services.
 
 
@@ -2744,9 +2744,9 @@ def create_local_cluster(num_workers,
     num_ps: Number of PS servers to start.
     protocol: Communication protocol.  Allowed values are documented in the
       documentation of `tf.train.Server`.
-    worker_config: (optional) ConfigProto to initialize workers. Can be used to
+    worker_config: (optional) `tf.ConfigProto` to initialize workers. Can be used to
       instantiate multiple devices etc.
-    ps_config: (optional) ConfigProto to initialize PS servers.
+    ps_config: (optional) `tf.ConfigProto` to initialize PS servers.
 
   Returns:
     A tuple `(worker_servers, ps_servers)`.  `worker_servers` is a list
",0,train
42579858f9cda701c7c69d4a1f89035f0a68b258,tensorflow/tensorflow,[MLIR][XLA] Add GatherOp to HLO to LHLO converters,hlo_legalize_to_lhlo.cc,"@@ -453,6 +453,7 @@ void populateHLOToLHLOConversionPattern(
       HloToLhloOpConverter<xla_hlo::DivOp>,
       HloToLhloOpConverter<xla_hlo::DotOp>,
       HloToLhloOpConverter<xla_hlo::ExpOp>,
+      HloToLhloOpConverter<xla_hlo::GatherOp>,
       HloToLhloOpConverter<xla_hlo::ImagOp>,
       HloToLhloOpConverter<xla_hlo::IotaOp>,
       HloToLhloOpConverter<xla_hlo::LogOp>,
",0,train
42579858f9cda701c7c69d4a1f89035f0a68b258,tensorflow/tensorflow,[MLIR][XLA] Add GatherOp to HLO to LHLO converters,map_hlo_to_lhlo_op.h,"@@ -52,6 +52,7 @@ MAP_HLO_TO_LHLO(CosOp);
 MAP_HLO_TO_LHLO(DivOp);
 MAP_HLO_TO_LHLO(DotOp);
 MAP_HLO_TO_LHLO(ExpOp);
+MAP_HLO_TO_LHLO(GatherOp);
 MAP_HLO_TO_LHLO(ImagOp);
 MAP_HLO_TO_LHLO(IotaOp);
 MAP_HLO_TO_LHLO(LogOp);
",0,train
76dca033bc9e17ba5f74c22a301d513afa4d5790,tensorflow/tensorflow,Update nn_ops.py,nn_ops.py,"@@ -1853,9 +1853,9 @@ def conv2d_v2(input,  # pylint: disable=redefined-builtin
   Usage Example:
   
   >>> kernel_in = np.array([
-  ...  [ [[2, 0.1]],[[3, 0.2]] ],
+  ...  [ [[2, 0.1]], [[3, 0.2]] ],
   ...  [ [[0, 0.3]],[[1, 0.4]] ], ])
-  >>> x = tf.placeholder(tf.float32, shape=[1, 5, 5, 1])
+  >>> x = tf.Variable(shape=tf.TensorShape(1, 5, 5, 1))
   >>> kernel = tf.constant(kernel_in, dtype=tf.float32)
   <tf.Tensor 'Conv2D_1:0' shape=(1, 4, 4, 2) dtype=float32>
 
",0,train
edbb66a83fd6070bfc9509caae15b46b7c7a2261,tensorflow/tensorflow,"Remove usage of internal composite_tensor_utils.get_shape from Keras. (By forking the one usage)

PiperOrigin-RevId: 342130856
Change-Id: I8e67dd09a1c4e7326dd50573b58091c0d1338dc9",training_utils_v1.py,"@@ -425,6 +425,15 @@ def standardize_single_array(x, expected_shape=None):
   return x
 
 
+def get_composite_shape(tensor):
+  """"""Returns the shape of the passed composite tensor.""""""
+  if isinstance(tensor, sparse_tensor.SparseTensorValue):
+    # SparseTensorValues use a 'dense_shape' attribute
+    return tensor.dense_shape
+  else:
+    return tensor.shape
+
+
 def standardize_input_data(data,
                            names,
                            shapes=None,
@@ -528,7 +537,7 @@ def standardize_input_data(data,
             continue
           data_shape = tuple(tensorshape.as_list())
         elif is_composite_or_composite_value(data[i]):
-          tensorshape = composite_tensor_utils.get_shape(data[i])
+          tensorshape = get_composite_shape(data[i])
           data_shape = tuple(tensorshape.as_list())
         else:
           data_shape = data[i].shape
",0,train
8edf5c9e4cd68b81e7660ed44c7852485aebae14,tensorflow/tensorflow,"Reverted a change

PiperOrigin-RevId: 366510368
Change-Id: I87c5bc3734986ab815d48bb244f90d54bda95df9",sparse_tensor.py,"@@ -343,11 +343,7 @@ class SparseTensorSpec(type_spec.BatchableTypeSpec):
         not tf2.enabled()):
       return SparseTensorValue(*tensor_list)
     else:
-      result = SparseTensor(*tensor_list)
-      # Augment the static dense shape with the shape carried by the spec.
-      result._dense_shape_default = result._dense_shape_default.merge_with(  # pylint: disable=protected-access
-          self._shape)
-      return result
+      return SparseTensor(*tensor_list)
 
   # The SparseTensorSpec tensor_list encoding uses (de)serialize_sparse ops
   # to (un)box the component tensors in a way that allows for batching &
",0,train
8edf5c9e4cd68b81e7660ed44c7852485aebae14,tensorflow/tensorflow,"Reverted a change

PiperOrigin-RevId: 366510368
Change-Id: I87c5bc3734986ab815d48bb244f90d54bda95df9",sparse_tensor_test.py,"@@ -290,16 +290,6 @@ class SparseTensorSpecTest(test_util.TensorFlowTestCase,
     self.assertAllEqual(st.values, st_reconstructed.values)
     self.assertAllEqual(st.dense_shape, st_reconstructed.dense_shape)
 
-  def testFromComponentsDynamicDenseShapeTensor(self):
-    @def_function.function(input_signature=[
-        sparse_tensor.SparseTensorSpec([None, 10, 100])])
-    def sparse_fun(st):
-      self.assertEqual(st.get_shape().as_list(), [None, 10, 100])
-      return st.dense_shape
-
-    # Force tracing the TF function.
-    _ = sparse_fun.get_concrete_function()
-
   @test_util.run_v1_only(""SparseTensorValue is deprecated in v2"")
   def testFromNumpyComponents(self):
     indices = np.array([[0], [8]])
",0,train
8edf5c9e4cd68b81e7660ed44c7852485aebae14,tensorflow/tensorflow,"Reverted a change

PiperOrigin-RevId: 366510368
Change-Id: I87c5bc3734986ab815d48bb244f90d54bda95df9",control_flow_ops_test.py,"@@ -762,7 +762,7 @@ class DataTypesTest(test_util.TensorFlowTestCase):
 
   @test_util.run_deprecated_v1
   def test_sparse_tensors(self):
-    shape = tensor_shape.TensorShape([3, 4])
+    shape = tensor_shape.TensorShape([None, None])
 
     def true_fn():
       return [
",0,train
dd410bc164d4026a2feb5baae26e6df7a2005d89,tensorflow/tensorflow,"Backported some changes to the reduction code from upstream Eigen to keep the code in sync.
Change: 127477364",eigen_pooling.h,"@@ -376,6 +376,24 @@ struct AvgPoolMeanReducer {
   Packet packetCount_;
 };
 
+template <typename Device>
+struct reducer_traits<AvgPoolMeanReducer<float>, Device> {
+  enum {
+    Cost = 1,
+#if (EIGEN_ARCH_i386 || EIGEN_ARCH_x86_64) && !defined(__CUDACC__)
+    // We only support packet access for floats.
+    PacketAccess = true
+#else
+    PacketAccess = false
+#endif
+  };
+};
+
+template <>
+struct reducer_traits<AvgPoolMeanReducer<float>, GpuDevice> {
+  enum { Cost = 1, PacketAccess = false };
+};
+
 }  // namespace internal
 
 #if !defined(EIGEN_HAS_INDEX_LIST)
",0,train
de336139770c6e9e035c0e402375efda48d75301,tensorflow/tensorflow,"[rollforward]Guess test binary path from TEST_TARGET env var

PiperOrigin-RevId: 343132917
Change-Id: I6af62b595875070dac9e47f22564852dd4976252",multi_process_lib.py,"@@ -23,6 +23,7 @@ import platform
 import sys
 import unittest
 from absl import app
+from absl import logging
 
 from tensorflow.python.eager import test
 
@@ -97,31 +98,28 @@ def _set_spawn_exe_path():
   """"""
   # TODO(b/150264776): This does not work with Windows. Find a solution.
   if sys.argv[0].endswith('.py'):
+    path = None
     # If all we have is a python module path, we'll need to make a guess for
-    # the actual executable path. Since the binary path may correspond to the
-    # parent's path of the python module, we are making guesses by reducing
-    # directories one at a time. E.g.,
-    # tensorflow/python/some/path/my_test.py
-    # -> tensorflow/python/some/path/my_test
-    # -> tensorflow/python/some/my_test
-    # -> tensorflow/python/my_test
-    path_to_use = None
-    guess_path = sys.argv[0][:-3]
-    guess_path = guess_path.split(os.sep)
-    for path_reduction in range(-1, -len(guess_path), -1):
-      possible_path = os.sep.join(guess_path[:path_reduction] +
-                                  [guess_path[-1]])
+    # the actual executable path.
+    if 'bazel-out' in sys.argv[0]:
+      # Guess the binary path under bazel. For target
+      # //tensorflow/python/distribute:input_lib_test_multiworker_gpu, the
+      # argv[0] is in the form of
+      # /.../tensorflow/python/distribute/input_lib_test.py
+      # and the binary is
+      # /.../tensorflow/python/distribute/input_lib_test_multiworker_gpu
+      org_tensorflow_path = sys.argv[0][:sys.argv[0].rfind('/tensorflow')]
+      binary = os.environ['TEST_TARGET'][2:].replace(':', '/', 1)
+      possible_path = os.path.join(org_tensorflow_path, binary)
+      logging.info('Guessed test binary path: %s', possible_path)
       if os.access(possible_path, os.X_OK):
-        path_to_use = possible_path
-        break
-      # The binary can possibly have _gpu suffix.
-      possible_path += '_gpu'
-      if os.access(possible_path, os.X_OK):
-        path_to_use = possible_path
-        break
-    if path_to_use is None:
+        path = possible_path
+    if path is None:
+      logging.error(
+          'Cannot determine binary path. sys.argv[0]=%s os.environ=%s',
+          sys.argv[0], os.environ)
       raise RuntimeError('Cannot determine binary path')
-    sys.argv[0] = path_to_use
+    sys.argv[0] = path
   # Note that this sets the executable for *all* contexts.
   multiprocessing.get_context().set_executable(sys.argv[0])
 
",0,train
573c6f40a90ace2bc921738937fea32fdf724f7b,tensorflow/tensorflow,Bump the required numpy version in r1.6,setup.py,"@@ -36,7 +36,7 @@ REQUIRED_PACKAGES = [
     'astor >= 0.6.0',
     'gast >= 0.2.0',
     'grpcio >= 1.8.6',
-    'numpy >= 1.12.1',
+    'numpy >= 1.13.3',
     'six >= 1.10.0',
     'protobuf >= 3.4.0',
     'tensorflow-tensorboard >= 1.5.0, < 1.6.0',
",0,test
ff83809afa9062e77809d2b65ffbaee3c0045241,tensorflow/tensorflow,"Refactoring ArgMax implementation in preparation for ArgMin.

PiperOrigin-RevId: 203698572",arg_min_max.cc,"@@ -23,7 +23,7 @@ limitations under the License.
 namespace tflite {
 namespace ops {
 namespace builtin {
-namespace arg_max {
+namespace arg_min_max {
 
 constexpr int kInputTensor = 0;
 constexpr int kAxis = 1;
@@ -80,30 +80,39 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   return context->ResizeTensor(context, output, output_size);
 }
 
+template <typename T>
+std::function<bool(T, T)> GetComparefunction(bool is_arg_max) {
+  if (is_arg_max) {
+    return std::greater<T>();
+  } else {
+    return std::less<T>();
+  }
+}
+
 // The current impl actually ignores the axis argument.
 // Only determine the index of the maximum value in the last dimension.
-TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
+TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node, bool is_arg_max) {
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* axis = GetInput(context, node, kAxis);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
-#define TF_LITE_ARG_MAX(data_type, axis_type, output_type)                     \
-  optimized_ops::ArgMax(GetTensorData<axis_type>(axis),                        \
-                        GetTensorData<data_type>(input), GetTensorDims(input), \
-                        GetTensorData<output_type>(output),                    \
-                        GetTensorDims(output))
+#define TF_LITE_ARG_MIN_MAX(data_type, axis_type, output_type)         \
+  optimized_ops::ArgMinMax(                                            \
+      GetTensorData<axis_type>(axis), GetTensorData<data_type>(input), \
+      GetTensorDims(input), GetTensorData<output_type>(output),        \
+      GetTensorDims(output), GetComparefunction<data_type>(is_arg_max))
   if (axis->type == kTfLiteInt32) {
     switch (output->type) {
       case kTfLiteInt32: {
         switch (input->type) {
           case kTfLiteFloat32:
-            TF_LITE_ARG_MAX(float, int32_t, int32_t);
+            TF_LITE_ARG_MIN_MAX(float, int32_t, int32_t);
             break;
           case kTfLiteUInt8:
-            TF_LITE_ARG_MAX(uint8_t, int32_t, int32_t);
+            TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int32_t);
             break;
           case kTfLiteInt32:
-            TF_LITE_ARG_MAX(int32_t, int32_t, int32_t);
+            TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int32_t);
             break;
           default:
             return kTfLiteError;
@@ -112,13 +121,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       case kTfLiteInt64: {
         switch (input->type) {
           case kTfLiteFloat32:
-            TF_LITE_ARG_MAX(float, int32_t, int64_t);
+            TF_LITE_ARG_MIN_MAX(float, int32_t, int64_t);
             break;
           case kTfLiteUInt8:
-            TF_LITE_ARG_MAX(uint8_t, int32_t, int64_t);
+            TF_LITE_ARG_MIN_MAX(uint8_t, int32_t, int64_t);
             break;
           case kTfLiteInt32:
-            TF_LITE_ARG_MAX(int32_t, int32_t, int64_t);
+            TF_LITE_ARG_MIN_MAX(int32_t, int32_t, int64_t);
             break;
           default:
             return kTfLiteError;
@@ -132,13 +141,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       case kTfLiteInt32: {
         switch (input->type) {
           case kTfLiteFloat32:
-            TF_LITE_ARG_MAX(float, int64_t, int32_t);
+            TF_LITE_ARG_MIN_MAX(float, int64_t, int32_t);
             break;
           case kTfLiteUInt8:
-            TF_LITE_ARG_MAX(uint8_t, int64_t, int32_t);
+            TF_LITE_ARG_MIN_MAX(uint8_t, int64_t, int32_t);
             break;
           case kTfLiteInt32:
-            TF_LITE_ARG_MAX(int32_t, int64_t, int32_t);
+            TF_LITE_ARG_MIN_MAX(int32_t, int64_t, int32_t);
             break;
           default:
             return kTfLiteError;
@@ -147,13 +156,13 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
       case kTfLiteInt64: {
         switch (input->type) {
           case kTfLiteFloat32:
-            TF_LITE_ARG_MAX(float, int64_t, int64_t);
+            TF_LITE_ARG_MIN_MAX(float, int64_t, int64_t);
             break;
           case kTfLiteUInt8:
-            TF_LITE_ARG_MAX(uint8_t, int64_t, int64_t);
+            TF_LITE_ARG_MIN_MAX(uint8_t, int64_t, int64_t);
             break;
           case kTfLiteInt32:
-            TF_LITE_ARG_MAX(int32_t, int64_t, int64_t);
+            TF_LITE_ARG_MIN_MAX(int32_t, int64_t, int64_t);
             break;
           default:
             return kTfLiteError;
@@ -163,16 +172,20 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         return kTfLiteError;
     }
   }
-#undef TF_LITE_ARG_MAX
+#undef TF_LITE_ARG_MIN_MAX
 
   return kTfLiteOk;
 }
 
-}  // namespace arg_max
+TfLiteStatus ArgMaxEval(TfLiteContext* context, TfLiteNode* node) {
+  return Eval(context, node, true);
+}
+
+}  // namespace arg_min_max
 
 TfLiteRegistration* Register_ARG_MAX() {
-  static TfLiteRegistration r = {nullptr, nullptr, arg_max::Prepare,
-                                 arg_max::Eval};
+  static TfLiteRegistration r = {nullptr, nullptr, arg_min_max::Prepare,
+                                 arg_min_max::ArgMaxEval};
   return &r;
 }
 
",0,train
ff83809afa9062e77809d2b65ffbaee3c0045241,tensorflow/tensorflow,"Refactoring ArgMax implementation in preparation for ArgMin.

PiperOrigin-RevId: 203698572",arg_min_max_test.cc,,0,train
ff83809afa9062e77809d2b65ffbaee3c0045241,tensorflow/tensorflow,"Refactoring ArgMax implementation in preparation for ArgMin.

PiperOrigin-RevId: 203698572",optimized_ops.h,"@@ -41,6 +41,7 @@ namespace optimized_ops {
 
 // Unoptimized reference ops:
 using reference_ops::ArgMax;
+using reference_ops::ArgMinMax;
 using reference_ops::BroadcastGreater;
 using reference_ops::BroadcastGreaterEqual;
 using reference_ops::BroadcastLess;
",0,train
ff83809afa9062e77809d2b65ffbaee3c0045241,tensorflow/tensorflow,"Refactoring ArgMax implementation in preparation for ArgMin.

PiperOrigin-RevId: 203698572",reference_ops.h,"@@ -3717,9 +3717,9 @@ void TensorFlowMaximumMinimum(const T* input1_data, const Dims<4>& input1_dims,
   }
 }
 
-template <typename T1, typename T2, typename T3>
-void ArgMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims,
-            T2* output_data, const Dims<4>& output_dims) {
+template <typename T1, typename T2, typename T3, typename Cmp>
+void ArgMinMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims,
+               T2* output_data, const Dims<4>& output_dims, const Cmp& cmp) {
   // The current ArgMax implemention can only determine the index of the maximum
   // value in the last dimension. So the axis argument is ignored.
 
@@ -3732,19 +3732,28 @@ void ArgMax(const T3* axis, const T1* input_data, const Dims<4>& input_dims,
   const int depth = ArraySize(input_dims, 0);
 
   for (int i = 0; i < outer_size; ++i) {
-    auto max_value = input_data[i * depth];
-    int max_index = 0;
+    auto min_max_value = input_data[i * depth];
+    int min_max_index = 0;
     for (int d = 1; d < depth; ++d) {
       const auto& curr_value = input_data[i * depth + d];
-      if (curr_value > max_value) {
-        max_value = curr_value;
-        max_index = d;
+      if (cmp(curr_value, min_max_value)) {
+        min_max_value = curr_value;
+        min_max_index = d;
       }
     }
-    output_data[i] = max_index;
+    output_data[i] = min_max_index;
   }
 }
 
+// TODO(renjieliu): Remove this one.
+template <typename T1, typename T2, typename T3>
+void ArgMax(const T3* axis, const T1* input_data,
+            const tflite::Dims<4>& input_dims, T2* output_data,
+            const tflite::Dims<4>& output_dims) {
+  ArgMinMax(axis, input_data, input_dims, output_data, output_dims,
+            std::greater<T1>());
+}
+
 template <typename T>
 void Transpose(const T* input, const Dims<4>& input_dims, T* output,
                const Dims<4>& output_dims, const int* permuted_axes) {
",0,train
107416b863c7b362ec8c04006ce1aab5ab1699fd,tensorflow/tensorflow,"Fix `ResourceWarning: unclosed file` warnings in reader_ops_test (#3827)

Some file handles created in these test cases are were not being closed.
This causes some warnings as well along with leaking the handles.

./source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:245: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/text_line.0.txt'>
  f = open(fn, ""wb"")
/source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:276: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/text_line.1.txt'>
  self._testOneEpoch(self._CreateFiles(crlf=True))
./source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:273: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/text_line.1.txt'>
  self._testOneEpoch(self._CreateFiles(crlf=False))
./source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:279: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/text_line.1.txt'>
  files = self._CreateFiles()
../source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:183: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/whole_file.0.txt'>
  open(fn, ""wb"").write(c)
/source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:183: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/whole_file.1.txt'>
  open(fn, ""wb"").write(c)
/source/tensorflow/tensorflow/python/kernel_tests/reader_ops_test.py:183: ResourceWarning: unclosed file <_io.BufferedWriter name='/var/folders/sq/vmncyd7506q_ch43llrwr8sn6zfknl/T/reader_ops_test/whole_file.2.txt'>
  open(fn, ""wb"").write(c)
...",reader_ops_test.py,"@@ -178,7 +178,8 @@ class WholeFileReaderTest(tf.test.TestCase):
                        for i in range(3)]
     self._content = [b""One\na\nb\n"", b""Two\nC\nD"", b""Three x, y, z""]
     for fn, c in zip(self._filenames, self._content):
-      open(fn, ""wb"").write(c)
+      with open(fn, ""wb"") as h:
+        h.write(c)
 
   def tearDown(self):
     super(WholeFileReaderTest, self).tearDown()
@@ -240,13 +241,13 @@ class TextLineReaderTest(tf.test.TestCase):
     for i in range(self._num_files):
       fn = os.path.join(self.get_temp_dir(), ""text_line.%d.txt"" % i)
       filenames.append(fn)
-      f = open(fn, ""wb"")
-      for j in range(self._num_lines):
-        f.write(self._LineText(i, j))
-        # Always include a newline after the record unless it is
-        # at the end of the file, in which case we include it sometimes.
-        if j + 1 != self._num_lines or i == 0:
-          f.write(b""\r\n"" if crlf else b""\n"")
+      with open(fn, ""wb"") as f:
+        for j in range(self._num_lines):
+          f.write(self._LineText(i, j))
+          # Always include a newline after the record unless it is
+          # at the end of the file, in which case we include it sometimes.
+          if j + 1 != self._num_lines or i == 0:
+            f.write(b""\r\n"" if crlf else b""\n"")
     return filenames
 
   def _testOneEpoch(self, files):
@@ -311,11 +312,11 @@ class FixedLengthRecordReaderTest(tf.test.TestCase):
     for i in range(self._num_files):
       fn = os.path.join(self.get_temp_dir(), ""fixed_length_record.%d.txt"" % i)
       filenames.append(fn)
-      f = open(fn, ""wb"")
-      f.write(b""H"" * self._header_bytes)
-      for j in range(self._num_records):
-        f.write(self._Record(i, j))
-      f.write(b""F"" * self._footer_bytes)
+      with open(fn, ""wb"") as f:
+        f.write(b""H"" * self._header_bytes)
+        for j in range(self._num_records):
+          f.write(self._Record(i, j))
+        f.write(b""F"" * self._footer_bytes)
     return filenames
 
   def testOneEpoch(self):
",0,test
fc0b63edc0116f2df9847e3083247a4613bc0f26,tensorflow/tensorflow,"Clean up RemoveTrivialPassthroughOp and fix an issue in an edge case
where we were not erasing the correct arrays.

PiperOrigin-RevId: 176784020",remove_trivial_passthrough.cc,"@@ -63,19 +63,28 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation,
       main_input_array_index = i;
     }
   }
-  CHECK_LE(count_nonconstant_input_arrays, 1);
 
   const string main_input_name = passthru_op->inputs[main_input_array_index];
   const string output_name = passthru_op->outputs[0];
+
+  // Build the list of all input and output arrays of the passthrough node
+  // that we are considering removing. Any of these arrays is a candidate
+  // for being removed as well, if nothing else references it. Doing that
+  // arrays-removal together with the passthrough-node-removal proved too
+  // error-prone.
+  std::vector<string> removal_candidates;
+  for (const string& input : passthru_op->inputs) {
+    removal_candidates.push_back(input);
+  }
+  removal_candidates.push_back(output_name);
+
   if (IsDiscardableArray(*model, output_name)) {
     transformation->AddMessageF(
         ""Removing %s, keeping its non-constant input array"",
         LogName(*passthru_op));
-    model->arrays.erase(output_name);
     for (const string& input : passthru_op->inputs) {
       if (IsDiscardableArray(*model, input) && input != main_input_name &&
           CountOpsWithInput(*model, input) == 1) {
-        model->arrays.erase(input);
       }
     }
     RerouteEdges(output_name, main_input_name, model);
@@ -85,13 +94,12 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation,
     for (const string& input : passthru_op->inputs) {
       if (IsDiscardableArray(*model, input) &&
           (input == main_input_name || CountOpsWithInput(*model, input) == 1)) {
-        model->arrays.erase(input);
       }
     }
     RerouteEdges(main_input_name, output_name, model);
   } else {
     transformation->AddMessageF(
-        ""Cannot remove %s, neither its nonconstant input nor its output may be ""
+        ""Cannot remove %s, neither its main input nor its output may be ""
         ""discarded"",
         LogName(*passthru_op));
     return false;
@@ -100,6 +108,26 @@ bool RemoveTrivialPassthroughOp(GraphTransformation* transformation,
   // Remove the pass-through node.
   model->operators.erase(passthru_it);
 
+  // Remove any array that is no longer used.
+  for (const string& removal_candidate : removal_candidates) {
+    bool is_referenced = false;
+    for (const auto& op : model->operators) {
+      for (const string& input : op->inputs) {
+        if (input == removal_candidate) {
+          is_referenced = true;
+        }
+      }
+      for (const string& output : op->outputs) {
+        if (output == removal_candidate) {
+          is_referenced = true;
+        }
+      }
+    }
+    if (!is_referenced) {
+      model->arrays.erase(removal_candidate);
+    }
+  }
+
   return true;
 }
 
",0,train
fc0b63edc0116f2df9847e3083247a4613bc0f26,tensorflow/tensorflow,"Clean up RemoveTrivialPassthroughOp and fix an issue in an edge case
where we were not erasing the correct arrays.

PiperOrigin-RevId: 176784020",remove_trivial_passthrough.h,"@@ -21,10 +21,12 @@ limitations under the License.
 namespace toco {
 
 // A ""passthrough op"" is an op that satisfies the following conditions:
-//   1. It has at most one non-constant input (it may have other constant
-//   inputs).
+//   1. One of its inputs is (per the semantics of that op) its ""main input""
+//      for some notion of ""main input"" that is operator-specific; for example,
+//      for a Reshape op, the main input is the array being reshaped, not the
+//      other input which gives the new shape.
 //   2. It has exactly one output.
-//   3. It forwards exactly its single non-constant input to its single output.
+//   3. It forwards exactly its main input to its single output.
 //
 // Examples include:
 //   1. TensorFlow Identity ops. (Have one input).
@@ -34,7 +36,7 @@ namespace toco {
 //      where one of its inputs is a constant array filled with zeros.
 //
 // A passthrough op is ""trivial"" and can be removed when it is possible to
-// discard either its single non-constant input or output array, rerouting any
+// discard either its main input or output array, rerouting any
 // edge involving it to the other of these two arrays.
 //
 // It is only possible to discard such an array if it is not explicitly
",0,train
33febd68b5b5a198ff613f72581ce20293ed07f3,tensorflow/tensorflow,"Convert input shape to TensorShape before building SeparableConv.

PiperOrigin-RevId: 261724310",convolutional.py,"@@ -1785,6 +1785,7 @@ class DepthwiseConv2D(Conv2D):
     if len(input_shape) < 4:
       raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. '
                        'Received input shape:', str(input_shape))
+    input_shape = tensor_shape.TensorShape(input_shape)
     if self.data_format == 'channels_first':
       channel_axis = 1
     else:
",0,train
d71be4d5febada6af32f3286ad2f4ec61cefb1b3,tensorflow/tensorflow,"[XLA:GPU] s/llvm_ir::IrArray/IrArray/ in ir_emitter_unnested.

Less visual noise.

PiperOrigin-RevId: 204139183",ir_emitter_unnested.cc,"@@ -595,7 +595,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
             BuildKernelThunk(fusion, /*implements_whole_instruction=*/false));
         thunk_sequence_->emplace_back(
             MakeUnique<SequentialThunk>(std::move(thunks), fusion));
-        std::vector<llvm_ir::IrArray> parameter_arrays;
+        std::vector<IrArray> parameter_arrays;
         for (HloInstruction* operand : fusion->operands()) {
           parameter_arrays.push_back(GetIrArray(*operand, *fusion));
         }
@@ -668,7 +668,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
     // Set up kernel thunk and fused ir emitter.
     thunk_sequence_->emplace_back(
         BuildKernelThunk(fusion, /*implements_whole_instruction=*/true));
-    std::vector<llvm_ir::IrArray> operand_arrays;
+    std::vector<IrArray> operand_arrays;
     for (HloInstruction* operand : fusion->operands()) {
       operand_arrays.push_back(GetIrArray(*operand, *fusion));
     }
@@ -681,7 +681,7 @@ Status IrEmitterUnnested::HandleFusion(HloInstruction* fusion) {
 
     // Array to write into.  Because this is an in-place operation, this is the
     // same as operand 0's array.
-    llvm_ir::IrArray output_array = GetIrArray(*fusion, *fusion);
+    IrArray output_array = GetIrArray(*fusion, *fusion);
 
     LaunchDimensions launch_dimensions = CalculateLaunchDimensions(
         update_shape, ir_emitter_context_->device_description());
@@ -732,7 +732,7 @@ Status IrEmitterUnnested::HandleCopy(HloInstruction* copy) {
 }
 
 Status IrEmitterUnnested::EmitExtraOutputsForReduce(
-    const HloInstruction* reduce, const llvm_ir::IrArray::Index& index,
+    const HloInstruction* reduce, const IrArray::Index& index,
     tensorflow::gtl::ArraySlice<
         std::pair<llvm_ir::ElementGenerator, ShapeIndex>>
         extra_output_gens) {
@@ -819,8 +819,7 @@ Status IrEmitterUnnested::EmitReductionToScalar(
   // // and threads_per_block is a multiple of warpSize.
   // reduce_kernel<<<num_blocks, threads_per_block>>>();
   //
-  auto loop_body_emitter =
-      [=](const llvm_ir::IrArray::Index& tile_index) -> Status {
+  auto loop_body_emitter = [=](const IrArray::Index& tile_index) -> Status {
     const int num_reduces = reducers.size();
     llvm::Type* element_ir_type =
         llvm_ir::PrimitiveTypeToIrType(input_shape.element_type(), module_);
@@ -829,9 +828,8 @@ Status IrEmitterUnnested::EmitReductionToScalar(
       llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca(
           element_ir_type, /*ArraySize=*/nullptr,
           ""partial_reduction_result."" + llvm::Twine(i));
-      TF_ASSIGN_OR_RETURN(
-          llvm::Value* const init_ir_value,
-          init_value_gens[i](llvm_ir::IrArray::Index(index_ty)));
+      TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value,
+                          init_value_gens[i](IrArray::Index(index_ty)));
       ir_builder_.CreateStore(init_ir_value, partial_reduction_result_address);
       partial_reduction_result_addresses.push_back(
           partial_reduction_result_address);
@@ -866,7 +864,7 @@ Status IrEmitterUnnested::EmitReductionToScalar(
         llvm_ir::SetToFirstInsertPoint(if_data.true_block, &ir_builder_);
       }
 
-      llvm_ir::IrArray::Index input_index(
+      IrArray::Index input_index(
           /*linear=*/x, input_shape, &ir_builder_);
       llvm::Value* input_address = ir_builder_.CreateAlloca(element_ir_type);
       for (int i = 0; i != num_reduces; ++i) {
@@ -951,7 +949,7 @@ Status IrEmitterUnnested::EmitReductionToScalar(
       llvm::Value* output_address =
           GetIrArray(*output, *output, reduce_output_shapes[i])
               .EmitArrayElementAddress(
-                  llvm_ir::IrArray::Index(
+                  IrArray::Index(
                       /*linear=*/ir_builder_.getInt64(0),
                       ShapeUtil::GetSubshape(output->shape(),
                                              reduce_output_shapes[i]),
@@ -1037,8 +1035,7 @@ Status IrEmitterUnnested::EmitColumnReduction(
   //   }
   //   AtomicReducer(&output[x], partial_result);
   // }
-  auto loop_body_emitter =
-      [=](const llvm_ir::IrArray::Index& tile_index) -> Status {
+  auto loop_body_emitter = [=](const IrArray::Index& tile_index) -> Status {
     const int num_reduces = reducers.size();
     // Emit the loop body that reduces one tile.
     llvm::Type* element_ir_type =
@@ -1048,9 +1045,8 @@ Status IrEmitterUnnested::EmitColumnReduction(
       llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca(
           element_ir_type, /*ArraySize=*/nullptr,
           ""partial_reduction_result."" + llvm::Twine(i));
-      TF_ASSIGN_OR_RETURN(
-          llvm::Value* const init_ir_value,
-          init_value_gens[i](llvm_ir::IrArray::Index(index_ty)));
+      TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value,
+                          init_value_gens[i](IrArray::Index(index_ty)));
       ir_builder_.CreateStore(init_ir_value, partial_reduction_result_address);
       partial_reduction_result_addresses.push_back(
           partial_reduction_result_address);
@@ -1106,9 +1102,9 @@ Status IrEmitterUnnested::EmitColumnReduction(
         const Shape input_matrix_shape =
             ShapeUtil::MakeShapeWithDescendingLayout(input_shape.element_type(),
                                                      {height, width});
-        const llvm_ir::IrArray::Index input_matrix_index(
-            {y, x}, input_matrix_shape, &ir_builder_);
-        const llvm_ir::IrArray::Index input_index =
+        const IrArray::Index input_matrix_index({y, x}, input_matrix_shape,
+                                                &ir_builder_);
+        const IrArray::Index input_index =
             input_matrix_index
                 .SourceIndexOfReshape(input_matrix_shape,
                                       normalized_input_shape, &ir_builder_)
@@ -1159,11 +1155,10 @@ Status IrEmitterUnnested::EmitColumnReduction(
       llvm::Value* output_address =
           GetIrArray(*output, *output, reduce_output_shapes[i])
               .EmitArrayElementAddress(
-                  llvm_ir::IrArray::Index(
-                      x,
-                      ShapeUtil::GetSubshape(output->shape(),
-                                             reduce_output_shapes[i]),
-                      &ir_builder_),
+                  IrArray::Index(x,
+                                 ShapeUtil::GetSubshape(
+                                     output->shape(), reduce_output_shapes[i]),
+                                 &ir_builder_),
                   &ir_builder_, ""output_element_address"");
       TF_RETURN_IF_ERROR(EmitAtomicOperationForNestedComputation(
           *reducers[i], output_address, partial_reduction_result_addresses[i]));
@@ -1335,7 +1330,7 @@ Status IrEmitterUnnested::EmitRowReduction(
     return llvm::ConstantInt::get(index_ty, c);
   };
 
-  auto loop_body_emitter = [=](const llvm_ir::IrArray::Index& tile_index) {
+  auto loop_body_emitter = [=](const IrArray::Index& tile_index) {
     const int num_reduces = reducers.size();
     llvm::Type* element_ir_type = llvm_ir::PrimitiveTypeToIrType(
         input_shape.element_type(), ir_emitter_context_->llvm_module());
@@ -1344,9 +1339,8 @@ Status IrEmitterUnnested::EmitRowReduction(
       llvm::Value* partial_reduction_result_address = ir_builder_.CreateAlloca(
           element_ir_type, /*ArraySize=*/nullptr,
           ""partial_reduction_result."" + llvm::Twine(i));
-      TF_ASSIGN_OR_RETURN(
-          llvm::Value* const init_ir_value,
-          init_value_gens[i](llvm_ir::IrArray::Index(index_ty)));
+      TF_ASSIGN_OR_RETURN(llvm::Value* const init_ir_value,
+                          init_value_gens[i](IrArray::Index(index_ty)));
       ir_builder_.CreateStore(init_ir_value, partial_reduction_result_address);
       partial_reduction_result_addresses.push_back(
           partial_reduction_result_address);
@@ -1435,9 +1429,9 @@ Status IrEmitterUnnested::EmitRowReduction(
                 const Shape input_3d_tensor_shape =
                     ShapeUtil::MakeShapeWithDescendingLayout(
                         input_shape.element_type(), {depth, height, width});
-                const llvm_ir::IrArray::Index input_3d_tensor_index(
+                const IrArray::Index input_3d_tensor_index(
                     {z, y, x}, input_3d_tensor_shape, &ir_builder_);
-                const llvm_ir::IrArray::Index input_index =
+                const IrArray::Index input_index =
                     input_3d_tensor_index
                         .SourceIndexOfReshape(input_3d_tensor_shape,
                                               normalized_input_shape,
@@ -1532,11 +1526,10 @@ Status IrEmitterUnnested::EmitRowReduction(
       llvm::Value* output_address =
           GetIrArray(*output, *output, reduce_output_shapes[i])
               .EmitArrayElementAddress(
-                  llvm_ir::IrArray::Index(
-                      y,
-                      ShapeUtil::GetSubshape(output->shape(),
-                                             reduce_output_shapes[i]),
-                      &ir_builder_),
+                  IrArray::Index(y,
+                                 ShapeUtil::GetSubshape(
+                                     output->shape(), reduce_output_shapes[i]),
+                                 &ir_builder_),
                   &ir_builder_, ""output_element_address"");
       // We don't need to emit atomic operations if there is only one tile of
       // results. 'depth' is the z dimension, 'width' is the x dimension.
@@ -1686,11 +1679,11 @@ Status IrEmitterUnnested::HandleReduce(HloInstruction* reduce) {
         MakeUnique<SequentialThunk>(std::move(thunks), reduce));
 
     return EmitReductionToVector(
-        reduce, input->shape(), {[&](const llvm_ir::IrArray::Index& index) {
+        reduce, input->shape(), {[&](const IrArray::Index& index) {
           return GetIrArray(*input, *reduce)
               .EmitReadArrayElement(index, &ir_builder_);
         }},
-        {[&](const llvm_ir::IrArray::Index& index) {
+        {[&](const IrArray::Index& index) {
           return GetIrArray(*init_value, *reduce)
               .EmitReadArrayElement(index, &ir_builder_);
         }},
@@ -1791,8 +1784,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter(
   //         selected_index = I
   //         initialized_flag = true
   //   output(selected_index) = scatter(output(selected_index), source(S))
-  auto loop_body_emitter =
-      [=](const llvm_ir::IrArray::Index& source_index) -> Status {
+  auto loop_body_emitter = [=](const IrArray::Index& source_index) -> Status {
     // Allocate space to keep the currently selected value, its index, and a
     // boolean flag if the value is initialized. The initialized_flag is set
     // false.
@@ -1817,7 +1809,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter(
       window_size.push_back(dim.size());
       CHECK_GT(dim.size(), 0);
     }
-    const llvm_ir::IrArray::Index window_index = window_loops.AddLoopsForShape(
+    const IrArray::Index window_index = window_loops.AddLoopsForShape(
         ShapeUtil::MakeShape(operand_element_type, window_size), ""window"");
     llvm_ir::SetToFirstInsertPoint(window_loops.GetInnerLoopBodyBasicBlock(),
                                    &ir_builder_);
@@ -1825,7 +1817,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter(
     // Compute the operand index to visit and evaluate the condition whether the
     // operand index is within the bounds. The unsigned comparison includes
     // checking whether the operand index >= 0.
-    llvm_ir::IrArray::Index operand_index(index_type, source_index.size());
+    IrArray::Index operand_index(index_type, source_index.size());
     llvm::Value* in_bounds_condition = ir_builder_.getInt1(true);
     for (int64 i = 0; i < rank; ++i) {
       llvm::Value* strided_index = ir_builder_.CreateNSWMul(
@@ -1853,8 +1845,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter(
     // If the initialized_flag is false, initialize the selected value and index
     // with the currently visiting operand.
     llvm_ir::SetToFirstInsertPoint(if_initialized.false_block, &ir_builder_);
-    const auto save_operand_index = [&](
-        const llvm_ir::IrArray::Index& operand_index) {
+    const auto save_operand_index = [&](const IrArray::Index& operand_index) {
       for (int64 i = 0; i < rank; ++i) {
         llvm::Value* selected_index_address_slot =
             ir_builder_.CreateInBoundsGEP(selected_index_address,
@@ -1862,7 +1853,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter(
         ir_builder_.CreateStore(operand_index[i], selected_index_address_slot);
       }
     };
-    llvm_ir::IrArray operand_array = GetIrArray(*operand, *select_and_scatter);
+    IrArray operand_array = GetIrArray(*operand, *select_and_scatter);
     llvm::Value* operand_data =
         operand_array.EmitReadArrayElement(operand_index, &ir_builder_);
     ir_builder_.CreateStore(operand_data, selected_value_address);
@@ -1907,7 +1898,7 @@ Status IrEmitterUnnested::HandleSelectAndScatter(
     // value and the current output value.
     llvm_ir::SetToFirstInsertPoint(window_loops.GetOuterLoopExitBasicBlock(),
                                    &ir_builder_);
-    llvm_ir::IrArray::Index selected_index(operand_index.GetType());
+    IrArray::Index selected_index(operand_index.GetType());
     for (int64 i = 0; i < rank; ++i) {
       llvm::Value* selected_index_address_slot = ir_builder_.CreateInBoundsGEP(
           selected_index_address, {ir_builder_.getInt32(i)});
@@ -2492,7 +2483,7 @@ StatusOr<std::unique_ptr<Thunk>> IrEmitterUnnested::BuildInitializerThunk(
     TF_RETURN_IF_ERROR(HandleConstant(const_cast<HloInstruction*>(init_value)));
   }
   TF_RETURN_IF_ERROR(ParallelLoopEmitter(
-                         [=](const llvm_ir::IrArray::Index& index) {
+                         [=](const IrArray::Index& index) {
                            return GetIrArray(*init_value, *hlo)
                                .EmitReadArrayElement(index, &ir_builder_);
                          },
@@ -2688,7 +2679,7 @@ Status IrEmitterUnnested::EmitTargetElementLoopInThunk(
   }
 
   // For multioutput fusion, we need to emit each operand and the root.
-  std::vector<llvm_ir::IrArray> output_arrays;
+  std::vector<IrArray> output_arrays;
   for (int64 i = 0; i < ShapeUtil::TupleElementCount(hlo.shape()); ++i) {
     output_arrays.push_back(GetIrArray(hlo, hlo, {i}));
   }
@@ -2718,7 +2709,7 @@ Status IrEmitterUnnested::EmitTargetElementLoop(
 }
 
 int IrEmitterUnnested::ConstructIrArrayForOutputs(
-    const HloInstruction& hlo, std::vector<llvm_ir::IrArray>* output_arrays) {
+    const HloInstruction& hlo, std::vector<IrArray>* output_arrays) {
   int64 num_outputs = 1;
   if (hlo.IsMultiOutputFusion()) {
     num_outputs = ShapeUtil::TupleElementCount(hlo.shape());
@@ -2733,7 +2724,7 @@ int IrEmitterUnnested::ConstructIrArrayForOutputs(
 }
 
 int IrEmitterUnnested::ConstructIrArrayForInputs(
-    const HloInstruction& hlo, std::vector<llvm_ir::IrArray>* param_arrays) {
+    const HloInstruction& hlo, std::vector<IrArray>* param_arrays) {
   int64 num_params = hlo.operands().size();
   param_arrays->reserve(num_params);
   for (const HloInstruction* param : hlo.operands()) {
@@ -2743,11 +2734,10 @@ int IrEmitterUnnested::ConstructIrArrayForInputs(
 }
 
 int IrEmitterUnnested::ConstructOutputReducedShapeAndCastOutputIrArrayToShape(
-    const HloInstruction& hlo,
-    const std::vector<llvm_ir::IrArray>& output_arrays,
+    const HloInstruction& hlo, const std::vector<IrArray>& output_arrays,
     tensorflow::gtl::ArraySlice<int64> reduced_output_dims,
     std::vector<Shape>* output_reduced_shapes,
-    std::vector<llvm_ir::IrArray>* output_in_reduced_shape_arrays) {
+    std::vector<IrArray>* output_in_reduced_shape_arrays) {
   int64 num_outputs = 1;
   if (hlo.IsMultiOutputFusion()) {
     num_outputs = ShapeUtil::TupleElementCount(hlo.shape());
@@ -2770,19 +2760,18 @@ int IrEmitterUnnested::ConstructOutputReducedShapeAndCastOutputIrArrayToShape(
 }
 
 int IrEmitterUnnested::ConstructInputReducedShapeAndCastInputIrArrayToShape(
-    const HloInstruction& hlo,
-    const std::vector<llvm_ir::IrArray>& param_arrays,
+    const HloInstruction& hlo, const std::vector<IrArray>& param_arrays,
     const std::vector<llvm::Value*>& param_buffers,
     tensorflow::gtl::ArraySlice<int64> reduced_output_dims,
     std::vector<Shape>* param_reduced_shapes,
-    std::vector<llvm_ir::IrArray>* param_in_reduced_shape_arrays) {
+    std::vector<IrArray>* param_in_reduced_shape_arrays) {
   int64 num_params = hlo.operands().size();
   param_in_reduced_shape_arrays->reserve(num_params);
   param_reduced_shapes->reserve(num_params);
   for (int64 id = 0; id < num_params; ++id) {
     if (param_buffers[id] == nullptr) {
       param_reduced_shapes->push_back(Shape());
-      param_in_reduced_shape_arrays->push_back(llvm_ir::IrArray());
+      param_in_reduced_shape_arrays->push_back(IrArray());
       continue;
     }
     const HloInstruction* param = hlo.operand(id);
@@ -2835,11 +2824,11 @@ llvm::Value* GetBlockIdx(llvm::IRBuilder<>* builder, llvm::Type* index_ty,
 // processed element is within the boundary defined by `tile_width` and
 // `tile_height`.
 void EmitTiledElementalCodeWithBoundsCheck(
-    int64 tile_size, int64 num_rows, const llvm_ir::IrArray::Index& index,
+    int64 tile_size, int64 num_rows, const IrArray::Index& index,
     const string& loop_name, KernelSupportLibrary* ksl,
     llvm::IRBuilder<>* builder, llvm::Value* y, llvm::Value* x,
     llvm::Value* tile_width, llvm::Value* tile_height,
-    const std::function<void(const llvm_ir::IrArray::Index&, llvm::Value*)>&
+    const std::function<void(const IrArray::Index&, llvm::Value*)>&
         emit_elem_function) {
   llvm::Type* index_ty = tile_width->getType();
   // Emits a constant value with index type.
@@ -2847,8 +2836,7 @@ void EmitTiledElementalCodeWithBoundsCheck(
     return llvm::ConstantInt::get(index_ty, c);
   };
   // Adds `addend` to the given `dim` of `index`.
-  auto offset_dim = [&](llvm_ir::IrArray::Index index, llvm::Value* addend,
-                        int64 dim) {
+  auto offset_dim = [&](IrArray::Index index, llvm::Value* addend, int64 dim) {
     index[dim] = builder->CreateAdd(index[dim], addend);
     return index;
   };
@@ -3037,8 +3025,8 @@ LaunchDimensions IrEmitterUnnested::EmitHlo021Tile(
   auto emit_tiled_elemental_code_with_bounds_check =
       [&](const IrArray::Index& index, const string& loop_name,
           llvm::Value* tile_width, llvm::Value* tile_height,
-          const std::function<void(const llvm_ir::IrArray::Index&,
-                                   llvm::Value*)>& emit_elem_function) {
+          const std::function<void(const IrArray::Index&, llvm::Value*)>&
+              emit_elem_function) {
         EmitTiledElementalCodeWithBoundsCheck(
             kTileSize, kNumRows, index, loop_name, &ksl, &ir_builder_, y, x,
             tile_width, tile_height, emit_elem_function);
",0,train
260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present.

PiperOrigin-RevId: 425971937
Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",flatbuffer_export.cc,"@@ -201,7 +201,7 @@ static StatusOr<tflite::TensorType> GetTFLiteType(Type type,
 static bool IsConst(Operation* op) {
   return isa<mlir::ConstantOp, mlir::arith::ConstantOp, mlir::TF::ConstOp,
              tfl::ConstOp, tfl::QConstOp, tfl::SparseConstOp,
-             tfl::SparseQConstOp>(op);
+             tfl::SparseQConstOp, mlir::TFL::NoValueOp>(op);
 }
 
 static bool IsTFResourceOp(Operation* op) {
",0,test
260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present.

PiperOrigin-RevId: 425971937
Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",flatbuffer_import.cc,"@@ -853,8 +853,8 @@ StatusOr<Operation*> ConvertOp(
     // with `none` value,
     llvm::SmallVector<Value, 4> none_operands(
         input_max_num - op_input_num,
-        builder.create<mlir::ConstantOp>(loc, builder.getNoneType(),
-                                         builder.getUnitAttr()));
+        builder.create<mlir::TFL::NoValueOp>(loc, builder.getNoneType(),
+                                             builder.getUnitAttr()));
     op_state.addOperands(ArrayRef<Value>(none_operands));
   }
 
@@ -1305,8 +1305,8 @@ StatusOr<FuncOp> ConvertSubgraph(
         if (maybe_optional_arg_marker == nullptr) {
           maybe_optional_arg_marker =
               op_builder
-                  .create<mlir::ConstantOp>(base_loc, builder.getNoneType(),
-                                            builder.getUnitAttr())
+                  .create<mlir::TFL::NoValueOp>(base_loc, builder.getNoneType(),
+                                                builder.getUnitAttr())
                   .getResult();
         }
       } else if (!vals_map.at(input_num)) {
",0,test
260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present.

PiperOrigin-RevId: 425971937
Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",tfl_ops.cc,"@@ -227,8 +227,9 @@ struct RemoveOptionalZeroBias : public OpRewritePattern<ConcreteOpType> {
   LogicalResult matchAndRewrite(ConcreteOpType op,
                                 PatternRewriter &rewriter) const override {
     if (EqualsZero(op.bias())) {
-      auto none_value = rewriter.create<mlir::ConstantOp>(
-          rewriter.getUnknownLoc(), rewriter.getUnitAttr());
+      auto none_value = rewriter.create<TFL::NoValueOp>(
+          rewriter.getUnknownLoc(), rewriter.getNoneType(),
+          rewriter.getUnitAttr());
       op.biasMutable().assign(none_value);
     }
 
@@ -2431,14 +2432,16 @@ struct RemoveLSTMOpZeroBias : public OpRewritePattern<LSTMOp> {
   LogicalResult matchAndRewrite(LSTMOp op,
                                 PatternRewriter &rewriter) const override {
     if (EqualsZero(op.input_gate_bias())) {
-      auto none_value = rewriter.create<mlir::ConstantOp>(
-          rewriter.getUnknownLoc(), rewriter.getUnitAttr());
+      auto none_value = rewriter.create<TFL::NoValueOp>(
+          rewriter.getUnknownLoc(), rewriter.getNoneType(),
+          rewriter.getUnitAttr());
       op.input_gate_biasMutable().assign(none_value);
     }
 
     if (EqualsZero(op.projection_bias())) {
-      auto none_value = rewriter.create<mlir::ConstantOp>(
-          rewriter.getUnknownLoc(), rewriter.getUnitAttr());
+      auto none_value = rewriter.create<TFL::NoValueOp>(
+          rewriter.getUnknownLoc(), rewriter.getNoneType(),
+          rewriter.getUnitAttr());
       op.projection_biasMutable().assign(none_value);
     }
 
@@ -2778,9 +2781,10 @@ struct FoldPseudoConstOp : public OpRewritePattern<ConstOp> {
       rewriter.replaceOpWithNewOp<arith::ConstantOp>(const_op,
                                                      const_op.value());
       return success();
-    } else if (ConstantOp::isBuildableWith(const_op.value(),
-                                           const_op.getType())) {
-      rewriter.replaceOpWithNewOp<ConstantOp>(const_op, const_op.value());
+    } else if (TFL::NoValueOp::isBuildableWith(const_op.value(),
+                                               const_op.getType())) {
+      rewriter.replaceOpWithNewOp<NoValueOp>(const_op, rewriter.getNoneType(),
+                                             const_op.value().cast<UnitAttr>());
       return success();
     }
     return failure();
@@ -3685,6 +3689,18 @@ OpFoldResult PadV2Op::fold(ArrayRef<Attribute> operands) {
   return {};
 }
 
+//===----------------------------------------------------------------------===//
+// NoValueOp
+//===----------------------------------------------------------------------===//
+
+OpFoldResult NoValueOp::fold(ArrayRef<Attribute> operands) {
+  return valueAttr();
+}
+
+bool NoValueOp::isBuildableWith(Attribute value, Type type) {
+  return value.isa<UnitAttr>() && type.isa<NoneType>();
+}
+
 //===----------------------------------------------------------------------===//
 // TableGen'd op method definitions
 //===----------------------------------------------------------------------===//
@@ -3712,8 +3728,8 @@ Operation *TensorFlowLiteDialect::materializeConstant(OpBuilder &builder,
     return builder.create<ConstOp>(loc, type, value.cast<ElementsAttr>());
   if (arith::ConstantOp::isBuildableWith(value, type))
     return builder.create<arith::ConstantOp>(loc, type, value);
-  if (ConstantOp::isBuildableWith(value, type))
-    return builder.create<ConstantOp>(loc, type, value);
+  if (NoValueOp::isBuildableWith(value, type))
+    return builder.create<NoValueOp>(loc, type, value.cast<UnitAttr>());
   return nullptr;
 }
 
",0,test
260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present.

PiperOrigin-RevId: 425971937
Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",tfl_to_std.cc,"@@ -17,6 +17,7 @@ limitations under the License.
 #include ""llvm/Support/Casting.h""
 #include ""mlir/Dialect/Quant/QuantOps.h""  // from @llvm-project
 #include ""mlir/Dialect/StandardOps/IR/Ops.h""  // from @llvm-project
+#include ""mlir/IR/BuiltinAttributes.h""  // from @llvm-project
 #include ""tensorflow/compiler/mlir/lite/ir/tfl_ops.h""
 #include ""tensorflow/compiler/mlir/lite/quantization/quantization_utils.h""
 
@@ -44,8 +45,8 @@ void ConvertTFLQuantOpsToMlirQuantOps(FuncOp func) {
         auto c = b.create<arith::ConstantOp>(q.getLoc(), q.value());
         q.output().replaceAllUsesWith(c);
         q.erase();
-      } else if (ConstantOp::isBuildableWith(value, type)) {
-        auto c = b.create<ConstantOp>(q.getLoc(), q.value());
+      } else if (TFL::NoValueOp::isBuildableWith(value, type)) {
+        auto c = b.create<TFL::NoValueOp>(q.getLoc(), type, mlir::UnitAttr());
         q.output().replaceAllUsesWith(c);
         q.erase();
       }
",0,test
260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present.

PiperOrigin-RevId: 425971937
Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",legalize_tf.cc,"@@ -31,6 +31,7 @@ limitations under the License.
 #include ""llvm/ADT/Hashing.h""
 #include ""llvm/ADT/StringSwitch.h""
 #include ""llvm/Support/Threading.h""
+#include ""llvm/Support/raw_ostream.h""
 #include ""mlir/Dialect/Quant/FakeQuantSupport.h""  // from @llvm-project
 #include ""mlir/Dialect/Quant/QuantOps.h""  // from @llvm-project
 #include ""mlir/Dialect/Quant/UniformSupport.h""  // from @llvm-project
@@ -264,7 +265,7 @@ LogicalResult ConvertTFMatMulOp::matchAndRewrite(
   }
 
   Type output_type = tf_matmul_op.getResult().getType();
-  auto no_input = rewriter.create<ConstantOp>(
+  auto no_input = rewriter.create<TFL::NoValueOp>(
       op->getLoc(), rewriter.getNoneType(), rewriter.getUnitAttr());
   auto fc_op = rewriter.create<FullyConnectedOp>(
       op->getLoc(), ArrayRef<Type>{output_type},
@@ -359,7 +360,7 @@ LogicalResult ConvertTFConv3DOp::matchAndRewrite(
 
   // TensorFlow Conv3D has no bias, optimization patterns will fuse Conv3D
   // with other ops can fill the bias.
-  Value none = rewriter.create<mlir::ConstantOp>(
+  Value none = rewriter.create<TFL::NoValueOp>(
       op->getLoc(), rewriter.getNoneType(), rewriter.getUnitAttr());
 
   rewriter.replaceOpWithNewOp<TFL::Conv3DOp>(
@@ -399,7 +400,7 @@ LogicalResult ConvertTFConv3DBackpropInputV2Op::matchAndRewrite(
 
   // TensorFlow Conv3D has no bias, optimization patterns will fuse Conv3D
   // with other ops can fill the bias.
-  Value none = rewriter.create<mlir::ConstantOp>(
+  Value none = rewriter.create<TFL::NoValueOp>(
       op->getLoc(), rewriter.getNoneType(), rewriter.getUnitAttr());
 
   Value output_shape =
@@ -518,7 +519,7 @@ struct LegalizeUnidirectionalSequenceLstm : public RewritePattern {
     }
 
     // Optional input placeholder.
-    Value none = rewriter.create<mlir::ConstantOp>(
+    Value none = rewriter.create<TFL::NoValueOp>(
         op->getLoc(), rewriter.getNoneType(), rewriter.getUnitAttr());
 
     // Populate inputs.
@@ -943,6 +944,7 @@ void LegalizeTF::runOnOperation() {
   // graph.
   target.addLegalOp<mlir::arith::ConstantOp>();
   target.addLegalOp<mlir::ConstantOp>();
+  target.addLegalOp<TFL::NoValueOp>();
   target.addLegalOp<ConstOp>();
   target.addLegalOp<DequantizeOp>();
   target.addLegalOp<QConstOp>();
",0,test
260cf9c59a104a252ffe4a299666a9efe437789a,tensorflow/tensorflow,"[lite] Add new NoValue op to tflite which is used to represent none values, which is used as optional tensors when the value is not present.

PiperOrigin-RevId: 425971937
Change-Id: I62bcffa6514cb08927e87bf013589328fdf49237",lstm_utils.cc,"@@ -84,8 +84,8 @@ Value CreateI32DenseConst(OpBuilder* builder, ArrayRef<int32_t> values,
 }
 
 Value CreateNoneValue(OpBuilder* builder, mlir::Location location) {
-  return builder->create<mlir::ConstantOp>(location, builder->getNoneType(),
-                                           builder->getUnitAttr());
+  return builder->create<TFL::NoValueOp>(location, builder->getNoneType(),
+                                         builder->getUnitAttr());
 }
 
 Value Transpose(OpBuilder* builder, Value value_to_transpose,
@@ -719,8 +719,7 @@ LogicalResult ConvertKerasLSTMLayer(mlir::FuncOp func_op, OpBuilder* builder) {
       output_shape,
       final_inputs.getType().cast<RankedTensorType>().getElementType());
 
-  Value none = builder->create<mlir::ConstantOp>(
-      func_op.getLoc(), builder->getNoneType(), builder->getUnitAttr());
+  Value none = CreateNoneValue(builder, func_op.getLoc());
   auto lstm = builder->create<mlir::TFL::UnidirectionalSequenceLSTMOp>(
       func_op.getLoc(), result_type, /*input=*/final_inputs,
       /*input_to_input_weights=*/weights_array->getResult(0),
",0,test
97757f34d2124c2341111344bef469501e789c04,tensorflow/tensorflow,"make get_config from config public for feature column

PiperOrigin-RevId: 272227486",feature_column_v2.py,"@@ -2265,8 +2265,7 @@ class FeatureColumn(object):
     """"""
     pass
 
-  @abc.abstractmethod
-  def _get_config(self):
+  def get_config(self):
     """"""Returns the config of the feature column.
 
     A FeatureColumn config is a Python dictionary (serializable) containing the
@@ -2283,7 +2282,7 @@ class FeatureColumn(object):
             'SerializationExampleFeatureColumn',
             ('dimension', 'parent', 'dtype', 'normalizer_fn'))):
 
-      def _get_config(self):
+      def get_config(self):
         # Create a dict from the namedtuple.
         # Python attribute literals can be directly copied from / to the config.
         # For example 'dimension', assuming it is an integer literal.
@@ -2304,8 +2303,8 @@ class FeatureColumn(object):
         return config
 
       @classmethod
-      def _from_config(cls, config, custom_objects=None, columns_by_name=None):
-        # This should do the inverse transform from `_get_config` and construct
+      def from_config(cls, config, custom_objects=None, columns_by_name=None):
+        # This should do the inverse transform from `get_config` and construct
         # the namedtuple.
         kwargs = config.copy()
         kwargs['parent'] = deserialize_feature_column(
@@ -2320,21 +2319,24 @@ class FeatureColumn(object):
       A serializable Dict that can be used to deserialize the object with
       from_config.
     """"""
-    pass
+    return self._get_config()
+
+  def _get_config(self):
+    raise NotImplementedError('Must be implemented in subclasses.')
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""Creates a FeatureColumn from its config.
 
-    This method should be the reverse of `_get_config`, capable of instantiating
-    the same FeatureColumn from the config dictionary. See `_get_config` for an
+    This method should be the reverse of `get_config`, capable of instantiating
+    the same FeatureColumn from the config dictionary. See `get_config` for an
     example of common (de)serialization practices followed in this file.
 
     TODO(b/118939620): This is a private method until consensus is reached on
     supporting object deserialization deduping within Keras.
 
     Args:
-      config: A Dict config acquired with `_get_config`.
+      config: A Dict config acquired with `get_config`.
       custom_objects: Optional dictionary mapping names (strings) to custom
         classes or functions to be considered during deserialization.
       columns_by_name: A Dict[String, FeatureColumn] of existing columns in
@@ -2344,7 +2346,11 @@ class FeatureColumn(object):
     Returns:
       A FeatureColumn for the input config.
     """"""
-    pass
+    return cls._from_config(config, custom_objects, columns_by_name)
+
+  @classmethod
+  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+    raise NotImplementedError('Must be implemented in subclasses.')
 
 
 class DenseColumn(FeatureColumn):
@@ -2857,7 +2863,7 @@ class NumericColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.key]
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     config = dict(zip(self._fields, self))
     config['normalizer_fn'] = generic_utils.serialize_keras_object(
@@ -2866,7 +2872,7 @@ class NumericColumn(
     return config
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     _check_config_keys(config, cls._fields)
     kwargs = _standardize_and_copy_config(config)
@@ -3014,7 +3020,7 @@ class BucketizedColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.source_column]
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import serialize_feature_column  # pylint: disable=g-import-not-at-top
     config = dict(zip(self._fields, self))
@@ -3022,7 +3028,7 @@ class BucketizedColumn(
     return config
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import deserialize_feature_column  # pylint: disable=g-import-not-at-top
     _check_config_keys(config, cls._fields)
@@ -3247,7 +3253,7 @@ class EmbeddingColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.categorical_column]
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import serialize_feature_column  # pylint: disable=g-import-not-at-top
     config = dict(zip(self._fields, self))
@@ -3257,7 +3263,7 @@ class EmbeddingColumn(
     return config
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import deserialize_feature_column  # pylint: disable=g-import-not-at-top
     _check_config_keys(config, cls._fields)
@@ -3440,15 +3446,6 @@ class SharedEmbeddingColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.categorical_column]
 
-  def _get_config(self):
-    """"""See 'FeatureColumn` base class.""""""
-    raise NotImplementedError()
-
-  @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
-    """"""See 'FeatureColumn` base class.""""""
-    raise NotImplementedError()
-
 
 def _check_shape(shape, key):
   """"""Returns shape if it's valid, raises error otherwise.""""""
@@ -3559,14 +3556,14 @@ class HashedCategoricalColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.key]
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     config = dict(zip(self._fields, self))
     config['dtype'] = self.dtype.name
     return config
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     _check_config_keys(config, cls._fields)
     kwargs = _standardize_and_copy_config(config)
@@ -3673,14 +3670,14 @@ class VocabularyFileCategoricalColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.key]
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     config = dict(zip(self._fields, self))
     config['dtype'] = self.dtype.name
     return config
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     _check_config_keys(config, cls._fields)
     kwargs = _standardize_and_copy_config(config)
@@ -3787,14 +3784,14 @@ class VocabularyListCategoricalColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.key]
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     config = dict(zip(self._fields, self))
     config['dtype'] = self.dtype.name
     return config
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     _check_config_keys(config, cls._fields)
     kwargs = _standardize_and_copy_config(config)
@@ -3899,12 +3896,12 @@ class IdentityCategoricalColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.key]
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     return dict(zip(self._fields, self))
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     _check_config_keys(config, cls._fields)
     kwargs = _standardize_and_copy_config(config)
@@ -4013,7 +4010,7 @@ class WeightedCategoricalColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.categorical_column, self.weight_feature_key]
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import serialize_feature_column  # pylint: disable=g-import-not-at-top
     config = dict(zip(self._fields, self))
@@ -4023,7 +4020,7 @@ class WeightedCategoricalColumn(
     return config
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import deserialize_feature_column  # pylint: disable=g-import-not-at-top
     _check_config_keys(config, cls._fields)
@@ -4157,7 +4154,7 @@ class CrossedColumn(
     """"""See 'FeatureColumn` base class.""""""
     return list(self.keys)
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import serialize_feature_column  # pylint: disable=g-import-not-at-top
     config = dict(zip(self._fields, self))
@@ -4165,7 +4162,7 @@ class CrossedColumn(
     return config
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import deserialize_feature_column  # pylint: disable=g-import-not-at-top
     _check_config_keys(config, cls._fields)
@@ -4427,7 +4424,7 @@ class IndicatorColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.categorical_column]
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import serialize_feature_column  # pylint: disable=g-import-not-at-top
     config = dict(zip(self._fields, self))
@@ -4436,7 +4433,7 @@ class IndicatorColumn(
     return config
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import deserialize_feature_column  # pylint: disable=g-import-not-at-top
     _check_config_keys(config, cls._fields)
@@ -4573,7 +4570,7 @@ class SequenceCategoricalColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.categorical_column]
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import serialize_feature_column  # pylint: disable=g-import-not-at-top
     config = dict(zip(self._fields, self))
@@ -4582,7 +4579,7 @@ class SequenceCategoricalColumn(
     return config
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     from tensorflow.python.feature_column.serialization import deserialize_feature_column  # pylint: disable=g-import-not-at-top
     _check_config_keys(config, cls._fields)
",0,train
97757f34d2124c2341111344bef469501e789c04,tensorflow/tensorflow,"make get_config from config public for feature column

PiperOrigin-RevId: 272227486",feature_column_v2_test.py,"@@ -81,10 +81,10 @@ class BaseFeatureColumnForTests(fc.FeatureColumn):
     raise ValueError('Should not use this method.')
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     raise ValueError('Should not use this method.')
 
-  def _get_config(self):
+  def get_config(self):
     raise ValueError('Should not use this method.')
 
 
@@ -478,7 +478,7 @@ class NumericColumnTest(test.TestCase):
     price = fc.numeric_column('price', normalizer_fn=_increment_two)
     self.assertEqual(['price'], price.parents)
 
-    config = price._get_config()
+    config = price.get_config()
     self.assertEqual({
         'key': 'price',
         'shape': (1,),
@@ -487,7 +487,7 @@ class NumericColumnTest(test.TestCase):
         'normalizer_fn': '_increment_two'
     }, config)
 
-    new_col = fc.NumericColumn._from_config(
+    new_col = fc.NumericColumn.from_config(
         config, custom_objects={'_increment_two': _increment_two})
     self.assertEqual(price, new_col)
     self.assertEqual(new_col.shape, (1,))
@@ -833,7 +833,7 @@ class BucketizedColumnTest(test.TestCase):
     bucketized_price = fc.bucketized_column(price, boundaries=[0, 2, 4, 6])
     self.assertEqual([price], bucketized_price.parents)
 
-    config = bucketized_price._get_config()
+    config = bucketized_price.get_config()
     self.assertEqual({
         'source_column': {
             'class_name': 'NumericColumn',
@@ -848,11 +848,11 @@ class BucketizedColumnTest(test.TestCase):
         'boundaries': (0, 2, 4, 6)
     }, config)
 
-    new_bucketized_price = fc.BucketizedColumn._from_config(config)
+    new_bucketized_price = fc.BucketizedColumn.from_config(config)
     self.assertEqual(bucketized_price, new_bucketized_price)
     self.assertIsNot(price, new_bucketized_price.source_column)
 
-    new_bucketized_price = fc.BucketizedColumn._from_config(
+    new_bucketized_price = fc.BucketizedColumn.from_config(
         config,
         columns_by_name={
             serialization._column_name_with_class_name(price): price
@@ -1106,7 +1106,7 @@ class HashedCategoricalColumnTest(test.TestCase):
     wire_column = fc.categorical_column_with_hash_bucket('wire', 4)
     self.assertEqual(['wire'], wire_column.parents)
 
-    config = wire_column._get_config()
+    config = wire_column.get_config()
     self.assertEqual({
         'key': 'wire',
         'hash_bucket_size': 4,
@@ -1114,7 +1114,7 @@ class HashedCategoricalColumnTest(test.TestCase):
     }, config)
 
     self.assertEqual(wire_column,
-                     fc.HashedCategoricalColumn._from_config(config))
+                     fc.HashedCategoricalColumn.from_config(config))
 
 
 class CrossedColumnTest(test.TestCase):
@@ -1588,7 +1588,7 @@ class CrossedColumnTest(test.TestCase):
 
     self.assertEqual([b, 'c'], crossed.parents)
 
-    config = crossed._get_config()
+    config = crossed.get_config()
     self.assertEqual({
         'hash_bucket_size':
             5,
@@ -1612,11 +1612,11 @@ class CrossedColumnTest(test.TestCase):
         }, 'c')
     }, config)
 
-    new_crossed = fc.CrossedColumn._from_config(config)
+    new_crossed = fc.CrossedColumn.from_config(config)
     self.assertEqual(crossed, new_crossed)
     self.assertIsNot(b, new_crossed.keys[0])
 
-    new_crossed = fc.CrossedColumn._from_config(
+    new_crossed = fc.CrossedColumn.from_config(
         config,
         columns_by_name={serialization._column_name_with_class_name(b): b})
     self.assertEqual(crossed, new_crossed)
@@ -4396,7 +4396,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
 
     self.assertEqual(['wire'], wire_column.parents)
 
-    config = wire_column._get_config()
+    config = wire_column.get_config()
     self.assertEqual({
         'default_value': -1,
         'dtype': 'string',
@@ -4407,7 +4407,7 @@ class VocabularyFileCategoricalColumnTest(test.TestCase):
     }, config)
 
     self.assertEqual(wire_column,
-                     fc.VocabularyFileCategoricalColumn._from_config(config))
+                     fc.VocabularyFileCategoricalColumn.from_config(config))
 
 
 class VocabularyListCategoricalColumnTest(test.TestCase):
@@ -4859,7 +4859,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
 
     self.assertEqual(['aaa'], wire_column.parents)
 
-    config = wire_column._get_config()
+    config = wire_column.get_config()
     self.assertEqual({
         'default_value': -1,
         'dtype': 'string',
@@ -4869,7 +4869,7 @@ class VocabularyListCategoricalColumnTest(test.TestCase):
     }, config)
 
     self.assertEqual(wire_column,
-                     fc.VocabularyListCategoricalColumn._from_config(config))
+                     fc.VocabularyListCategoricalColumn.from_config(config))
 
 
 class IdentityCategoricalColumnTest(test.TestCase):
@@ -5218,14 +5218,14 @@ class IdentityCategoricalColumnTest(test.TestCase):
 
     self.assertEqual(['aaa'], column.parents)
 
-    config = column._get_config()
+    config = column.get_config()
     self.assertEqual({
         'default_value': None,
         'key': 'aaa',
         'number_buckets': 3
     }, config)
 
-    self.assertEqual(column, fc.IdentityCategoricalColumn._from_config(config))
+    self.assertEqual(column, fc.IdentityCategoricalColumn.from_config(config))
 
 
 class TransformFeaturesTest(test.TestCase):
@@ -5600,7 +5600,7 @@ class IndicatorColumnTest(test.TestCase):
 
     self.assertEqual([parent], animal.parents)
 
-    config = animal._get_config()
+    config = animal.get_config()
     self.assertEqual({
         'categorical_column': {
             'class_name': 'IdentityCategoricalColumn',
@@ -5612,11 +5612,11 @@ class IndicatorColumnTest(test.TestCase):
         }
     }, config)
 
-    new_animal = fc.IndicatorColumn._from_config(config)
+    new_animal = fc.IndicatorColumn.from_config(config)
     self.assertEqual(animal, new_animal)
     self.assertIsNot(parent, new_animal.categorical_column)
 
-    new_animal = fc.IndicatorColumn._from_config(
+    new_animal = fc.IndicatorColumn.from_config(
         config,
         columns_by_name={
             serialization._column_name_with_class_name(parent): parent
@@ -6605,7 +6605,7 @@ class EmbeddingColumnTest(test.TestCase):
 
     self.assertEqual([categorical_column], embedding_column.parents)
 
-    config = embedding_column._get_config()
+    config = embedding_column.get_config()
     self.assertEqual({
         'categorical_column': {
             'class_name': 'IdentityCategoricalColumn',
@@ -6633,22 +6633,22 @@ class EmbeddingColumnTest(test.TestCase):
     }, config)
 
     custom_objects = {'TruncatedNormal': init_ops.TruncatedNormal}
-    new_embedding_column = fc.EmbeddingColumn._from_config(
+    new_embedding_column = fc.EmbeddingColumn.from_config(
         config, custom_objects=custom_objects)
-    self.assertEqual(embedding_column._get_config(),
-                     new_embedding_column._get_config())
+    self.assertEqual(embedding_column.get_config(),
+                     new_embedding_column.get_config())
     self.assertIsNot(categorical_column,
                      new_embedding_column.categorical_column)
 
-    new_embedding_column = fc.EmbeddingColumn._from_config(
+    new_embedding_column = fc.EmbeddingColumn.from_config(
         config,
         custom_objects=custom_objects,
         columns_by_name={
             serialization._column_name_with_class_name(categorical_column):
                 categorical_column
         })
-    self.assertEqual(embedding_column._get_config(),
-                     new_embedding_column._get_config())
+    self.assertEqual(embedding_column.get_config(),
+                     new_embedding_column.get_config())
     self.assertIs(categorical_column, new_embedding_column.categorical_column)
 
   @test_util.run_deprecated_v1
@@ -6666,7 +6666,7 @@ class EmbeddingColumnTest(test.TestCase):
 
     self.assertEqual([categorical_column], embedding_column.parents)
 
-    config = embedding_column._get_config()
+    config = embedding_column.get_config()
     self.assertEqual({
         'categorical_column': {
             'class_name': 'IdentityCategoricalColumn',
@@ -6689,13 +6689,13 @@ class EmbeddingColumnTest(test.TestCase):
         '_initializer': _initializer,
     }
 
-    new_embedding_column = fc.EmbeddingColumn._from_config(
+    new_embedding_column = fc.EmbeddingColumn.from_config(
         config, custom_objects=custom_objects)
     self.assertEqual(embedding_column, new_embedding_column)
     self.assertIsNot(categorical_column,
                      new_embedding_column.categorical_column)
 
-    new_embedding_column = fc.EmbeddingColumn._from_config(
+    new_embedding_column = fc.EmbeddingColumn.from_config(
         config,
         custom_objects=custom_objects,
         columns_by_name={
@@ -7763,7 +7763,7 @@ class WeightedCategoricalColumnTest(test.TestCase):
 
     self.assertEqual([categorical_column, 'weight'], column.parents)
 
-    config = column._get_config()
+    config = column.get_config()
     self.assertEqual({
         'categorical_column': {
             'config': {
@@ -7777,9 +7777,9 @@ class WeightedCategoricalColumnTest(test.TestCase):
         'weight_feature_key': 'weight'
     }, config)
 
-    self.assertEqual(column, fc.WeightedCategoricalColumn._from_config(config))
+    self.assertEqual(column, fc.WeightedCategoricalColumn.from_config(config))
 
-    new_column = fc.WeightedCategoricalColumn._from_config(
+    new_column = fc.WeightedCategoricalColumn.from_config(
         config,
         columns_by_name={
             serialization._column_name_with_class_name(categorical_column):
",0,train
97757f34d2124c2341111344bef469501e789c04,tensorflow/tensorflow,"make get_config from config public for feature column

PiperOrigin-RevId: 272227486",sequence_feature_column.py,"@@ -582,7 +582,7 @@ class SequenceNumericColumn(
     """"""See 'FeatureColumn` base class.""""""
     return [self.key]
 
-  def _get_config(self):
+  def get_config(self):
     """"""See 'FeatureColumn` base class.""""""
     config = dict(zip(self._fields, self))
     config['normalizer_fn'] = utils.serialize_keras_object(self.normalizer_fn)
@@ -590,7 +590,7 @@ class SequenceNumericColumn(
     return config
 
   @classmethod
-  def _from_config(cls, config, custom_objects=None, columns_by_name=None):
+  def from_config(cls, config, custom_objects=None, columns_by_name=None):
     """"""See 'FeatureColumn` base class.""""""
     fc._check_config_keys(config, cls._fields)
     kwargs = fc._standardize_and_copy_config(config)
",0,train
97757f34d2124c2341111344bef469501e789c04,tensorflow/tensorflow,"make get_config from config public for feature column

PiperOrigin-RevId: 272227486",sequence_feature_column_test.py,"@@ -765,7 +765,7 @@ class SequenceCategoricalColumnWithIdentityTest(
         'animal', num_buckets=4)
     animal = fc.indicator_column(parent)
 
-    config = animal._get_config()
+    config = animal.get_config()
     self.assertEqual(
         {
             'categorical_column': {
@@ -783,11 +783,11 @@ class SequenceCategoricalColumnWithIdentityTest(
             }
         }, config)
 
-    new_animal = fc.IndicatorColumn._from_config(config)
+    new_animal = fc.IndicatorColumn.from_config(config)
     self.assertEqual(animal, new_animal)
     self.assertIsNot(parent, new_animal.categorical_column)
 
-    new_animal = fc.IndicatorColumn._from_config(
+    new_animal = fc.IndicatorColumn.from_config(
         config,
         columns_by_name={
             serialization._column_name_with_class_name(parent): parent
",0,train
97757f34d2124c2341111344bef469501e789c04,tensorflow/tensorflow,"make get_config from config public for feature column

PiperOrigin-RevId: 272227486",serialization.py,"@@ -45,14 +45,14 @@ def serialize_feature_column(fc):
   """"""Serializes a FeatureColumn or a raw string key.
 
   This method should only be used to serialize parent FeatureColumns when
-  implementing FeatureColumn._get_config(), else serialize_feature_columns()
+  implementing FeatureColumn.get_config(), else serialize_feature_columns()
   is preferable.
 
   This serialization also keeps information of the FeatureColumn class, so
   deserialization is possible without knowing the class type. For example:
 
   a = numeric_column('x')
-  a._get_config() gives:
+  a.get_config() gives:
   {
       'key': 'price',
       'shape': (1,),
@@ -85,7 +85,7 @@ def serialize_feature_column(fc):
     return fc
   elif isinstance(fc, fc_lib.FeatureColumn):
     return generic_utils.serialize_keras_class_and_config(
-        fc.__class__.__name__, fc._get_config())  # pylint: disable=protected-access
+        fc.__class__.__name__, fc.get_config())  # pylint: disable=protected-access
   else:
     raise ValueError('Instance: {} is not a FeatureColumn'.format(fc))
 
@@ -96,7 +96,7 @@ def deserialize_feature_column(config,
   """"""Deserializes a `config` generated with `serialize_feature_column`.
 
   This method should only be used to deserialize parent FeatureColumns when
-  implementing FeatureColumn._from_config(), else deserialize_feature_columns()
+  implementing FeatureColumn.from_config(), else deserialize_feature_columns()
   is preferable. Returns a FeatureColumn for this config.
   TODO(b/118939620): Simplify code if Keras utils support object deduping.
 
@@ -136,7 +136,7 @@ def deserialize_feature_column(config,
         'Expected FeatureColumn class, instead found: {}'.format(cls))
 
   # Always deserialize the FeatureColumn, in order to get the name.
-  new_instance = cls._from_config(  # pylint: disable=protected-access
+  new_instance = cls.from_config(  # pylint: disable=protected-access
       cls_config,
       custom_objects=custom_objects,
       columns_by_name=columns_by_name)
",0,train
d9ae69f04ba944384d117662b888c43cb7e0bf72,tensorflow/tensorflow,Only trigger reduction columns indexing for columns reductions.,ir_emitter_unnested.cc,"@@ -3207,7 +3207,8 @@ ReductionCodegenInfo IrEmitterUnnested::ComputeReductionCodegenInfo(
         // dtypes.
         ((cc_major == 6 && smallest_input_dtype_bits <= 16) || cc_major >= 7)) {
       return kLinearStridedIndexingX;
-    } else if (IsUnrollingColumnReductionBeneficial(
+    } else if (!reduction_dimensions.is_row_reduction &&
+	       IsUnrollingColumnReductionBeneficial(
                    unnested_hlo, input_shape,
                    reduction_dimensions.dimensions[2])) {
       return kLinearIndexingX;
",0,test
13038b78f24d50a14f132806816cae99f630d78c,tensorflow/tensorflow,"Speed up transitive reduction in dependency optimizer by sorting inputs and breaking out of the inner loop when topo_order(input) < topo_order(source).
This gives a ~7% speedup on the transformer graph (166ms -> 155ms).

PiperOrigin-RevId: 277762493
Change-Id: I9c80cb68f6695000b9511d6651302772b308a25d",dependency_optimizer.cc,"@@ -500,6 +500,8 @@ Status DependencyOptimizer::TransitiveReduction() {
         control_outputs[input_node_idx].emplace_back(node_idx, input_slot);
       }
     }
+    std::sort(inputs[node_idx].begin(), inputs[node_idx].end(),
+              std::greater<int>());
   }
 
   // Run the longest path in DAG algorithm for each source node that has control
@@ -528,13 +530,15 @@ Status DependencyOptimizer::TransitiveReduction() {
     std::fill(longest_distance.begin() + source,
               longest_distance.begin() + highest_control_target + 1, 0);
     for (int target = source + 1; target <= highest_control_target; ++target) {
-      for (int input : inputs[target]) {
+      const auto& target_inputs = inputs[target];
+      for (int input_idx = 0; input_idx < target_inputs.size(); ++input_idx) {
+        const int input = target_inputs[input_idx];
+        if (input < source) break;
         // If the input node is before source in the topo order, no path
         // source -> input -> target can exits and we can skip it.
         // Also only extend a path from the source itself or from nodes that
         // have a path from source, indicated by longest_distance[input] > 0.
-        if (input == source ||
-            (input > source && longest_distance[input] > 0)) {
+        if (input == source || longest_distance[input] > 0) {
           // If source -> input -> target is longer than the longest
           // path so far from source -> target, update the longest_distance.
           int candidate_longest_distance = longest_distance[input] + 1;
",0,test
ab5ba2aa0c3817f472a8336bba4cbb18fdeda258,tensorflow/tensorflow,"Allow empty GCS tokens to be cached.

PiperOrigin-RevId: 217159671",google_auth_provider.cc,"@@ -135,8 +135,7 @@ Status GoogleAuthProvider::GetToken(string* t) {
   mutex_lock lock(mu_);
   const uint64 now_sec = env_->NowSeconds();
 
-  if (!current_token_.empty() &&
-      now_sec + kExpirationTimeMarginSec < expiration_timestamp_sec_) {
+  if (now_sec + kExpirationTimeMarginSec < expiration_timestamp_sec_) {
     *t = current_token_;
     return Status::OK();
   }
",0,train
8db2e909e59c11b302715a9aec215cfc349892f7,tensorflow/tensorflow,update version information file. Also upadate tensorrt bazel configuration file,convert_nodes_test.cc,"@@ -280,6 +280,14 @@ class FakeITensor : public nvinfer1::ITensor {
   float getDynamicRangeMax() const override { return 0.f; }
 #endif
 
+#if IS_TRT_VERSION_GE(6, 0, 0, 0)
+  void setAllowedFormats(nvinfer1::TensorFormats formats) override {}
+
+  nvinfer1::TensorFormats getAllowedFormats() const override { return 1; }
+
+  bool isShape() const override { return false; }
+#endif
+
  private:
   string name_;
   nvinfer1::Dims dims_;
",0,train
8db2e909e59c11b302715a9aec215cfc349892f7,tensorflow/tensorflow,update version information file. Also upadate tensorrt bazel configuration file,find_cuda_config.py,"@@ -390,7 +390,8 @@ def _find_tensorrt_config(base_paths, required_version):
                                              get_header_version)
 
   if "".."" in header_version:
-    header_path, header_version = _find_header(base_paths, ""NvInferRTSafe.h"",
+    # From TRT 6.0 onwards, version information has been moved to NvInferVersion.h.
+    header_path, header_version = _find_header(base_paths, ""NvInferVersion.h"",
                                                required_version,
                                                get_header_version)
 
",0,train
c041b5de75463f76bf8d9461e0f79ea9ecec498f,tensorflow/tensorflow,"Replace ARCH_K8 with __x86_64__.

PiperOrigin-RevId: 317689006
Change-Id: I7e47b17ef53b3cc223b64ff179fcdc3777c61eb7",manual_constructor_test.cc,"@@ -92,7 +92,7 @@ TEST(ManualConstructorTest, Alignment) {
 
   EXPECT_EQ(reinterpret_cast<char*>(test2.b.get()) - &test2.a,
             reinterpret_cast<char*>(&control2.b) - &control2.a);
-#ifdef ARCH_K8
+#ifdef __x86_64__
   EXPECT_EQ(reinterpret_cast<intptr_t>(test2.b.get()) % 16, 0);
 #endif
 }
",0,train
a4e401da71458d253b05e41f28637b65baf64be4,tensorflow/tensorflow,"Prevent segfault in `embedding_lookup_sparse.cc`

Previous fixes missed one additional case.

PiperOrigin-RevId: 417676944
Change-Id: I8ab412155cf9b1e897448a6611d209eaa7ca9e66",embedding_lookup_sparse.cc,"@@ -159,6 +159,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 3, &weights));
   const TfLiteTensor* value;
   TF_LITE_ENSURE_OK(context, GetInputSafe(context, node, 4, &value));
+  const size_t values_size = NumElements(value);
 
   const int lookup_rank = SizeOfDimension(indices, 1);
   const int embedding_rank = NumDimensions(value);
@@ -253,6 +254,11 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     current_squares_weight += w * w;
     current_total_weight += w;
     for (int k = 0; k < embedding_size; k++) {
+      // only index if indices are valid
+      if (current_output_offset + k < 0) continue;
+      if (current_output_offset + k >= output_size) continue;
+      if (example_embedding_offset + k < 0) continue;
+      if (example_embedding_offset + k >= values_size) continue;
       output_ptr[current_output_offset + k] +=
           value_ptr[example_embedding_offset + k] * w;
     }
",0,train
5d2a37a1ca528d454fc33400cad1d3163f1672b2,tensorflow/tensorflow,"[tf:tfrt] Check the returned memref alignment

Failures in downstream kernels are hard to debug and impossible to find the original source of misaligned tensor.

PiperOrigin-RevId: 420368719
Change-Id: I4b6f73e26ffbb37e49dafe77f1b798487311f744",tf_cpurt.h,"@@ -143,9 +143,9 @@ struct ConvertTensor {
     // Incorrect alignment will lead to a segfault in the downstream Tensorflow
     // kernels, check it before returning to the runtime.
     if (internal::IsStaticStorageDuration(memref)) {
-      DCHECK(tensor.IsAligned()) << ""global memref is not aligned"";
+      CHECK(tensor.IsAligned()) << ""global memref is not aligned"";
     } else {
-      DCHECK(tensor.IsAligned()) << ""allocated memref is not aligned"";
+      CHECK(tensor.IsAligned()) << ""allocated memref is not aligned"";
     }
 
     return tensor;
",0,train
351fd5e844343348bd6ba1535c908fe0ef0b196b,tensorflow/tensorflow,"Emit an error if there is an uncompilable op in tpu cluster and soft_device_placement option is not true.

This emits an error early rather than a potentially misleading error later in compilation.

PiperOrigin-RevId: 358291279
Change-Id: I227b8303a6b6245c49243e37b0ee9e2e68c20e35",mark_ops_for_outside_compilation.cc,"@@ -299,6 +299,30 @@ LogicalResult MarkUncompilableOps(
   return success();
 }
 
+// Check for uncompilable ops that are in `tf_dialect` and are not already
+// marked for outside compilation.
+bool ContainsUncompilableOps(const Dialect* tf_dialect, Block* block,
+                             llvm::DenseSet<OperationName>& supported_ops) {
+  int uncompilable_op_count = 0;
+  // Check if op or any parent is already marked for outside compilation.
+  block->walk([&](Operation* op) {
+    Operation* iter_op = op;
+    while (iter_op && !llvm::isa<tf_device::ClusterOp>(iter_op)) {
+      if (iter_op->hasAttrOfType<StringAttr>(kXlaOutsideCompilationAttr)) {
+        return;
+      }
+      iter_op = iter_op->getParentOp();
+    }
+
+    if (!IsSupportedOp(*op, supported_ops, tf_dialect)) {
+      op->emitOpError() << ""isn't compilable for TPU device. enable ""
+                           ""soft_device_placement option to run on CPU"";
+      ++uncompilable_op_count;
+    }
+  });
+  return uncompilable_op_count > 0;
+}
+
 // Unmarks outside compilation for any op that has parents already
 // marked for outside compilation since the child will be extracted
 // anyways.
@@ -354,6 +378,10 @@ void MarkOpsForOutsideCompilation::runOnOperation() {
       if (failed(MarkUncompilableOps(tf_dialect, &cluster.GetBody(),
                                      supported_ops)))
         return WalkResult::interrupt();
+    } else {
+      if (ContainsUncompilableOps(tf_dialect, &cluster.GetBody(),
+                                  supported_ops))
+        return WalkResult::interrupt();
     }
     MarkVariantInputsOutputs(cluster);
 
",0,train
081758b0e5efc1a1591cda068a4866099bf8a3c5,tensorflow/tensorflow,"Do not reuse allocations which hold tuple logical buffers. This works around a GPU codegen issue which extended the live range of tuple buffers. This also addresses a potential latent bug where thread-local or custom call buffer might have been reused.
Change: 145561773",buffer_assignment.cc,"@@ -170,20 +170,26 @@ BufferAssignment::GetUniqueTopLevelOutputAllocation() const {
 
 BufferAllocation* BufferAssignment::NewAllocation(const LogicalBuffer& buffer,
                                                   int64 size,
-                                                  bool is_thread_local) {
+                                                  bool is_thread_local,
+                                                  bool is_reusable) {
   BufferAllocation::Index index = allocations_.size();
-  allocations_.emplace_back(index, size, is_thread_local);
+  allocations_.emplace_back(index, size, is_thread_local, is_reusable);
   BufferAllocation* allocation = &allocations_.back();
-  AddAssignment(buffer, allocation);
+  AddAssignment(buffer, allocation, /*colocated_buffer=*/false);
   allocation_index_for_buffer_[&buffer] = index;
   return allocation;
 }
 
 // Adds an instruction to the set assigned to the given buffer.
 void BufferAssignment::AddAssignment(const LogicalBuffer& buffer,
-                                     BufferAllocation* allocation) {
+                                     BufferAllocation* allocation,
+                                     bool colocated_buffer) {
   CHECK_EQ(0, allocation_index_for_buffer_.count(&buffer))
       << ""LogicalBuffer "" << buffer << "" already has an allocation."";
+  CHECK(allocation->is_reusable() || allocation->assigned_buffers().empty() ||
+        colocated_buffer)
+      << ""Non-reusable allocation already assigned a buffer"";
+
   TF_CHECK_OK(points_to_analysis().VerifyBuffer(buffer));
 
   allocation->AddAssignment(buffer);
@@ -351,6 +357,11 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation,
     return false;
   }
 
+  if (!allocation->is_reusable()) {
+    VLOG(4) << ""Can't assign: allocation is not reusable"";
+    return false;
+  }
+
   for (const LogicalBuffer* assigned_buffer : allocation->assigned_buffers()) {
     if (assignment->liveness().MayInterfere(*assigned_buffer, buffer)) {
       VLOG(4) << ""Can't assign: assignee "" << assigned_buffer->ToString()
@@ -369,7 +380,7 @@ bool BufferAssigner::MaybeAssignBuffer(BufferAllocation* allocation,
     return false;
   }
 
-  assignment->AddAssignment(buffer, allocation);
+  assignment->AddAssignment(buffer, allocation, /*colocated_buffer=*/false);
   return true;
 }
 
@@ -455,7 +466,8 @@ tensorflow::Status BufferAssigner::AssignBuffersForComputation(
       // callers.
       BufferAllocation* allocation =
           assignment->NewAllocation(*buffer, buffer_size_(*buffer),
-                                    /*is_thread_local=*/false);
+                                    /*is_thread_local=*/false,
+                                    /*is_reusable=*/false);
       allocation->set_entry_computation_parameter(
           buffer->instruction()->parameter_number());
       VLOG(3) << ""New allocation for entry computation parameter: ""
@@ -470,8 +482,8 @@ tensorflow::Status BufferAssigner::AssignBuffersForComputation(
       // Custom call operations never have reusable buffers. Also we do not
       // reuse thread-local buffers for now, because they are dynamically
       // allocated and their lifetimes are hard to compute.
-      assignment->NewAllocation(*buffer, buffer_size_(*buffer),
-                                is_thread_local);
+      assignment->NewAllocation(*buffer, buffer_size_(*buffer), is_thread_local,
+                                /*is_reusable=*/false);
       continue;
     }
 
@@ -503,7 +515,16 @@ tensorflow::Status BufferAssigner::AssignBuffersForComputation(
       // Can't use MaybeAssignBuffer here because buffer liveness conservatively
       // assumes buffers in different computations always interfere.
       CHECK_GE(root_allocation->size(), buffer_size_(*buffer));
-      assignment->AddAssignment(*buffer, root_allocation);
+      assignment->AddAssignment(*buffer, root_allocation,
+                                /*colocated_buffer=*/true);
+      continue;
+    }
+
+    if (ShapeUtil::IsTuple(buffer->shape())) {
+      // TODO(b/34669761): Don't reuse tuple buffers because the GPU backend
+      // assumes longer buffer liveness than indicated by the analysis.
+      assignment->NewAllocation(*buffer, buffer_size_(*buffer), is_thread_local,
+                                /*is_reusable=*/false);
       continue;
     }
 
@@ -567,8 +588,9 @@ tensorflow::Status BufferAssigner::AssignBuffersForComputation(
       }
     }
     if (!assignment->HasAllocation(*buffer)) {
-      auto* allocation = assignment->NewAllocation(
-          *buffer, buffer_size_(*buffer), is_thread_local);
+      auto* allocation =
+          assignment->NewAllocation(*buffer, buffer_size_(*buffer),
+                                    is_thread_local, /*is_reusable=*/true);
       VLOG(3) << ""New allocation for: "" << buffer->ToString();
       allocation_indices.push_back(allocation->index());
     }
@@ -651,10 +673,12 @@ void BufferAssigner::AssignColocatedBufferSets(
         // module-level scope, we can allow buffers to be shared across
         // computations (in some cases).
         allocation = assignment->NewAllocation(*buffer, buffer_size_(*buffer),
-                                               /*is_thread_local=*/false);
+                                               /*is_thread_local=*/false,
+                                               /*is_reusable=*/true);
         colocated_buffer_allocations_.insert(allocation->index());
       } else {
-        assignment->AddAssignment(*buffer, allocation);
+        assignment->AddAssignment(*buffer, allocation,
+                                  /*colocated_buffer=*/true);
       }
       colocated_buffers_.insert(buffer);
     }
",0,test
081758b0e5efc1a1591cda068a4866099bf8a3c5,tensorflow/tensorflow,"Do not reuse allocations which hold tuple logical buffers. This works around a GPU codegen issue which extended the live range of tuple buffers. This also addresses a potential latent bug where thread-local or custom call buffer might have been reused.
Change: 145561773",buffer_assignment.h,"@@ -52,8 +52,12 @@ class BufferAllocation {
   // contiguously and can be used as array indexes.
   using Index = int64;
 
-  BufferAllocation(Index index, int64 size, bool is_thread_local)
-      : index_(index), size_(size), is_thread_local_(is_thread_local) {}
+  BufferAllocation(Index index, int64 size, bool is_thread_local,
+                   bool is_reusable)
+      : index_(index),
+        size_(size),
+        is_thread_local_(is_thread_local),
+        is_reusable_(is_reusable) {}
   ~BufferAllocation() {}
 
   // Adds a LogicalBuffer to the set assigned to this buffer.
@@ -64,6 +68,9 @@ class BufferAllocation {
   // local.
   bool is_thread_local() const { return is_thread_local_; }
 
+  // Whether this allocation can be used by more than one logical buffer.
+  bool is_reusable() const { return is_reusable_; }
+
   // Whether this allocation holds a LogicalBuffer from a parameter of the entry
   // computation. These buffers have lifetimes which may be longer than the
   // XLA computation.
@@ -138,6 +145,9 @@ class BufferAllocation {
   // Whether this buffer needs to be thread-local.
   bool is_thread_local_;
 
+  // Whether this buffer is usable by more than one logical buffer.
+  bool is_reusable_;
+
   // Whether this allocation holds an entry computation parameter. Entry
   // computation parameters are special be cause they have lifetimes which may
   // outlast the computation.
@@ -232,10 +242,13 @@ class BufferAssignment {
   // assigned to it. `is_thread_local` indicates whether this buffer needs to be
   // thread-local.
   BufferAllocation* NewAllocation(const LogicalBuffer& buffer, int64 size,
-                                  bool is_thread_local);
+                                  bool is_thread_local, bool is_reusable);
 
-  // Adds a LogicalBuffer to the set assigned to the given allocation.
-  void AddAssignment(const LogicalBuffer& buffer, BufferAllocation* allocation);
+  // Adds a LogicalBuffer to the set assigned to the given allocation. If
+  // colocated_buffer is true, then the logical buffer is an alias of another
+  // buffer assigned to this allocation.
+  void AddAssignment(const LogicalBuffer& buffer, BufferAllocation* allocation,
+                     bool colocated_buffer);
 
   // Returns the BufferLiveness object used to construct this assignment.
   const BufferLiveness& liveness() { return *liveness_; }
@@ -314,6 +327,10 @@ class BufferAssigner {
                          const LogicalBuffer& buffer,
                          BufferAssignment* assignment);
 
+  // Colocated buffers are logical buffers from different computations which
+  // alias. Explicitly handling these colocated buffers is necessary because
+  // points-to analysis is computation level scope and does not recognize
+  // aliasing across computations (b/32491382).
   using ColocatedBufferSet = std::vector<const LogicalBuffer*>;
 
   // Returns a vector of ColocatedBufferSet objects, where each
",0,test
081758b0e5efc1a1591cda068a4866099bf8a3c5,tensorflow/tensorflow,"Do not reuse allocations which hold tuple logical buffers. This works around a GPU codegen issue which extended the live range of tuple buffers. This also addresses a potential latent bug where thread-local or custom call buffer might have been reused.
Change: 145561773",buffer_assignment_test.cc,"@@ -1046,6 +1046,31 @@ TEST_F(BufferAssignmentTest, AmbiguousBufferAsOutput) {
                         .ConsumeValueOrDie()));
 }
 
+// TODO(b/34669761): Remove this test when buffers are allowed to share
+// allocations.
+TEST_F(BufferAssignmentTest, TupleBufferNotReused) {
+  // Test a computation that returns a tuple parameter.
+  auto builder = HloComputation::Builder(TestName());
+  auto scalar_shape = ShapeUtil::MakeShape(F32, {});
+  auto param = builder.AddInstruction(
+      HloInstruction::CreateParameter(0, scalar_shape, ""param0""));
+  auto tuple = builder.AddInstruction(HloInstruction::CreateTuple({param}));
+  auto tuple_element = builder.AddInstruction(
+      HloInstruction::CreateGetTupleElement(scalar_shape, tuple, 0));
+  auto copy = builder.AddInstruction(HloInstruction::CreateUnary(
+      scalar_shape, HloOpcode::kCopy, tuple_element));
+
+  auto module = MakeUnique<HloModule>(TestName());
+  module->AddEntryComputation(builder.Build());
+  auto assignment = RunBufferAssignment(module.get());
+
+  // There should be no buffer reuse. The copy should not reuse the tuple
+  // buffer.
+  EXPECT_EQ(3, assignment->Allocations().size());
+  EXPECT_NE(GetTopLevelAllocation(*assignment, tuple),
+            GetTopLevelAllocation(*assignment, copy));
+}
+
 }  // namespace
 
 }  // namespace xla
",0,test
6dd43ec8cb299459b835e50faa4f3ffad044098c,tensorflow/tensorflow,PiperOrigin-RevId: 170347520,broadcast_simple_test.cc,"@@ -96,7 +96,7 @@ class BroadcastSimpleTest : public ClientLibraryTestBase {
       }
       default: {
         // Default to Add
-        CHECK(false);
+        LOG(FATAL);
       }
     }
   }
",0,train
e69f08a6e5af596c0e0613980a958d587f440db0,tensorflow/tensorflow,"Fix and de-flake estimators_test.
Change: 133218112",estimators_test.py,"@@ -68,7 +68,11 @@ class FeatureEngineeringFunctionTest(tf.test.TestCase):
 
     def feature_engineering_fn(features, targets):
       _, _ = features, targets
-      return {""x"": tf.constant([9.])}, {""y"": tf.constant([99.])}
+      return {
+          ""transformed_x"": tf.constant([9.])
+      }, {
+          ""transformed_y"": tf.constant([99.])
+      }
 
     def model_fn(features, targets):
       # dummy variable:
@@ -83,8 +87,8 @@ class FeatureEngineeringFunctionTest(tf.test.TestCase):
         feature_engineering_fn=feature_engineering_fn)
     estimator.fit(input_fn=input_fn, steps=1)
     prediction = next(estimator.predict(input_fn=input_fn, as_iterable=True))
-    # predictions = transformed_x (99)
-    self.assertEqual(99., prediction)
+    # predictions = transformed_x (9)
+    self.assertEqual(9., prediction)
 
   def testNoneFeatureEngineeringFn(self):
 
",0,train
1bd1c27e0db046ef97a1e39817956c05bc3f44d3,tensorflow/tensorflow,"Automated rollback of change 128378997
Change: 128393251",tensor_shape.cc,"@@ -33,14 +33,13 @@ static void AppendTo(const TensorShape& s, gtl::InlinedVector<int64, 8>* vals) {
 }
 
 void TensorShape::CheckDimsEqual(int NDIMS) const {
-  CHECK_EQ(NDIMS, dims()) << ""Asking for tensor of "" << NDIMS << ""dimensions""
-                          << "" from a tensor of "" << dims() << "" dimensions"";
+  CHECK_EQ(NDIMS, dims()) << ""Asking for tensor of "" << NDIMS
+                          << "" for a tensor of "" << dims() << "" dimensions"";
 }
 
 void TensorShape::CheckDimsAtLeast(int NDIMS) const {
   CHECK_GE(NDIMS, dims()) << ""Asking for tensor of at least "" << NDIMS
-                          << "" dimensions from a tensor of "" << dims()
-                          << "" dimensions"";
+                          << "" for a tensor of "" << dims() << "" dimensions"";
 }
 
 bool TensorShape::IsValid(const TensorShapeProto& proto) {
",0,train
1bd1c27e0db046ef97a1e39817956c05bc3f44d3,tensorflow/tensorflow,"Automated rollback of change 128378997
Change: 128393251",array_ops_test.py,"@@ -273,10 +273,12 @@ class StridedSliceChecker(object):
     self.x_np = np.array(x)
 
   def __getitem__(self, spec):
-    op = self.x.__getitem__(spec)
+    # TODO(aselle): When NewSliceHelper is installed, we can switch this back
+    # op = self.x[spec]
+    op = array_ops._NewSliceHelper(self.x, spec)
 
     tensor = op.eval()
-    self.test.assertAllEqual(self.x_np.__getitem__(spec), tensor)
+    self.test.assertAllEqual(self.x_np[spec], tensor)
     self.test.assertAllEqual(tensor.shape, op.get_shape())
     return tensor
 
@@ -393,7 +395,9 @@ class StridedSliceShapeChecker(object):
     self.x = x
 
   def __getitem__(self, spec):
-    op = self.x.__getitem__(spec)
+    # TODO(aselle): When NewSliceHelper is installed, we can switch this back
+    # op = self.x[spec]
+    op = array_ops._NewSliceHelper(self.x, spec)
     return op.get_shape()
 
 
@@ -447,28 +451,22 @@ class GradSliceChecker(object):
     self.varnp = varnp
 
   def __getitem__(self, spec):
-    slice_var = self.var[spec]
-    slice_val = self.val[spec]
-
-    # compute analytic 2nd derivative
-    analytic_grad2 = 2 * slice_val
-
-    dy = tf.Variable(tf.ones(shape=slice_var.get_shape(), dtype=tf.int32))
-    assign = dy.assign(slice_var)
-    slice_val_grad, = tf.gradients(slice_val, self.var, grad_ys=dy)
-    slice_val_grad2, = tf.gradients(slice_val_grad, dy, grad_ys=self.var)
-    self.sess.run(assign)
-    slice_val_grad_evaled, slice_val_grad2_evaled = (
-        self.sess.run([slice_val_grad, slice_val_grad2]))
-    analytic_grad2_evaled = analytic_grad2.eval()
-    self.test.assertAllEqual(slice_val_grad2_evaled, analytic_grad2_evaled)
-
-    # compute analytic gradient for slice
-    np_val_grad = (2 * self.varnp * self.varnp)
+    val_grad_op = tf.gradients(self.val, self.var)
+    sliceval_grad_op = tf.gradients(
+        array_ops._NewSliceHelper(self.val, spec), self.var)
+    slice1_op = array_ops._NewSliceHelper(val_grad_op, spec)
+    slice2_op = array_ops._NewSliceHelper(sliceval_grad_op, spec)
+    val_grad, sliceval_grad, slice1, slice2 = self.sess.run(
+        [val_grad_op, sliceval_grad_op, slice1_op, slice2_op])
+    np_val_grad = (2 * self.varnp)
     np_sliceval_grad = np.zeros(self.var.get_shape())
-    np_sliceval_grad[spec] = np_val_grad[spec]
-    # verify gradient
-    self.test.assertAllEqual(slice_val_grad_evaled, np_sliceval_grad)
+    np_sliceval_grad[spec] = np.array(val_grad[0])[spec]
+    # make sure np val grad is correct
+    self.test.assertAllEqual(np_val_grad, val_grad[0])
+    # make sure slice gradient is correct
+    self.test.assertAllEqual(np_sliceval_grad, sliceval_grad[0])
+    # make sure val grad and sliceval grad are the same in sliced area
+    self.test.assertAllEqual(slice1, slice2)
 
 
 class StridedSliceGradTest(test_util.TensorFlowTestCase):
@@ -495,7 +493,7 @@ class BenchmarkSlice(object):
     self.tensor = tensor
 
   def __getitem__(self, x):
-    return self.tensor[x]
+    return array_ops._NewSliceHelper(self.tensor, x)
 
 
 class StridedSliceBenchmark(tf.test.Benchmark):
",0,train
1bd1c27e0db046ef97a1e39817956c05bc3f44d3,tensorflow/tensorflow,"Automated rollback of change 128378997
Change: 128393251",array_grad.py,"@@ -151,7 +151,7 @@ def _SliceGrad(op, grad):
 
 @ops.RegisterGradient(""StridedSlice"")
 def _StridedSliceGrad(op, grad):
-  """"""Gradient for StridedSlice op.""""""
+  """"""Gradient for unpack op.""""""
   x = array_ops.shape(op.inputs[0])
   begin = op.inputs[1]
   end = op.inputs[2]
@@ -170,25 +170,6 @@ def _StridedSliceGrad(op, grad):
       shrink_axis_mask=op.get_attr(""shrink_axis_mask"")), None, None, None
 
 
-@ops.RegisterGradient(""StridedSliceGrad"")
-def _StridedSliceGradGrad(op, grad):
-  """"""Gradient for StridedSliceGrad op.""""""
-  begin = op.inputs[1]
-  end = op.inputs[2]
-  strides = op.inputs[3]
-
-  return None, None, None, None, array_ops.strided_slice(
-      grad,
-      begin,
-      end,
-      strides,
-      begin_mask=op.get_attr(""begin_mask""),
-      end_mask=op.get_attr(""end_mask""),
-      ellipsis_mask=op.get_attr(""ellipsis_mask""),
-      new_axis_mask=op.get_attr(""new_axis_mask""),
-      shrink_axis_mask=op.get_attr(""shrink_axis_mask""))
-
-
 @ops.RegisterGradient(""Split"")
 def _SplitGrad(op, *grads):
   return None, array_ops.concat(op.inputs[0], list(grads))
",0,train
1bd1c27e0db046ef97a1e39817956c05bc3f44d3,tensorflow/tensorflow,"Automated rollback of change 128378997
Change: 128393251",array_ops.py,"@@ -196,7 +196,7 @@ def zeros_initializer(shape, dtype=dtypes.float32):
   return zeros(shape, dtype)
 
 
-def _SliceHelper(tensor, slice_spec):
+def _NewSliceHelper(tensor, slice_spec):
   """"""Overload for Tensor.__getitem__.
 
   This operation extracts the specified region from the tensor.
@@ -275,6 +275,73 @@ def _SliceHelper(tensor, slice_spec):
 
 
 # pylint: disable=undefined-variable,protected-access
+def _SliceHelper(tensor, slice_spec):
+  """"""Overload for Tensor.__getitem__.
+
+  Currently the size of the slice must be statically known in each dimension,
+  i.e. the ""stop"" of the slice must not be omitted.
+
+  TODO(mrry): Support slices where the sizes are not specified.
+  TODO(mrry): Support negative indices in slices with numpy/Python semantics.
+
+  Args:
+    tensor: An ops.Tensor object.
+    slice_spec: The arguments to Tensor.__getitem__.
+
+  Returns:
+    The appropriate slice of ""tensor"", based on ""slice_spec"".
+
+  Raises:
+    ValueError: If a slice range is negative size.
+    TypeError: If the slice indices aren't int, slice, or Ellipsis.
+  """"""
+  if not isinstance(slice_spec, (list, tuple)):
+    slice_spec = [slice_spec]
+  indices = []
+  sizes = []
+  squeeze_dims = []
+  for dim, s in enumerate(slice_spec):
+    if isinstance(s, _baseslice):
+      if s.step not in (None, 1):
+        raise NotImplementedError(
+            ""Steps other than 1 are not currently supported"")
+      start = s.start if s.start is not None else 0
+      if start < 0:
+        raise NotImplementedError(
+            ""Negative start indices are not currently supported"")
+      indices.append(start)
+      if s.stop is not None and s.stop < 0:
+        raise NotImplementedError(
+            ""Negative stop indices are not currently supported"")
+      # NOTE(mrry): If the stop is not specified, Python substitutes
+      #   sys.maxsize, which is typically (2 ** 63) - 1. Since Slice currently
+      #   supports signed DT_INT32 arguments, we use -1 to specify that all
+      #   elements should be captured.
+      if s.stop is None or s.stop == sys.maxsize:
+        sizes.append(-1)
+      else:
+        if start > s.stop:
+          raise ValueError(""Stop must be at least start"")
+        sizes.append(s.stop - start)
+    elif s is Ellipsis:
+      raise NotImplementedError(""Ellipsis is not currently supported"")
+    else:
+      try:
+        s = int(s)
+      except TypeError:
+        raise TypeError(""Bad slice index %s of type %s"" % (s, type(s)))
+      if s < 0:
+        raise NotImplementedError(""Negative indices are currently unsupported"")
+      indices.append(s)
+      sizes.append(1)
+      squeeze_dims.append(dim)
+  sliced = slice(tensor, indices, sizes)
+  if squeeze_dims:
+    return squeeze(sliced, squeeze_dims=squeeze_dims)
+  else:
+    return sliced
+
+
 def slice(input_, begin, size, name=None):
   """"""Extracts a slice from a tensor.
 
@@ -423,6 +490,8 @@ def strided_slice(input_,
                                      new_axis_mask=new_axis_mask,
                                      shrink_axis_mask=shrink_axis_mask)
 
+# TODO(aselle): When gradient is added and performance verified switch
+# ops.Tensor._override_operator(""__getitem__"", _NewSliceHelper)
 ops.Tensor._override_operator(""__getitem__"", _SliceHelper)
 
 
@@ -1526,9 +1595,8 @@ def _StridedSliceShape(op):
 
   sparse_dims = begin_shape.merge_with(end_shape).merge_with(strides_shape)[
       0].value
-  if (sparse_dims is None or begin_value is None or end_value is None or
-      strides_value is None):
-    return [tensor_shape.unknown_shape()]
+  if sparse_dims is None:
+    return [input_shape.unknown_shape()]
 
   begin_mask = op.get_attr(""begin_mask"")
   end_mask = op.get_attr(""end_mask"")
",0,train
69f52845ed618350aafd6fa8c2d369a636d205b5,tensorflow/tensorflow,"Update __init__.py

Add python formatting to correct website formatting away newlines",__init__.py,"@@ -18,17 +18,14 @@ For TensorFlow 1.0, we have reorganized the TensorFlow summary ops into a
 submodule, and made some semantic tweaks. The first thing to note is that we
 moved the APIs around as follows:
 
+```python
 tf.scalar_summary -> tf.summary.scalar
-
 tf.histogram_summary -> tf.summary.histogram
-
 tf.audio_summary -> tf.summary.audio
-
 tf.image_summary -> tf.summary.image
-
 tf.merge_summary -> tf.summary.merge
-
 tf.merge_all_summaries -> tf.summary.merge_all
+```
 
 We think this API is cleaner and will improve long-term discoverability and
 clarity of the TensorFlow API. We however, also took the opportunity to make an
@@ -46,7 +43,7 @@ collision.
 
 The new summary APIs under tf.summary throw away the ""tag"" as an independent
 concept; instead, the first argument is the node name. So summary tags now 
-automatically inherit the surrounding TF name scope, and automatically
+automatically inherit the surrounding TF namescope, and automatically
 are deduplicated if there is a conflict. Now however, the only allowed
 characters are alphanumerics, underscores, and forward slashes. To make
 migration easier, the new APIs automatically convert illegal characters to
@@ -67,7 +64,7 @@ def add_activation_summaries(v):
 ```
 
 Now, so long as the add_activation_summaries function is called from within the
-right name scope, the behavior is the same.
+right namescope, the behavior is the same.
 
 Because this change does modify the behavior and could break tests, we can't
 automatically migrate usage to the new APIs. That is why we are making the old
@@ -82,9 +79,9 @@ to the new summary ops:
   tf.summary.scalar requires a single scalar name and scalar value. In most
   cases, you can create tf.summary.scalars in a loop to get the same behavior
 
-As before, TensorBoard groups charts by the top-level name scope. This may
+As before, TensorBoard groups charts by the top-level namescope. This may
 be inconvenient, since in the new summary ops the summary will inherit that
-name scope without user control. We plan to add more grouping mechanisms to
+namescope without user control. We plan to add more grouping mechanisms to
 TensorBoard, so it will be possible to specify the TensorBoard group for
 each summary via the summary API.
 
",0,test
ac18e7069cf865783e9ed75a1a036d69084d9a7f,tensorflow/tensorflow,"Version of convolution that uses weights broadcasting at simd level supports apis without pointers support in kernel languages.

PiperOrigin-RevId: 419768517
Change-Id: I7824e4d6c618a30ed8b6636e0e9dbad1d627b7be",conv_powervr.cc,"@@ -940,16 +940,24 @@ std::string ConvPowerVR::GenerateConv(const GpuInfo& gpu_info,
   } else if (use_simd_broadcast) {
     int parts = local_mem_size / simd_size;
     int reminder = local_mem_size % simd_size;
+    const std::string read_start = gpu_info.SupportsPointersInKernels()
+                                       ? ""filters_loc[""
+                                       : ""args.weights.Read(filters_offset + "";
+    const std::string read_end =
+        gpu_info.SupportsPointersInKernels() ? ""]"" : "")"";
     for (int i = 0; i < parts; ++i) {
-      c += ""    FLT4 simd_w"" + std::to_string(i) + "" = filters_loc[simd_id + "" +
-           std::to_string(i * simd_size) + ""];\n"";
+      const std::string weights_index =
+          ""simd_id + "" + std::to_string(i * simd_size);
+      c += ""    FLT4 simd_w"" + std::to_string(i) + "" = "" + read_start +
+           weights_index + read_end + "";\n"";
     }
     if (reminder) {
+      const std::string weights_index =
+          ""simd_id + "" + std::to_string(parts * simd_size);
       c += ""    FLT4 simd_w"" + std::to_string(parts) + "";\n"";
       c += ""    if (simd_id < "" + std::to_string(reminder) + "") {\n"";
-      c += ""      simd_w"" + std::to_string(parts) +
-           "" = filters_loc[simd_id + "" + std::to_string(parts * simd_size) +
-           ""];\n"";
+      c += ""      simd_w"" + std::to_string(parts) + "" = "" + read_start +
+           weights_index + read_end + "";\n"";
       c += ""    }\n"";
     }
   } else if (conv_params.AreWeightsBuffer()) {  // GLOBAL_MEM/CONSTANT_MEM
",0,train
0cc35b3d409ddf9d3baeb54edd88b1addd186b17,tensorflow/tensorflow,"Fix behavior of `stack()` when passed a single ragged tensor, to be consistent with handling of multiple tensors.

PiperOrigin-RevId: 226355632",ragged_array_ops.py,"@@ -655,7 +655,7 @@ def _ragged_stack_concat_helper(rt_inputs, axis, stack_values):
   # Special case: if there's only one input, then return it as-is.
   if len(rt_inputs) == 1:
     if stack_values:
-      return expand_dims(rt_inputs[0], axis=0)
+      return expand_dims(rt_inputs[0], axis=axis)
     else:
       return rt_inputs[0]
 
",0,train
0cc35b3d409ddf9d3baeb54edd88b1addd186b17,tensorflow/tensorflow,"Fix behavior of `stack()` when passed a single ragged tensor, to be consistent with handling of multiple tensors.

PiperOrigin-RevId: 226355632",ragged_stack_op_test.py,"@@ -33,6 +33,52 @@ class RaggedStackOpTest(ragged_test_util.RaggedTensorTestCase,
                         parameterized.TestCase):
 
   @parameterized.parameters(
+      dict(
+          descr='One rank-2 input (ragged_rank=1), axis=0',
+          rt_inputs=(
+              [['a00', 'a01'], [], ['a20', 'a21']],),   # shape=(3, None)
+          axis=0,
+          expected=[[[b'a00', b'a01'], [], [b'a20', b'a21']]]),
+      dict(
+          descr='One rank-2 input (ragged_rank=1), axis=1',
+          rt_inputs=(
+              [['a00', 'a01'], [], ['a20', 'a21', 'a22']],),   # shape=(3, None)
+          axis=1,
+          expected=[
+              [[b'a00', b'a01']],
+              [[]],
+              [[b'a20', b'a21', b'a22']]]),
+      dict(
+          descr='One rank-2 input (ragged_rank=1), axis=2',
+          rt_inputs=(
+              [['a00', 'a01'], [], ['a20', 'a21', 'a22']],),   # shape=(3, None)
+          axis=2,
+          expected=[
+              [[b'a00'], [b'a01']], [],
+              [[b'a20'], [b'a21'], [b'a22']]]),
+      dict(
+          descr='One rank-2 input (ragged_rank=1), axis=-3',
+          rt_inputs=(
+              [['a00', 'a01'], [], ['a20', 'a21']],),   # shape=(3, None)
+          axis=-3,
+          expected=[[[b'a00', b'a01'], [], [b'a20', b'a21']]]),
+      dict(
+          descr='One rank-2 input (ragged_rank=1), axis=-2',
+          rt_inputs=(
+              [['a00', 'a01'], [], ['a20', 'a21', 'a22']],),   # shape=(3, None)
+          axis=-2,
+          expected=[
+              [[b'a00', b'a01']],
+              [[]],
+              [[b'a20', b'a21', b'a22']]]),
+      dict(
+          descr='One rank-2 input (ragged_rank=1), axis=-1',
+          rt_inputs=(
+              [['a00', 'a01'], [], ['a20', 'a21', 'a22']],),  # shape=(3, None)
+          axis=-1,
+          expected=[
+              [[b'a00'], [b'a01']], [],
+              [[b'a20'], [b'a21'], [b'a22']]]),
       dict(
           descr='Two rank-2 inputs (ragged_rank=1), axis=0',
           rt_inputs=(
",0,train
59b464fc9d2379c5da0bb6b3b4b5c6b0d36d65ce,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2021-07-27

PiperOrigin-RevId: 387057905
Change-Id: I1577296734d90794ccac1a409ad447dc302d4031",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 7, 26)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2021, 7, 27)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train
f4850641530017a3b2b294974298ae13028b8583,tensorflow/tensorflow,CLN: code style,training_ops.cc,"@@ -342,7 +342,7 @@ struct ApplyAdaMaxNonCuda {
     m.device(d) += (grad - m) * (T(1) - beta1());
     // Here v is u in section 7.1
     v.device(d) = (beta2() * v).cwiseMax(grad.abs());
-    // var is θ  in section 7.1
+    // var is θ in section 7.1
     var.device(d) -= lr() / (T(1) - beta1_power()) * (m / (v + epsilon()));
   }
 };
",0,train
61fde9e1e3c5d995aa20f7bf2781ba60db5bf246,tensorflow/tensorflow,Single quote to backtick for consistency,mirrored_strategy.py,"@@ -49,7 +49,7 @@ class MirroredStrategy(distribute_lib.DistributionStrategy):
 
   There are several important concepts for distributed TensorFlow, e.g.
   `client`, `job`, `task`, `cluster`, `in-graph replication` and
-  'synchronous training' and they have already been defined in the
+  `synchronous training` and they have already been defined in the
   [TensorFlow's documentation](https://www.tensorflow.org/deploy/distributed).
   The distribution strategy inherits these concepts as well and in addition to
   that we also clarify several more concepts:
",0,test
e656213afff2e9491ae1c53e2e76a427a3be20c9,tensorflow/tensorflow,"Rename hlo_algorithm_blacklist to hlo_algorithm_denylist

PiperOrigin-RevId: 322821382
Change-Id: Iea0de84b3c82562b5649fc0a6092cd2fb473c83a",gpu_conv_algorithm_picker.cc,"@@ -24,7 +24,7 @@ limitations under the License.
 #include ""tensorflow/compiler/xla/service/gpu/backend_configs.pb.h""
 #include ""tensorflow/compiler/xla/service/gpu/convolution_thunk.h""
 #include ""tensorflow/compiler/xla/service/gpu/gpu_autotuning.pb.h""
-#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h""
+#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.h""
 #include ""tensorflow/compiler/xla/service/gpu/ir_emission_utils.h""
 #include ""tensorflow/compiler/xla/service/gpu/stream_executor_util.h""
 #include ""tensorflow/compiler/xla/service/hlo_casting_utils.h""
",0,train
e656213afff2e9491ae1c53e2e76a427a3be20c9,tensorflow/tensorflow,"Rename hlo_algorithm_blacklist to hlo_algorithm_denylist

PiperOrigin-RevId: 322821382
Change-Id: Iea0de84b3c82562b5649fc0a6092cd2fb473c83a",hlo_algorithm_denylist.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h""
+#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.h""
 
 #include <string>
 
",0,train
e656213afff2e9491ae1c53e2e76a427a3be20c9,tensorflow/tensorflow,"Rename hlo_algorithm_blacklist to hlo_algorithm_denylist

PiperOrigin-RevId: 322821382
Change-Id: Iea0de84b3c82562b5649fc0a6092cd2fb473c83a",hlo_algorithm_denylist.h,,0,train
e656213afff2e9491ae1c53e2e76a427a3be20c9,tensorflow/tensorflow,"Rename hlo_algorithm_blacklist to hlo_algorithm_denylist

PiperOrigin-RevId: 322821382
Change-Id: Iea0de84b3c82562b5649fc0a6092cd2fb473c83a",hlo_algorithm_denylist_test.cc,"@@ -13,7 +13,7 @@ See the License for the specific language governing permissions and
 limitations under the License.
 ==============================================================================*/
 
-#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_blacklist.h""
+#include ""tensorflow/compiler/xla/service/gpu/hlo_algorithm_denylist.h""
 
 #include ""tensorflow/core/lib/io/path.h""
 #include ""tensorflow/core/platform/env.h""
",0,train
7e346134472913c743047cd49e17649f34ec75ca,tensorflow/tensorflow,"Add TensorListStack op to the list of ops not to be marked for outside compilation.

PiperOrigin-RevId: 342746704
Change-Id: I343580637ace980fdbbab8801eedc65be94f4b77",mark_ops_for_outside_compilation.cc,"@@ -117,6 +117,7 @@ void AddRewrittenCompositeOps(MLIRContext* context,
       GET_OPERATION_NAME(TF::TensorListElementShapeOp),
       GET_OPERATION_NAME(TF::TensorListGatherOp),
       GET_OPERATION_NAME(TF::TensorListScatterIntoExistingListOp),
+      GET_OPERATION_NAME(TF::TensorListStackOp),
   };
 #undef GET_OPERATION_NAME
 
",0,train
d84bfa45172da3af2b487593fb0cac1756f4fc0d,tensorflow/tensorflow,"Removed #includes of protobuf full headers when TENSORFLOW_LITE_PROTOS is defined.
This requires #ifdef'ing out the code to use JSON parsing/serialization.

PiperOrigin-RevId: 217003132",human_readable_json.cc,"@@ -22,6 +22,10 @@ namespace tensorflow {
 
 Status ProtoToHumanReadableJson(const ::google::protobuf::Message& proto,
                                 string* result) {
+#ifdef TENSORFLOW_LITE_PROTOS
+  *result = ""[human readable output not available on Android]"";
+  return Status::OK();
+#else
   result->clear();
 
   auto status = google::protobuf::util::MessageToJsonString(proto, result);
@@ -34,10 +38,14 @@ Status ProtoToHumanReadableJson(const ::google::protobuf::Message& proto,
                         StringPiece(error_msg.data(), error_msg.length())));
   }
   return Status::OK();
+#endif
 }
 
 Status HumanReadableJsonToProto(const string& str,
                                 ::google::protobuf::Message* proto) {
+#ifdef TENSORFLOW_LITE_PROTOS
+  return errors::Internal(""Cannot parse JSON protos on Android"");
+#else
   proto->Clear();
   auto status = google::protobuf::util::JsonStringToMessage(str, proto);
   if (!status.ok()) {
@@ -49,6 +57,7 @@ Status HumanReadableJsonToProto(const string& str,
                         StringPiece(error_msg.data(), error_msg.length())));
   }
   return Status::OK();
+#endif
 }
 
 }  // namespace tensorflow
",0,train
d84bfa45172da3af2b487593fb0cac1756f4fc0d,tensorflow/tensorflow,"Removed #includes of protobuf full headers when TENSORFLOW_LITE_PROTOS is defined.
This requires #ifdef'ing out the code to use JSON parsing/serialization.

PiperOrigin-RevId: 217003132",protobuf.h,"@@ -19,18 +19,21 @@ limitations under the License.
 // IWYU pragma: private, include ""third_party/tensorflow/core/platform/protobuf.h""
 // IWYU pragma: friend third_party/tensorflow/core/platform/protobuf.h
 
-#include ""google/protobuf/arena.h""
+#ifndef TENSORFLOW_LITE_PROTOS
 #include ""google/protobuf/descriptor.h""
 #include ""google/protobuf/descriptor.pb.h""
 #include ""google/protobuf/dynamic_message.h""
+#include ""google/protobuf/text_format.h""
+#include ""google/protobuf/util/json_util.h""
+#include ""google/protobuf/util/type_resolver_util.h""
+#endif
+
+#include ""google/protobuf/arena.h""
 #include ""google/protobuf/io/coded_stream.h""
 #include ""google/protobuf/io/zero_copy_stream.h""
 #include ""google/protobuf/io/zero_copy_stream_impl_lite.h""
 #include ""google/protobuf/map.h""
 #include ""google/protobuf/repeated_field.h""
-#include ""google/protobuf/text_format.h""
-#include ""google/protobuf/util/json_util.h""
-#include ""google/protobuf/util/type_resolver_util.h""
 
 namespace tensorflow {
 namespace protobuf = ::google::protobuf;
",0,train
d600c3c4fa9c8ea9581ea9ff52a30b87655ebb71,tensorflow/tensorflow,"Convert XEventMetadata XStats to trace viewer events

PiperOrigin-RevId: 407940486
Change-Id: I2a4d378a7fa1013e75bf069184082715951a450f",xplane_to_trace_events.cc,"@@ -85,14 +85,17 @@ void ConvertXPlaneToTraceEvents(uint32 device_id, const XPlaneVisitor& xplane,
           event->set_timestamp_ps(xevent.TimestampPs());
           event->set_duration_ps(xevent.DurationPs());
 
-          xevent.ForEachStat([&](const XStatVisitor& stat) {
+          auto for_each_stat = [&](const XStatVisitor& stat) {
             if (stat.ValueCase() == XStat::VALUE_NOT_SET) return;
             if (IsInternalStat(stat.Type())) return;
             if (stat.Type() == StatType::kStepName) {
               event->set_name(stat.ToString());
             }
             args[std::string(stat.Name())] = stat.ToString();
-          });
+          };
+          // The metadata stats should appear before the per-occurrence stats.
+          xevent.Metadata().ForEachStat(for_each_stat);
+          xevent.ForEachStat(for_each_stat);
         });
   });
 }
",0,train
8624a703ebd914e9d91bb7992570b52946fad970,tensorflow/tensorflow,"Try to make resize bilinear test more deterministic.

PiperOrigin-RevId: 228301953",resize_bilinear_test.cc,"@@ -76,6 +76,7 @@ void TestOneResizeBilinear(int batch, int depth, int input_width,
 }
 
 TEST(ResizeBilinear, TestResizeBilinear8Bit) {
+  RandomEngine().seed(38291);
   const int kTestsToRun = 100 * 1000;
   for (int i = 0; i < kTestsToRun; i++) {
     const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
@@ -91,6 +92,7 @@ TEST(ResizeBilinear, TestResizeBilinear8Bit) {
 }
 
 TEST(ResizeBilinear2x2, TestResizeBilinear8Bit) {
+  RandomEngine().seed(38291);
   const int kTestsToRun = 100 * 1000;
   for (int i = 0; i < kTestsToRun; i++) {
     const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
@@ -106,6 +108,7 @@ TEST(ResizeBilinear2x2, TestResizeBilinear8Bit) {
 }
 
 TEST(ResizeBilinear, TestResizeBilinear) {
+  RandomEngine().seed(38291);
   const int kTestsToRun = 100 * 1000;
   for (int i = 0; i < kTestsToRun; i++) {
     const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
@@ -121,6 +124,7 @@ TEST(ResizeBilinear, TestResizeBilinear) {
 }
 
 TEST(ResizeBilinear2x2, TestResizeBilinear) {
+  RandomEngine().seed(38291);
   const int kTestsToRun = 100 * 1000;
   for (int i = 0; i < kTestsToRun; i++) {
     const int batch = ExponentialRandomPositiveInt(0.9f, 3, 20);
",0,test
8fdf7b0e3b94154dd268acbc1db569c6c7ca3ce3,tensorflow/tensorflow,"Disable pinning functions to the CPU.

PiperOrigin-RevId: 231327139",execute.cc,"@@ -744,7 +744,7 @@ bool IsPinnableOp(const string& op_type) {
   static const gtl::FlatSet<string>* unpinnable_ops = new gtl::FlatSet<string>({
       ""RandomUniform"",
       ""RandomUniformInt"",
-      ""RandomNormal"",
+      ""RandomStandardNormal"",
       ""StatelessRandomUniform"",
       ""StatelessRandomUniformInt"",
       ""StatelessRandomNormal"",
@@ -764,7 +764,7 @@ bool IsPinnableOp(const string& op_type) {
 Status MaybeUpdateOpDevice(EagerOperation* op) {
   EagerContext* ctx = op->EagerContext();
   bool all_inputs_eligible_for_cpu_pinning =
-      ctx->PinSmallOpsToCPU() && IsPinnableOp(op->Name());
+      ctx->PinSmallOpsToCPU() && !op->is_function() && IsPinnableOp(op->Name());
   Device* op_device = op->Device() == nullptr ? ctx->HostCPU() : op->Device();
   for (int i = 0; i < op->Inputs().size(); ++i) {
     TensorHandle* tensor_handle = op->Inputs()[i];
",0,test
8fdf7b0e3b94154dd268acbc1db569c6c7ca3ce3,tensorflow/tensorflow,"Disable pinning functions to the CPU.

PiperOrigin-RevId: 231327139",function_test.py,"@@ -49,6 +49,7 @@ from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import clip_ops
 from tensorflow.python.ops import control_flow_ops
+from tensorflow.python.ops import gen_random_ops
 from tensorflow.python.ops import gen_resource_variable_ops
 from tensorflow.python.ops import init_ops
 from tensorflow.python.ops import list_ops
@@ -2120,6 +2121,24 @@ class FunctionTest(test.TestCase, parameterized.TestCase):
       self.assertIn('node assert_equal/Assert/Assert (defined at', e.message)
       self.assertNotIn('fn3', e.message)
 
+  def testFunctionIsNotPinned(self):
+    """"""Tests that functions aren't pinned to the CPU by the eager runtime.""""""
+    if not context.context().num_gpus():
+      self.skipTest('No GPUs found.')
+    seed1, seed2 = 79, 25
+    shape = constant_op.constant([4, 7])
+    dtype = dtypes.float32
+
+    @def_function.function
+    def func():
+      with ops.device('GPU:0'):
+        return gen_random_ops.random_standard_normal(
+            shape, dtype=dtype, seed=seed1, seed2=seed2)
+
+    with ops.device('GPU:0'):
+      x = func()
+      self.assertRegexpMatches(x.device, 'GPU')
+
 
 class MultiDeviceTest(test.TestCase, parameterized.TestCase):
 
",0,test
dcb9053d23034e1d16b8787a0ab3239a10d74f4b,tensorflow/tensorflow,"SparseXentOp now returns NaNs for loss & grad rows where the label value is OOB.
Change: 128485714",bounds_check.h,"@@ -42,7 +42,7 @@ namespace internal {
 // This function may only be used on primitive integral types (int32, int64,
 // etc).  It does not guarantee any atomicity or barriers.
 template <typename T>
-const T SubtleMustCopy(const T &x) {
+EIGEN_ALWAYS_INLINE EIGEN_DEVICE_FUNC const T SubtleMustCopy(const T &x) {
   static_assert(std::is_integral<T>::value,
                 ""SubtleMustCopy can only be used on integer types."");
   auto *to_x = reinterpret_cast<const volatile T *>(&x);
",0,train
dcb9053d23034e1d16b8787a0ab3239a10d74f4b,tensorflow/tensorflow,"SparseXentOp now returns NaNs for loss & grad rows where the label value is OOB.
Change: 128485714",sparse_xent_op.h,"@@ -19,6 +19,8 @@ limitations under the License.
 
 #include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 #include ""tensorflow/core/framework/tensor_types.h""
+#include ""tensorflow/core/kernels/bounds_check.h""
+#include ""tensorflow/core/platform/macros.h""
 #include ""tensorflow/core/platform/types.h""
 
 namespace tensorflow {
@@ -56,14 +58,22 @@ class SparseXentLossGenerator {
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SparseXentLossGenerator(
       typename TTypes<const T, 2>::Tensor32Bit logits,
       typename TTypes<const T, 1>::Tensor32Bit sum_exp_logits,
-      typename TTypes<const Index, 1>::Tensor32Bit labels)
-      : logits_(logits), sum_exp_logits_(sum_exp_logits), labels_(labels) {}
+      typename TTypes<const Index, 1>::Tensor32Bit labels,
+      const Index max_depth)
+      : logits_(logits),
+        sum_exp_logits_(sum_exp_logits),
+        labels_(labels),
+        max_depth_(max_depth) {}
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T
   operator()(const Eigen::array<int, 2>& coords) const {
-    int batch = coords[0];
-    int depth = coords[1];
-    return (labels_(batch) == depth)
+    const int batch = coords[0];
+    const int depth = coords[1];
+    const Index label = tensorflow::internal::SubtleMustCopy(labels_(batch));
+    if (!FastBoundsCheck(label, max_depth_)) {
+      return Eigen::NumTraits<T>::quiet_NaN();
+    }
+    return TF_PREDICT_FALSE(label == depth)
                ? (Eigen::numext::log(sum_exp_logits_(batch)) - logits_(coords))
                : T(0.0);
   };
@@ -72,6 +82,7 @@ class SparseXentLossGenerator {
   typename TTypes<const T, 2>::Tensor32Bit logits_;
   typename TTypes<const T, 1>::Tensor32Bit sum_exp_logits_;
   typename TTypes<const Index, 1>::Tensor32Bit labels_;
+  const Index max_depth_;
 };
 
 // Generator for calculation of the sparse Xent gradient.
@@ -87,16 +98,22 @@ class SparseXentGradGenerator {
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE SparseXentGradGenerator(
       typename TTypes<const T, 2>::Tensor32Bit exp_logits,
       typename TTypes<const T, 1>::Tensor32Bit sum_exp_logits,
-      typename TTypes<const Index, 1>::Tensor32Bit labels)
+      typename TTypes<const Index, 1>::Tensor32Bit labels,
+      const Index max_depth)
       : exp_logits_(exp_logits),
         sum_exp_logits_(sum_exp_logits),
-        labels_(labels) {}
+        labels_(labels),
+        max_depth_(max_depth) {}
 
   EIGEN_DEVICE_FUNC EIGEN_ALWAYS_INLINE T
   operator()(const Eigen::array<int, 2>& coords) const {
-    int batch = coords[0];
-    int depth = coords[1];
-    T subtract = (depth == labels_(batch)) ? T(1.0) : T(0.0);
+    const int batch = coords[0];
+    const int depth = coords[1];
+    const Index label = tensorflow::internal::SubtleMustCopy(labels_(batch));
+    if (!FastBoundsCheck(label, max_depth_)) {
+      return Eigen::NumTraits<T>::quiet_NaN();
+    }
+    T subtract = TF_PREDICT_FALSE(depth == label) ? T(1.0) : T(0.0);
     return exp_logits_(coords) / sum_exp_logits_(batch) - subtract;
   };
 
@@ -104,6 +121,7 @@ class SparseXentGradGenerator {
   typename TTypes<const T, 2>::Tensor32Bit exp_logits_;
   typename TTypes<const T, 1>::Tensor32Bit sum_exp_logits_;
   typename TTypes<const Index, 1>::Tensor32Bit labels_;
+  const Index max_depth_;
 };
 
 }  // namespace generator
@@ -185,7 +203,8 @@ struct SparseXentEigenImpl {
     //  along classes
     generator::SparseXentLossGenerator<T, Index> sparse_xent_loss_gen(
         sparse_xent_helpers::To32BitConst<T>(backprop),
-        sparse_xent_helpers::To32BitConst<T>(scratch), To32Bit(labels));
+        sparse_xent_helpers::To32BitConst<T>(scratch), To32Bit(labels),
+        backprop.dimension(1) /* max_depth */);
     To32Bit(loss).device(d) =
         To32Bit(backprop).generate(sparse_xent_loss_gen).sum(along_class);
 
@@ -194,7 +213,8 @@ struct SparseXentEigenImpl {
     To32Bit(backprop).device(d) = To32Bit(backprop).exp();
     generator::SparseXentGradGenerator<T, Index> sparse_xent_grad_gen(
         sparse_xent_helpers::To32BitConst<T>(backprop),
-        sparse_xent_helpers::To32BitConst<T>(scratch), To32Bit(labels));
+        sparse_xent_helpers::To32BitConst<T>(scratch), To32Bit(labels),
+        backprop.dimension(1) /* max_depth */);
     To32Bit(backprop).device(d) =
         To32Bit(backprop).generate(sparse_xent_grad_gen);
   }
",0,train
dcb9053d23034e1d16b8787a0ab3239a10d74f4b,tensorflow/tensorflow,"SparseXentOp now returns NaNs for loss & grad rows where the label value is OOB.
Change: 128485714",sparse_xent_op_test.py,"@@ -73,6 +73,30 @@ class SparseXentTest(tf.test.TestCase):
     self._testSingleClass(use_gpu=True)
     self._testSingleClass(use_gpu=False)
 
+  def _testInvalidLabel(self, use_gpu):
+    features = [
+        [1., 1., 1., 1.],
+        [1., 1., 1., 1.],
+        [1., 2., 3., 4.],
+        [1., 2., 3., 4.]]
+    labels = [4, 3, 0, -1]
+    with self.test_session(use_gpu=use_gpu) as sess:
+      loss, backprop = gen_nn_ops._sparse_softmax_cross_entropy_with_logits(
+          features, labels)
+      tf_loss, tf_backprop = sess.run([loss, backprop])
+      self.assertAllClose(
+          [[np.nan] * 4,
+           [0.25, 0.25, 0.25, -0.75],
+           [-0.968, 0.087, 0.237, 0.6439],
+           [np.nan] * 4],
+          tf_backprop, rtol=1e-3, atol=1e-3)
+      self.assertAllClose(
+          [np.nan, 1.3862, 3.4420, np.nan], tf_loss, rtol=1e-3, atol=1e-3)
+
+  def testInvalidLabel(self):
+    self._testInvalidLabel(use_gpu=True)
+    self._testInvalidLabel(use_gpu=False)
+
   def testNpXent(self):
     # We create 2 batches of logits for testing.
     # batch 0 is the boring uniform distribution: 1, 1, 1, 1, with target 3.
",0,train
92415c09b8d00f200429e994b08e302f4ca85e67,tensorflow/tensorflow,"Update README.md for tf.contrib.kfac and add deprecation warning.

PiperOrigin-RevId: 199119904",optimizer.py,"@@ -18,6 +18,8 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import warnings
+
 # pylint disable=long-line
 from tensorflow.contrib.kfac.python.ops import curvature_matrix_vector_products as cmvp
 from tensorflow.contrib.kfac.python.ops import estimator as est
@@ -107,6 +109,10 @@ class KfacOptimizer(gradient_descent.GradientDescentOptimizer):
       ValueError: If momentum is non-zero and momentum_type is not 'regular'
           or 'adam'.
     """"""
+    warnings.warn(
+        ""third_party.tensorflow.contrib.kfac is deprecated.""
+        ""This will be removed on 15-07-2018. Check README for further details."",
+        DeprecationWarning)
     # Parameters to be passed to the Fisher estimator:
     self._variables = var_list or tf_variables.trainable_variables
     self._cov_ema_decay = cov_ema_decay
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",collective_param_resolver_distributed_test.cc,"@@ -51,7 +51,7 @@ class FakeWorker : public TestWorkerInterface {
       : name_(name), device_mgr_(dev_mgr), param_resolver_(cpres) {}
 
   void GetStatusAsync(const GetStatusRequest* request,
-                      GetStatusResponse* response,
+                      GetStatusResponse* response, bool fail_fast,
                       StatusCallback done) override {
     std::vector<DeviceAttributes> dev_attr;
     device_mgr_->ListDeviceAttributes(&dev_attr);
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",collective_rma_distributed_test.cc,"@@ -74,7 +74,7 @@ class FakeWorker : public TestWorkerInterface {
   BufRendezvous* buf_rendezvous() { return &buf_rendezvous_; }
 
   void GetStatusAsync(const GetStatusRequest* request,
-                      GetStatusResponse* response,
+                      GetStatusResponse* response, bool fail_fast,
                       StatusCallback done) override {
     std::vector<DeviceAttributes> dev_attr;
     device_mgr_->ListDeviceAttributes(&dev_attr);
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",device_resolver_distributed.cc,"@@ -98,7 +98,8 @@ void DeviceResolverDistributed::RefreshRemoteAttributes(
   WorkerInterface* worker = worker_cache_->GetOrCreateWorker(task);
   CHECK(worker) << ""Failed to get worker for "" << task;
   worker->GetStatusAsync(
-      req, resp, [this, device, task, req, resp, worker, done](Status s) {
+      req, resp, /*fail_fast=*/true,
+      [this, device, task, req, resp, worker, done](Status s) {
         if (s.ok()) {
           mutex_lock l(mu_);
           for (const DeviceAttributes& da : resp->device_attributes()) {
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",device_resolver_distributed_test.cc,"@@ -69,7 +69,7 @@ class FakeWorker : public TestWorkerInterface {
       : name_(name), device_mgr_(dev_mgr), device_resolver_(dres) {}
 
   void GetStatusAsync(const GetStatusRequest* request,
-                      GetStatusResponse* response,
+                      GetStatusResponse* response, bool fail_fast,
                       StatusCallback done) override {
     std::vector<DeviceAttributes> dev_attr;
     device_mgr_->ListDeviceAttributes(&dev_attr);
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",remote_device.cc,"@@ -129,7 +129,7 @@ void NewRemoteDevices(Env* env, WorkerCacheInterface* worker_cache,
       }
     }
   };
-  wi->GetStatusAsync(&call->req, &call->resp, cb);
+  wi->GetStatusAsync(&call->req, &call->resp, /*fail_fast=*/false, cb);
 }
 
 }  // namespace tensorflow
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",grpc_eager_client.cc,"@@ -49,8 +49,7 @@ class GrpcEagerClient : public EagerClient {
       override {                                                          \
     new RPCState<protobuf::Message>(                                      \
         &stub_, cq_, ""/tensorflow.eager.EagerService/"" #method, *request, \
-        response, std::move(done), nullptr, nullptr, /*max_retries=*/10,  \
-        /*fail_fast=*/true);                                              \
+        response, std::move(done), nullptr, nullptr, /*max_retries=*/0);  \
   }
 
   CLIENT_METHOD(CreateContext);
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",grpc_remote_worker.cc,"@@ -72,9 +72,10 @@ class GrpcRemoteWorker : public WorkerInterface {
   ~GrpcRemoteWorker() override {}
 
   void GetStatusAsync(const GetStatusRequest* request,
-                      GetStatusResponse* response,
+                      GetStatusResponse* response, bool fail_fast,
                       StatusCallback done) override {
-    IssueRequest(request, response, getstatus_, std::move(done));
+    IssueRequest(request, response, getstatus_, std::move(done), nullptr,
+                 fail_fast);
   }
 
   void CreateWorkerSessionAsync(const CreateWorkerSessionRequest* request,
@@ -269,18 +270,18 @@ class GrpcRemoteWorker : public WorkerInterface {
   void IssueRequest(const protobuf::Message* request,
                     protobuf::Message* response, const ::grpc::string& method,
                     StatusCallback done, CallOptions* call_opts = nullptr,
-                    int max_retries = kMaxWorkerRpcRetries) {
-    new RPCState<protobuf::Message>(&stub_, cq_, method, *request, response,
-                                    std::move(done), call_opts,
-                                    callback_threadpool_, max_retries);
+                    bool fail_fast = true) {
+    new RPCState<protobuf::Message>(
+        &stub_, cq_, method, *request, response, std::move(done), call_opts,
+        callback_threadpool_, /*max_retries=*/0, fail_fast);
   }
+
   void IssueRequest(const protobuf::Message* request, TensorResponse* response,
                     const ::grpc::string& method, StatusCallback done,
-                    CallOptions* call_opts = nullptr,
-                    int max_retries = kMaxWorkerRpcRetries) {
+                    CallOptions* call_opts = nullptr) {
     new RPCState<TensorResponse>(&stub_, cq_, method, *request, response,
                                  std::move(done), call_opts,
-                                 callback_threadpool_, max_retries);
+                                 callback_threadpool_);
   }
 
   void IssueMarkRecvFinishedRequest(int64 request_id) {
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",grpc_state.h,"@@ -36,17 +36,15 @@ namespace tensorflow {
 
 // Object allocated per active RPC.
 // Manage the state of a single asynchronous RPC request.  If `max_retries`
-// is greater than 0, the request will be retried for any transient failures
-// as long as the overall deadline has not elapsed.
+// is greater than 0, the request will be retried for any transient failures.
 template <class Response>
 class RPCState : public GrpcClientCQTag {
  public:
-  // Default behavior is to set fail_fast = False and handle timeouts manually.
   RPCState(::grpc::GenericStub* stub, ::grpc::CompletionQueue* cq,
            const ::grpc::string& method, const protobuf::Message& request,
            Response* response, StatusCallback done, CallOptions* call_opts,
            thread::ThreadPool* threadpool, int32 max_retries = 0,
-           bool fail_fast = false)
+           bool fail_fast = true)
       : RPCState(stub, cq, method, request, response, std::move(done),
                  call_opts, threadpool, fail_fast,
                  /*timeout_in_ms=*/0, max_retries) {}
@@ -133,6 +131,7 @@ class RPCState : public GrpcClientCQTag {
       response_buf_.Clear();
       VLOG(1) << ""Retrying call for "" << method_ << ""Retry: "" << num_retries_
               << "" of "" << max_retries_;
+      // TODO(b/139945426) Allow user to configure the retry backoff time.
       StartCall();
     } else {
       // Attach additional GRPC error information if any to the final status
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",test_utils.h,"@@ -31,7 +31,7 @@ namespace tensorflow {
 class TestWorkerInterface : public WorkerInterface {
  public:
   void GetStatusAsync(const GetStatusRequest* request,
-                      GetStatusResponse* response,
+                      GetStatusResponse* response, bool fail_fast,
                       StatusCallback done) override {
     done(errors::Unimplemented(""GetStatusAsync""));
   }
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",worker.cc,"@@ -36,7 +36,8 @@ Worker::Worker(WorkerEnv* env) : env_(env), recent_request_ids_(100000) {
 }
 
 void Worker::GetStatusAsync(const GetStatusRequest* request,
-                            GetStatusResponse* response, StatusCallback done) {
+                            GetStatusResponse* response, bool fail_fast,
+                            StatusCallback done) {
   DeviceMgr* dm = env_->device_mgr;
   std::vector<DeviceAttributes> devices;
   dm->ListDeviceAttributes(&devices);
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",worker.h,"@@ -46,7 +46,7 @@ class Worker : public WorkerInterface {
   virtual ~Worker() {}
 
   void GetStatusAsync(const GetStatusRequest* request,
-                      GetStatusResponse* response,
+                      GetStatusResponse* response, bool fail_fast,
                       StatusCallback done) override;
 
   void CreateWorkerSessionAsync(const CreateWorkerSessionRequest* request,
",0,train
69b3106b1811c77bf1eb6d60dd29eac8e6f76716,tensorflow/tensorflow,"Currently the GRPC context used by GrpcRemoteWorker is configured with fail_fast = false and timeout = 0. Thus if the worker process is restarted with a new port due to e.g. preemption in public cloud, the client process will hang forever.

This patch intends to increase the fault tolerance by allowing client process to fail fast so that user can have the chance to restart the client process from the checkpoint state. This is achieved by setting fail_fast = true.

PiperOrigin-RevId: 265705492",worker_interface.h,"@@ -37,7 +37,7 @@ class TensorResponse;
 class WorkerInterface {
  public:
   virtual void GetStatusAsync(const GetStatusRequest* request,
-                              GetStatusResponse* response,
+                              GetStatusResponse* response, bool fail_fast,
                               StatusCallback done) = 0;
 
   virtual void CreateWorkerSessionAsync(
@@ -131,7 +131,15 @@ class WorkerInterface {
 
   Status GetStatus(const GetStatusRequest* request,
                    GetStatusResponse* response) {
-    return CallAndWait(&ME::GetStatusAsync, request, response);
+    Status ret;
+    Notification n;
+    GetStatusAsync(request, response, /*fail_fast=*/true,
+                   [&ret, &n](const Status& s) {
+                     ret = s;
+                     n.Notify();
+                   });
+    n.WaitForNotification();
+    return ret;
   }
 
   Status CreateWorkerSession(const CreateWorkerSessionRequest* request,
",0,train
20db88eec824259764b2eafba377f93ea11776b0,tensorflow/tensorflow,"Ignore nodes that are going to be swapped when computing max memory usage

PiperOrigin-RevId: 181248577",graph_memory.cc,"@@ -16,6 +16,7 @@ limitations under the License.
 #include ""tensorflow/core/grappler/costs/graph_memory.h""
 #include <list>
 #include ""tensorflow/core/framework/allocation_description.pb.h""
+#include ""tensorflow/core/framework/attr_value.pb.h""
 #include ""tensorflow/core/framework/node_def.pb.h""
 #include ""tensorflow/core/framework/step_stats.pb.h""
 #include ""tensorflow/core/framework/tensor_description.pb.h""
@@ -163,6 +164,8 @@ void GraphMemory::InferFromTrace(const StepStats& timeline) {
 
   NodeMap node_map(&item_.graph);
   for (const auto& dev_stats : timeline.dev_stats()) {
+    const string& device_name = dev_stats.device();
+    const bool is_gpu = (device_name.find(""GPU:"") || device_name.find(""gpu:""));
     std::list<LiveTensor>& device_tensors =
         live_tensors_per_device[dev_stats.device()];
     for (const auto& node_stats : dev_stats.node_stats()) {
@@ -194,7 +197,24 @@ void GraphMemory::InferFromTrace(const StepStats& timeline) {
         // graph (e.g _Send/_Recv nodes).
         continue;
       }
-      for (const string& input : node->input()) {
+      std::unordered_set<int> swapped_inputs;
+      if (is_gpu) {
+        auto it = node->attr().find(""_swap_to_host"");
+        if (it != node->attr().end()) {
+          const AttrValue& val = it->second;
+          for (int port_id : val.list().i()) {
+            swapped_inputs.insert(port_id);
+          }
+        }
+      }
+      for (int i = 0; i < node->input_size(); ++i) {
+        if (swapped_inputs.find(i) != swapped_inputs.end()) {
+          // The memory of swapped inputs will be released as early as possible:
+          // therefore ignore this input when determining the deallocation time
+          // of the tensor.
+          continue;
+        }
+        const string& input = node->input(i);
         int position;
         string input_node = ParseNodeName(input, &position);
         if (position < 0) {
",0,test
20db88eec824259764b2eafba377f93ea11776b0,tensorflow/tensorflow,"Ignore nodes that are going to be swapped when computing max memory usage

PiperOrigin-RevId: 181248577",graph_memory_test.cc,"@@ -134,6 +134,62 @@ TEST_F(GraphMemoryTest, MultiDevice) {
   EXPECT_EQ(gpu_expected, gpu_tensors);
 }
 
+TEST_F(GraphMemoryTest, GpuSwapping) {
+  TrivialTestGraphInputYielder fake_input(4, 2, 1024 * 1024, false, {""/GPU:0""});
+  GrapplerItem item;
+  CHECK(fake_input.NextItem(&item));
+  item.feed.clear();
+
+  {
+    // Estimate the max memory usage for the graph.
+    GraphMemory memory(item);
+    Status s = memory.InferStatically(devices_);
+    TF_CHECK_OK(s);
+
+    const GraphMemory::MemoryUsage& gpu_mem =
+        memory.GetPeakMemoryUsage(""/GPU:0"");
+    EXPECT_EQ(20971520, gpu_mem.used_memory);
+    std::set<string> gpu_tensors;
+    for (const auto& t : gpu_mem.live_tensors) {
+      gpu_tensors.insert(strings::StrCat(t.node, "":"", t.output_id));
+    }
+    std::set<string> gpu_expected;
+    gpu_expected.insert(""Square:0"");
+    gpu_expected.insert(""Square_1:0"");
+    gpu_expected.insert(""AddN:0"");
+    gpu_expected.insert(""AddN_1:0"");
+    gpu_expected.insert(""AddN_2:0"");
+    EXPECT_EQ(gpu_expected, gpu_tensors);
+  }
+
+  {
+    // Swap the first input to node AddN_1: its fanin (the square nodes) should
+    // not appear in the max cut anymore.
+    for (auto& node : *item.graph.mutable_node()) {
+      if (node.name() == ""AddN_1"") {
+        (*node.mutable_attr())[""_swap_to_host""].mutable_list()->add_i(0);
+      }
+    }
+    GraphMemory memory(item);
+    Status s = memory.InferStatically(devices_);
+    TF_CHECK_OK(s);
+    const GraphMemory::MemoryUsage& new_gpu_mem =
+        memory.GetPeakMemoryUsage(""/GPU:0"");
+    EXPECT_EQ(20971520, new_gpu_mem.used_memory);
+    std::set<string> new_gpu_tensors;
+    for (const auto& t : new_gpu_mem.live_tensors) {
+      new_gpu_tensors.insert(strings::StrCat(t.node, "":"", t.output_id));
+    }
+    std::set<string> new_gpu_expected;
+    new_gpu_expected.insert(""AddN:0"");
+    new_gpu_expected.insert(""AddN_1:0"");
+    new_gpu_expected.insert(""AddN_2:0"");
+    new_gpu_expected.insert(""AddN_3:0"");
+    new_gpu_expected.insert(""AddN_4:0"");
+    EXPECT_EQ(new_gpu_expected, new_gpu_tensors);
+  }
+}
+
 TEST_F(GraphMemoryTest, CtrlDependencies) {
   // Build a simple graph with a control dependency.
   Scope s = Scope::NewRootScope();
",0,test
20db88eec824259764b2eafba377f93ea11776b0,tensorflow/tensorflow,"Ignore nodes that are going to be swapped when computing max memory usage

PiperOrigin-RevId: 181248577",trivial_test_graph_input_yielder.cc,"@@ -31,8 +31,6 @@ namespace {
 GraphDef CreateGraphDef(int num_stages, int width, int tensor_size,
                         bool use_multiple_devices, bool insert_queue,
                         const std::vector<string>& device_names) {
-  CHECK_GE(device_names.size(), width);
-
   using namespace ::tensorflow::ops;  // NOLINT(build/namespaces)
 
   tensorflow::Scope s = tensorflow::Scope::NewRootScope();
@@ -49,13 +47,17 @@ GraphDef CreateGraphDef(int num_stages, int width, int tensor_size,
     std::vector<Output> this_stage;
     for (int j = 0; j < width; j++) {
       if (last_stage.size() == 1) {
-        Output unary_op =
-            Square(s.WithDevice(device_names[use_multiple_devices ? j : 0]),
-                   last_stage[0]);
+        Output unary_op = Square(
+            s.WithDevice(
+                device_names[use_multiple_devices ? j % device_names.size()
+                                                  : 0]),
+            last_stage[0]);
         this_stage.push_back(unary_op);
       } else {
         Output combine =
-            AddN(s.WithDevice(device_names[use_multiple_devices ? j : 0]),
+            AddN(s.WithDevice(
+                     device_names[use_multiple_devices ? j % device_names.size()
+                                                       : 0]),
                  last_stage);
         this_stage.push_back(combine);
       }
",0,test
49b666dbbd58958a7499fa3961c1c8c75757ad7c,tensorflow/tensorflow,"Bring in `isbuiltin`.

PiperOrigin-RevId: 187049824",tf_inspect.py,"@@ -149,6 +149,11 @@ def getsource(object):  # pylint: disable=redefined-builtin
   return _inspect.getsource(tf_decorator.unwrap(object)[1])
 
 
+def isbuiltin(object):  # pylint: disable=redefined-builtin
+  """"""TFDecorator-aware replacement for inspect.isbuiltin.""""""
+  return _inspect.isbuiltin(tf_decorator.unwrap(object)[1])
+
+
 def isclass(object):  # pylint: disable=redefined-builtin
   """"""TFDecorator-aware replacement for inspect.isclass.""""""
   return _inspect.isclass(tf_decorator.unwrap(object)[1])
",0,train
49b666dbbd58958a7499fa3961c1c8c75757ad7c,tensorflow/tensorflow,"Bring in `isbuiltin`.

PiperOrigin-RevId: 187049824",tf_inspect_test.py,"@@ -144,6 +144,19 @@ def test_decorated_function_with_defaults(a, b=2, c='Hello'):
     self.assertEqual(
         expected, tf_inspect.getsource(test_decorated_function_with_defaults))
 
+  def testIsBuiltin(self):
+    self.assertEqual(
+        tf_inspect.isbuiltin(TestDecoratedClass),
+        inspect.isbuiltin(TestDecoratedClass))
+    self.assertEqual(
+        tf_inspect.isbuiltin(test_decorated_function),
+        inspect.isbuiltin(test_decorated_function))
+    self.assertEqual(
+        tf_inspect.isbuiltin(test_undecorated_function),
+        inspect.isbuiltin(test_undecorated_function))
+    self.assertEqual(tf_inspect.isbuiltin(range), inspect.isbuiltin(range))
+    self.assertEqual(tf_inspect.isbuiltin(max), inspect.isbuiltin(max))
+
   def testIsClass(self):
     self.assertTrue(tf_inspect.isclass(TestDecoratedClass))
     self.assertFalse(tf_inspect.isclass(test_decorated_function))
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",c_api_internal.c,"@@ -172,6 +172,8 @@ const char* TfLiteTypeGetName(TfLiteType type) {
       return ""COMPLEX64"";
     case kTfLiteString:
       return ""STRING"";
+    case kTfLiteFloat16:
+      return ""FLOAT16"";
   }
   return ""Unknown type"";
 }
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",c_api_internal.h,"@@ -195,6 +195,11 @@ typedef struct {
   float re, im;  // real and imaginary parts, respectively.
 } TfLiteComplex64;
 
+// Half precision data type compatible with the C99 definition.
+typedef struct {
+  uint16_t data;
+} TfLiteFloat16;
+
 // Types supported by tensor
 typedef enum {
   kTfLiteNoType = 0,
@@ -207,6 +212,7 @@ typedef enum {
   kTfLiteInt16 = 7,
   kTfLiteComplex64 = 8,
   kTfLiteInt8 = 9,
+  kTfLiteFloat16 = 10,
 } TfLiteType;
 
 // Return the name of a given type, for error reporting purposes.
@@ -259,6 +265,8 @@ typedef union {
   int32_t* i32;
   int64_t* i64;
   float* f;
+  // Placeholder for 16b float type. Use uint16* in the pointer union for now.
+  TfLiteFloat16* f16;
   char* raw;
   const char* raw_const;
   uint8_t* uint8;
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",c_api_internal_test.cc,"@@ -78,6 +78,7 @@ TEST(Types, TestTypeNames) {
   };
   EXPECT_EQ(type_name(kTfLiteNoType), ""NOTYPE"");
   EXPECT_EQ(type_name(kTfLiteFloat32), ""FLOAT32"");
+  EXPECT_EQ(type_name(kTfLiteFloat16), ""FLOAT16"");
   EXPECT_EQ(type_name(kTfLiteInt16), ""INT16"");
   EXPECT_EQ(type_name(kTfLiteInt32), ""INT32"");
   EXPECT_EQ(type_name(kTfLiteUInt8), ""UINT8"");
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",flatbuffer_conversions.cc,"@@ -61,9 +61,8 @@ TfLiteStatus ConvertTensorType(TensorType tensor_type, TfLiteType* type,
       *type = kTfLiteFloat32;
       break;
     case TensorType_FLOAT16:
-      error_reporter->Report(""Unimplemented data type float16 in tensor\n"",
-                             tensor_type);
-      return kTfLiteError;
+      *type = kTfLiteFloat16;
+      break;
     case TensorType_INT16:
       *type = kTfLiteInt16;
       break;
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",flatbuffer_conversions_test.cc,"@@ -141,6 +141,13 @@ TEST_F(FlatbufferConversionsTest, TestConvertTensorType) {
   EXPECT_EQ(kTfLiteFloat32, type);
 }
 
+TEST_F(FlatbufferConversionsTest, TestConvertTensorTypeFloat16) {
+  TfLiteType type;
+  EXPECT_EQ(kTfLiteOk,
+            ConvertTensorType(TensorType_FLOAT16, &type, &mock_reporter_));
+  EXPECT_EQ(kTfLiteFloat16, type);
+}
+
 }  // namespace tflite
 
 int main(int argc, char** argv) {
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",subgraph.cc,"@@ -469,6 +469,9 @@ TfLiteStatus Subgraph::BytesRequired(TfLiteType type, const int* dims,
     case kTfLiteInt8:
       *bytes = sizeof(int8_t) * count;
       break;
+    case kTfLiteFloat16:
+      *bytes = sizeof(TfLiteFloat16) * count;
+      break;
     default:
       ReportError(
           ""Only float32, int8, int16, int32, int64, uint8, bool, complex64 ""
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",util.cc,"@@ -60,6 +60,8 @@ TF_DataType GetTensorFlowDataType(TfLiteType type) {
       return TF_FLOAT;
     case kTfLiteFloat32:
       return TF_FLOAT;
+    case kTfLiteFloat16:
+      return TF_HALF;
     case kTfLiteInt16:
       return TF_INT16;
     case kTfLiteInt32:
@@ -83,6 +85,8 @@ TfLiteType GetTensorFlowLiteType(TF_DataType type) {
   switch (type) {
     case TF_FLOAT:
       return kTfLiteFloat32;
+    case TF_HALF:
+      return kTfLiteFloat16;
     case TF_INT16:
       return kTfLiteInt16;
     case TF_INT32:
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",util_test.cc,"@@ -101,9 +101,9 @@ TEST(UtilTest, CopyShapeAndType) {
 
   EXPECT_EQ(
       CopyShapeAndType(&context, Tensor(tensorflow::DT_HALF, {1, 2}), &dst),
-      kTfLiteError);
-  EXPECT_EQ(context.error,
-            ""TF Lite does not support TensorFlow data type: half"");
+      kTfLiteOk);
+  EXPECT_THAT(context.new_size, ElementsAre(1, 2));
+  EXPECT_EQ(dst.type, kTfLiteFloat16);
 }
 
 TEST(UtilTest, TypeConversionsFromTFLite) {
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",TFLTensor.h,"@@ -29,6 +29,9 @@ typedef NS_ENUM(NSUInteger, TFLTensorDataType) {
   /** 32-bit single precision floating point. */
   TFLTensorDataTypeFloat32,
 
+  /** 16-bit half precision floating point. */
+  TFLTensorDataTypeFloat16,
+
   /** 32-bit signed integer. */
   TFLTensorDataTypeInt32,
 
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",enum_mapping.h,"@@ -62,6 +62,8 @@ inline TensorType TfLiteTypeToSchemaType(TfLiteType type) {
       return TensorType_FLOAT32;  // TODO(aselle): Consider an error.
     case kTfLiteFloat32:
       return TensorType_FLOAT32;
+    case kTfLiteFloat16:
+      return TensorType_FLOAT16;
     case kTfLiteInt32:
       return TensorType_INT32;
     case kTfLiteUInt8:
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",interpreter.cc,"@@ -30,6 +30,11 @@ limitations under the License.
 #include ""tensorflow/lite/schema/schema_generated.h""
 #include ""tensorflow/lite/util.h""
 
+// TODO(b/132087118): move static_assert to c_api_internal when compiled with
+// C++.
+static_assert(sizeof(TfLiteFloat16) == sizeof(uint16_t),
+              ""Float 16 type must be 16 bits."");
+
 namespace tflite {
 
 namespace {
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",interpreter.h,"@@ -74,6 +74,10 @@ constexpr TfLiteType typeToTfLiteType<string>() {
   return kTfLiteString;
 }
 
+template <>
+constexpr TfLiteType typeToTfLiteType<TfLiteFloat16>() {
+  return kTfLiteFloat16;
+}
 // An interpreter for a graph of nodes that input and output from tensors.
 // Each node of the graph processes a set of input tensors and produces a
 // set of output Tensors. All inputs/output tensors are referenced by index.
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",interpreter_test.cc,"@@ -17,6 +17,7 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
+#include ""third_party/eigen3/Eigen/Core""
 #include ""tensorflow/lite/core/api/error_reporter.h""
 #include ""tensorflow/lite/kernels/internal/compatibility.h""
 #include ""tensorflow/lite/kernels/kernel_util.h""
@@ -165,7 +166,7 @@ TEST(BasicInterpreter, CheckAllocate) {
   } cases[] = {
       {kTfLiteFloat32, sizeof(float)}, {kTfLiteInt32, sizeof(int32_t)},
       {kTfLiteUInt8, sizeof(uint8_t)}, {kTfLiteInt64, sizeof(int64_t)},
-      {kTfLiteInt16, sizeof(int16_t)},
+      {kTfLiteInt16, sizeof(int16_t)}, {kTfLiteFloat16, sizeof(TfLiteFloat16)},
   };
 
   for (auto test : cases) {
@@ -238,6 +239,8 @@ TEST(BasicInterpreter, CheckResize) {
   const uint8_t uint8s[] = {3, 4};
   const int64_t int64s[] = {6, -7};
   const int16_t int16s[] = {8, -9};
+  const Eigen::half float16s[] = {Eigen::half_impl::float_to_half_rtne(-3.f),
+                                  Eigen::half_impl::float_to_half_rtne(-4.f)};
 
   struct {
     TfLiteType type;
@@ -249,6 +252,8 @@ TEST(BasicInterpreter, CheckResize) {
       {kTfLiteUInt8, sizeof(uint8_t), reinterpret_cast<const char*>(uint8s)},
       {kTfLiteInt64, sizeof(int64_t), reinterpret_cast<const char*>(int64s)},
       {kTfLiteInt16, sizeof(int16_t), reinterpret_cast<const char*>(int16s)},
+      {kTfLiteFloat16, sizeof(TfLiteFloat16),
+       reinterpret_cast<const char*>(float16s)},
   };
 
   for (auto test : cases) {
@@ -283,10 +288,8 @@ TEST(BasicInterpreter, CheckResize) {
 TEST(BasicInterpreter, CheckAlignment) {
   struct {
     TfLiteType type;
-  } cases[] = {
-      {kTfLiteFloat32}, {kTfLiteInt32}, {kTfLiteUInt8},
-      {kTfLiteInt64},   {kTfLiteInt16},
-  };
+  } cases[] = {{kTfLiteFloat32}, {kTfLiteInt32}, {kTfLiteUInt8},
+               {kTfLiteInt64},   {kTfLiteInt16}, {kTfLiteFloat16}};
 
   for (auto test : cases) {
     Interpreter interpreter;
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",tensor_ctypes.h,"@@ -66,6 +66,11 @@ inline const float* GetTensorData(const TfLiteTensor* tensor) {
   return tensor != nullptr ? tensor->data.f : nullptr;
 }
 
+template <>
+inline const TfLiteFloat16* GetTensorData(const TfLiteTensor* tensor) {
+  return tensor != nullptr ? tensor->data.f16 : nullptr;
+}
+
 template <>
 inline const uint8_t* GetTensorData(const TfLiteTensor* tensor) {
   return tensor != nullptr ? tensor->data.uint8 : nullptr;
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",test_util.h,"@@ -20,7 +20,6 @@ limitations under the License.
 
 #include <gmock/gmock.h>
 #include <gtest/gtest.h>
-
 #include ""tensorflow/core/platform/logging.h""
 #include ""tensorflow/lite/interpreter.h""
 #include ""tensorflow/lite/kernels/internal/tensor_utils.h""
@@ -568,6 +567,7 @@ class SingleOpTest : public ::testing::TestWithParam<string> {
 template <typename T>
 TensorType GetTensorType() {
   if (std::is_same<T, float>::value) return TensorType_FLOAT32;
+  if (std::is_same<T, TfLiteFloat16>::value) return TensorType_FLOAT16;
   if (std::is_same<T, int32_t>::value) return TensorType_INT32;
   if (std::is_same<T, uint8_t>::value) return TensorType_UINT8;
   if (std::is_same<T, string>::value) return TensorType_STRING;
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",optional_debug_tools.cc,"@@ -56,6 +56,8 @@ const char* TensorTypeName(TfLiteType type) {
       return ""kTfLiteInt16"";
     case kTfLiteComplex64:
       return ""kTfLiteComplex64"";
+    case kTfLiteFloat16:
+      return ""kTfLiteFloat16"";
   }
   return ""(invalid)"";
 }
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",python_utils.cc,"@@ -32,6 +32,8 @@ int TfLiteTypeToPyArrayType(TfLiteType tf_lite_type) {
   switch (tf_lite_type) {
     case kTfLiteFloat32:
       return NPY_FLOAT32;
+    case kTfLiteFloat16:
+      return NPY_FLOAT16;
     case kTfLiteInt32:
       return NPY_INT32;
     case kTfLiteInt16:
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",calibration_wrapper.cc,"@@ -61,6 +61,8 @@ inline TensorType TfLiteTypeToSchemaType(TfLiteType type) {
       return TensorType_FLOAT32;  // TODO(b/129336260): No schema type for none.
     case kTfLiteFloat32:
       return TensorType_FLOAT32;
+    case kTfLiteFloat16:
+      return TensorType_FLOAT16;
     case kTfLiteInt32:
       return TensorType_INT32;
     case kTfLiteUInt8:
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",util.py,"@@ -31,6 +31,7 @@ from tensorflow.python.training.saver import export_meta_graph as _export_meta_g
 # Map of tf.dtypes to TFLite types_flag_pb2.
 _MAP_TF_TO_TFLITE_TYPES = {
     dtypes.float32: _types_pb2.FLOAT,
+    dtypes.float16: _types_pb2.FLOAT16,
     dtypes.int32: _types_pb2.INT32,
     dtypes.int64: _types_pb2.INT64,
     dtypes.string: _types_pb2.STRING,
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",util_test.py,"@@ -50,6 +50,8 @@ class UtilTest(test_util.TensorFlowTestCase):
     self.assertEqual(
         util.convert_dtype_to_tflite_type(dtypes.complex64),
         _types_pb2.COMPLEX64)
+    self.assertEqual(
+        util.convert_dtype_to_tflite_type(dtypes.half), _types_pb2.FLOAT16)
     with self.assertRaises(ValueError):
       util.convert_dtype_to_tflite_type(dtypes.bool)
 
",0,train
c58ddf2520bc0f80834756f4876fc10418e380ee,tensorflow/tensorflow,"Propagate half precision float through tflite

PiperOrigin-RevId: 247082214",model.h,"@@ -223,6 +223,7 @@ enum class ArrayDataType : uint8 {
   kUint64,  // 10
   kString,
   kComplex64,
+  kFloat16,
 };
 
 // Compile-time logic to map ArrayDataType to the corresponding C++ scalar type
",0,train
e74a115bcf5cd27f476b46161a639e9ec599491d,tensorflow/tensorflow,"[TF-numpy] Adds __rmatmul__ method to ndarray.

PiperOrigin-RevId: 317771125
Change-Id: I719c46d97ae1c68ac59dcd1cf8f65d067ddc7658",np_math_ops.py,"@@ -950,11 +950,12 @@ setattr(np_arrays.ndarray, '__sub__', _wrap(subtract))
 setattr(np_arrays.ndarray, '__rsub__', _wrap(subtract, True))
 setattr(np_arrays.ndarray, '__mul__', _wrap(multiply))
 setattr(np_arrays.ndarray, '__rmul__', _wrap(multiply, True))
+setattr(np_arrays.ndarray, '__matmul__', _wrap(matmul))
+setattr(np_arrays.ndarray, '__rmatmul__', _wrap(matmul, True))
 setattr(np_arrays.ndarray, '__pow__', _wrap(power))
 setattr(np_arrays.ndarray, '__rpow__', _wrap(power, True))
 setattr(np_arrays.ndarray, '__truediv__', _wrap(true_divide))
 setattr(np_arrays.ndarray, '__rtruediv__', _wrap(true_divide, True))
-setattr(np_arrays.ndarray, '__matmul__', _wrap(matmul))
 
 
 def _comparison(tf_fun, x1, x2, cast_bool_to_int=False):
",0,train
9fda694598631b6207f64fa5c39e6f76dca313a9,tensorflow/tensorflow,"Release the GIL when waiting for pending async ops

Prevents deadlocks with PyFunc ops

PiperOrigin-RevId: 361689648
Change-Id: I2e20ddae99e8ca5dad324ac9a13941cf93cf4c6f",core_test.py,"@@ -598,6 +598,12 @@ class TFETest(test_util.TensorFlowTestCase):
       self.assertAllEqual(test_fn(test_var), 3.0)
     async_executor.wait()
 
+    with context.executor_scope(async_executor):
+      test_var = variables.Variable(2.)
+      result = test_fn(test_var)
+      context.async_wait()
+      self.assertAllEqual(result, 3.0)
+
   @test_util.run_gpu_only
   def testNumpyForceCPU(self):
     cpu = constant_op.constant([[1., 2.], [3., 4.]])
",0,train
9fda694598631b6207f64fa5c39e6f76dca313a9,tensorflow/tensorflow,"Release the GIL when waiting for pending async ops

Prevents deadlocks with PyFunc ops

PiperOrigin-RevId: 361689648
Change-Id: I2e20ddae99e8ca5dad324ac9a13941cf93cf4c6f",tfe_wrapper.cc,"@@ -753,13 +753,19 @@ PYBIND11_MODULE(_pywrap_tfe, m) {
   m.def(""TFE_ContextSyncExecutors"", [](py::handle& ctx) {
     tensorflow::Safe_TF_StatusPtr status =
         tensorflow::make_safe(TF_NewStatus());
+    // NOTE: release Python GIL for pending PyFunc ops to be executed properly.
+    Py_BEGIN_ALLOW_THREADS;
     TFE_ContextAsyncWait(tensorflow::InputTFE_Context(ctx), status.get());
+    Py_END_ALLOW_THREADS;
     tensorflow::MaybeRaiseRegisteredFromTFStatus(status.get());
   });
   m.def(""TFE_ContextClearExecutors"", [](py::handle& ctx) {
     tensorflow::Safe_TF_StatusPtr status =
         tensorflow::make_safe(TF_NewStatus());
+    // NOTE: release Python GIL for pending PyFunc ops to be executed properly.
+    Py_BEGIN_ALLOW_THREADS;
     TFE_ContextAsyncWait(tensorflow::InputTFE_Context(ctx), status.get());
+    Py_END_ALLOW_THREADS;
     // NOTE: different from TFE_ContextSyncExecutors that raises potential
     // errors, deliberately ignore executor statuses in cleanup.
   });
",0,train
bd464260b8eab307a1672fe16d5c26bc0d681ad5,tensorflow/tensorflow,Format comments,filesystem_interface.h,"@@ -836,27 +836,38 @@ typedef struct TF_FilesystemOps {
   /// `num_options`. Ownership of the array is transferred to caller and the
   /// caller is responsible of freeing the buffers using respective file systems
   /// allocation API.
+  /// Plugins:
+  ///   * Must set `status` to `TF_OK` if `options` and `num_options` set.
+  ///     If there is no configurable option, `num_options` should be 0.
+  ///   * Might use any other error value for `status` to signal other errors.
+  /// DEFAULT IMPLEMENTATION: return 0 options and `TF_OK`.
   void (*get_filesystem_configuration)(const TF_Filesystem* filesystem,
                                        TF_Filesystem_Option** options,
                                        int* num_options, TF_Status* status);
 
   /// Updates filesystem configuration with options passed in `options`. It can
   /// contain full set of options supported by the filesystem or just a subset
-  /// of them. Ownership of options and buffers therein belongs to the caller and
-  /// any buffers need to be allocated through filesystem allocation API. On
-  /// success should return TF_OK in `status`. On failure, it should return a relevant error
-  /// code. Filesystems may choose to ignore configuration errors but should at
-  /// least display a warning or error message to warn the users.
+  /// of them. Ownership of options and buffers therein belongs to the caller
+  /// and any buffers need to be allocated through filesystem allocation API.
+  /// Filesystems may choose to ignore configuration errors but should at least
+  /// display a warning or error message to warn the users. Plugins:
+  ///   * Must set `status` to `TF_OK` if options are updated.
+  ///   * Might use any other error value for `status` to signal other errors.
+  /// DEFAULT IMPLEMENTATION: return `TF_NOT_FOUND`.
   void (*set_filesystem_configuration)(const TF_Filesystem* filesystem,
                                        const TF_Filesystem_Option** options,
                                        int num_options, TF_Status* status);
 
   /// Returns the value of the filesystem option given in `key` in `option`.
   /// Valid values of the `key` are returned by
-  /// `get_file_system_configuration_keys` call. This method should return TF_OK
-  /// on success, TF_NOT_FOUND if the key does not exist. Ownership of the
+  /// `get_file_system_configuration_keys` call. Ownership of the
   /// `option` is transferred to caller. Buffers therein should be allocated and
   /// freed by the relevant filesystems allocation API.
+  /// Plugins:
+  ///   * Must set `status` to `TF_OK` if `option` is set
+  ///   * Must set `status` to `TF_NOT_FOUND` if the key is invalid
+  ///   * Might use any other error value for `status` to signal other errors.
+  /// DEFAULT IMPLEMENTATION: return `TF_NOT_FOUND`.
   void (*get_filesystem_configuration_option)(const TF_Filesystem* filesystem,
                                               const char* key,
                                               TF_Filesystem_Option** option,
@@ -864,10 +875,13 @@ typedef struct TF_FilesystemOps {
 
   /// Sets the value of the filesystem option given in `key` to value in
   /// `option`. Valid values of the `key` are returned by
-  /// `get_file_system_configuration_keys` call. This method should return TF_OK
-  /// on success, TF_NOT_FOUND if the key does not exist or other relevant error
-  /// codes. `option` and the `key` are owned by the caller. Buffers therein
-  /// should be allocated and freed by the filesystems allocation API.
+  /// `get_file_system_configuration_keys` call. Ownership of the `option` and
+  /// the `key` belogs to the caller. Buffers therein should be allocated and
+  /// freed by the filesystems allocation API. Plugins:
+  ///   * Must set `status` to `TF_OK` if `option` is set/updated
+  ///   * Must set `status` to `TF_NOT_FOUND` if the key is invalid
+  ///   * Might use any other error value for `status` to signal other errors.
+  /// DEFAULT IMPLEMENTATION: return `TF_NOT_FOUND`.
   void (*set_filesystem_configuration_option)(
       const TF_Filesystem* filesystem, const TF_Filesystem_Option* option,
       TF_Status* status);
@@ -875,6 +889,11 @@ typedef struct TF_FilesystemOps {
   /// Returns a list of valid configuration keys in `keys` array and number of
   /// keys in `num_keys`. Ownership of the buffers in `keys` are transferred to
   /// caller and needs to be freed using relevant filesystem allocation API.
+  /// Plugins:
+  ///   * Must set `status` to `TF_OK` on success. If there are no configurable
+  ///   keys, `num_keys` should be set to 0
+  ///   * Might use any other error value for `status` to signal other errors.
+  /// DEFAULT IMPLEMENTATION: return `TF_OK` and `num_keys`=0.
   void (*get_filesystem_configuration_keys)(const TF_Filesystem* filesystem,
                                             char** keys, int* num_keys,
                                             TF_Status* status);
",0,train
2773c6370d9afb529f4aba4fe852a6ad38823da4,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 252093638",subgraph.cc,"@@ -288,11 +288,6 @@ TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
     return kTfLiteOk;
   }
 
-  TFLITE_LOG(tflite::TFLITE_LOG_INFO,
-             ""Replacing %d node(s) with delegate (%s) node."",
-             nodes_to_replace->size,
-             registration.custom_name ? registration.custom_name : ""unknown"");
-
   // Annotate the registration as DELEGATE op.
   registration.builtin_code = BuiltinOperator_DELEGATE;
 
@@ -303,6 +298,13 @@ TfLiteStatus Subgraph::ReplaceNodeSubsetsWithDelegateKernels(
   PartitionGraphIntoIndependentNodeSubsets(&info, nodes_to_replace,
                                            &node_subsets);
 
+  TFLITE_LOG(
+      tflite::TFLITE_LOG_INFO,
+      ""Replacing %d node(s) with delegate (%s) node, yielding %zu partitions."",
+      nodes_to_replace->size,
+      registration.custom_name ? registration.custom_name : ""unknown"",
+      node_subsets.size());
+
   execution_plan_.clear();
 
   for (auto& node_subset : node_subsets) {
",0,train
ebd59a6298faea4a590e4eafedd06c91fbe3995e,tensorflow/tensorflow,add & for runner,dataset_test_base.cc,"@@ -333,7 +333,7 @@ Status DatasetOpsTestBase::InitFunctionLibraryRuntime(
       nullptr /* cluster_flr */);
   flr_ = pflr_->GetFLR(""/job:localhost/replica:0/task:0/cpu:0"");
   if (thread_pool_ == nullptr) {
-    runner_ = [](const std::function<void()> fn) { fn(); };
+    runner_ = [](const std::function<void()>& fn) { fn(); };
   } else {
     runner_ = [this](std::function<void()> fn) {
       thread_pool_->Schedule(std::move(fn));
",0,test
ebd59a6298faea4a590e4eafedd06c91fbe3995e,tensorflow/tensorflow,add & for runner,single_threaded_executor_test.cc,"@@ -68,7 +68,7 @@ class ExecutorTest : public ::testing::Test {
     };
     delete exec_;
     TF_CHECK_OK(NewSingleThreadedExecutor(params, *graph, &exec_));
-    runner_ = [](const std::function<void()> fn) { fn(); };
+    runner_ = [](const std::function<void()>& fn) { fn(); };
     rendez_ = NewLocalRendezvous();
   }
 
",0,test
37eed82574c622bf91d72000b7b6ebdc92c9317c,tensorflow/tensorflow,"Removing unused options from batch_ops.batch_function().

PiperOrigin-RevId: 203489357",batch_ops.py,"@@ -58,8 +58,6 @@ def batch_function(num_batch_threads,
                    max_batch_size,
                    batch_timeout_micros,
                    allowed_batch_sizes=None,
-                   grad_timeout_micros=60 * 1000 * 1000,
-                   unbatch_timeout_micros=60 * 1000 * 1000,
                    max_enqueued_batches=10):
   """"""Batches the computation done by the decorated function.
 
@@ -94,10 +92,6 @@ def batch_function(num_batch_threads,
      does nothing. Otherwise, supplies a list of batch sizes, causing the op
      to pad batches up to one of those sizes. The entries must increase
      monotonically, and the final entry must equal max_batch_size.
-    grad_timeout_micros: The timeout to use for the gradient. See the
-     documentation of the unbatch op for more details. Defaults to 60s.
-    unbatch_timeout_micros: The timeout to use for unbatching. See the
-     documentation of the unbatch op for more details. Defaults to 60s.
     max_enqueued_batches: The maximum depth of the batch queue. Defaults to 10.
 
   Returns:
",0,train
9d703eecbfca400f2a1d4786050e171a94696117,tensorflow/tensorflow,"Add `DistributionStrategy.experimental_run_v2`.

PiperOrigin-RevId: 237071002",distribute_lib.py,"@@ -437,7 +437,7 @@ class DistributionStrategy(object):
     """"""Runs ops in `fn` on each replica, with inputs from `input_iterator`.
 
     When eager execution is enabled, executes ops specified by `fn` on each
-    replica.  Otherwise, builds a graph to execute the ops on each replica.
+    replica. Otherwise, builds a graph to execute the ops on each replica.
 
     Each replica will take a single, different input from the inputs provided by
     one `get_next` call on the input iterator.
@@ -445,13 +445,13 @@ class DistributionStrategy(object):
     `fn` may call `tf.distribute.get_replica_context()` to access members such
     as `replica_id_in_sync_group`.
 
-    IMPORTANT: Depending on the `DistributionStrategy` being used, and whether
-    eager execution is enabled, `fn` may be called one or more times (once for
-    each replica).
+    IMPORTANT: Depending on the `tf.distribute.Strategy` implementation being
+    used, and whether eager execution is enabled, `fn` may be called one or more
+    times (once for each replica).
 
     Args:
-      fn: function to run. The inputs to the function must match the outputs of
-        `input_iterator.get_next()`. The output must be a `tf.nest` of
+      fn: The function to run. The inputs to the function must match the outputs
+        of `input_iterator.get_next()`. The output must be a `tf.nest` of
         `Tensor`s.
       input_iterator: (Optional) input iterator from which the inputs are taken.
 
@@ -463,11 +463,36 @@ class DistributionStrategy(object):
       single replica).
     """"""
     with self.scope():
-      if input_iterator is None:
-        return self._extended.call_for_each_replica(fn)
-      else:
-        inputs = input_iterator.get_next()
-        return self._extended.call_for_each_replica(fn, args=(inputs,))
+      args = (input_iterator.get_next(),) if input_iterator is not None else ()
+    return self.experimental_run_v2(fn, args=args)
+
+  def experimental_run_v2(self, fn, args=(), kwargs=None):
+    """"""Runs ops in `fn` on each replica, with the given arguments.
+
+    When eager execution is enabled, executes ops specified by `fn` on each
+    replica. Otherwise, builds a graph to execute the ops on each replica.
+
+    `fn` may call `tf.distribute.get_replica_context()` to access members such
+    as `replica_id_in_sync_group`.
+
+    IMPORTANT: Depending on the `tf.distribute.Strategy` implementation being
+    used, and whether eager execution is enabled, `fn` may be called one or more
+    times (once for each replica).
+
+    Args:
+      fn: The function to run. The output must be a `tf.nest` of `Tensor`s.
+      args: (Optional) Positional arguments to `fn`.
+      kwargs: (Optional) Keyword arguments to `fn`.
+
+    Returns:
+      Merged return value of `fn` across replicas. The structure of the return
+      value is the same as the return value from `fn`. Each element in the
+      structure can either be `PerReplica` (if the values are unsynchronized),
+      `Mirrored` (if the values are kept in sync), or `Tensor` (if running on a
+      single replica).
+    """"""
+    with self.scope():
+      return self._extended.call_for_each_replica(fn, args=args, kwargs=kwargs)
 
   def reduce(self, reduce_op, value):
     """"""Reduce `value` across replicas.
",0,test
9d703eecbfca400f2a1d4786050e171a94696117,tensorflow/tensorflow,"Add `DistributionStrategy.experimental_run_v2`.

PiperOrigin-RevId: 237071002",tpu_strategy.py,"@@ -154,32 +154,28 @@ class TPUStrategy(distribute_lib.DistributionStrategy):
   # TODO(cjfj): Modify `_call_for_each_replica` in `TPUExtended` such that this
   # can use the default implementation.
   # This implementation runs a single step. It does not use infeed or outfeed.
-  def experimental_run(self, fn, input_iterator=None):
+  def experimental_run_v2(self, fn, args=(), kwargs=None):
     """"""See base class.""""""
     if context.executing_eagerly() and not ops.inside_function():
       raise NotImplementedError(
           ""Eager mode not supported in TPUStrategy outside TF functions."")
 
-    if input_iterator is None:
-      inputs = []
-    else:
-      inputs = input_iterator.get_next()
+    if kwargs is None:
+      kwargs = {}
 
     result = [None]
-    def replicated_fn(replica_id, replica_input):
+    def replicated_fn(replica_id, replica_args, replica_kwargs):
       """"""Wraps user function to provide replica ID and `Tensor` inputs.""""""
       with _TPUReplicaContext(self, replica_id_in_sync_group=replica_id):
-        if input_iterator is None:
-          result[0] = fn()
-        else:
-          result[0] = fn(replica_input)
+        result[0] = fn(*replica_args, **replica_kwargs)
       return result[0]
 
     replicate_inputs = []  # By replica.
     for i in range(self.num_replicas_in_sync):
       replicate_inputs.append(
           [constant_op.constant(i, dtype=dtypes.int32),
-           values.select_replica(i, inputs)])
+           values.select_replica(i, args),
+           values.select_replica(i, kwargs)])
 
     with self.scope():
       replicate_outputs = tpu.replicate(replicated_fn, replicate_inputs)
",0,test
95fae75e4d15c59a43d8eaf150b8c32c7c6d1495,tensorflow/tensorflow,Fixing MklCPUAllocator error introduced by commit #1baac7862739525351d25202800dc04e8ec3868b,mkl_cpu_allocator.h,"@@ -25,7 +25,7 @@ limitations under the License.
 #include <cstdlib>
 #include <string>
 #include ""tensorflow/core/common_runtime/bfc_allocator.h""
-#include ""tensorflow/core/framework/allocator.h""
+#include ""tensorflow/core/common_runtime/visitable_allocator.h""
 #include ""tensorflow/core/lib/strings/numbers.h""
 #include ""tensorflow/core/lib/strings/str_util.h""
 #include ""tensorflow/core/platform/mem.h""
@@ -161,7 +161,7 @@ class MklCPUAllocator : public VisitableAllocator {
   /// The alignment that we need for the allocations
   static const size_t kAlignment = 64;
 
-  Allocator* allocator_;  // owned by this class
+  VisitableAllocator* allocator_;  // owned by this class
 };
 
 }  // namespace tensorflow
",0,train
7047ceec37a3f004386621e8e56b825ab0d648a3,tensorflow/tensorflow,"Update sparse input documentation.

PiperOrigin-RevId: 312789707
Change-Id: I09410e9adc25cfe6099cf1fd1a77edc3680a3a59",input_layer.py,"@@ -218,7 +218,9 @@ def Input(  # pylint: disable=invalid-name
       dtype: The data type expected by the input, as a string
           (`float32`, `float64`, `int32`...)
       sparse: A boolean specifying whether the placeholder to be created is
-          sparse. Only one of 'ragged' and 'sparse' can be True.
+          sparse. Only one of 'ragged' and 'sparse' can be True. Note that,
+          if `sparse` is False, sparse tensors can still be passed into the
+          input - they will be densified with a default value of 0.
       tensor: Optional existing tensor to wrap into the `Input` layer.
           If set, the layer will not create a placeholder tensor.
       ragged: A boolean specifying whether the placeholder to be created is
",0,test
b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data.

PiperOrigin-RevId: 196932028",bidirectional_sequence_lstm.cc,"@@ -135,7 +135,7 @@ TfLiteStatus CheckLstmTensorDimensions(
   TF_LITE_ENSURE(context, params->cell_clip >= 0);
   TF_LITE_ENSURE(context, params->proj_clip >= 0);
 
-  TfLiteTensor* input_to_input_weights =
+  const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, input_to_input_weights_tensor);
   if (input_to_input_weights) {
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2);
@@ -155,7 +155,7 @@ TfLiteStatus CheckLstmTensorDimensions(
   TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell);
   TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input);
 
-  TfLiteTensor* recurrent_to_input_weights =
+  const TfLiteTensor* recurrent_to_input_weights =
       GetOptionalInputTensor(context, node, recurrent_to_input_weights_tensor);
   if (recurrent_to_input_weights) {
     TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2);
@@ -189,21 +189,21 @@ TfLiteStatus CheckLstmTensorDimensions(
        (recurrent_to_input_weights == nullptr));
   TF_LITE_ENSURE(context, cifg_weights_all_or_none == true);
 
-  TfLiteTensor* cell_to_input_weights =
+  const TfLiteTensor* cell_to_input_weights =
       GetOptionalInputTensor(context, node, cell_to_input_weights_tensor);
   if (cell_to_input_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell);
   }
 
-  TfLiteTensor* cell_to_forget_weights =
+  const TfLiteTensor* cell_to_forget_weights =
       GetOptionalInputTensor(context, node, cell_to_forget_weights_tensor);
   if (cell_to_forget_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell);
   }
 
-  TfLiteTensor* cell_to_output_weights =
+  const TfLiteTensor* cell_to_output_weights =
       GetOptionalInputTensor(context, node, cell_to_output_weights_tensor);
   if (cell_to_output_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1);
@@ -222,7 +222,7 @@ TfLiteStatus CheckLstmTensorDimensions(
   TF_LITE_ENSURE(context, peephole_weights_all_or_none == true);
 
   // Make sure the input gate bias is present only when not a CIFG-LSTM.
-  TfLiteTensor* input_gate_bias =
+  const TfLiteTensor* input_gate_bias =
       GetOptionalInputTensor(context, node, input_gate_bias_tensor);
   if (use_cifg) {
     TF_LITE_ENSURE_EQ(context, input_gate_bias, nullptr);
@@ -246,7 +246,7 @@ TfLiteStatus CheckLstmTensorDimensions(
   TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1);
   TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell);
 
-  TfLiteTensor* projection_weights =
+  const TfLiteTensor* projection_weights =
       GetOptionalInputTensor(context, node, projection_weights_tensor);
   if (projection_weights) {
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2);
@@ -254,7 +254,7 @@ TfLiteStatus CheckLstmTensorDimensions(
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell);
   }
 
-  TfLiteTensor* projection_bias =
+  const TfLiteTensor* projection_bias =
       GetOptionalInputTensor(context, node, projection_bias_tensor);
   if (projection_bias) {
     TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1);
@@ -374,7 +374,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   fw_output_state->allocation_type = kTfLiteArenaRwPersistent;
   fw_cell_state->allocation_type = kTfLiteArenaRwPersistent;
 
-  TfLiteTensor* fw_input_to_input_weights =
+  const TfLiteTensor* fw_input_to_input_weights =
       GetOptionalInputTensor(context, node, kFwInputToInputWeightsTensor);
   const bool fw_use_cifg = (fw_input_to_input_weights == nullptr);
   TfLiteIntArray* fw_scratch_buffer_size = TfLiteIntArrayCreate(2);
@@ -442,7 +442,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   bw_output_state->allocation_type = kTfLiteArenaRwPersistent;
   bw_cell_state->allocation_type = kTfLiteArenaRwPersistent;
 
-  TfLiteTensor* bw_input_to_input_weights =
+  const TfLiteTensor* bw_input_to_input_weights =
       GetOptionalInputTensor(context, node, kBwInputToInputWeightsTensor);
   const bool bw_use_cifg = (bw_input_to_input_weights == nullptr);
   TfLiteIntArray* bw_scratch_buffer_size = TfLiteIntArrayCreate(2);
@@ -470,7 +470,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const int n_input = input->dims->data[2];
 
   // Tensors for the forward cell.
-  TfLiteTensor* fw_input_to_input_weights =
+  const TfLiteTensor* fw_input_to_input_weights =
       GetOptionalInputTensor(context, node, kFwInputToInputWeightsTensor);
   const TfLiteTensor* fw_input_to_forget_weights =
       GetInput(context, node, kFwInputToForgetWeightsTensor);
@@ -479,7 +479,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* fw_input_to_output_weights =
       GetInput(context, node, kFwInputToOutputWeightsTensor);
 
-  TfLiteTensor* fw_recurrent_to_input_weights =
+  const TfLiteTensor* fw_recurrent_to_input_weights =
       GetOptionalInputTensor(context, node, kFwRecurrentToInputWeightsTensor);
   const TfLiteTensor* fw_recurrent_to_forget_weights =
       GetInput(context, node, kFwRecurrentToForgetWeightsTensor);
@@ -488,14 +488,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* fw_recurrent_to_output_weights =
       GetInput(context, node, kFwRecurrentToOutputWeightsTensor);
 
-  TfLiteTensor* fw_cell_to_input_weights =
+  const TfLiteTensor* fw_cell_to_input_weights =
       GetOptionalInputTensor(context, node, kFwCellToInputWeightsTensor);
-  TfLiteTensor* fw_cell_to_forget_weights =
+  const TfLiteTensor* fw_cell_to_forget_weights =
       GetOptionalInputTensor(context, node, kFwCellToForgetWeightsTensor);
-  TfLiteTensor* fw_cell_to_output_weights =
+  const TfLiteTensor* fw_cell_to_output_weights =
       GetOptionalInputTensor(context, node, kFwCellToOutputWeightsTensor);
 
-  TfLiteTensor* fw_input_gate_bias =
+  const TfLiteTensor* fw_input_gate_bias =
       GetOptionalInputTensor(context, node, kFwInputGateBiasTensor);
   const TfLiteTensor* fw_forget_gate_bias =
       GetInput(context, node, kFwForgetGateBiasTensor);
@@ -504,9 +504,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* fw_output_gate_bias =
       GetInput(context, node, kFwOutputGateBiasTensor);
 
-  TfLiteTensor* fw_projection_weights =
+  const TfLiteTensor* fw_projection_weights =
       GetOptionalInputTensor(context, node, kFwProjectionWeightsTensor);
-  TfLiteTensor* fw_projection_bias =
+  const TfLiteTensor* fw_projection_bias =
       GetOptionalInputTensor(context, node, kFwProjectionBiasTensor);
 
   TfLiteTensor* fw_output_state =
@@ -515,7 +515,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteTensor* fw_output = GetOutput(context, node, kFwOutputTensor);
 
   // Tensors for the backward cell.
-  TfLiteTensor* bw_input_to_input_weights =
+  const TfLiteTensor* bw_input_to_input_weights =
       GetOptionalInputTensor(context, node, kBwInputToInputWeightsTensor);
   const TfLiteTensor* bw_input_to_forget_weights =
       GetInput(context, node, kBwInputToForgetWeightsTensor);
@@ -524,7 +524,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* bw_input_to_output_weights =
       GetInput(context, node, kBwInputToOutputWeightsTensor);
 
-  TfLiteTensor* bw_recurrent_to_input_weights =
+  const TfLiteTensor* bw_recurrent_to_input_weights =
       GetOptionalInputTensor(context, node, kBwRecurrentToInputWeightsTensor);
   const TfLiteTensor* bw_recurrent_to_forget_weights =
       GetInput(context, node, kBwRecurrentToForgetWeightsTensor);
@@ -533,14 +533,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* bw_recurrent_to_output_weights =
       GetInput(context, node, kBwRecurrentToOutputWeightsTensor);
 
-  TfLiteTensor* bw_cell_to_input_weights =
+  const TfLiteTensor* bw_cell_to_input_weights =
       GetOptionalInputTensor(context, node, kBwCellToInputWeightsTensor);
-  TfLiteTensor* bw_cell_to_forget_weights =
+  const TfLiteTensor* bw_cell_to_forget_weights =
       GetOptionalInputTensor(context, node, kBwCellToForgetWeightsTensor);
-  TfLiteTensor* bw_cell_to_output_weights =
+  const TfLiteTensor* bw_cell_to_output_weights =
       GetOptionalInputTensor(context, node, kBwCellToOutputWeightsTensor);
 
-  TfLiteTensor* bw_input_gate_bias =
+  const TfLiteTensor* bw_input_gate_bias =
       GetOptionalInputTensor(context, node, kBwInputGateBiasTensor);
   const TfLiteTensor* bw_forget_gate_bias =
       GetInput(context, node, kBwForgetGateBiasTensor);
@@ -549,9 +549,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* bw_output_gate_bias =
       GetInput(context, node, kBwOutputGateBiasTensor);
 
-  TfLiteTensor* bw_projection_weights =
+  const TfLiteTensor* bw_projection_weights =
       GetOptionalInputTensor(context, node, kBwProjectionWeightsTensor);
-  TfLiteTensor* bw_projection_bias =
+  const TfLiteTensor* bw_projection_bias =
       GetOptionalInputTensor(context, node, kBwProjectionBiasTensor);
 
   TfLiteTensor* bw_output_state =
",0,train
b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data.

PiperOrigin-RevId: 196932028",fully_connected.cc,"@@ -91,7 +91,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
-  TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
+  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
   // Check all the parameters of tensor match within themselves and match the
@@ -347,7 +347,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
   const TfLiteTensor* filter = GetInput(context, node, kWeightsTensor);
-  TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
+  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
 
   switch (filter->type) {  // Already know in/out types are same.
",0,train
b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data.

PiperOrigin-RevId: 196932028",kernel_util.h,"@@ -47,8 +47,9 @@ inline int64_t NumElements(const TfLiteTensor* t) {
   return count;
 }
 
-inline TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context,
-                                            const TfLiteNode* node, int index) {
+inline const TfLiteTensor* GetOptionalInputTensor(TfLiteContext* context,
+                                                  const TfLiteNode* node,
+                                                  int index) {
   const bool use_tensor = node->inputs->data[index] != kOptionalTensor;
   if (use_tensor) {
     return &context->tensors[node->inputs->data[index]];
",0,train
b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data.

PiperOrigin-RevId: 196932028",lstm.cc,"@@ -92,7 +92,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   TF_LITE_ENSURE(context, params->cell_clip >= 0);
   TF_LITE_ENSURE(context, params->proj_clip >= 0);
 
-  TfLiteTensor* input_to_input_weights =
+  const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, kInputToInputWeightsTensor);
   if (input_to_input_weights) {
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2);
@@ -112,7 +112,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell);
   TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input);
 
-  TfLiteTensor* recurrent_to_input_weights =
+  const TfLiteTensor* recurrent_to_input_weights =
       GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor);
   if (recurrent_to_input_weights) {
     TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2);
@@ -146,21 +146,21 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
        (recurrent_to_input_weights == nullptr));
   TF_LITE_ENSURE(context, cifg_weights_all_or_none == true);
 
-  TfLiteTensor* cell_to_input_weights =
+  const TfLiteTensor* cell_to_input_weights =
       GetOptionalInputTensor(context, node, kCellToInputWeightsTensor);
   if (cell_to_input_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell);
   }
 
-  TfLiteTensor* cell_to_forget_weights =
+  const TfLiteTensor* cell_to_forget_weights =
       GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor);
   if (cell_to_forget_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell);
   }
 
-  TfLiteTensor* cell_to_output_weights =
+  const TfLiteTensor* cell_to_output_weights =
       GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor);
   if (cell_to_output_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1);
@@ -179,7 +179,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   TF_LITE_ENSURE(context, peephole_weights_all_or_none == true);
 
   // Make sure the input gate bias is present only when not a CIFG-LSTM.
-  TfLiteTensor* input_gate_bias =
+  const TfLiteTensor* input_gate_bias =
       GetOptionalInputTensor(context, node, kInputGateBiasTensor);
   if (use_cifg) {
     TF_LITE_ENSURE_EQ(context, input_gate_bias, nullptr);
@@ -202,7 +202,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1);
   TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell);
 
-  TfLiteTensor* projection_weights =
+  const TfLiteTensor* projection_weights =
       GetOptionalInputTensor(context, node, kProjectionWeightsTensor);
   if (projection_weights) {
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2);
@@ -210,7 +210,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell);
   }
 
-  TfLiteTensor* projection_bias =
+  const TfLiteTensor* projection_bias =
       GetOptionalInputTensor(context, node, kProjectionBiasTensor);
   if (projection_bias) {
     TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1);
@@ -298,7 +298,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   output_state->allocation_type = kTfLiteArenaRwPersistent;
   cell_state->allocation_type = kTfLiteArenaRwPersistent;
 
-  TfLiteTensor* input_to_input_weights =
+  const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, kInputToInputWeightsTensor);
   const bool use_cifg = (input_to_input_weights == nullptr);
   if (use_cifg) {
@@ -324,7 +324,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
 
-  TfLiteTensor* input_to_input_weights =
+  const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, kInputToInputWeightsTensor);
   const TfLiteTensor* input_to_forget_weights =
       GetInput(context, node, kInputToForgetWeightsTensor);
@@ -333,7 +333,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input_to_output_weights =
       GetInput(context, node, kInputToOutputWeightsTensor);
 
-  TfLiteTensor* recurrent_to_input_weights =
+  const TfLiteTensor* recurrent_to_input_weights =
       GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor);
   const TfLiteTensor* recurrent_to_forget_weights =
       GetInput(context, node, kRecurrentToForgetWeightsTensor);
@@ -342,14 +342,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* recurrent_to_output_weights =
       GetInput(context, node, kRecurrentToOutputWeightsTensor);
 
-  TfLiteTensor* cell_to_input_weights =
+  const TfLiteTensor* cell_to_input_weights =
       GetOptionalInputTensor(context, node, kCellToInputWeightsTensor);
-  TfLiteTensor* cell_to_forget_weights =
+  const TfLiteTensor* cell_to_forget_weights =
       GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor);
-  TfLiteTensor* cell_to_output_weights =
+  const TfLiteTensor* cell_to_output_weights =
       GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor);
 
-  TfLiteTensor* input_gate_bias =
+  const TfLiteTensor* input_gate_bias =
       GetOptionalInputTensor(context, node, kInputGateBiasTensor);
   const TfLiteTensor* forget_gate_bias =
       GetInput(context, node, kForgetGateBiasTensor);
@@ -357,9 +357,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* output_gate_bias =
       GetInput(context, node, kOutputGateBiasTensor);
 
-  TfLiteTensor* projection_weights =
+  const TfLiteTensor* projection_weights =
       GetOptionalInputTensor(context, node, kProjectionWeightsTensor);
-  TfLiteTensor* projection_bias =
+  const TfLiteTensor* projection_bias =
       GetOptionalInputTensor(context, node, kProjectionBiasTensor);
 
   TfLiteTensor* output_state = GetOutput(context, node, kOutputStateTensor);
",0,train
b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data.

PiperOrigin-RevId: 196932028",pad.cc,"@@ -45,7 +45,7 @@ struct PadContext {
     output = GetOutput(context, node, 0);
     dims = NumDimensions(input);
   }
-  TfLiteTensor* constant_values;
+  const TfLiteTensor* constant_values;
   const TfLiteTensor* input;
   const TfLiteTensor* paddings;
   TfLiteTensor* output;
",0,train
b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data.

PiperOrigin-RevId: 196932028",svdf.cc,"@@ -74,7 +74,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   TF_LITE_ASSERT_EQ(input->dims->data[1], weights_feature->dims->data[1]);
   TF_LITE_ASSERT_EQ(weights_time->dims->data[0], num_filters);
 
-  TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
+  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
   if (bias) {
     TF_LITE_ASSERT_EQ(bias->dims->data[0], num_units);
   }
@@ -134,7 +134,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   TfLiteTensor* output = GetOutput(context, node, kOutputTensor);
   TfLiteTensor* scratch = GetTemporary(context, node, /*index=*/0);
 
-  TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
+  const TfLiteTensor* bias = GetOptionalInputTensor(context, node, kBiasTensor);
 
   const int rank = params->rank;
   const int batch_size = input->dims->data[0];
",0,train
b2e53b91019f9ab00fe133fe10b2d29bc7e5886c,tensorflow/tensorflow,"Making GetOptionalInput from kernel_util.h return a pointer to const data.

PiperOrigin-RevId: 196932028",unidirectional_sequence_lstm.cc,"@@ -92,7 +92,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   TF_LITE_ENSURE(context, params->cell_clip >= 0);
   TF_LITE_ENSURE(context, params->proj_clip >= 0);
 
-  TfLiteTensor* input_to_input_weights =
+  const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, kInputToInputWeightsTensor);
   if (input_to_input_weights) {
     TF_LITE_ENSURE_EQ(context, input_to_input_weights->dims->size, 2);
@@ -112,7 +112,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[0], n_cell);
   TF_LITE_ENSURE_EQ(context, input_to_cell_weights->dims->data[1], n_input);
 
-  TfLiteTensor* recurrent_to_input_weights =
+  const TfLiteTensor* recurrent_to_input_weights =
       GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor);
   if (recurrent_to_input_weights) {
     TF_LITE_ENSURE_EQ(context, recurrent_to_input_weights->dims->size, 2);
@@ -146,21 +146,21 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
        (recurrent_to_input_weights == nullptr));
   TF_LITE_ENSURE(context, cifg_weights_all_or_none == true);
 
-  TfLiteTensor* cell_to_input_weights =
+  const TfLiteTensor* cell_to_input_weights =
       GetOptionalInputTensor(context, node, kCellToInputWeightsTensor);
   if (cell_to_input_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_input_weights->dims->data[0], n_cell);
   }
 
-  TfLiteTensor* cell_to_forget_weights =
+  const TfLiteTensor* cell_to_forget_weights =
       GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor);
   if (cell_to_forget_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->size, 1);
     TF_LITE_ENSURE_EQ(context, cell_to_forget_weights->dims->data[0], n_cell);
   }
 
-  TfLiteTensor* cell_to_output_weights =
+  const TfLiteTensor* cell_to_output_weights =
       GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor);
   if (cell_to_output_weights) {
     TF_LITE_ENSURE_EQ(context, cell_to_output_weights->dims->size, 1);
@@ -179,7 +179,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   TF_LITE_ENSURE(context, peephole_weights_all_or_none == true);
 
   // Make sure the input gate bias is present only when not a CIFG-LSTM.
-  TfLiteTensor* input_gate_bias =
+  const TfLiteTensor* input_gate_bias =
       GetOptionalInputTensor(context, node, kInputGateBiasTensor);
   if (use_cifg) {
     TF_LITE_ENSURE_EQ(context, input_gate_bias, nullptr);
@@ -202,7 +202,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
   TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->size, 1);
   TF_LITE_ENSURE_EQ(context, output_gate_bias->dims->data[0], n_cell);
 
-  TfLiteTensor* projection_weights =
+  const TfLiteTensor* projection_weights =
       GetOptionalInputTensor(context, node, kProjectionWeightsTensor);
   if (projection_weights) {
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->size, 2);
@@ -210,7 +210,7 @@ TfLiteStatus CheckInputTensorDimensions(TfLiteContext* context,
     TF_LITE_ENSURE_EQ(context, projection_weights->dims->data[1], n_cell);
   }
 
-  TfLiteTensor* projection_bias =
+  const TfLiteTensor* projection_bias =
       GetOptionalInputTensor(context, node, kProjectionBiasTensor);
   if (projection_bias) {
     TF_LITE_ENSURE_EQ(context, projection_bias->dims->size, 1);
@@ -300,7 +300,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   output_state->allocation_type = kTfLiteArenaRwPersistent;
   cell_state->allocation_type = kTfLiteArenaRwPersistent;
 
-  TfLiteTensor* input_to_input_weights =
+  const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, kInputToInputWeightsTensor);
   const bool use_cifg = (input_to_input_weights == nullptr);
   if (use_cifg) {
@@ -326,7 +326,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   auto* params = reinterpret_cast<TfLiteLSTMParams*>(node->builtin_data);
   const TfLiteTensor* input = GetInput(context, node, kInputTensor);
 
-  TfLiteTensor* input_to_input_weights =
+  const TfLiteTensor* input_to_input_weights =
       GetOptionalInputTensor(context, node, kInputToInputWeightsTensor);
   const TfLiteTensor* input_to_forget_weights =
       GetInput(context, node, kInputToForgetWeightsTensor);
@@ -335,7 +335,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* input_to_output_weights =
       GetInput(context, node, kInputToOutputWeightsTensor);
 
-  TfLiteTensor* recurrent_to_input_weights =
+  const TfLiteTensor* recurrent_to_input_weights =
       GetOptionalInputTensor(context, node, kRecurrentToInputWeightsTensor);
   const TfLiteTensor* recurrent_to_forget_weights =
       GetInput(context, node, kRecurrentToForgetWeightsTensor);
@@ -344,14 +344,14 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* recurrent_to_output_weights =
       GetInput(context, node, kRecurrentToOutputWeightsTensor);
 
-  TfLiteTensor* cell_to_input_weights =
+  const TfLiteTensor* cell_to_input_weights =
       GetOptionalInputTensor(context, node, kCellToInputWeightsTensor);
-  TfLiteTensor* cell_to_forget_weights =
+  const TfLiteTensor* cell_to_forget_weights =
       GetOptionalInputTensor(context, node, kCellToForgetWeightsTensor);
-  TfLiteTensor* cell_to_output_weights =
+  const TfLiteTensor* cell_to_output_weights =
       GetOptionalInputTensor(context, node, kCellToOutputWeightsTensor);
 
-  TfLiteTensor* input_gate_bias =
+  const TfLiteTensor* input_gate_bias =
       GetOptionalInputTensor(context, node, kInputGateBiasTensor);
   const TfLiteTensor* forget_gate_bias =
       GetInput(context, node, kForgetGateBiasTensor);
@@ -359,9 +359,9 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   const TfLiteTensor* output_gate_bias =
       GetInput(context, node, kOutputGateBiasTensor);
 
-  TfLiteTensor* projection_weights =
+  const TfLiteTensor* projection_weights =
       GetOptionalInputTensor(context, node, kProjectionWeightsTensor);
-  TfLiteTensor* projection_bias =
+  const TfLiteTensor* projection_bias =
       GetOptionalInputTensor(context, node, kProjectionBiasTensor);
 
   TfLiteTensor* output_state = GetOutput(context, node, kOutputStateTensor);
",0,train
42b0014216ab04f704967b722f7062df8a4180e1,tensorflow/tensorflow,"Add test cases of uint16, uint32, uint64 support for tf.math.[equal|not_equal]

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",cwise_ops_binary_test.py,"@@ -948,6 +948,31 @@ class ComparisonOpTest(test.TestCase):
             ""Incompatible shapes|Dimensions must be equal""):
           f(x.astype(t), y.astype(t))
 
+  def testEqualDType(self):
+    dtypes = [np.float16, np.float32, np.float64, np.int8, np.int16, np.int32, np.int64, np.uint8, np.uint16, np.uint32, np.uint64]
+    x = np.asarray([0, 1, 2, 3, 4])
+    y = np.asarray([0, 1, 2, 3, 4])
+    for dtype in dtypes:
+      xt = x.astype(dtype)
+      yt = y.astype(dtype)
+      cmp_eq = math_ops.equal(xt, yt)
+      cmp_ne = math_ops.not_equal(xt, yt)
+      values = self.evaluate([cmp_eq, cmp_ne])
+      self.assertAllEqual(
+          [[True, True, True, True, True],
+           [False, False, False, False, False]], values)
+    for dtype in [np.complex64, np.complex128]:
+      xt = x.astype(dtype)
+      xt -= 1j * xt
+      yt = y.astype(dtype)
+      yt -= 1j * yt
+      cmp_eq = math_ops.equal(xt, yt)
+      cmp_ne = math_ops.not_equal(xt, yt)
+      values = self.evaluate([cmp_eq, cmp_ne])
+      self.assertAllEqual(
+          [[True, True, True, True, True],
+           [False, False, False, False, False]], values)
+
 
 if __name__ == ""__main__"":
   test.main()
",0,train
d08667249fd064ddf41777eea6debf4474e6622a,tensorflow/tensorflow,"Internal change

PiperOrigin-RevId: 251371868",strip_unused_lib.py,"@@ -75,6 +75,8 @@ def strip_unused(input_graph_def, input_node_names, output_node_names,
       if ""_output_shapes"" in node.attr:
         placeholder_node.attr[""_output_shapes""].CopyFrom(node.attr[
             ""_output_shapes""])
+      if ""shape"" in node.attr:
+        placeholder_node.attr[""shape""].CopyFrom(node.attr[""shape""])
       inputs_replaced_graph_def.node.extend([placeholder_node])
     else:
       inputs_replaced_graph_def.node.extend([copy.deepcopy(node)])
",0,train
da9706d518adf45b5d2dff480d80e78be12575ca,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-08-25

PiperOrigin-RevId: 328291621
Change-Id: If04388ad4e881890383fa7e83b49c272ff216949",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 24)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 8, 25)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,test
6850dafeeaaa48efa748134688844bd079ef3949,tensorflow/tensorflow,"collective_param_resolver_local.cc: delete DCHECK(!ir->out_mu.try_lock()); in a lambda

UNLOCK_FUNCTION(ir->out_mu) annotates that the lock is held on entry.
try_lock() should not be called.

PiperOrigin-RevId: 215769341",collective_param_resolver_local.cc,"@@ -522,7 +522,6 @@ void CollectiveParamResolverLocal::CallInitInstanceSharedParams(
   InitInstanceSharedParams(
       gr, cp, ir,
       [this, ir, done](const Status& s) UNLOCK_FUNCTION(ir->out_mu) {
-        DCHECK(!ir->out_mu.try_lock());
         DCHECK(ir->out_mu_available);
         ir->status.Update(s);
         ir->out_mu.unlock();
",0,train
2e7cc48e5cce0ff5429b2d9d0ac313ce70035605,tensorflow/tensorflow,Change for internal compatibility.,single_machine.cc,"@@ -41,7 +41,7 @@ SingleMachine::SingleMachine(int timeout_s, int num_cpu_cores, int num_gpus)
 }
 
 SingleMachine::~SingleMachine() {
-  CloseSession(false /*use_timeout*/);
+  CloseSession(false /*use_timeout*/).IgnoreError();
 
   // Prevent the destructor from deleting mu_ until CloseSession() is done.
   mutex_lock l(mu_);
@@ -164,18 +164,18 @@ Status SingleMachine::CloseSession(bool use_timeout) {
 
     thread_pool_->Schedule([this] {
       if (this->coordinator_) {
-        this->coordinator_->RequestStop();
+        this->coordinator_->RequestStop().IgnoreError();
         // Wait for all the runners to have closed their queues.
         while (!this->coordinator_->AllRunnersStopped()) {
           sleep(1);
         }
         // Now we can close the session. This should cancel any pending I/O
         // operation.
-        this->session_->Close();
+        this->session_->Close().IgnoreError();
         // Last but not least, we can delete the coordinator.
         this->coordinator_.reset();
       } else {
-        this->session_->Close();
+        this->session_->Close().IgnoreError();
       }
 
       // Wait for any previous run to finish.
",0,train
2e7cc48e5cce0ff5429b2d9d0ac313ce70035605,tensorflow/tensorflow,Change for internal compatibility.,fused_batch_norm_op.cc,"@@ -300,8 +300,9 @@ struct FusedBatchNorm<GPUDevice, T> {
     GPUDevice d = context->eigen_device<GPUDevice>();
     using perftools::gputools::DeviceMemory;
     Tensor inv_var;
-    context->allocate_temp(DataTypeToEnum<T>::value, estimated_variance.shape(),
-                           &inv_var);
+    OP_REQUIRES_OK(
+        context, context->allocate_temp(DataTypeToEnum<T>::value,
+                                        estimated_variance.shape(), &inv_var));
     auto inv_var_ptr = StreamExecutorUtil::AsDeviceMemory<T>(inv_var);
     std::function<const DeviceMemory<T>&()> var_to_inv_var =
         [d, epsilon, estimated_variance,
",0,train
2e7cc48e5cce0ff5429b2d9d0ac313ce70035605,tensorflow/tensorflow,Change for internal compatibility.,graph_transferer.cc,"@@ -96,7 +96,8 @@ Status GraphTransferer::LoadGraphFromProto(
           shape_inference::ShapeHandle handle;
           status = context->MakeShapeFromTensorShape(
               input_node_info.second.shape(), &handle);
-          shape_refiner.SetShape(node, 0, handle);
+          // TODO(b/32704451): Don't just ignore this status!
+          shape_refiner.SetShape(node, 0, handle).IgnoreError();
           is_input_node = true;
         }
         if (!status.ok()) {
@@ -395,9 +396,11 @@ void GraphTransferer::RegisterConstantNode(
   const_node_info.add_shape(shape[2]);
   const_node_info.add_shape(shape[3]);
   const TensorProto* proto = nullptr;
-  GetNodeAttr(node.def(), ""value"", &proto);
+  // TODO(b/32704451): Don't just ignore this status!
+  GetNodeAttr(node.def(), ""value"", &proto).IgnoreError();
   Tensor const_tensor;
-  MakeTensorFromProto(*proto, &const_tensor);
+  // TODO(b/32704451): Don't just ignore this status!
+  MakeTensorFromProto(*proto, &const_tensor).IgnoreError();
 
   const_node_info.set_dtype(const_tensor.dtype());
   // TODO(satok): Remove. Determine constant value without dryrun
",0,train
2e7cc48e5cce0ff5429b2d9d0ac313ce70035605,tensorflow/tensorflow,Change for internal compatibility.,sparsify_gather.cc,"@@ -146,7 +146,7 @@ Status SparsifyGather(const GraphDef& input_graph_def,
           const NodeDef& const_node = match.inputs[0].inputs[0].node;
 
           DataType data_type;
-          GetNodeAttr(const_node, ""dtype"", &data_type);
+          TF_RETURN_IF_ERROR(GetNodeAttr(const_node, ""dtype"", &data_type));
           if (data_type != DT_FLOAT) {
             return tensorflow::errors::FailedPrecondition(
                 ""Transform only applicable to subgraph with 'Const' of dtype ""
",0,train
1cd086bc77c2f58d9ce519250cdefc355ab3aac4,tensorflow/tensorflow,[tensorflow/compiler/xla/service/tuple_points_to_analysis_test.cc] Add calls to `reserve()` before populating vectors,tuple_points_to_analysis_test.cc,"@@ -111,6 +111,7 @@ class TuplePointsToAnalysisTest : public HloTestBase {
         points_to_analysis_->GetBufferDefinedAt(instruction, index)
             .ValueOrDie();
     std::vector<BufferAlias> expected_aliases;
+    expected_aliases.reserve(expected.size());
     for (auto& pair : expected) {
       expected_aliases.push_back(BufferAlias(pair.first, pair.second));
     }
",0,train
78688104bc118097a7968c864197a3c328f1c00b,tensorflow/tensorflow,"Fix allocator build errors in xtensa softmax, conv + depthwise conv kernels.

PiperOrigin-RevId: 322830325
Change-Id: I22eb3d1259db1390e6ad2c3caa588279b50fd674",conv.cc,"@@ -329,10 +329,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const int num_channels = filter->dims->data[kConvQuantizedDimension];
   // Dynimically allocate per-channel quantization parameters.
   op_data->per_channel_output_multiplier =
-      reinterpret_cast<int32_t>(context->AllocatePersistentBuffer(
+      reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
           context, num_channels * sizeof(int32_t)));
   op_data->per_channel_output_shift =
-      reinterpret_cast<int32_t>(context->AllocatePersistentBuffer(
+      reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
           context, num_channels * sizeof(int32_t)));
 
   // All per-channel quantized tensors need valid zero point and scale arrays.
",0,train
78688104bc118097a7968c864197a3c328f1c00b,tensorflow/tensorflow,"Fix allocator build errors in xtensa softmax, conv + depthwise conv kernels.

PiperOrigin-RevId: 322830325
Change-Id: I22eb3d1259db1390e6ad2c3caa588279b50fd674",depthwise_conv.cc,"@@ -377,10 +377,10 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
   const int num_channels = filter->dims->data[kDepthwiseConvQuantizedDimension];
   // Dynimically allocate per-channel quantization parameters.
   op_data->per_channel_output_multiplier =
-      reinterpret_cast<int32_t>(context->AllocatePersistentBuffer(
+      reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
           context, num_channels * sizeof(int32_t)));
   op_data->per_channel_output_shift =
-      reinterpret_cast<int32_t>(context->AllocatePersistentBuffer(
+      reinterpret_cast<int32_t*>(context->AllocatePersistentBuffer(
           context, num_channels * sizeof(int32_t)));
 
   // All per-channel quantized tensors need valid zero point and scale arrays.
",0,train
78688104bc118097a7968c864197a3c328f1c00b,tensorflow/tensorflow,"Fix allocator build errors in xtensa softmax, conv + depthwise conv kernels.

PiperOrigin-RevId: 322830325
Change-Id: I22eb3d1259db1390e6ad2c3caa588279b50fd674",softmax.cc,"@@ -167,10 +167,9 @@ TfLiteStatus SoftmaxPrepare(TfLiteContext* context, TfLiteNode* node) {
   // the scale and beta before calculating exp. It is mandatory to apply beta
   // and scale here, since each softmax op may have different beta and scale
   // values. Beta and scale will remain constant for a given softmax op.
-  void* allocated_ptr;
-  TF_LITE_ENSURE_STATUS(context->AllocatePersistentBuffer(
-      context, kInt8Range * sizeof(int16_t), &allocated_ptr));
-  op_data->exp_lut = static_cast<uint16_t*>(allocated_ptr);
+  op_data->exp_lut = static_cast<uint16_t*>(context->AllocatePersistentBuffer(
+      context, kInt8Range * sizeof(uint16_t)));
+  TF_LITE_ENSURE(context, op_data->exp_lut != nullptr);
 
   TF_LITE_ENSURE_STATUS(
       CalculateSoftmaxOpData(context, input, output, params, op_data));
",0,train
e5132a1a1c2a47b2496189dd0e0880d53816dea3,tensorflow/tensorflow,"[XLA:GPU] Fix a memory corruption in HLO snapshotting.

PiperOrigin-RevId: 396849022
Change-Id: Id451a827a81007692fe91c153ba115f083e9abc0",gpu_compiler.cc,"@@ -1162,13 +1162,10 @@ StatusOr<std::unique_ptr<Executable>> GpuCompiler::RunBackend(
   // Dump computation proto state and buffer assignment for debug and test, if
   // dump is enabled.
   if (DumpingEnabledForHloModule(gpu_executable->module())) {
-    if (!hlo_proto_) {
-      hlo_proto_ = absl::make_unique<HloProto>();
-      *hlo_proto_->mutable_hlo_module() = gpu_executable->module().ToProto();
-    }
-    *hlo_proto_->mutable_buffer_assignment() =
+    auto hlo_proto = absl::make_unique<HloProto>(*hlo_proto_);
+    *hlo_proto->mutable_buffer_assignment() =
         compile_module_results.buffer_assignment->ToProto();
-    gpu_executable->set_hlo_proto(std::move(hlo_proto_));
+    gpu_executable->set_hlo_proto(std::move(hlo_proto));
   }
   gpu_executable->set_debug_info(
       compile_module_results.buffer_assignment->GetStats().ToString());
",0,train
e5132a1a1c2a47b2496189dd0e0880d53816dea3,tensorflow/tensorflow,"[XLA:GPU] Fix a memory corruption in HLO snapshotting.

PiperOrigin-RevId: 396849022
Change-Id: Id451a827a81007692fe91c153ba115f083e9abc0",gpu_compiler.h,"@@ -112,7 +112,7 @@ class GpuCompiler : public LLVMCompiler {
   }
 
   // Optional HloProto, stashed for dumping snapshots.
-  mutable std::unique_ptr<HloProto> hlo_proto_;
+  std::unique_ptr<HloProto> hlo_proto_;
 
   se::Platform::Id platform_id_;
 
",0,train
44fb8a750e563392e4aa4b7c6de5d7f56d1c65a8,tensorflow/tensorflow,"Tweak comment on XlaClusterInfo's default constructor.

PiperOrigin-RevId: 220499695",encapsulate_util.h,"@@ -117,11 +117,14 @@ Status PreprocessForEncapsulation(Graph* g,
 
 // Information for XLA computation.
 struct XlaClusterInfo {
-  // The implicit default constructor is deleted because host_compute_core is a
-  // const member whose type (std::map) doesn't necessarily have a user provided
-  // constructor - while libc++ and libstdc++ 4.8 provide a user defined
-  // default constructor, libstdc++ at least >= 7.3 does not.
-  // See also c++11 [class.ctor] p5.
+  // Add an explicitly-defined default constructor for this class.
+  //
+  // The compiler may delete the default constructor here because
+  // host_compute_core is a const member whose type (std::map) doesn't
+  // necessarily have a user provided constructor -- while libc++ and
+  // libstdc++ 4.8 provide a user defined default constructor, libstdc++ at
+  // least >= 7.3 does not. See also c++11 [class.ctor] p5.
+  //
   // TODO(klimek): In c++17 we'll be able to initialize host_compute_core
   // without losing aggregate initialization, which allows us to get rid of
   // the constructor definitions again.
",0,train
1188b9e764fc76f0dfa9c87a4575e8fac706a3ec,tensorflow/tensorflow,"Shortcut cross_device_ops reduce and batch_reduce method if there is only one input in PerReplica object.

PiperOrigin-RevId: 249860947",cross_device_ops.py,"@@ -232,6 +232,11 @@ class CrossDeviceOps(object):
   def __init__(self):
     pass
 
+  @property
+  def _num_between_graph_workers(self):
+    # Returns 1 by default, the value may be overridden by sub classes.
+    return 1
+
   def reduce(self, reduce_op, per_replica_value, destinations):
     """"""Reduce `per_replica_value` to `destinations`.
 
@@ -255,6 +260,14 @@ class CrossDeviceOps(object):
       per_replica_value = _make_tensor_into_per_replica(per_replica_value)
 
     validate_destinations(destinations)
+
+    # Shortcut if `per_replica_value` only contains one value.
+    if self._num_between_graph_workers == 1 and len(
+        per_replica_value.values) == 1 and _devices_match(
+            per_replica_value, destinations):
+      return value_lib.Mirrored(per_replica_value.device_map,
+                                per_replica_value.values)
+
     return self.reduce_implementation(reduce_op, per_replica_value,
                                       destinations)
 
@@ -288,6 +301,15 @@ class CrossDeviceOps(object):
     for _, d in value_destination_pairs:
       validate_destinations(d)
 
+    # Shortcut all PerReplica objects only contain one value.
+    if self._num_between_graph_workers == 1 and _all_devices_match(
+        value_destination_pairs) and len(
+            value_destination_pairs[0][0].values) == 1:
+      return [
+          value_lib.Mirrored(v.device_map, v.values)
+          for v, _ in value_destination_pairs
+      ]
+
     return self.batch_reduce_implementation(reduce_op, value_destination_pairs)
 
   def broadcast(self, tensor, destinations):
@@ -974,6 +996,10 @@ class CollectiveAllReduce(CrossDeviceOps):
                              cross_device_utils.CollectiveKeys())
     super(CollectiveAllReduce, self).__init__()
 
+  @property
+  def _num_between_graph_workers(self):
+    return self._num_workers
+
   def reduce_implementation(self, reduce_op, per_replica_value, destinations):
     all_reduced = self._batch_all_reduce(reduce_op, [per_replica_value])[0]
     device_map, logical_device = get_device_map_from(destinations)
",0,train
8c31d812ad5904dcb2d5e8d221837be5bbb6e725,tensorflow/tensorflow,"Turning off the parallelized IsDirectory() call on iOS platform, due to problems with more than a few threads.
Change: 137727687",file_system.cc,"@@ -29,6 +29,10 @@ limitations under the License.
 #include ""tensorflow/core/platform/platform.h""
 #include ""tensorflow/core/platform/protobuf.h""
 
+#if defined(__APPLE__)
+#include <TargetConditionals.h>
+#endif
+
 namespace tensorflow {
 
 namespace {
",0,train
2fb51e98b98f0cba4af1131203a588a43cadbf8a,tensorflow/tensorflow,"Have ValueUseIterator template use OperandType instead of IROperand.

This was causing some issues using helper methods like llvm::make_early_inc_range on Value::getUses(), resulting in IROperand instead of OpOperand.

PiperOrigin-RevId: 262056425",UseDefLists.h,"@@ -197,7 +197,7 @@ public:
 /// An iterator over all uses of a ValueBase.
 template <typename OperandType>
 class ValueUseIterator
-    : public std::iterator<std::forward_iterator_tag, IROperand> {
+    : public std::iterator<std::forward_iterator_tag, OperandType> {
 public:
   ValueUseIterator() = default;
   explicit ValueUseIterator(OperandType *current) : current(current) {}
",0,train
1abfa5aa09174be04fd18f946eefd6368ae3cead,tensorflow/tensorflow,Markdown links using `[This link](http://example.net/)` syntax (#2273),nn_ops.py,"@@ -68,15 +68,15 @@ def atrous_conv2d(value, filters, rate, padding, name=None):
   the amount of computation.
 
   For a description of atrous convolution and how it can be used for dense
-  feature extraction, please see: (Semantic Image Segmentation with Deep
-  Convolutional Nets and Fully Connected CRFs)[http://arxiv.org/abs/1412.7062].
-  The same operation is investigated further in (Multi-Scale Context Aggregation
-  by Dilated Convolutions)[http://arxiv.org/abs/1511.07122]. Previous works
+  feature extraction, please see: [Semantic Image Segmentation with Deep
+  Convolutional Nets and Fully Connected CRFs](http://arxiv.org/abs/1412.7062).
+  The same operation is investigated further in [Multi-Scale Context Aggregation
+  by Dilated Convolutions](http://arxiv.org/abs/1511.07122). Previous works
   that effectively use atrous convolution in different ways are, among others,
-  (OverFeat: Integrated Recognition, Localization and Detection using
-  Convolutional Networks) [http://arxiv.org/abs/1312.6229] and (Fast Image
-  Scanning with Deep Max-Pooling Convolutional Neural Networks)
-  [http://arxiv.org/abs/1302.1700]. Atrous convolution is also closely related
+  [OverFeat: Integrated Recognition, Localization and Detection using
+  Convolutional Networks](http://arxiv.org/abs/1312.6229) and [Fast Image
+  Scanning with Deep Max-Pooling Convolutional Neural Networks]
+  (http://arxiv.org/abs/1302.1700). Atrous convolution is also closely related
   to the so-called noble identities in multi-rate signal processing.
 
   There are many different ways to implement atrous convolution (see the refs
@@ -227,8 +227,8 @@ def conv2d_transpose(value,
                      name=None):
   """"""The transpose of `conv2d`.
 
-  This operation is sometimes called ""deconvolution"" after (Deconvolutional
-  Networks)[http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf], but is
+  This operation is sometimes called ""deconvolution"" after [Deconvolutional
+  Networks](http://www.matthewzeiler.com/pubs/cvpr2010/cvpr2010.pdf), but is
   actually the transpose (gradient) of `conv2d` rather than an actual
   deconvolution.
 
",0,train
583da17bd6e0972b2c01305547ca04008b2c22a8,tensorflow/tensorflow,"Update GraphDef version to 1015.

PiperOrigin-RevId: 422490804
Change-Id: If7881dcb54d717061bc3c62b5a5efb0cb9641f8f",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 1014  // Updated: 2022/1/17
+#define TF_GRAPH_DEF_VERSION 1015  // Updated: 2022/1/18
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,train
f3117e8ec1952311d869c7524fba8cdfc2975979,tensorflow/tensorflow,"[RunHandler] Fix wait-for-handler code when timeout is not set.

Previously we were setting a (very short) deadline when `call_timeout == 0`, whereas this should be treated as an indefinite deadline.

PiperOrigin-RevId: 292241523
Change-Id: I659886f0f1642b6683c4c2ff44d74ae7bec29620",run_handler.cc,"@@ -36,7 +36,9 @@ limitations under the License.
 
 namespace tensorflow {
 namespace {
+// LINT.IfChange
 static constexpr int32 kMaxConcurrentHandlers = 128;
+// LINT.ThenChange(//tensorflow/core/framework/run_handler_test.cc)
 
 // TODO(azaks): Refactor with thread:ThreadPool
 class RunHandlerEnvironment {
@@ -948,16 +950,18 @@ class RunHandlerPool::Impl {
     RunHandler::Impl* handler_impl;
     {
       mutex_lock l(mu_);
-      if (free_handlers_.empty()) {
+      if (!has_free_handler()) {
         profiler::TraceMe activity(
             [&] {
               return strings::StrCat(""WaitingForHandler#step_id="", step_id,
                                      ""#"");
             },
             profiler::TraceMeLevel::kInfo);
-        if (!mu_.AwaitWithDeadline(
-                Condition(this, &Impl::has_free_handler),
-                EnvTime::NowNanos() + timeout_in_ms * 1000 * 1000)) {
+        if (timeout_in_ms == 0) {
+          mu_.Await(Condition(this, &Impl::has_free_handler));
+        } else if (!mu_.AwaitWithDeadline(
+                       Condition(this, &Impl::has_free_handler),
+                       EnvTime::NowNanos() + timeout_in_ms * 1000 * 1000)) {
           return nullptr;
         }
       }
",0,train
f3117e8ec1952311d869c7524fba8cdfc2975979,tensorflow/tensorflow,"[RunHandler] Fix wait-for-handler code when timeout is not set.

Previously we were setting a (very short) deadline when `call_timeout == 0`, whereas this should be treated as an indefinite deadline.

PiperOrigin-RevId: 292241523
Change-Id: I659886f0f1642b6683c4c2ff44d74ae7bec29620",run_handler_test.cc,"@@ -205,5 +205,37 @@ TEST_F(RunHandlerTest, TestConcurrencyUseRunHandlerPool) {
   delete tp;
 }
 
+TEST_F(RunHandlerTest, TestWaitTimeout) {
+  std::unique_ptr<RunHandlerPool> pool(new RunHandlerPool(1, 1));
+
+  // Get the single handler in the pool.
+  std::vector<std::unique_ptr<RunHandler>> blocking_handles;
+  const int32 kMaxConcurrentHandlers = 128;  // Copied from run_handler.cc.
+  blocking_handles.reserve(kMaxConcurrentHandlers);
+  for (int i = 0; i < kMaxConcurrentHandlers; ++i) {
+    blocking_handles.push_back(pool->Get(i));
+  }
+
+  // A subsequent request with a non-zero timeout will fail by returning
+  // nullptr.
+  auto null_handle = pool->Get(128, 1);
+  EXPECT_EQ(null_handle.get(), nullptr);
+
+  // A subsequent request with no timeout will succeed once the blocking handle
+  // is returned.
+  auto tp = std::make_unique<thread::ThreadPool>(Env::Default(), ""test"", 4);
+  std::atomic<int64> release_time;
+
+  tp->Schedule([&blocking_handles, &release_time]() {
+    Env::Default()->SleepForMicroseconds(5000);
+    release_time = EnvTime::NowNanos();
+    blocking_handles[0].reset();
+  });
+
+  auto next_handle = pool->Get(129, 0);
+  EXPECT_GT(EnvTime::NowNanos(), release_time);
+  EXPECT_NE(next_handle.get(), nullptr);
+}
+
 }  // namespace
 }  // namespace tensorflow
",0,train
1e5a49750b70bda55a5d6cd3618ae2bdc0cf4f80,tensorflow/tensorflow,"[buildcop] Disable testAllToAllV3 that fails various memory tests, see the bug for the detail.

PiperOrigin-RevId: 398518426
Change-Id: Ic51f33eaf6ef0285c8d14a0bc945aa0b81563120",collective_ops_test.py,"@@ -1245,6 +1245,7 @@ class CollectiveOpsV3Test(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(device_combination)
   def testAllToAllV3(self, device, communication):
+    self.skipTest('TODO(b/200953796)')
     group_size = 2
     group_key = 104
 
",0,train
8cb8c460a3a1998182f8d338f9d82de89b076d19,tensorflow/tensorflow,"Treat SparseApply* on empty sparse gradients as no-op

PiperOrigin-RevId: 353040731
Change-Id: Ibe213b002efc2622c86fb936c477c1e13820f3e4",training_ops_gpu.cu.cc,"@@ -512,6 +512,9 @@ struct SparseApplyAdagrad<GPUDevice, T, Tindex, has_epsilon> {
     const Tindex first_dim_size = var.dimension(0);
     const Tindex grad_size = grad.size();
     const Tindex indices_size = indices.size();
+    if (grad_size == 0) {
+      return Status::OK();
+    }
     GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
     return GpuLaunchKernel(
         SparseApplyAdagradKernel<T, Tindex, has_epsilon>, config.block_count,
@@ -570,6 +573,9 @@ struct SparseApplyProximalAdagrad<GPUDevice, T, Tindex> {
     const Tindex first_dim_size = var.dimension(0);
     const Tindex grad_size = grad.size();
     const Tindex indices_size = indices.size();
+    if (grad_size == 0) {
+      return Status::OK();
+    }
     GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
     return GpuLaunchKernel(SparseApplyProximalAdagradKernel<T, Tindex>,
                            config.block_count, config.thread_per_block, 0,
@@ -777,6 +783,9 @@ struct SparseApplyFtrl<GPUDevice, T, Tindex, has_l2_shrinkage> {
     const Tindex first_dim_size = var.dimension(0);
     const Tindex grad_size = grad.size();
     const Tindex indices_size = indices.size();
+    if (grad_size == 0) {
+      return Status::OK();
+    }
     GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
     return GpuLaunchKernel(
         SparseApplyFtrlKernel<T, Tindex, has_l2_shrinkage>, config.block_count,
@@ -846,12 +855,14 @@ struct SparseApplyKerasMomentum<GPUDevice, T, Tindex> {
     const Tindex first_dim_size = var.dimension(0);
     const Tindex grad_size = grad.size();
     const Tindex indices_size = indices.size();
-    GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
-    TF_CHECK_OK(GpuLaunchKernel(
-        SparseApplyKerasMomentumKernel<T, Tindex>, config.block_count,
-        config.thread_per_block, 0, d.stream(), var.data(), accum.data(),
-        lr.data(), grad.data(), indices.data(), momentum.data(), use_nesterov,
-        first_dim_size, grad_size, indices_size));
+    if (grad_size != 0) {
+      GpuLaunchConfig config = GetGpuLaunchConfig(grad_size, d);
+      TF_CHECK_OK(GpuLaunchKernel(
+          SparseApplyKerasMomentumKernel<T, Tindex>, config.block_count,
+          config.thread_per_block, 0, d.stream(), var.data(), accum.data(),
+          lr.data(), grad.data(), indices.data(), momentum.data(), use_nesterov,
+          first_dim_size, grad_size, indices_size));
+    }
     return static_cast<Tindex>(-1);
   }
 };
",0,train
8cb8c460a3a1998182f8d338f9d82de89b076d19,tensorflow/tensorflow,"Treat SparseApply* on empty sparse gradients as no-op

PiperOrigin-RevId: 353040731
Change-Id: Ibe213b002efc2622c86fb936c477c1e13820f3e4",training_ops_test.py,"@@ -223,9 +223,9 @@ class TrainingOpsTest(TensorFlowTestCase):
       self._testTypesForFtrlMultiplyLinearByLr(
           x, y, z, lr, grad, use_gpu=False, l1=l1, l2=l2)
 
-  def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices):
+  def _testTypesForSparseAdagrad(self, x, y, lr, grad, indices, use_gpu):
     self.setUp()
-    with self.session(use_gpu=True):
+    with self.session(use_gpu=use_gpu):
       var = variables.VariableV1(x)
       accum = variables.VariableV1(y)
       self.evaluate(variables.global_variables_initializer())
@@ -251,11 +251,12 @@ class TrainingOpsTest(TensorFlowTestCase):
                               lr,
                               grad,
                               indices,
+                              use_gpu,
                               l1=0.0,
                               l2=0.0,
                               lr_power=-0.5):
     self.setUp()
-    with self.session(use_gpu=False):
+    with self.session(use_gpu=use_gpu):
       var = variables.VariableV1(x)
       accum = variables.VariableV1(y)
       linear = variables.VariableV1(z)
@@ -327,8 +328,9 @@ class TrainingOpsTest(TensorFlowTestCase):
   @test_util.run_v1_only(""SparseApplyAdagrad op returns a ref, so it is not ""
                          ""supported in eager mode."")
   def testSparseApplyAdagrad(self):
-    for (dtype, index_type) in itertools.product(
-        [np.float16, np.float32, np.float64], [np.int32, np.int64]):
+    for (dtype, index_type,
+         use_gpu) in itertools.product([np.float16, np.float32, np.float64],
+                                       [np.int32, np.int64], [False, True]):
       x_val = [np.arange(10), np.arange(10, 20), np.arange(20, 30)]
       y_val = [np.arange(1, 11), np.arange(11, 21), np.arange(21, 31)]
       x = np.array(x_val).astype(dtype)
@@ -337,13 +339,19 @@ class TrainingOpsTest(TensorFlowTestCase):
       grad_val = [np.arange(10), np.arange(10)]
       grad = np.array(grad_val).astype(dtype)
       indices = np.array([0, 2]).astype(index_type)
-      self._testTypesForSparseAdagrad(x, y, lr, grad, indices)
+      self._testTypesForSparseAdagrad(x, y, lr, grad, indices, use_gpu)
+      # Empty sparse gradients.
+      empty_grad = np.zeros([0, 10], dtype=dtype)
+      empty_indices = np.zeros([0], dtype=index_type)
+      self._testTypesForSparseAdagrad(x, y, lr, empty_grad, empty_indices,
+                                      use_gpu)
 
   @test_util.run_v1_only(""SparseApplyAdagrad op returns a ref, so it is not ""
                          ""supported in eager mode."")
   def testSparseApplyAdagradDim1(self):
-    for (dtype, index_type) in itertools.product(
-        [np.float16, np.float32, np.float64], [np.int32, np.int64]):
+    for (dtype, index_type,
+         use_gpu) in itertools.product([np.float16, np.float32, np.float64],
+                                       [np.int32, np.int64], [False, True]):
       x_val = [[1.0], [2.0], [3.0]]
       y_val = [[4.0], [5.0], [6.0]]
       x = np.array(x_val).astype(dtype)
@@ -352,13 +360,18 @@ class TrainingOpsTest(TensorFlowTestCase):
       grad_val = [[1.5], [2.5]]
       grad = np.array(grad_val).astype(dtype)
       indices = np.array([0, 2]).astype(index_type)
-      self._testTypesForSparseAdagrad(x, y, lr, grad, indices)
+      self._testTypesForSparseAdagrad(x, y, lr, grad, indices, use_gpu)
 
   @test_util.run_v1_only(""SparseApplyFtrl op returns a ref, so it is not ""
                          ""supported in eager mode."")
   def testSparseApplyFtrlDim1(self):
-    for (dtype, index_type) in itertools.product(
-        [np.float16, np.float32, np.float64], [np.int32, np.int64]):
+    for (dtype, index_type,
+         use_gpu) in itertools.product([np.float16, np.float32, np.float64],
+                                       [np.int32, np.int64], [False, True]):
+      # TODO(b/178042695): This configuration leads to a ""too many resources
+      # requested for launch"" error.
+      if (dtype, index_type, use_gpu) == (np.float64, np.int64, True):
+        continue
       x_val = [[0.0], [0.0], [0.0]]
       y_val = [[4.0], [5.0], [6.0]]
       z_val = [[0.0], [0.0], [0.0]]
@@ -369,7 +382,12 @@ class TrainingOpsTest(TensorFlowTestCase):
       grad_val = [[1.5], [2.5]]
       grad = np.array(grad_val).astype(dtype)
       indices = np.array([0, 2]).astype(index_type)
-      self._testTypesForSparseFtrl(x, y, z, lr, grad, indices)
+      self._testTypesForSparseFtrl(x, y, z, lr, grad, indices, use_gpu)
+      # Empty sparse gradients.
+      empty_grad = np.zeros([0, 1], dtype=dtype)
+      empty_indices = np.zeros([0], dtype=index_type)
+      self._testTypesForSparseFtrl(x, y, z, lr, empty_grad, empty_indices,
+                                   use_gpu)
 
   @test_util.run_v1_only(""SparseApplyFtrlMultiplyLinearByLr op returns a ref, ""
                          ""so it is not supported in eager mode."")
",0,train
c3d50afa464d8130b5d258b5c0b69b3f4ee40501,tensorflow/tensorflow,"Pass profile_handle instead of session_id for XLA TPU compilation

To make XLA profile based compilation more deterministic, this change
passes an immutable profile_handle rather than an id of session,
which can update the containing session.

PiperOrigin-RevId: 438127623",compiler.h,"@@ -134,10 +134,12 @@ class AotCompilationOptions {
   se::StreamExecutor* executor() const { return executor_; }
   void set_executor(se::StreamExecutor* executor) { executor_ = executor; }
 
-  // Optional session_id and cache key may be used to trigger recompilation
+  // Optional profile_handle and cache key may be used to trigger recompilation
   // when a compilation cache is used.
-  uint64_t session_id() const { return session_id_; }
-  void set_session_id(uint64_t session_id) { session_id_ = session_id; }
+  uint64_t profile_handle() const { return profile_handle_; }
+  void set_profile_handle(uint64_t profile_handle) {
+    profile_handle_ = profile_handle;
+  }
 
   absl::string_view cache_key() const { return cache_key_; }
   void set_cache_key(absl::string_view cache_key) {
@@ -161,7 +163,7 @@ class AotCompilationOptions {
   FusionConfigCollection fusion_config_collection_ =
       FusionConfigCollection::kOff;
   se::StreamExecutor* executor_ = nullptr;
-  uint64_t session_id_ = 0;
+  uint64_t profile_handle_ = 0;
   std::string cache_key_;
   bool run_backend_only_ = false;
 };
",0,train
c3d50afa464d8130b5d258b5c0b69b3f4ee40501,tensorflow/tensorflow,"Pass profile_handle instead of session_id for XLA TPU compilation

To make XLA profile based compilation more deterministic, this change
passes an immutable profile_handle rather than an id of session,
which can update the containing session.

PiperOrigin-RevId: 438127623",hlo_module.h,"@@ -408,9 +408,11 @@ class HloModule {
     module->metadata_ = std::move(metadata_);
   }
 
-  uint64_t session_id() const { return session_id_; }
+  uint64_t profile_handle() const { return profile_handle_; }
 
-  void set_session_id(uint64_t session_id) { session_id_ = session_id; }
+  void set_profile_handle(uint64_t profile_handle) {
+    profile_handle_ = profile_handle;
+  }
 
   void add_profile_info(const HloModuleProto::ProfileInfo& profile_info) {
     profile_info_list_.push_back(profile_info);
@@ -494,8 +496,8 @@ class HloModule {
   // True if the module contains dynamic computation.
   bool is_dynamic_ = false;
 
-  // A compilation session id.
-  uint64_t session_id_ = 0;
+  // Optional compilation profile handle.
+  uint64_t profile_handle_ = 0;
 
   // An array of ProfileInfo specifying what optimization profiles this module
   // contains, along with the relative speedups.
",0,train
0b881b877b147e31ab86f4e5a4a215fb4764a782,tensorflow/tensorflow,"Fix util_tensor_slice_set_test to not be sensitive to the order of
the results returned by TensorSlice.QueryMeta(). Since TensorSlice
uses unordered_map<>, the order is not guaranteed.

PiperOrigin-RevId: 248363564",tensor_slice_set_test.cc,"@@ -218,10 +218,18 @@ TEST(TensorSliceSetTest, QueryMetaTwoD) {
     std::vector<std::pair<TensorSlice, string>> results;
     EXPECT_TRUE(tss.QueryMeta(s, &results));
     EXPECT_EQ(2, results.size());
-    EXPECT_EQ(""2,2:0,3"", results[0].first.DebugString());
-    EXPECT_EQ(""slice_2"", results[0].second);
-    EXPECT_EQ(""0,2:-"", results[1].first.DebugString());
-    EXPECT_EQ(""slice_1"", results[1].second);
+    // Allow results to be returned in either order
+    if (results[0].second == ""slice_2"") {
+      EXPECT_EQ(""2,2:0,3"", results[0].first.DebugString());
+      EXPECT_EQ(""slice_2"", results[0].second);
+      EXPECT_EQ(""0,2:-"", results[1].first.DebugString());
+      EXPECT_EQ(""slice_1"", results[1].second);
+    } else {
+      EXPECT_EQ(""0,2:-"", results[0].first.DebugString());
+      EXPECT_EQ(""slice_1"", results[0].second);
+      EXPECT_EQ(""2,2:0,3"", results[1].first.DebugString());
+      EXPECT_EQ(""slice_2"", results[1].second);
+    }
   }
 
   // Slice #4 includes the hole and so there is no match
",0,train
8d00ced88d6dfa369c2387d3745c1451f3f1ae64,tensorflow/tensorflow,Fix clang format issue,mkl_fused_batch_norm_op.cc,"@@ -14,13 +14,13 @@ limitations under the License.
 ==============================================================================*/
 #ifdef INTEL_MKL
 #include ""mkldnn.hpp""
-#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 #include ""tensorflow/core/framework/op_kernel.h""
 #include ""tensorflow/core/framework/register_types.h""
 #include ""tensorflow/core/framework/tensor.h""
 #include ""tensorflow/core/framework/tensor_types.h""
 #include ""tensorflow/core/util/mkl_util.h""
 #include ""tensorflow/core/util/tensor_format.h""
+#include ""third_party/eigen3/unsupported/Eigen/CXX11/Tensor""
 
 using mkldnn::batch_normalization_backward;
 using mkldnn::batch_normalization_forward;
@@ -711,9 +711,9 @@ class MklFusedBatchNormOp : public OpKernel {
         std::memcpy(batch_variance_data, variance_data, depth_ * sizeof(U));
       }
     } catch (mkldnn::error& e) {
-      string error_msg = ""Status: "" + std::to_string(e.status) +
-                         "", message: "" + string(e.message) + "", in file "" +
-                         string(__FILE__) + "":"" + std::to_string(__LINE__);
+      string error_msg = ""Status: "" + std::to_string(e.status) + "", message: "" +
+                         string(e.message) + "", in file "" + string(__FILE__) +
+                         "":"" + std::to_string(__LINE__);
       OP_REQUIRES_OK(
           context,
           errors::Aborted(""Operation received an exception:"", error_msg));
@@ -1036,9 +1036,9 @@ class MklFusedBatchNormGradOp : public OpKernel {
                   reinterpret_cast<char*>(diff_weights_data + depth_),
                   depth_ * sizeof(U));
     } catch (mkldnn::error& e) {
-      string error_msg = ""Status: "" + std::to_string(e.status) +
-                         "", message: "" + string(e.message) + "", in file "" +
-                         string(__FILE__) + "":"" + std::to_string(__LINE__);
+      string error_msg = ""Status: "" + std::to_string(e.status) + "", message: "" +
+                         string(e.message) + "", in file "" + string(__FILE__) +
+                         "":"" + std::to_string(__LINE__);
       OP_REQUIRES_OK(
           context,
           errors::Aborted(""Operation received an exception:"", error_msg));
",0,test
0393436023d8fe7a2f98284420c58de6e461212a,tensorflow/tensorflow,"First step of migrating layers to new API.
Change: 121435753",layers_test.py,"@@ -22,6 +22,7 @@ import numpy as np
 import tensorflow as tf
 
 
+# TODO(b/28426988): Add separate tests for non-legacy versions.
 class FullyConnectedTest(tf.test.TestCase):
 
   def setUp(self):
@@ -41,8 +42,9 @@ class FullyConnectedTest(tf.test.TestCase):
     assert not tf.get_collection(tf.GraphKeys.SUMMARIES)
 
   def _fully_connected_basic_use(self, x, num_output_units, expected_shape):
-    output = tf.contrib.layers.fully_connected(x, num_output_units,
-                                               activation_fn=tf.nn.relu)
+    output = tf.contrib.layers.legacy_fully_connected(x,
+                                                      num_output_units,
+                                                      activation_fn=tf.nn.relu)
 
     with tf.Session() as sess:
       with self.assertRaises(tf.errors.FailedPreconditionError):
@@ -71,7 +73,7 @@ class FullyConnectedTest(tf.test.TestCase):
           self.input_3_dim, last_dim, [2, 4, last_dim])
 
   def test_relu_layer_basic_use(self):
-    output = tf.contrib.layers.relu(self.input, 8)
+    output = tf.contrib.layers.legacy_relu(self.input, 8)
 
     with tf.Session() as sess:
       with self.assertRaises(tf.errors.FailedPreconditionError):
@@ -90,7 +92,7 @@ class FullyConnectedTest(tf.test.TestCase):
                      len(tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)))
 
   def test_relu6_layer_basic_use(self):
-    output = tf.contrib.layers.relu6(self.input, 8)
+    output = tf.contrib.layers.legacy_relu6(self.input, 8)
 
     with tf.Session() as sess:
       with self.assertRaises(tf.errors.FailedPreconditionError):
@@ -112,11 +114,11 @@ class FullyConnectedTest(tf.test.TestCase):
 
   def test_variable_reuse_with_scope(self):
     with tf.variable_scope('test') as vs:
-      output1 = tf.contrib.layers.relu(self.input, 8)
-      output2 = tf.contrib.layers.relu(self.input, 8)
+      output1 = tf.contrib.layers.legacy_relu(self.input, 8)
+      output2 = tf.contrib.layers.legacy_relu(self.input, 8)
 
     with tf.variable_scope(vs, reuse=True):
-      output3 = tf.contrib.layers.relu(self.input, 8)
+      output3 = tf.contrib.layers.legacy_relu(self.input, 8)
 
     with tf.Session() as sess:
       tf.initialize_all_variables().run()
@@ -127,7 +129,7 @@ class FullyConnectedTest(tf.test.TestCase):
 
   def test_variable_reuse_with_template(self):
     tmpl1 = tf.make_template('test',
-                             tf.contrib.layers.fully_connected,
+                             tf.contrib.layers.legacy_fully_connected,
                              num_output_units=8)
     output1 = tmpl1(self.input)
     output2 = tmpl1(self.input)
@@ -138,9 +140,11 @@ class FullyConnectedTest(tf.test.TestCase):
     self.assertAllClose(out_value1, out_value2)
 
   def _custom_initializers(self, x, num_output_units, expected_outputs):
-    output = tf.contrib.layers.relu(x, num_output_units,
-                                    weight_init=tf.constant_initializer(2.0),
-                                    bias_init=tf.constant_initializer(1.0))
+    output = tf.contrib.layers.legacy_relu(
+        x,
+        num_output_units,
+        weight_init=tf.constant_initializer(2.0),
+        bias_init=tf.constant_initializer(1.0))
 
     with tf.Session() as sess:
       tf.initialize_all_variables().run()
@@ -165,10 +169,11 @@ class FullyConnectedTest(tf.test.TestCase):
                                 [49.6, 49.6]]])
 
   def test_custom_collections(self):
-    tf.contrib.layers.relu(self.input, 2,
-                           weight_collections=['unbiased'],
-                           bias_collections=['biased'],
-                           output_collections=['output'])
+    tf.contrib.layers.legacy_relu(self.input,
+                                  2,
+                                  weight_collections=['unbiased'],
+                                  bias_collections=['biased'],
+                                  output_collections=['output'])
 
     self.assertEquals(1, len(tf.get_collection('unbiased')))
     self.assertEquals(1, len(tf.get_collection('biased')))
@@ -176,9 +181,10 @@ class FullyConnectedTest(tf.test.TestCase):
     self.assertEquals(2, len(tf.get_collection(tf.GraphKeys.VARIABLES)))
 
   def test_all_custom_collections(self):
-    tf.contrib.layers.relu(self.input, 2,
-                           weight_collections=['unbiased', 'all'],
-                           bias_collections=['biased', 'all'])
+    tf.contrib.layers.legacy_relu(self.input,
+                                  2,
+                                  weight_collections=['unbiased', 'all'],
+                                  bias_collections=['biased', 'all'])
 
     self.assertEquals(1, len(tf.get_collection('unbiased')))
     self.assertEquals(1, len(tf.get_collection('biased')))
@@ -186,16 +192,16 @@ class FullyConnectedTest(tf.test.TestCase):
                       tf.get_collection('all'))
 
   def test_no_bias(self):
-    tf.contrib.layers.relu(self.input, 2, bias_init=None)
+    tf.contrib.layers.legacy_relu(self.input, 2, bias_init=None)
     self.assertEqual(1, len(tf.get_collection(tf.GraphKeys.VARIABLES)))
 
   def test_no_activation(self):
-    y = tf.contrib.layers.fully_connected(self.input, 2)
+    y = tf.contrib.layers.legacy_fully_connected(self.input, 2)
     self.assertEquals(2, len(tf.get_collection(tf.GraphKeys.VARIABLES)))
     self.assertEquals('BiasAdd', y.op.type)
 
   def test_no_activation_no_bias(self):
-    y = tf.contrib.layers.fully_connected(self.input, 2, bias_init=None)
+    y = tf.contrib.layers.legacy_fully_connected(self.input, 2, bias_init=None)
     self.assertEquals(1, len(tf.get_collection(tf.GraphKeys.VARIABLES)))
     self.assertEquals('MatMul', y.op.type)
 
@@ -206,7 +212,9 @@ class FullyConnectedTest(tf.test.TestCase):
       cnt[0] += 1
       return tensor
 
-    tf.contrib.layers.fully_connected(self.input, 2, weight_regularizer=test_fn)
+    tf.contrib.layers.legacy_fully_connected(self.input,
+                                             2,
+                                             weight_regularizer=test_fn)
 
     self.assertEqual([tensor],
                      tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
@@ -219,10 +227,12 @@ class FullyConnectedTest(tf.test.TestCase):
       cnt[0] += 1
       return tensor
 
-    tf.contrib.layers.fully_connected(self.input, 2,
-                                      weight_regularizer=test_fn)
-    tf.contrib.layers.fully_connected(self.input, 2,
-                                      weight_regularizer=test_fn)
+    tf.contrib.layers.legacy_fully_connected(self.input,
+                                             2,
+                                             weight_regularizer=test_fn)
+    tf.contrib.layers.legacy_fully_connected(self.input,
+                                             2,
+                                             weight_regularizer=test_fn)
 
     self.assertEqual([tensor, tensor],
                      tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
@@ -236,12 +246,14 @@ class FullyConnectedTest(tf.test.TestCase):
       return tensor
 
     with tf.variable_scope('test') as vs:
-      tf.contrib.layers.fully_connected(self.input, 2,
-                                        weight_regularizer=test_fn)
+      tf.contrib.layers.legacy_fully_connected(self.input,
+                                               2,
+                                               weight_regularizer=test_fn)
 
     with tf.variable_scope(vs, reuse=True):
-      tf.contrib.layers.fully_connected(self.input, 2,
-                                        weight_regularizer=test_fn)
+      tf.contrib.layers.legacy_fully_connected(self.input,
+                                               2,
+                                               weight_regularizer=test_fn)
 
     self.assertEqual([tensor],
                      tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
@@ -254,7 +266,9 @@ class FullyConnectedTest(tf.test.TestCase):
     with self.test_session():
       x = tf.constant([[]], shape=[0, 3])
       self.assertEqual(0, tf.size(x).eval())
-      y = tf.contrib.layers.fully_connected(x, 2, activation_fn=tf.nn.softmax)
+      y = tf.contrib.layers.legacy_fully_connected(x,
+                                                   2,
+                                                   activation_fn=tf.nn.softmax)
       tf.initialize_all_variables().run()
       expected_y = np.array([]).reshape(0, 2)
       np.testing.assert_array_equal(expected_y, y.eval())
@@ -262,7 +276,7 @@ class FullyConnectedTest(tf.test.TestCase):
   def test_shapes_variable_first_dim(self):
     # first dimension is not known statically.
     x = tf.placeholder(tf.float32, shape=[None, 4, 3])
-    y = tf.contrib.layers.fully_connected(x, 1)
+    y = tf.contrib.layers.legacy_fully_connected(x, 1)
     # in the output we still only know the 2nd and 3rd dimensions statically.
     self.assertEquals(y.get_shape().as_list(), [None, 4, 1])
     with self.test_session() as sess:
@@ -280,7 +294,7 @@ class FullyConnectedTest(tf.test.TestCase):
 
   def _unknown_dim_invalid_input(self, last_dim):
     x = tf.placeholder(tf.float32, shape=[3, last_dim])
-    tf.contrib.layers.fully_connected(x, 2, activation_fn=None)
+    tf.contrib.layers.legacy_fully_connected(x, 2, activation_fn=None)
 
   def test_known_dim_valid_input(self):
     self._unknown_dim_invalid_input(last_dim=3)
@@ -295,7 +309,9 @@ class FullyConnectedTest(tf.test.TestCase):
       with self.assertRaisesRegexp(ValueError,
                                    'rank of x must be at least 2 not: 1'):
         x = tf.constant([[]], shape=[0])
-        tf.contrib.layers.fully_connected(x, 2, activation_fn=tf.nn.softmax)
+        tf.contrib.layers.legacy_fully_connected(x,
+                                                 2,
+                                                 activation_fn=tf.nn.softmax)
 
 
 class Convolution2dTest(tf.test.TestCase):
@@ -308,8 +324,9 @@ class Convolution2dTest(tf.test.TestCase):
     assert not tf.get_collection(tf.GraphKeys.SUMMARIES)
 
   def test_basic_use(self):
-    output = tf.contrib.layers.convolution2d(self.input, 8, (3, 3),
-                                             activation_fn=tf.nn.relu)
+    output = tf.contrib.layers.legacy_convolution2d(self.input,
+                                                    8, (3, 3),
+                                                    activation_fn=tf.nn.relu)
 
     with tf.Session() as sess:
       with self.assertRaises(tf.errors.FailedPreconditionError):
@@ -328,17 +345,17 @@ class Convolution2dTest(tf.test.TestCase):
 
   def test_variable_reuse_with_scope(self):
     with tf.variable_scope('test') as vs:
-      output1 = tf.contrib.layers.convolution2d(self.input,
-                                                8, (3, 3),
-                                                activation_fn=tf.nn.relu)
-      output2 = tf.contrib.layers.convolution2d(self.input,
-                                                8, (3, 3),
-                                                activation_fn=tf.nn.relu)
+      output1 = tf.contrib.layers.legacy_convolution2d(self.input,
+                                                       8, (3, 3),
+                                                       activation_fn=tf.nn.relu)
+      output2 = tf.contrib.layers.legacy_convolution2d(self.input,
+                                                       8, (3, 3),
+                                                       activation_fn=tf.nn.relu)
 
     with tf.variable_scope(vs, reuse=True):
-      output3 = tf.contrib.layers.convolution2d(self.input,
-                                                8, (3, 3),
-                                                activation_fn=tf.nn.relu)
+      output3 = tf.contrib.layers.legacy_convolution2d(self.input,
+                                                       8, (3, 3),
+                                                       activation_fn=tf.nn.relu)
 
     with tf.Session() as sess:
       tf.initialize_all_variables().run()
@@ -349,7 +366,7 @@ class Convolution2dTest(tf.test.TestCase):
 
   def test_variable_reuse_with_template(self):
     tmpl1 = tf.make_template('test',
-                             tf.contrib.layers.convolution2d,
+                             tf.contrib.layers.legacy_convolution2d,
                              kernel_size=(3, 3),
                              num_output_channels=8)
     output1 = tmpl1(self.input)
@@ -361,10 +378,9 @@ class Convolution2dTest(tf.test.TestCase):
     self.assertAllClose(out_value1, out_value2)
 
   def test_custom_initializers(self):
-    output = tf.contrib.layers.convolution2d(
+    output = tf.contrib.layers.legacy_convolution2d(
         self.input,
-        2,
-        (3, 3),
+        2, (3, 3),
         activation_fn=tf.nn.relu,
         weight_init=tf.constant_initializer(2.0),
         bias_init=tf.constant_initializer(1.0),
@@ -378,21 +394,22 @@ class Convolution2dTest(tf.test.TestCase):
         np.array([[[[1261., 1261.]]], [[[3853., 3853.]]]]), out_value)
 
   def test_custom_collections(self):
-    tf.contrib.layers.convolution2d(self.input,
-                                    2, (3, 3),
-                                    activation_fn=tf.nn.relu,
-                                    weight_collections=['unbiased'],
-                                    bias_collections=['biased'])
+    tf.contrib.layers.legacy_convolution2d(self.input,
+                                           2, (3, 3),
+                                           activation_fn=tf.nn.relu,
+                                           weight_collections=['unbiased'],
+                                           bias_collections=['biased'])
 
     self.assertEquals(1, len(tf.get_collection('unbiased')))
     self.assertEquals(1, len(tf.get_collection('biased')))
 
   def test_all_custom_collections(self):
-    tf.contrib.layers.convolution2d(self.input,
-                                    2, (3, 3),
-                                    activation_fn=tf.nn.relu,
-                                    weight_collections=['unbiased', 'all'],
-                                    bias_collections=['biased', 'all'])
+    tf.contrib.layers.legacy_convolution2d(
+        self.input,
+        2, (3, 3),
+        activation_fn=tf.nn.relu,
+        weight_collections=['unbiased', 'all'],
+        bias_collections=['biased', 'all'])
 
     self.assertEquals(1, len(tf.get_collection('unbiased')))
     self.assertEquals(1, len(tf.get_collection('biased')))
@@ -407,15 +424,18 @@ class Convolution2dTest(tf.test.TestCase):
       cnt[0] += 1
       return tensor
 
-    tf.contrib.layers.convolution2d(self.input, 2, (3, 3),
-                                    weight_regularizer=test_fn)
+    tf.contrib.layers.legacy_convolution2d(self.input,
+                                           2, (3, 3),
+                                           weight_regularizer=test_fn)
 
     self.assertEqual([tensor],
                      tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES))
     self.assertEqual(1, cnt[0])
 
   def test_no_bias(self):
-    tf.contrib.layers.convolution2d(self.input, 2, (3, 3), bias_init=None)
+    tf.contrib.layers.legacy_convolution2d(self.input,
+                                           2, (3, 3),
+                                           bias_init=None)
     self.assertEqual(1,
                      len(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)))
 
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",interpreter_test.cc,"@@ -566,7 +566,7 @@ TEST(BasicInterpreter, ThreeStepAllocate) {
     DynamicBuffer buf;
     StringRef str_ref = GetString(input, 0);
     buf.AddString(str_ref);
-    buf.WriteToTensor(output);
+    buf.WriteToTensorAsVector(output);
     return kTfLiteOk;
   };
 
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",tensor_jni.cc,"@@ -278,7 +278,7 @@ void WriteMultiDimensionalStringArray(JNIEnv* env, jobject src,
   tflite::DynamicBuffer dst_buffer;
   PopulateStringDynamicBuffer(env, src, &dst_buffer, tensor->dims->size);
   if (!env->ExceptionCheck()) {
-    dst_buffer.WriteToTensor(tensor);
+    dst_buffer.WriteToTensor(tensor, /*new_shape=*/nullptr);
   }
 }
 
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",gather.cc,"@@ -118,7 +118,7 @@ TfLiteStatus GatherStrings(TfLiteContext* context, const TfLiteTensor* input,
     const auto string_ref = GetString(input, pos);
     buffer.AddString(string_ref.str, string_ref.len);
   }
-  buffer.WriteToTensor(output);
+  buffer.WriteToTensorAsVector(output);
   return kTfLiteOk;
 }
 
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",hashtable_lookup.cc,"@@ -137,7 +137,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     }
   }
   if (output->type == kTfLiteString) {
-    buf.WriteToTensor(output);
+    buf.WriteToTensorAsVector(output);
   }
 
   return kTfLiteOk;
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",skip_gram.cc,"@@ -107,7 +107,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
   // Generate n-grams recursively.
   tflite::DynamicBuffer buf;
   if (words.size() < params->ngram_size) {
-    buf.WriteToTensor(GetOutput(context, node, 0));
+    buf.WriteToTensorAsVector(GetOutput(context, node, 0));
     return kTfLiteOk;
   }
 
@@ -145,7 +145,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
     }
   }
 
-  buf.WriteToTensor(GetOutput(context, node, 0));
+  buf.WriteToTensorAsVector(GetOutput(context, node, 0));
   return kTfLiteOk;
 }
 }  // namespace
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",test_util.h,"@@ -199,7 +199,7 @@ class SingleOpModel {
     for (const string& s : content) {
       buf.AddString(s.data(), s.length());
     }
-    buf.WriteToTensor(tensor);
+    buf.WriteToTensor(tensor, /*new_shape=*/nullptr);
   }
 
   // Populate the tensor given its index.
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",normalize.cc,"@@ -92,7 +92,7 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
 
   tflite::DynamicBuffer buf;
   buf.AddString(result.data(), result.length());
-  buf.WriteToTensor(GetOutput(context, node, 0));
+  buf.WriteToTensorAsVector(GetOutput(context, node, 0));
   return kTfLiteOk;
 }
 
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",predictor.cc,"@@ -49,7 +49,7 @@ void ExecuteTfLite(const std::string& sentence,
     TfLiteTensor* input = interpreter->tensor(interpreter->inputs()[0]);
     tflite::DynamicBuffer buf;
     buf.AddString(sentence.data(), sentence.length());
-    buf.WriteToTensor(input);
+    buf.WriteToTensorAsVector(input);
     interpreter->AllocateTensors();
 
     interpreter->Invoke();
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",string_util.cc,"@@ -96,8 +96,7 @@ int DynamicBuffer::WriteToBuffer(char** buffer) {
   return bytes;
 }
 
-void DynamicBuffer::WriteToTensor(TfLiteTensor* tensor) {
-  // Set tensor content pointer to tensor_buffer, and release original data.
+void DynamicBuffer::WriteToTensorAsVector(TfLiteTensor* tensor) {
   auto dims = TfLiteIntArrayCreate(1);
   dims->data[0] = offset_.size() - 1;  // Store number of strings.
   WriteToTensor(tensor, dims);
@@ -108,6 +107,10 @@ void DynamicBuffer::WriteToTensor(TfLiteTensor* tensor,
   char* tensor_buffer;
   int bytes = WriteToBuffer(&tensor_buffer);
 
+  if (new_shape == nullptr) {
+    new_shape = TfLiteIntArrayCopy(tensor->dims);
+  }
+
   // Set tensor content pointer to tensor_buffer, and release original data.
   TfLiteTensorReset(tensor->type, tensor->name, new_shape, tensor->params,
                     tensor_buffer, bytes, kTfLiteDynamic, tensor->allocation,
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",string_util.h,"@@ -74,12 +74,18 @@ class DynamicBuffer {
   // The function allocates space for the buffer but does NOT take ownership.
   int WriteToBuffer(char** buffer);
 
-  // Fill content into a string tensor, with the given new_shape. The new
-  // shape must match the number of strings in this object.
+  // Fill content into a string tensor, with the given new_shape. The new shape
+  // must match the number of strings in this object. Caller relinquishes
+  // ownership of new_shape. If 'new_shape' is nullptr, keep the tensor's
+  // existing shape.
   void WriteToTensor(TfLiteTensor* tensor, TfLiteIntArray* new_shape);
 
   // Fill content into a string tensor. Set shape to {num_strings}.
-  void WriteToTensor(TfLiteTensor* tensor);
+  void WriteToTensorAsVector(TfLiteTensor* tensor);
+
+  // Deprecated. Use WriteToTensorAsVector() or pass in the new shpe.
+  // TODO(b/120230709): remove when people migrate away.
+  void WriteToTensor(TfLiteTensor* tensor) { WriteToTensorAsVector(tensor); }
 
  private:
   // Data buffer to store contents of strings, not including headers.
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",string_util_test.cc,"@@ -55,7 +55,7 @@ TEST(StringUtil, TestStringUtil) {
   new_shape->data[0] = 2;
   new_shape->data[1] = 1;
   buf0.WriteToTensor(t0, new_shape);
-  buf1.WriteToTensor(t1);
+  buf1.WriteToTensorAsVector(t1);
 
   // Check tensor shapes.
   EXPECT_EQ(t0->dims->size, 2);
@@ -99,7 +99,7 @@ TEST(StringUtil, TestAddJoinedString) {
 
   DynamicBuffer buf;
   buf.AddJoinedString({{s0, 3}, {s1, 4}, {s2, 0}, {s3, 3}}, ' ');
-  buf.WriteToTensor(t0);
+  buf.WriteToTensorAsVector(t0);
 
   ASSERT_EQ(GetStringCount(t0), 1);
   StringRef str_ref;
@@ -115,7 +115,7 @@ TEST(StringUtil, TestEmptyList) {
   t0->type = kTfLiteString;
   t0->allocation_type = kTfLiteDynamic;
   DynamicBuffer buf;
-  buf.WriteToTensor(t0);
+  buf.WriteToTensorAsVector(t0);
 
   ASSERT_EQ(GetStringCount(t0), 0);
   ASSERT_EQ(t0->bytes, 8);
",0,train
3e66cee177a072657ad2aea628ad395446d1479e,tensorflow/tensorflow,"Rename WriteToTensor to make it explicit the fact that it set the shape to {size}

PiperOrigin-RevId: 223521732",benchmark_tflite_model.cc,"@@ -279,7 +279,7 @@ void BenchmarkTfLiteModel::PrepareInputsAndOutputs() {
       FillRandomString(&buffer, sizes, []() {
         return ""we're have some friends over saturday to hang out in the yard"";
       });
-      buffer.WriteToTensor(interpreter->tensor(i));
+      buffer.WriteToTensor(interpreter->tensor(i), /*new_shape=*/nullptr);
     } else {
       TFLITE_LOG(FATAL) << ""Don't know how to populate tensor "" << t->name
                         << "" of type "" << t->type;
",0,train
9a7e849472c954470de889cc8873223e4db1e4df,tensorflow/tensorflow,"* Passing `training_features` (without weight column) instead of `features` into GradientBoostedDecisionTreeModel.
* Export GTFlow model into generic format with features defined in proto.

PiperOrigin-RevId: 171766066",custom_export_strategy.py,"@@ -96,7 +96,8 @@ def make_custom_export_strategy(name,
 
 def convert_to_universal_format(dtec, sorted_feature_names,
                                 num_dense, num_sparse_float,
-                                num_sparse_int):
+                                num_sparse_int,
+                                feature_name_to_proto=None):
   """"""Convert GTFlow trees to universal format.""""""
   del num_sparse_int  # unused.
   model_and_features = generic_tree_model_pb2.ModelAndFeatures()
@@ -104,7 +105,11 @@ def convert_to_universal_format(dtec, sorted_feature_names,
   # feature is processed before it's fed to the model (e.g. bucketing
   # information). As of now, this serves as a list of features the model uses.
   for feature_name in sorted_feature_names:
-    model_and_features.features[feature_name].SetInParent()
+    if not feature_name_to_proto:
+      model_and_features.features[feature_name].SetInParent()
+    else:
+      model_and_features.features[feature_name].CopyFrom(
+          feature_name_to_proto[feature_name])
   model = model_and_features.model
   model.ensemble.summation_combination_technique.SetInParent()
   for tree_idx in range(len(dtec.trees)):
",0,train
9a7e849472c954470de889cc8873223e4db1e4df,tensorflow/tensorflow,"* Passing `training_features` (without weight column) instead of `features` into GradientBoostedDecisionTreeModel.
* Export GTFlow model into generic format with features defined in proto.

PiperOrigin-RevId: 171766066",model.py,"@@ -93,7 +93,7 @@ def model_builder(features, labels, mode, params, config):
       learner_config=learner_config,
       feature_columns=feature_columns,
       logits_dimension=head.logits_dimension,
-      features=features)
+      features=training_features)
   with ops.name_scope(""gbdt"", ""gbdt_optimizer""):
     predictions_dict = gbdt_model.predict(mode)
     logits = predictions_dict[""predictions""]
",0,train
fe15ce0d733794491fea0d51589dd7c7c779ff60,tensorflow/tensorflow,"Fix broken test after changing the constructor of Converter.

PiperOrigin-RevId: 272527750",convert_nodes_test.cc,"@@ -657,7 +657,7 @@ class ConverterTest : public ::testing::Test {
     builder_.reset(nvinfer1::createInferBuilder(logger_));
     network_.reset(builder_->createNetwork());
     converter_.reset(new Converter(network_.get(), TrtPrecisionMode::FP32,
-                                   /*use_calibration=*/false));
+                                   /*use_calibration=*/false, &logger_));
     weight_store_ = &converter_->weight_store_;
   }
 
@@ -995,8 +995,9 @@ TEST_F(ConverterTest, MaybeApplyQuantizationRanges) {
   // input -> infer1 -> infer2 -> infer3
   FakeITensor input, infer_1, infer_2, infer_3;
   FakeITensor not_infer;
+  Logger logger;
   Converter int8_converter(/*trt_network=*/nullptr, TrtPrecisionMode::INT8,
-                           /*use_calibration=*/true);
+                           /*use_calibration=*/true, &logger);
   int8_converter.ProvideQuantizationRange(&input, -5.0f, 5.0f);
   int8_converter.ProvideQuantizationRange(&not_infer, -100.0f, 100.0f);
   int8_converter.MarkQuantizationRangesAsInferrable(&input, &infer_1);
@@ -1257,7 +1258,7 @@ class OpConverterTest : public ::testing::Test {
 
     // Reset the converter.
     converter_.reset(new Converter(network_.get(), precision_mode_to_test_,
-                                   /*use_calibration=*/false));
+                                   /*use_calibration=*/false, &logger_));
 
     // Reset other related artifacts.
     scope_ = Scope::NewRootScope();
",0,train
075b37f91926e92aa3305ce12982f0128a59c0c6,tensorflow/tensorflow,"Include traceback for distributed variables.

Otherwise errors in variable creation fail to point to the correct line of code.

PiperOrigin-RevId: 256075666",values.py,"@@ -591,7 +591,7 @@ def _enter_or_assert_strategy(strategy):
 
 
 DistributedVarOp = collections.namedtuple(
-    ""DistributedVarOp"", [""name"", ""graph"", ""type""])
+    ""DistributedVarOp"", [""name"", ""graph"", ""traceback"", ""type""])
 
 
 class DistributedVariable(DistributedDelegate, variables_lib.AbstractVariable):
@@ -757,6 +757,7 @@ class DistributedVariable(DistributedDelegate, variables_lib.AbstractVariable):
     if distribution_strategy_context.in_cross_replica_context():
       return DistributedVarOp(self.primary.op.name,
                               self.primary.op.graph,
+                              self.primary.op.traceback,
                               self.primary.op.type)
     return self.get().op
 
@@ -885,7 +886,8 @@ class TPUVariableMixin(object):
   @property
   def op(self):
     return DistributedVarOp(
-        self.primary.op.name, self.primary.op.graph, self.primary.op.type)
+        self.primary.op.name, self.primary.op.graph, self.primary.op.traceback,
+        self.primary.op.type)
 
   def _dense_var_to_tensor(self, dtype=None, name=None, as_ref=False):
     """"""Converts a variable to a tensor.""""""
",0,train
075b37f91926e92aa3305ce12982f0128a59c0c6,tensorflow/tensorflow,"Include traceback for distributed variables.

Otherwise errors in variable creation fail to point to the correct line of code.

PiperOrigin-RevId: 256075666",values_test.py,"@@ -644,6 +644,24 @@ class MirroredVariableTest(test.TestCase, parameterized.TestCase):
     after_restore = self.evaluate(v)
     self.assertAllClose(before_save, after_restore)
 
+  @combinations.generate(
+      combinations.combine(
+          distribution=[
+              strategy_combinations.mirrored_strategy_with_one_cpu,
+              strategy_combinations.mirrored_strategy_with_gpu_and_cpu,
+              strategy_combinations.tpu_strategy,
+              strategy_combinations.central_storage_strategy_with_two_gpus,
+          ],
+          mode=[""graph""]))
+  def testTraceback(self, distribution):
+    with distribution.scope():
+      variable_scope.get_variable(
+          name=""testVar"", initializer=1., use_resource=True)
+      with self.assertRaisesRegex(
+          ValueError, ""Variable testVar already exists""):
+        variable_scope.get_variable(
+            name=""testVar"", initializer=1., use_resource=True)
+
 
 _TPU_STRATEGIES = (tpu_strategy.TPUStrategy, tpu_strategy.TPUStrategyV1)
 
",0,train
cdb9f312f1a00e3fb90f14d79aca2fa9dcab8f21,tensorflow/tensorflow,"Add field to HLO proto field to enable reversing a convolution filter.

PiperOrigin-RevId: 176149369",computation_builder.cc,"@@ -153,6 +153,7 @@ bool ComputationBuilder::MakeWindow(
     } else {
       dim->set_window_dilation(1);
     }
+    dim->set_window_reversal(false);
   }
   return true;
 }
",0,train
cdb9f312f1a00e3fb90f14d79aca2fa9dcab8f21,tensorflow/tensorflow,"Add field to HLO proto field to enable reversing a convolution filter.

PiperOrigin-RevId: 176149369",hlo_evaluator.cc,"@@ -814,7 +814,9 @@ class HloEvaluator::TypedVisitor : public DfsHloVisitorWithDefault {
             }
 
             rhs_index[dnums.kernel_spatial_dimensions(ki)] =
-                rhs_spatial_index[ki];
+                window_dim.window_reversal()
+                    ? ((window_dim.size() - 1) - rhs_spatial_index[ki])
+                    : rhs_spatial_index[ki];
           }
 
           result_val += lhs_literal.Get<ReturnT>(lhs_index) *
",0,train
cdb9f312f1a00e3fb90f14d79aca2fa9dcab8f21,tensorflow/tensorflow,"Add field to HLO proto field to enable reversing a convolution filter.

PiperOrigin-RevId: 176149369",hlo_evaluator_test.cc,"@@ -794,6 +794,83 @@ TEST_F(HloEvaluatorTest, Simple4x4Conv2DWith2x2Kernel) {
   LiteralTestUtil::ExpectEqual(*expected, *result);
 }
 
+TEST_F(HloEvaluatorTest, Conv2DGeneralDimensionsReversed) {
+  HloComputation::Builder b(TestName());
+
+  // clang-format off
+  // Input dimensions: [feature=2, height=3, batch=1, width=4]
+  Array4D<float> input({
+    {{{1, 2, 3, 4}},
+     {{5, 6, 7, 8}},
+     {{9, 10, 11, 12}}},
+    {{{13, 14, 15, 16}},
+     {{17, 18, 19, 20}},
+     {{21, 22, 23, 24}}}
+  });
+  // Weight dimensions:
+  // [kernel_output_feature=1, width=3, kernel_input_feature=2, height=3]
+  Array4D<float> weight({{
+    {{1, 7, 13},
+     {4, 10, 16}},
+    {{2, 8, 14},
+     {5, 11, 17}},
+    {{3, 9, 15},
+     {6, 12, 18}}
+  }});
+  // clang-format on
+
+  auto lhs_literal = Literal::CreateR4FromArray4D<float>(input);
+  HloInstruction* lhs_instruction =
+      b.AddInstruction(HloInstruction::CreateConstant(std::move(lhs_literal)));
+
+  auto rhs_literal = Literal::CreateR4FromArray4D<float>(weight);
+  HloInstruction* rhs_instruction =
+      b.AddInstruction(HloInstruction::CreateConstant(std::move(rhs_literal)));
+  rhs_instruction = b.AddInstruction(HloInstruction::CreateReverse(
+      rhs_instruction->shape(), rhs_instruction, {3, 1}));
+
+  Window window;
+  WindowDimension dim;
+  dim.set_size(3);
+  dim.set_stride(1);
+  dim.set_padding_low(0);
+  dim.set_padding_high(0);
+  dim.set_window_dilation(1);
+  dim.set_base_dilation(1);
+  dim.set_window_reversal(true);
+  *window.add_dimensions() = dim;
+  *window.add_dimensions() = dim;
+
+  ConvolutionDimensionNumbers dnums;
+  dnums.set_input_batch_dimension(2);
+  dnums.set_output_batch_dimension(2);
+  dnums.set_input_feature_dimension(0);
+  dnums.set_output_feature_dimension(0);
+  dnums.add_spatial_dimensions(1);
+  dnums.add_spatial_dimensions(3);
+
+  dnums.set_kernel_output_feature_dimension(0);
+  dnums.set_kernel_input_feature_dimension(2);
+  dnums.add_kernel_spatial_dimensions(3);
+  dnums.add_kernel_spatial_dimensions(1);
+
+  const Shape& shape = ShapeUtil::MakeShape(F32, {1, 1, 1, 2});
+  b.AddInstruction(HloInstruction::CreateConvolve(
+      shape, lhs_instruction, rhs_instruction, window, dnums));
+  auto computation = module().AddEntryComputation(b.Build());
+
+  std::unique_ptr<Literal> result =
+      evaluator_->Evaluate(*computation, {}).ConsumeValueOrDie();
+
+  // clang-format off
+  // Result dimensions: [feature=1, height=1, batch=1, width=2]
+  Array4D<float> expected_array({{{{2514, 2685}}}});
+  // clang-format on
+  auto expected = Literal::CreateR4FromArray4D<float>(expected_array);
+
+  LiteralTestUtil::ExpectEqual(*expected, *result);
+}
+
 TEST_F(HloEvaluatorTest, Conv2DGeneralDimensions) {
   HloComputation::Builder b(TestName());
 
",0,train
cdb9f312f1a00e3fb90f14d79aca2fa9dcab8f21,tensorflow/tensorflow,"Add field to HLO proto field to enable reversing a convolution filter.

PiperOrigin-RevId: 176149369",window_util.cc,"@@ -44,6 +44,9 @@ namespace window_util {
   if (dim.window_dilation() != 1) {
     StrAppend(&str, "",window_dilation="", dim.window_dilation());
   }
+  if (dim.window_reversal()) {
+    StrAppend(&str, "",window_reversal"");
+  }
   StrAppend(&str, "")"");
   return str;
 }
",0,train
2a85bf4a14cf02f7b9cc6258c750f5f0e9fb385c,tensorflow/tensorflow,"Fix minimal logging build for macos

PiperOrigin-RevId: 315321111
Change-Id: I205b82403e663bc415156cbe7e1d82e3b8866e93",platform.h,"@@ -34,6 +34,7 @@ limitations under the License.
 #define PLATFORM_POSIX_IOS
 #define IS_MOBILE_PLATFORM
 #else
+// If no platform specified, use:
 #define PLATFORM_POSIX
 #endif
 
",0,train
2a85bf4a14cf02f7b9cc6258c750f5f0e9fb385c,tensorflow/tensorflow,"Fix minimal logging build for macos

PiperOrigin-RevId: 315321111
Change-Id: I205b82403e663bc415156cbe7e1d82e3b8866e93",model_test.cc,"@@ -18,10 +18,6 @@ limitations under the License.
 #include <stdint.h>
 #include <stdio.h>
 #include <stdlib.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
 #include <fstream>
 #include <iostream>
 
",0,train
ec4f65aa468c6f304d67693850b846a5bc5d059b,tensorflow/tensorflow,"Always validate `node_id` range

PiperOrigin-RevId: 411133308
Change-Id: I917cf026132d2374abdb8d6e06c8a925d031a74c",stats_ops.cc,"@@ -1187,8 +1187,10 @@ class BoostedTreesSparseCalculateBestFeatureSplitOp : public OpKernel {
         f_map.clear();
       }
       previous_node_id = node_id;
-      DCHECK_LE(node_id_first, node_id);
-      DCHECK_LT(node_id, node_id_last);
+      OP_REQUIRES(
+          context, node_id_first <= node_id && node_id < node_id_last,
+          errors::InvalidArgument(""node_id = "", node_id, "" which is not in ["",
+                                  node_id_first, "", "", node_id_last, "")""));
       const int32_t feature_dim = stats_summary_indices(idx, 1);
       const int32_t bucket_id = stats_summary_indices(idx, 2);
       const int32_t stat_dim = stats_summary_indices(idx, 3);
",0,train
74a6cca5d867d37e79ec9d780f2c57b926f07a80,tensorflow/tensorflow,"Removed a linear scan in dtypes.as_dtype

PiperOrigin-RevId: 229152423",dtypes.py,"@@ -535,29 +535,31 @@ _np_qint32 = np.dtype([(""qint32"", np.int32, 1)])
 np_resource = np.dtype([(""resource"", np.ubyte, 1)])
 
 # Standard mappings between types_pb2.DataType values and numpy.dtypes.
-_NP_TO_TF = frozenset([
-    (np.float16, float16),
-    (np.float32, float32),
-    (np.float64, float64),
-    (np.int32, int32),
-    (np.int64, int64),
-    (np.uint8, uint8),
-    (np.uint16, uint16),
-    (np.uint32, uint32),
-    (np.uint64, uint64),
-    (np.int16, int16),
-    (np.int8, int8),
-    (np.complex64, complex64),
-    (np.complex128, complex128),
-    (np.object_, string),
-    (np.bool_, bool),
-    (_np_qint8, qint8),
-    (_np_quint8, quint8),
-    (_np_qint16, qint16),
-    (_np_quint16, quint16),
-    (_np_qint32, qint32),
-    (_np_bfloat16, bfloat16),
-])
+_NP_TO_TF = {
+    np.float16: float16,
+    np.float32: float32,
+    np.float64: float64,
+    np.int32: int32,
+    np.int64: int64,
+    np.uint8: uint8,
+    np.uint16: uint16,
+    np.uint32: uint32,
+    np.uint64: uint64,
+    np.int16: int16,
+    np.int8: int8,
+    np.complex64: complex64,
+    np.complex128: complex128,
+    np.object_: string,
+    np.string_: string,
+    np.unicode_: string,
+    np.bool_: bool,
+    _np_qint8: qint8,
+    _np_quint8: quint8,
+    _np_qint16: qint16,
+    _np_quint16: quint16,
+    _np_qint32: qint32,
+    _np_bfloat16: bfloat16,
+}
 _TF_TO_NP = {
     types_pb2.DT_HALF:
         np.float16,
@@ -664,6 +666,20 @@ _PYTHON_TO_TF = {
     builtins.object: string
 }
 
+_ANY_TO_TF = {}
+_ANY_TO_TF.update(_INTERN_TABLE)
+_ANY_TO_TF.update(_STRING_TO_TF)
+_ANY_TO_TF.update(_PYTHON_TO_TF)
+_ANY_TO_TF.update(_NP_TO_TF)
+
+# Ensure no collisions.
+assert len(_ANY_TO_TF) == sum(len(d) for d in [
+    _INTERN_TABLE,
+    _STRING_TO_TF,
+    _PYTHON_TO_TF,
+    _NP_TO_TF
+])
+
 
 @tf_export(""dtypes.as_dtype"", ""as_dtype"")
 def as_dtype(type_value):
@@ -684,36 +700,16 @@ def as_dtype(type_value):
   if isinstance(type_value, DType):
     return type_value
 
-  try:
-    return _INTERN_TABLE[type_value]
-  except KeyError:
-    pass
-
-  try:
-    return _STRING_TO_TF[type_value]
-  except KeyError:
-    pass
+  if isinstance(type_value, np.dtype):
+    try:
+      return _NP_TO_TF[type_value.type]
+    except KeyError:
+      pass
 
   try:
-    return _PYTHON_TO_TF[type_value]
+    return _ANY_TO_TF[type_value]
   except KeyError:
     pass
 
-  if isinstance(type_value, np.dtype):
-    # The numpy dtype for strings is variable length. We can not compare
-    # dtype with a single constant (np.string does not exist) to decide
-    # dtype is a ""string"" type. We need to compare the dtype.type to be
-    # sure it's a string type.
-    if type_value.type == np.string_ or type_value.type == np.unicode_:
-      return string
-
-  if isinstance(type_value, (type, np.dtype)):
-    for key, val in _NP_TO_TF:
-      try:
-        if key == type_value:
-          return val
-      except TypeError as e:
-        raise TypeError(""Cannot convert {} to a dtype. {}"".format(
-            type_value, e))
-
-  raise TypeError(""Cannot convert value %r to a TensorFlow DType."" % type_value)
+  raise TypeError(
+      ""Cannot convert value %r to a TensorFlow DType."" % type_value)
",0,train
d7527088595cbc89778ee8d1b3e8361be0cb75cf,tensorflow/tensorflow,"Fix ""Converting DataType 'INVALID' to MLIR Type"" bug

PiperOrigin-RevId: 276387998
Change-Id: Ide7dd335e1d1c1463e318ae39de3ac84a9aeeddf",graphdef_to_tfl_flatbuffer.cc,"@@ -106,10 +106,30 @@ Status ConvertGraphDefToTFLiteFlatBuffer(const toco::ModelFlags& model_flags,
   std::vector<double> node_maxs;
   tensorflow::DataType inference_type =
       ConvertIODataTypeToDataType(toco_flags.inference_type());
+
+  // Build a map from placeholder to data types.
+  llvm::StringMap<DataType> placeholder_data_type_map;
+  for (const NodeDef& node_def : input.node()) {
+    if (node_def.op() == ""Placeholder"" && node_def.attr().count(""dtype"") > 0) {
+      placeholder_data_type_map[node_def.name()] =
+          node_def.attr().at(""dtype"").type();
+    }
+  }
+
   for (auto& flag : model_flags.input_arrays()) {
+    // TOCO doesn't required `data_type` to be filled for every input.
+    // If it's not filled, try to get the data type from the placeholder.
+    auto toco_data_type = flag.data_type();
+    DataType data_type;
+    if (toco_data_type == ::toco::IODataType::IO_DATA_TYPE_UNKNOWN &&
+        placeholder_data_type_map.find(flag.name()) !=
+            placeholder_data_type_map.end()) {
+      data_type = placeholder_data_type_map[flag.name()];
+    } else {
+      data_type = ConvertIODataTypeToDataType(toco_data_type);
+    }
     node_names.push_back(flag.name());
-    node_dtypes.push_back(
-        DataType_Name(ConvertIODataTypeToDataType(flag.data_type())));
+    node_dtypes.push_back(DataType_Name(data_type));
     node_shapes.push_back(std::vector<int>(flag.shape().dims().begin(),
                                            flag.shape().dims().end()));
 
",0,train
7448a2b927cda64446a57bca785ac2b58a16cf12,tensorflow/tensorflow,"Don't strip default attributes when sending from master to workers

PiperOrigin-RevId: 287220931
Change-Id: Ide0a4131bd35952ed63129b1112626d3683d885b",master_session.cc,"@@ -31,7 +31,6 @@ limitations under the License.
 #include ""tensorflow/core/framework/allocation_description.pb.h""
 #include ""tensorflow/core/framework/collective.h""
 #include ""tensorflow/core/framework/cost_graph.pb.h""
-#include ""tensorflow/core/framework/graph_def_util.h""
 #include ""tensorflow/core/framework/node_def.pb.h""
 #include ""tensorflow/core/framework/node_def_util.h""
 #include ""tensorflow/core/framework/tensor.h""
@@ -473,8 +472,8 @@ Status MasterSession::ReffedClientGraph::DoRegisterPartitions(
     c->req.set_session_handle(session_handle_);
     c->req.set_create_worker_session_called(!should_deregister_);
     c->req.mutable_graph_def()->Swap(&graph_partitions[part.name]);
-    StripDefaultAttributes(*OpRegistry::Global(),
-                           c->req.mutable_graph_def()->mutable_node());
+    // TODO(b/146354085): Default attributes should be stripped here from
+    // c->req.graph_def(), but this causes some TFX pipelines to fail.
     *c->req.mutable_config_proto() = session_opts_.config;
     *c->req.mutable_graph_options() = session_opts_.config.graph_options();
     *c->req.mutable_debug_options() =
",0,train
79a93ac627b9af8ae84a874ce248fe42aac8de36,tensorflow/tensorflow,"Support partitioned embedding lookup for resource variables.
Change: 146034474",resource_variable_ops.cc,"@@ -230,11 +230,7 @@ REGISTER_KERNEL_BUILDER(Name(""VarIsInitializedOp"").Device(DEVICE_GPU),
 template <typename Device, typename T, typename Index>
 class ResourceGatherOp : public OpKernel {
  public:
-  explicit ResourceGatherOp(OpKernelConstruction* c) : OpKernel(c) {
-    const DataType dt = DataTypeToEnum<T>::v();
-    const DataType index_t = DataTypeToEnum<Index>::v();
-    OP_REQUIRES_OK(c, c->MatchSignature({dt, index_t}, {dt}));
-  }
+  explicit ResourceGatherOp(OpKernelConstruction* c) : OpKernel(c) {}
 
   void Compute(OpKernelContext* c) override {
     Var* v = nullptr;
",0,test
79a93ac627b9af8ae84a874ce248fe42aac8de36,tensorflow/tensorflow,"Support partitioned embedding lookup for resource variables.
Change: 146034474",embedding_ops_test.py,"@@ -162,7 +162,8 @@ def _EmbeddingParams(num_shards,
 def _EmbeddingParamsAsPartitionedVariable(num_shards,
                                           vocab_size,
                                           dtype=dtypes.float32,
-                                          shape=None):
+                                          shape=None,
+                                          use_resource=False):
   p, params, feed_dict = _EmbeddingParams(
       num_shards, vocab_size, dtype=dtype, shape=shape)
   shape = shape or [10]
@@ -171,7 +172,8 @@ def _EmbeddingParamsAsPartitionedVariable(num_shards,
       shape=[vocab_size] + shape,
       initializer=array_ops.concat([params[p_i.name] for p_i in p], 0),
       partitioner=partitioned_variables.min_max_variable_partitioner(
-          max_partitions=num_shards, min_slice_size=1))
+          max_partitions=num_shards, min_slice_size=1),
+      use_resource=use_resource)
   return p, partitioned_variable, params, feed_dict
 
 
@@ -300,6 +302,29 @@ class EmbeddingLookupTest(test.TestCase):
     self.assertAllEqual(np_result, tf_result)
     self.assertShapeEqual(np_result, embedding)
 
+  def testSimpleShardedPartitionedResourceVariable(self):
+    with self.test_session() as sess:
+      num_shards = 2
+      vocab_size = 4
+      p, p_variable, params, _ = _EmbeddingParamsAsPartitionedVariable(
+          num_shards, vocab_size, use_resource=True)
+
+      id_vals = np.array([0, 0])
+      ids = constant_op.constant(list(id_vals), dtype=dtypes.int32)
+      print(""Construct ids"", ids.get_shape())
+      embedding = embedding_ops.embedding_lookup(p_variable, ids)
+      variables.global_variables_initializer().run()
+      params_values = [params[p_i.name] for p_i in p]
+      # Test that the PartitionedVariable components equal the list in p
+      p_var_val = sess.run(list(p_variable))
+      # Actual test
+      print(ops.get_default_graph().as_graph_def())
+      tf_result = embedding.eval()
+    np_result, _, _ = _EmbeddingResult(params, id_vals, num_shards, vocab_size)
+    self.assertAllEqual(params_values, p_var_val)
+    self.assertAllEqual(np_result, tf_result)
+    self.assertShapeEqual(np_result, embedding)
+
   def testShardedModPartitioningInt32Ids(self):
     with self.test_session():
       num_shards = 5
",0,test
79a93ac627b9af8ae84a874ce248fe42aac8de36,tensorflow/tensorflow,"Support partitioned embedding lookup for resource variables.
Change: 146034474",embedding_ops.py,"@@ -33,6 +33,14 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 
 
+def _do_gather(params, ids, validate_indices=True, name=None):
+  """"""Deals with doing gather differently for resource variables.""""""
+  if isinstance(params, resource_variable_ops.ResourceVariable):
+    return params.sparse_read(ids, name=name)
+  return array_ops.gather(
+      params, ids, name=name, validate_indices=validate_indices)
+
+
 def embedding_lookup(params, ids, partition_strategy=""mod"", name=None,
                      validate_indices=True, max_norm=None):
   """"""Looks up `ids` in a list of embedding tensors.
@@ -100,16 +108,15 @@ def embedding_lookup(params, ids, partition_strategy=""mod"", name=None,
     return x
   with ops.name_scope(name, ""embedding_lookup"", params + [ids]) as name:
     np = len(params)  # Number of partitions
-    params = ops.convert_n_to_tensor_or_indexed_slices(params, name=""params"")
+    # Preserve the resource variable status to avoid accidental dense reads.
+    if not any(isinstance(p, resource_variable_ops.ResourceVariable)
+               for p in params):
+      params = ops.convert_n_to_tensor_or_indexed_slices(params, name=""params"")
     if np == 1:
       with ops.colocate_with(params[0]):
-        # TODO(apassos): implement the sharded version as well.
-        if isinstance(params[0], resource_variable_ops.ResourceVariable):
-          ret = params[0].sparse_read(ids, name=name)
-        else:
-          ret = array_ops.gather(params[0], ids, name=name,
-                                 validate_indices=validate_indices)
-      return maybe_normalize(ret)
+        return maybe_normalize(
+            _do_gather(
+                params[0], ids, validate_indices=validate_indices, name=name))
     else:
       ids = ops.convert_to_tensor(ids, name=""ids"")
       flat_ids = array_ops.reshape(ids, [-1])
@@ -169,9 +176,9 @@ def embedding_lookup(params, ids, partition_strategy=""mod"", name=None,
       partitioned_result = []
       for p in xrange(np):
         with ops.colocate_with(params[p]):
-          partitioned_result.append(array_ops.gather(
-              params[p], gather_ids[p],
-              validate_indices=validate_indices))
+          partitioned_result.append(
+              _do_gather(params[p], gather_ids[p],
+                         validate_indices=validate_indices))
       # Stitch these back together
       ret = data_flow_ops.dynamic_stitch(pindices, partitioned_result,
                                          name=name)
",0,test
79a93ac627b9af8ae84a874ce248fe42aac8de36,tensorflow/tensorflow,"Support partitioned embedding lookup for resource variables.
Change: 146034474",resource_variable_ops.py,"@@ -241,9 +241,9 @@ class ResourceVariable(object):
 
   def sparse_read(self, indices, collections=None, trainable=True, name=None):
     """"""Reads the value of this variable sparsely, using `gather`.""""""
-    with ops.name_scope(""Gather"" if name is None else name):
+    with ops.name_scope(""Gather"" if name is None else name) as name:
       value = gen_resource_variable_ops.resource_gather(
-          self._handle, indices, dtype=self._dtype)
+          self._handle, indices, dtype=self._dtype, name=name)
     _register_variable_read(value, collections=collections, trainable=trainable)
     return array_ops.identity(value)
 
",0,test
2f15f9a594730757898ee5ece214135d45de212f,tensorflow/tensorflow,"NFC: Update signature of ConvertDotDimensionNumbers

Planning to add ConvertConvDimensionNumbers in a follow-up change.

PiperOrigin-RevId: 273624510",hlo_function_importer.cc,"@@ -92,13 +92,12 @@ StatusOr<DenseElementsAttr> CreateDenseAttrFromLiteral(ShapedType type,
 
 // Returns whether the instruction is a default dot operation.
 bool DotIsDefault(const HloInstruction* instruction) {
-  auto dot_dimensions = instruction->dot_dimension_numbers();
+  auto dnums = instruction->dot_dimension_numbers();
   DotDimensionNumbers default_dimension_numbers;
   default_dimension_numbers.add_lhs_contracting_dimensions(
       instruction->operand(0)->shape().dimensions_size() == 1 ? 0 : 1);
   default_dimension_numbers.add_rhs_contracting_dimensions(0);
-  return xla::protobuf_util::ProtobufEquals(dot_dimensions,
-                                            default_dimension_numbers);
+  return xla::protobuf_util::ProtobufEquals(dnums, default_dimension_numbers);
 }
 }  // namespace
 
@@ -250,8 +249,8 @@ StatusOr<mlir::Operation*> HloFunctionImporter::ImportInstruction(
         MakeAndReturn(DotOp);
       }
 
-      attributes.push_back(builder_->getNamedAttr(
-          ""dot_dimension_numbers"", ConvertDotDimensionNumbers(instruction)));
+      attributes.push_back(
+          ConvertDotDimensionNumbers(instruction->dot_dimension_numbers()));
       MakeAndReturn(DotGeneralOp);
     }
     case HloOpcode::kCall: {
@@ -581,21 +580,18 @@ mlir::DenseIntElementsAttr HloFunctionImporter::Convert(
       .cast<DenseIntElementsAttr>();
 }
 
-mlir::xla_hlo::DotDimensionNumbers
-HloFunctionImporter::ConvertDotDimensionNumbers(HloInstruction* instruction) {
-  auto dot_dimensions = instruction->dot_dimension_numbers();
+mlir::NamedAttribute HloFunctionImporter::ConvertDotDimensionNumbers(
+    const DotDimensionNumbers& dnums) {
   std::vector<int64_t> rhs_contracting_dimensions(
-      dot_dimensions.rhs_contracting_dimensions().begin(),
-      dot_dimensions.rhs_contracting_dimensions().end());
+      dnums.rhs_contracting_dimensions().begin(),
+      dnums.rhs_contracting_dimensions().end());
   std::vector<int64_t> lhs_contracting_dimensions(
-      dot_dimensions.lhs_contracting_dimensions().begin(),
-      dot_dimensions.lhs_contracting_dimensions().end());
+      dnums.lhs_contracting_dimensions().begin(),
+      dnums.lhs_contracting_dimensions().end());
   std::vector<int64_t> rhs_batch_dimensions(
-      dot_dimensions.rhs_batch_dimensions().begin(),
-      dot_dimensions.rhs_batch_dimensions().end());
+      dnums.rhs_batch_dimensions().begin(), dnums.rhs_batch_dimensions().end());
   std::vector<int64_t> lhs_batch_dimensions(
-      dot_dimensions.lhs_batch_dimensions().begin(),
-      dot_dimensions.lhs_batch_dimensions().end());
+      dnums.lhs_batch_dimensions().begin(), dnums.lhs_batch_dimensions().end());
 
   // Push the attributes into our new DictionaryAttr.
   auto lhs_batch_dims_attr = Convert(lhs_batch_dimensions);
@@ -603,9 +599,10 @@ HloFunctionImporter::ConvertDotDimensionNumbers(HloInstruction* instruction) {
   auto lhs_contracting_dims_attr = Convert(lhs_contracting_dimensions);
   auto rhs_contracting_dims_attr = Convert(rhs_contracting_dimensions);
 
-  return mlir::xla_hlo::DotDimensionNumbers::get(
+  auto attr = mlir::xla_hlo::DotDimensionNumbers::get(
       lhs_batch_dims_attr, rhs_batch_dims_attr, lhs_contracting_dims_attr,
       rhs_contracting_dims_attr, context_);
+  return builder_->getNamedAttr(""dot_dimension_numbers"", attr);
 }
 
 }  // namespace xla
",0,train
2f15f9a594730757898ee5ece214135d45de212f,tensorflow/tensorflow,"NFC: Update signature of ConvertDotDimensionNumbers

Planning to add ConvertConvDimensionNumbers in a follow-up change.

PiperOrigin-RevId: 273624510",hlo_function_importer.h,"@@ -105,9 +105,9 @@ class HloFunctionImporter {
   // Converts Array ref to an DenseIntElementsAttr.
   mlir::DenseIntElementsAttr Convert(llvm::ArrayRef<int64_t> op_dimensions);
 
-  // Converts the dot dimensions to attributes.
-  mlir::xla_hlo::DotDimensionNumbers ConvertDotDimensionNumbers(
-      xla::HloInstruction* instruction);
+  // Converts the dot dimensions to attribute.
+  mlir::NamedAttribute ConvertDotDimensionNumbers(
+      const DotDimensionNumbers& dnums);
 
   mlir::MLIRContext* context_;
   mlir::ModuleOp module_;
",0,train
35614e1eb507a99df2a5d953f0c4d2dfb55efc2c,tensorflow/tensorflow,"Change QuantizeSoftmaxOutput to a template, dropping the unused output_data parameter.

On ARM64, switch uint8_t to use std::round instead of add and truncate, it's faster.

PiperOrigin-RevId: 300854002
Change-Id: Ie3bd951b9c1c7747e13e17be5c2ef59f07f10992",optimized_ops.h,"@@ -4071,16 +4071,20 @@ inline void Softmax(const SoftmaxParams& params,
   }
 }
 
-inline int32_t QuantizeSoftmaxOutput(int8_t* output_data, float prob_rescaled,
-                                     int32_t zero_point) {
+template <typename T>
+inline int32_t QuantizeSoftmaxOutput(float prob_rescaled, int32_t zero_point) {
   const int32_t prob_rnd = static_cast<int32_t>(std::round(prob_rescaled));
   return prob_rnd + zero_point;
 }
 
-inline int32_t QuantizeSoftmaxOutput(uint8_t* output_data, float prob_rescaled,
-                                     int32_t zero_point) {
+#if !__aarch64__
+// With ARM64, rounding is faster than add + truncation.
+template <>
+inline int32_t QuantizeSoftmaxOutput<uint8_t>(float prob_rescaled,
+                                              int32_t zero_point) {
   return static_cast<int32_t>(prob_rescaled + 0.5f);
 }
+#endif
 
 inline void PopulateSoftmaxLookupTable(SoftmaxParams* data, float input_scale,
                                        float beta) {
@@ -4123,7 +4127,7 @@ inline void Softmax(const SoftmaxParams& params,
     for (int j = 0; j < last_dim; ++j) {
       const float prob_rescaled = table_offset[input_data[j]] * inv_sum_exp;
       const int32_t prob_quantized =
-          QuantizeSoftmaxOutput(output_data, prob_rescaled, params.zero_point);
+          QuantizeSoftmaxOutput<T>(prob_rescaled, params.zero_point);
       output_data[j] = static_cast<T>(
           std::max(std::min(clamp_max, prob_quantized), clamp_min));
     }
",0,test
b3b6085d6f81fe6ad47a72e8289ed93f98952e8d,tensorflow/tensorflow,Fix a typo in input_producer documentation,input.py,"@@ -92,7 +92,7 @@ def input_producer(input_tensor, element_shape=None, num_epochs=None,
   """"""Output the rows of `input_tensor` to a queue for an input pipeline.
 
   Args:
-    input_tensor: A tensor with the rows to produce. Must be at
+    input_tensor: A tensor with the rows to produce. Must be at least
       one-dimensional. Must either have a fully-defined shape, or
       `element_shape` must be defined.
     element_shape: (Optional.) A `TensorShape` representing the shape of a
",0,test
f46041c9937309ec09270f83b4153eb71105ce37,tensorflow/tensorflow,"Adds missing functions to LabeledTensor's __init__.py.
Change: 148149131",__init__.py,"@@ -72,6 +72,8 @@ digamma = _core.digamma
 erf = _core.erf
 erfc = _core.erfc
 logical_not = _core.logical_not
+tanh = _core.tanh
+sigmoid = _core.sigmoid
 
 add = _core.add
 sub = _core.sub
",0,train
dfb532cb5de2ea7d067ec42f5b81e02a2148c3ac,tensorflow/tensorflow,"Register GPU kernels for placeholder to make placer happy.
Change: 123984360",constant_op.cc,"@@ -273,5 +273,10 @@ class PlaceholderOp : public OpKernel {
 };
 
 REGISTER_KERNEL_BUILDER(Name(""Placeholder"").Device(DEVICE_CPU), PlaceholderOp);
+// The following GPU kernel registration is used to address the situation that
+// a placeholder is added in a GPU device context and soft placement is false.
+// Since a placeholder should never be executed, adding these GPU kernels has
+// no effect on graph execution.
+REGISTER_KERNEL_BUILDER(Name(""Placeholder"").Device(DEVICE_GPU), PlaceholderOp);
 
 }  // namespace tensorflow
",0,train
92b294fac35d905f62ac89d66eeaf6e99054f954,tensorflow/tensorflow,"Disable new segment reduction kernels by default

- Doing this because a performance regression was observed in some
  cases (need to investigate further).
- The new kernels are now only used if determinism is required or if
  TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS=1 is set.
- This commit also fixes the determinism test on Windows.",segment_reduction_ops.h,"@@ -25,7 +25,7 @@ namespace tensorflow {
 
 class OpKernelContext;
 
-bool UseNonDeterministicSegmentReductions();
+bool UseDeterministicSegmentReductions();
 bool DisableSegmentReductionOpDeterminismExceptions();
 
 // Type of SparseSegmentReduction operation to perform gradient of.
",0,train
92b294fac35d905f62ac89d66eeaf6e99054f954,tensorflow/tensorflow,"Disable new segment reduction kernels by default

- Doing this because a performance regression was observed in some
  cases (need to investigate further).
- The new kernels are now only used if determinism is required or if
  TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS=1 is set.
- This commit also fixes the determinism test on Windows.",segment_reduction_ops_gpu.cu.h,"@@ -712,11 +712,22 @@ void SegmentReductionFunctor<
   const Index input_inner_dim_size = input_total_size / input_outer_dim_size;
   const Index num_segments = output.size() / input_inner_dim_size;
 
+    bool use_deterministic_kernels =
+#if defined(PLATFORM_WINDOWS)
+        // See comment in segment_reduction_ops_gpu_0.cu.cc regarding Windows CI
+        // build error.
+        false;
+#else
+        UseDeterministicSegmentReductions() ||
+        (OpDeterminismRequired() &&
+         !ReduceOpIsAssociative<ReductionF, T>::value);
+#endif
+
   // TODO(benbarsdell): If there are no performance concerns with the new
   // deterministic kernels, remove this runtime check and only compile the old
   // non-deterministic kernels on Windows (as a workaround for the build failure
   // issue).
-  if (UseNonDeterministicSegmentReductions()) {
+  if (!use_deterministic_kernels) {
     // Set 'output' to initial value.
     GpuLaunchConfig config = GetGpuLaunchConfig(output.size(), d);
     const T InitialValue = InitialValueF()();
@@ -774,8 +785,8 @@ void SegmentReductionFunctor<
             /*indices=*/static_cast<const Index*>(nullptr),
             /*weights=*/static_cast<T*>(nullptr), output.data()));
 #else
-    // Note: Shouldn't reach here because UseNonDeterministicSegmentReductions()
-    // always returns true on Windows.
+    // Note: Shouldn't reach here because use_deterministic_kernels is always
+    // false on Windows.
     OP_REQUIRES(ctx, false,
                 errors::Unimplemented(""Deterministic segment reductions are ""
                                       ""not implemented on Windows.""));
@@ -794,8 +805,19 @@ struct UnsortedSegmentFunctor<GPUDevice, T, Index, InitialValueF, ReductionF> {
       return;
     }
 
+    bool use_deterministic_kernels =
+#if defined(PLATFORM_WINDOWS)
+        // See comment in segment_reduction_ops_gpu_0.cu.cc regarding Windows CI
+        // build error.
+        false;
+#else
+        UseDeterministicSegmentReductions() ||
+        (!ReduceOpIsAssociative<ReductionF, T>::value &&
+         OpDeterminismRequired());
+#endif
+
     bool determinism_requirement_met =
-        !UseNonDeterministicSegmentReductions() ||
+        use_deterministic_kernels ||
         ReduceOpIsAssociative<ReductionF, T>::value ||
         !OpDeterminismRequired() ||
         DisableSegmentReductionOpDeterminismExceptions();
@@ -819,7 +841,7 @@ struct UnsortedSegmentFunctor<GPUDevice, T, Index, InitialValueF, ReductionF> {
     // deterministic kernels, remove this runtime check and only compile the old
     // non-deterministic kernels on Windows (as a workaround for the build
     // failure issue).
-    if (UseNonDeterministicSegmentReductions()) {
+    if (!use_deterministic_kernels) {
       // Set 'output' to initial value.
       GPUDevice d = ctx->template eigen_device<GPUDevice>();
       GpuLaunchConfig config = GetGpuLaunchConfig(output.size(), d);
@@ -876,8 +898,8 @@ struct UnsortedSegmentFunctor<GPUDevice, T, Index, InitialValueF, ReductionF> {
               /*segment_ids=*/segment_ids_ptr, /*indices=*/sorted_indices_ptr,
               /*weights=*/static_cast<T*>(nullptr), output.data()));
 #else
-      // Note: Shouldn't reach here because
-      // UseNonDeterministicSegmentReductions() always returns true on Windows.
+      // Note: Shouldn't reach here because use_deterministic_kernels is always
+      // false on Windows.
       OP_REQUIRES(
           ctx, false,
           errors::Unimplemented(""Deterministic unsorted segment reductions are ""
",0,train
92b294fac35d905f62ac89d66eeaf6e99054f954,tensorflow/tensorflow,"Disable new segment reduction kernels by default

- Doing this because a performance regression was observed in some
  cases (need to investigate further).
- The new kernels are now only used if determinism is required or if
  TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS=1 is set.
- This commit also fixes the determinism test on Windows.",segment_reduction_ops_gpu_0.cu.cc,"@@ -20,19 +20,19 @@ limitations under the License.
 
 namespace tensorflow {
 
-bool UseNonDeterministicSegmentReductions() {
+bool UseDeterministicSegmentReductions() {
   // See comment below regarding CI build error on Windows.
 #if !defined(PLATFORM_WINDOWS)
   static bool cached_result = [] {
     bool result = false;
     TF_CHECK_OK(tensorflow::ReadBoolFromEnvVar(
-        ""TF_USE_NONDETERMINISTIC_SEGMENT_REDUCTIONS"",
+        ""TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS"",
         /*default_val=*/false, &result));
     return result;
   }();
   return cached_result;
 #else
-  return true;
+  return false;
 #endif
 }
 
",0,train
92b294fac35d905f62ac89d66eeaf6e99054f954,tensorflow/tensorflow,"Disable new segment reduction kernels by default

- Doing this because a performance regression was observed in some
  cases (need to investigate further).
- The new kernels are now only used if determinism is required or if
  TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS=1 is set.
- This commit also fixes the determinism test on Windows.",segment_reduction_ops_impl.h,"@@ -296,11 +296,22 @@ class SegmentReductionGPUOp : public AsyncOpKernel {
       OP_REQUIRES_OK_ASYNC(
           context, context->allocate_output(0, output_shape, &output), done);
 
+      bool use_deterministic_kernels =
+#if defined(PLATFORM_WINDOWS)
+          // See comment in segment_reduction_ops_gpu_0.cu.cc regarding Windows
+          // CI build error.
+          false;
+#else
+          UseDeterministicSegmentReductions() ||
+          (!SegmentReductionFunctor::atomic_reduction_is_associative &&
+           OpDeterminismRequired());
+#endif
+
       // The determinism check is here, rather than inside the functor (as it is
       // for the unsorted segment reduction ops) because the done callback
       // (required for OP_REQUIRES_ASYNC) is not available inside the functor.
       bool determinism_requirement_met =
-          !UseNonDeterministicSegmentReductions() ||
+          use_deterministic_kernels ||
           SegmentReductionFunctor::atomic_reduction_is_associative ||
           !OpDeterminismRequired() ||
           DisableSegmentReductionOpDeterminismExceptions();
",0,train
92b294fac35d905f62ac89d66eeaf6e99054f954,tensorflow/tensorflow,"Disable new segment reduction kernels by default

- Doing this because a performance regression was observed in some
  cases (need to investigate further).
- The new kernels are now only used if determinism is required or if
  TF_USE_DETERMINISTIC_SEGMENT_REDUCTIONS=1 is set.
- This commit also fixes the determinism test on Windows.",segment_reduction_ops_deterministic_test.py,"@@ -34,8 +34,14 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import test
 
 
-def UsingNonDeterministicSegmentReductions():
-  return bool(int(os.getenv(""TF_USE_NONDETERMINISTIC_SEGMENT_REDUCTIONS"", ""0"")))
+def PlatformIsWindows():
+  return os.name == 'nt'
+
+
+def DeterministicSegmentReductionsSupported():
+  # See comment in segment_reduction_ops_gpu_0.cu.cc for why deterministic
+  # segment reduction kernels are disabled on Windows.
+  return not PlatformIsWindows()
 
 
 class SegmentReductionDeterminismExceptionsTest(test.TestCase):
@@ -69,7 +75,7 @@ class SegmentReductionDeterminismExceptionsTest(test.TestCase):
         for data_type in [dtypes.float16, dtypes.float32, dtypes.float64]:
           with self.cached_session(force_gpu=True):
             data, segment_ids, _ = self._input(data_type, segment_ids_type)
-            if (UsingNonDeterministicSegmentReductions() and
+            if (not DeterministicSegmentReductionsSupported() and
                 should_throw_for_float):
               with self.assertRaisesRegex(
                   errors_impl.UnimplementedError,
@@ -106,7 +112,7 @@ class SegmentReductionDeterminismExceptionsTest(test.TestCase):
               continue
             data, segment_ids, num_segments = self._input(
                 data_type, segment_ids_type)
-            if (UsingNonDeterministicSegmentReductions() and
+            if (not DeterministicSegmentReductionsSupported() and
                 (data_type != dtypes.int32) and should_throw_for_float):
               with self.assertRaisesRegex(errors_impl.UnimplementedError,
                                           self._UNSORTED_ERROR_MESSAGE):
@@ -129,7 +135,7 @@ class SegmentReductionDeterminismExceptionsTest(test.TestCase):
           with self.cached_session(force_gpu=True):
             data, segment_ids, num_segments = self._input(
                 data_type, segment_ids_type)
-            if UsingNonDeterministicSegmentReductions():
+            if not DeterministicSegmentReductionsSupported():
               with self.assertRaisesRegex(errors_impl.UnimplementedError,
                                           self._UNSORTED_ERROR_MESSAGE):
                 result = op(data, segment_ids, num_segments)
@@ -150,7 +156,7 @@ class SegmentReductionDeterminismExceptionsTest(test.TestCase):
           values, indices, _ = self._input(data_type, segment_ids_type)
           sparse_value = indexed_slices.IndexedSlices(
               values, indices, dense_shape=values.shape)
-          if UsingNonDeterministicSegmentReductions():
+          if not DeterministicSegmentReductionsSupported():
             with self.assertRaisesRegex(errors_impl.UnimplementedError,
                                         self._UNSORTED_ERROR_MESSAGE):
               # convert_to_tensor with IndexedSlices uses unsorted_segment_sum
@@ -174,7 +180,7 @@ class SegmentReductionDeterminismExceptionsTest(test.TestCase):
             tape.watch(params)
             op_output = array_ops.gather(params, indices)
           gradient = tape.gradient(op_output, params)
-          if UsingNonDeterministicSegmentReductions():
+          if not DeterministicSegmentReductionsSupported():
             with self.assertRaisesRegex(errors_impl.UnimplementedError,
                                         self._UNSORTED_ERROR_MESSAGE):
               # convert_to_tensor on IndexedSlices
",0,train
264a4b7f20d3654bd29e9b335d6ddfe6115ac63b,tensorflow/tensorflow,Updated examples,image_ops_impl.py,"@@ -1948,10 +1948,9 @@ def random_hue(image, max_delta, seed=None):
 
   Usage Example:
   ```python
-  >>> x = tf.constant([[[1.0, 2.0, 3.0]]])
-  >>> y = tf.image.random_hue(x, max_delta=0.1)
-  >>> print(y.numpy())
-  [[[1.        2.5... 3.       ]]]
+  >> import tensorflow as tf
+  >> x = tf.random.normal(shape=(256, 256, 3))
+  >> y = tf.image.random_hue(x, max_delta=0.1)
   ```
   
   Args:
@@ -2943,11 +2942,9 @@ def rgb_to_yiq(images):
   
   Usage Example:
     ```python
-    >>> import tensorflow as tf
-    >>> x = tf.constant([[[2.0, 5.0, 3.0]]])
-    >>> y = tf.image.rgb_to_yiq(x)
-    >>> print(y.numpy())
-    [[[ 3.875     -1.14... -1.25...]]]
+    >> import tensorflow as tf
+    >> x = tf.random.normal(shape=(256, 256, 3))
+    >> y = tf.image.rgb_to_yiq(x)
     ```
 
   Args:
",0,train
8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths.

-Use std::move when assigning std::function to reduce some simple allocations.'

-Use std::bind to avoid copy of std::function in lambda statements.
Change: 129654152",executor.cc,"@@ -1824,8 +1824,8 @@ void ExecutorState::DumpState() {
 void ExecutorState::Finish() {
   mu_.lock();
   auto status = status_;
-  auto done_cb = done_cb_;
-  auto runner = runner_;
+  auto done_cb = std::move(done_cb_);
+  auto runner = std::move(runner_);
   mu_.unlock();
   delete this;
   CHECK(done_cb != nullptr);
",0,train
8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths.

-Use std::move when assigning std::function to reduce some simple allocations.'

-Use std::bind to avoid copy of std::function in lambda statements.
Change: 129654152",process_util.cc,"@@ -51,19 +51,21 @@ void SchedClosure(std::function<void()> closure) {
     const uint64 id = port::Tracing::UniqueId();
     port::Tracing::RecordEvent(port::Tracing::EventCategory::kScheduleClosure,
                                id);
-    std::function<void()> wrapper = [closure, id]() {
-      port::Tracing::ScopedActivity region(
-          port::Tracing::EventCategory::kRunClosure, id);
-      closure();
-    };
-    Env::Default()->SchedClosure(wrapper);
+    std::function<void()> wrapper = std::bind(
+        [id](std::function<void()> closure) {
+          port::Tracing::ScopedActivity region(
+              port::Tracing::EventCategory::kRunClosure, id);
+          closure();
+        },
+        std::move(closure));
+    Env::Default()->SchedClosure(std::move(wrapper));
   } else {
-    Env::Default()->SchedClosure(closure);
+    Env::Default()->SchedClosure(std::move(closure));
   }
 }
 
 void SchedNonBlockingClosureAfter(int64 micros, std::function<void()> closure) {
-  Env::Default()->SchedClosureAfter(micros, closure);
+  Env::Default()->SchedClosureAfter(micros, std::move(closure));
 }
 
 }  // namespace tensorflow
",0,train
8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths.

-Use std::move when assigning std::function to reduce some simple allocations.'

-Use std::bind to avoid copy of std::function in lambda statements.
Change: 129654152",base_rendezvous_mgr.cc,"@@ -64,13 +64,17 @@ void BaseRendezvousMgr::RecvLocalAsync(int64 step_id,
                                        const Rendezvous::ParsedKey& parsed,
                                        Rendezvous::DoneCallback done) {
   BaseRemoteRendezvous* rendez = FindOrCreate(step_id);
-  rendez->RecvLocalAsync(
-      parsed, [rendez, done](const Status& s, const Rendezvous::Args& send_args,
-                             const Rendezvous::Args& recv_args, const Tensor& v,
-                             bool dead) {
+  using namespace std::placeholders;
+  Rendezvous::DoneCallback done_cb = std::bind(
+      [rendez](Rendezvous::DoneCallback done,
+               // Begin unbound arguments.
+               const Status& s, const Rendezvous::Args& send_args,
+               const Rendezvous::Args& recv_args, const Tensor& v, bool dead) {
         rendez->Unref();
         done(s, send_args, recv_args, v, dead);
-      });
+      },
+      std::move(done), _1, _2, _3, _4, _5);
+  rendez->RecvLocalAsync(parsed, std::move(done_cb));
 }
 
 Status BaseRendezvousMgr::RecvLocal(int64 step_id,
",0,train
8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths.

-Use std::move when assigning std::function to reduce some simple allocations.'

-Use std::bind to avoid copy of std::function in lambda statements.
Change: 129654152",grpc_call.h,"@@ -183,7 +183,7 @@ class Call : public UntypedCall<Service> {
   // call is cancelled by the client.
   void SetCancelCallback(std::function<void()> callback) {
     mutex_lock l(mu_);
-    cancel_callback_ = callback;
+    cancel_callback_ = std::move(callback);
   }
 
   // Clears any cancellation callback that has been registered for this call.
",0,train
8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths.

-Use std::move when assigning std::function to reduce some simple allocations.'

-Use std::bind to avoid copy of std::function in lambda statements.
Change: 129654152",grpc_client_cq_tag.h,"@@ -29,7 +29,7 @@ namespace tensorflow {
 class GrpcClientCQTag {
  public:
   GrpcClientCQTag(::grpc::ClientContext* context, StatusCallback cb)
-      : context_(context), cb_(cb) {}
+      : context_(context), cb_(std::move(cb)) {}
   ~GrpcClientCQTag() { delete context_; }
 
   void OnCompleted(bool ok) {
",0,train
8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths.

-Use std::move when assigning std::function to reduce some simple allocations.'

-Use std::bind to avoid copy of std::function in lambda statements.
Change: 129654152",grpc_remote_worker.cc,"@@ -46,41 +46,44 @@ class GrpcRemoteWorker : public WorkerInterface {
                       GetStatusResponse* response,
                       StatusCallback done) override {
     IssueRequest(request, response, &grpc::WorkerService::Stub::AsyncGetStatus,
-                 done);
+                 std::move(done));
   }
 
   void RegisterGraphAsync(const RegisterGraphRequest* request,
                           RegisterGraphResponse* response,
                           StatusCallback done) override {
     IssueRequest(request, response,
-                 &grpc::WorkerService::Stub::AsyncRegisterGraph, done);
+                 &grpc::WorkerService::Stub::AsyncRegisterGraph,
+                 std::move(done));
   }
 
   void DeregisterGraphAsync(const DeregisterGraphRequest* request,
                             DeregisterGraphResponse* response,
                             StatusCallback done) override {
     IssueRequest(request, response,
-                 &grpc::WorkerService::Stub::AsyncDeregisterGraph, done);
+                 &grpc::WorkerService::Stub::AsyncDeregisterGraph,
+                 std::move(done));
   }
 
   void RunGraphAsync(CallOptions* call_opts, const RunGraphRequest* request,
                      RunGraphResponse* response, StatusCallback done) override {
     IssueRequest(request, response, &grpc::WorkerService::Stub::AsyncRunGraph,
-                 done, call_opts);
+                 std::move(done), call_opts);
   }
 
   void CleanupGraphAsync(const CleanupGraphRequest* request,
                          CleanupGraphResponse* response,
                          StatusCallback done) override {
     IssueRequest(request, response,
-                 &grpc::WorkerService::Stub::AsyncCleanupGraph, done);
+                 &grpc::WorkerService::Stub::AsyncCleanupGraph,
+                 std::move(done));
   }
 
   void CleanupAllAsync(const CleanupAllRequest* request,
                        CleanupAllResponse* response,
                        StatusCallback done) override {
     IssueRequest(request, response, &grpc::WorkerService::Stub::AsyncCleanupAll,
-                 done);
+                 std::move(done));
   }
 
   void RecvTensorAsync(CallOptions* call_opts, const RecvTensorRequest* request,
",0,train
8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths.

-Use std::move when assigning std::function to reduce some simple allocations.'

-Use std::bind to avoid copy of std::function in lambda statements.
Change: 129654152",rpc_rendezvous_mgr.cc,"@@ -127,16 +127,20 @@ class RpcRecvTensorCall : public BaseRecvTensorCall {
 
   // Start the main RecvTensor call, checking for an async abort.
   void StartRTCall(std::function<void()> recv_done) {
+    using namespace std::placeholders;
+    StatusCallback cb = std::bind(
+        [this](std::function<void()> recv_done,
+               // Begin unbound arguments.
+               const Status& s) {
+          if (!s.ok()) {
+            mutex_lock l(mu_);
+            status_.Update(s);
+          }
+          recv_done();
+        },
+        std::move(recv_done), _1);
     wi_->RecvTensorAsync(&opts_, &req_, &resp_,
-                         nullptr /* TensorBufAllocator */,
-                         // done callback
-                         [this, recv_done](const Status& s) {
-                           if (!s.ok()) {
-                             mutex_lock l(mu_);
-                             status_.Update(s);
-                           }
-                           recv_done();
-                         });
+                         nullptr /* TensorBufAllocator */, std::move(cb));
   }
 
   string src_worker_;
",0,train
8f6a17f052ba0709641082016bf3f53886078ba4,tensorflow/tensorflow,"Reduce some allocations on grpc code paths.

-Use std::move when assigning std::function to reduce some simple allocations.'

-Use std::bind to avoid copy of std::function in lambda statements.
Change: 129654152",sendrecv_ops.cc,"@@ -110,23 +110,27 @@ void RecvOp::ComputeAsync(OpKernelContext* ctx, DoneCallback done) {
   Rendezvous::Args args;
   args.device_context = ctx->op_device_context();
   args.alloc_attrs = ctx->output_alloc_attr(0);
-  DoneCallback done_cb = std::move(done);
-  ctx->rendezvous()->RecvAsync(
-      parsed, args,
-      [ctx, done_cb](const Status& s, const Rendezvous::Args& send_args,
-                     const Rendezvous::Args& recv_args, const Tensor& val,
-                     bool is_dead) {
+  using namespace std::placeholders;
+  Rendezvous::DoneCallback done_cb = std::bind(
+      [ctx](DoneCallback done,
+            // Begin unbound arguments.
+            const Status& s, const Rendezvous::Args& send_args,
+            const Rendezvous::Args& recv_args, const Tensor& val,
+            bool is_dead) {
         ctx->SetStatus(s);
         if (s.ok()) {
-          // 'ctx' allocates the output tensor of the expected type.  The
-          // runtime checks whether the tensor received here is the same type.
+          // 'ctx' allocates the output tensor of the expected type.
+          // The runtime checks whether the tensor received here is
+          // the same type.
           if (!is_dead) {
             ctx->set_output(0, val);
           }
           *ctx->is_output_dead() = is_dead;
         }
-        done_cb();
-      });
+        done();
+      },
+      std::move(done), _1, _2, _3, _4, _5);
+  ctx->rendezvous()->RecvAsync(parsed, args, std::move(done_cb));
 }
 
 REGISTER_KERNEL_BUILDER(Name(""_Recv"").Device(DEVICE_CPU), RecvOp);
",0,train
d59bdf549306f2746bf45c06161aeba6151b092d,tensorflow/tensorflow,"Add TpuExecutableInterface::fingerprint() virtual method.

This also makes the TpuExecutable in tpu_on_demand_compiler.cc
subclass TpuExecutableInterface, and implements the fingerprint()
method for future use by JAX. I didn't implement it for the
TpuExecutable class in tpu_executable.h, since TF doesn't need this
functionality (yet?), but it shouldn't be too hard.

PiperOrigin-RevId: 330842613
Change-Id: I592068c7b1110e0ae32b241e3e6c5a7b121f3e0f",tpu_on_demand_compiler.cc,"@@ -29,6 +29,7 @@ limitations under the License.
 #include ""tensorflow/stream_executor/tpu/c_api_decl.h""
 #include ""tensorflow/stream_executor/tpu/proto_helper.h""
 #include ""tensorflow/stream_executor/tpu/status_helper.h""
+#include ""tensorflow/stream_executor/tpu/tpu_executable_interface.h""
 #include ""tensorflow/stream_executor/tpu/tpu_executor.h""
 #include ""tensorflow/stream_executor/tpu/tpu_executor_c_api.h""
 #include ""tensorflow/stream_executor/tpu/tpu_platform.h""
@@ -97,11 +98,11 @@ void XLA_HloModuleConfig_Free(XLA_HloModuleConfig* module_config) {
   }
 }
 
-class TpuExecutable : public Executable {
+class TpuExecutable : public TpuExecutableInterface {
  public:
   TpuExecutable(SE_Executable* se_executable,
                 std::shared_ptr<HloModule> hlo_module)
-      : Executable(std::move(hlo_module), nullptr, nullptr),
+      : TpuExecutableInterface(std::move(hlo_module), nullptr, nullptr),
         se_executable_(se_executable) {}
 
   ~TpuExecutable() override {
@@ -192,7 +193,31 @@ class TpuExecutable : public Executable {
     return output;
   }
 
+  absl::string_view fingerprint() const override {
+    const char* data;
+    size_t size;
+    ExecutorApiFn()->TpuExecutable_FingerprintFn(se_executable_, &data, &size);
+    return absl::string_view(data, size);
+  }
+
  private:
+  Status LoadProgramAndEnqueueToStream(
+      const ServiceExecutableRunOptions& run_options,
+      absl::Span<const stream_executor::DeviceMemoryBase> arguments,
+      stream_executor::DeviceMemoryBase result,
+      absl::optional<stream_executor::DeviceMemoryBase>
+          cross_program_prefetch_addr) override {
+    LOG(FATAL) << ""LoadProgramAndEnqueueToStream unimplemented"";
+  }
+
+  Shape HostShapeToDeviceShape(const Shape& host_shape) override {
+    LOG(FATAL) << ""HostShapeToDeviceShape unimplemented"";
+  }
+
+  int64 ShapeSize(const Shape& shape) override {
+    LOG(FATAL) << ""ShapeSize unimplemented"";
+  }
+
   SE_Executable* se_executable_;
 };
 
",0,train
d59bdf549306f2746bf45c06161aeba6151b092d,tensorflow/tensorflow,"Add TpuExecutableInterface::fingerprint() virtual method.

This also makes the TpuExecutable in tpu_on_demand_compiler.cc
subclass TpuExecutableInterface, and implements the fingerprint()
method for future use by JAX. I didn't implement it for the
TpuExecutable class in tpu_executable.h, since TF doesn't need this
functionality (yet?), but it shouldn't be too hard.

PiperOrigin-RevId: 330842613
Change-Id: I592068c7b1110e0ae32b241e3e6c5a7b121f3e0f",tpu_executable.cc,"@@ -113,4 +113,9 @@ int64 TpuExecutable::ShapeSize(const Shape& shape) {
   return size;
 }
 
+absl::string_view TpuExecutable::fingerprint() const {
+  // TODO(skye): the fingerprint can be plumbed through via core_program_
+  LOG(FATAL) << ""TpuExecutable::fingerprint() unimplemented"";
+}
+
 }  // namespace xla
",0,train
d59bdf549306f2746bf45c06161aeba6151b092d,tensorflow/tensorflow,"Add TpuExecutableInterface::fingerprint() virtual method.

This also makes the TpuExecutable in tpu_on_demand_compiler.cc
subclass TpuExecutableInterface, and implements the fingerprint()
method for future use by JAX. I didn't implement it for the
TpuExecutable class in tpu_executable.h, since TF doesn't need this
functionality (yet?), but it shouldn't be too hard.

PiperOrigin-RevId: 330842613
Change-Id: I592068c7b1110e0ae32b241e3e6c5a7b121f3e0f",tpu_executable.h,"@@ -46,6 +46,8 @@ class TpuExecutable : public TpuExecutableInterface {
 
   const XLA_TpuProgram* core_program() const { return core_program_; }
 
+  absl::string_view fingerprint() const override;
+
  private:
   Status LoadProgramAndEnqueueToStream(
       const ServiceExecutableRunOptions& run_options,
",0,train
d59bdf549306f2746bf45c06161aeba6151b092d,tensorflow/tensorflow,"Add TpuExecutableInterface::fingerprint() virtual method.

This also makes the TpuExecutable in tpu_on_demand_compiler.cc
subclass TpuExecutableInterface, and implements the fingerprint()
method for future use by JAX. I didn't implement it for the
TpuExecutable class in tpu_executable.h, since TF doesn't need this
functionality (yet?), but it shouldn't be too hard.

PiperOrigin-RevId: 330842613
Change-Id: I592068c7b1110e0ae32b241e3e6c5a7b121f3e0f",tpu_executable_interface.h,"@@ -80,6 +80,8 @@ class TpuExecutableInterface : public Executable {
       absl::optional<stream_executor::DeviceMemoryBase>
           cross_program_prefetch_addr) = 0;
 
+  virtual absl::string_view fingerprint() const = 0;
+
  protected:
   virtual Shape HostShapeToDeviceShape(const Shape& host_shape) = 0;
 
",0,train
d59bdf549306f2746bf45c06161aeba6151b092d,tensorflow/tensorflow,"Add TpuExecutableInterface::fingerprint() virtual method.

This also makes the TpuExecutable in tpu_on_demand_compiler.cc
subclass TpuExecutableInterface, and implements the fingerprint()
method for future use by JAX. I didn't implement it for the
TpuExecutable class in tpu_executable.h, since TF doesn't need this
functionality (yet?), but it shouldn't be too hard.

PiperOrigin-RevId: 330842613
Change-Id: I592068c7b1110e0ae32b241e3e6c5a7b121f3e0f",tpu_executor_c_api.h,"@@ -300,6 +300,10 @@ TFTPU_CAPI_EXPORT void TpuExecutable_ExecuteAsyncOnStream(
     SE_HloExecutionProfile* hlo_execution_profile, SE_ExecutionOutput* output,
     SE_Status* status);
 
+TFTPU_CAPI_EXPORT void TpuExecutable_Fingerprint(SE_Executable* executable,
+                                                 const char** fingerprint,
+                                                 size_t* size);
+
 TFTPU_CAPI_EXPORT void TpuExecutable_Free(SE_Executable*);
 
 // Converts an XLA `Shape` into its equivalent TPU `Shape` representation.
@@ -445,6 +449,7 @@ struct TfTpu_ExecutorApiFn {
   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_Compile);
   TFTPU_ADD_FN_IN_STRUCT(TpuCompiler_ShapeSize);
   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_ExecuteAsyncOnStream);
+  TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Fingerprint);
   TFTPU_ADD_FN_IN_STRUCT(TpuExecutable_Free);
 
   TFTPU_ADD_FN_IN_STRUCT(XlaShapeToTpuShapeRepresentation);
",0,train
57f64fe469364417cfc6755c754abb54c2e3756b,tensorflow/tensorflow,revert unwanted typo,util_test.py,"@@ -785,7 +785,7 @@ class FillTriangularTest(test.TestCase):
 @test_util.with_c_api
 class ReduceWeightedLogSumExp(test.TestCase):
 
-  def _reduce_weighted_logsumexp(self, logx, w, axis, keepdims=False):
+  def _reduce_weighted_logsumexp(self, logx, w, axis, keep_dims=False):
     m = np.max(logx, axis=axis, keepdims=True)
     sum_ = np.sum(w * np.exp(logx - m), axis=axis, keepdims=keep_dims)
     sgn = np.sign(sum_)
",0,train
f98110b559362b086409438b00f5b73c9d870b60,tensorflow/tensorflow,"[TF] Use compare and cast instead of floor for dropout.

PiperOrigin-RevId: 229631446",layers_test.py,"@@ -1356,7 +1356,7 @@ class DropoutTest(test.TestCase):
     with self.cached_session():
       images = np.random.uniform(size=(5, height, width, 3))
       output = _layers.dropout(images)
-      self.assertEqual(output.op.name, 'Dropout/dropout_1/mul')
+      self.assertEqual(output.op.name, 'Dropout/dropout_1/mul_1')
       output.get_shape().assert_is_compatible_with(
           ops.convert_to_tensor(images).get_shape())
 
",0,train
f98110b559362b086409438b00f5b73c9d870b60,tensorflow/tensorflow,"[TF] Use compare and cast instead of floor for dropout.

PiperOrigin-RevId: 229631446",parse_layer_parameters.py,"@@ -27,7 +27,8 @@ from tensorflow.python.platform import tf_logging as logging
 _UNCHANGED_RF_LAYER_OPS = [
     ""Add"", ""BiasAdd"", ""Cast"", ""Ceil"", ""ConcatV2"", ""Const"", ""Floor"",
     ""FusedBatchNorm"", ""Identity"", ""Log"", ""Mul"", ""Pow"", ""RealDiv"", ""Relu"",
-    ""Relu6"", ""Round"", ""Rsqrt"", ""Softplus"", ""Sub"", ""VariableV2"", ""LRN""
+    ""Relu6"", ""Round"", ""Rsqrt"", ""Softplus"", ""Sub"", ""VariableV2"", ""LRN"",
+    ""GreaterEqual""
 ]
 
 # Different ways in which padding modes may be spelled.
@@ -276,11 +277,11 @@ def get_layer_params(node, name_to_node, input_resolution=None, force=False):
     kernel_size_x, kernel_size_y = _conv_kernel_size(node, name_to_node)
     # Compute the padding for this node separately for each direction.
     total_padding_x, padding_x = _padding_size_conv_pool(
-        node, kernel_size_x, stride_x, input_resolution[1]
-        if input_resolution is not None else None)
+        node, kernel_size_x, stride_x,
+        input_resolution[1] if input_resolution is not None else None)
     total_padding_y, padding_y = _padding_size_conv_pool(
-        node, kernel_size_y, stride_y, input_resolution[0]
-        if input_resolution is not None else None)
+        node, kernel_size_y, stride_y,
+        input_resolution[0] if input_resolution is not None else None)
   elif node.op == ""Pad"":
     # Kernel and stride are simply 1 in this case.
     kernel_size_x = 1
@@ -294,11 +295,11 @@ def get_layer_params(node, name_to_node, input_resolution=None, force=False):
     kernel_size_x, kernel_size_y = _pool_kernel_size(node, name_to_node)
     # Compute the padding for this node separately for each direction.
     total_padding_x, padding_x = _padding_size_conv_pool(
-        node, kernel_size_x, stride_x, input_resolution[1]
-        if input_resolution is not None else None)
+        node, kernel_size_x, stride_x,
+        input_resolution[1] if input_resolution is not None else None)
     total_padding_y, padding_y = _padding_size_conv_pool(
-        node, kernel_size_y, stride_y, input_resolution[0]
-        if input_resolution is not None else None)
+        node, kernel_size_y, stride_y,
+        input_resolution[0] if input_resolution is not None else None)
   elif node.op in _UNCHANGED_RF_LAYER_OPS:
     # These nodes do not modify the RF parameters.
     kernel_size_x = 1
@@ -320,7 +321,7 @@ def get_layer_params(node, name_to_node, input_resolution=None, force=False):
       total_padding_y = None
       padding_y = None
     else:
-      raise ValueError(""Unknown layer for operation '%s': %s"" % (node.name,
-                                                                 node.op))
+      raise ValueError(
+          ""Unknown layer for operation '%s': %s"" % (node.name, node.op))
   return (kernel_size_x, kernel_size_y, stride_x, stride_y, padding_x,
           padding_y, total_padding_x, total_padding_y)
",0,train
f98110b559362b086409438b00f5b73c9d870b60,tensorflow/tensorflow,"[TF] Use compare and cast instead of floor for dropout.

PiperOrigin-RevId: 229631446",topology_test.py,"@@ -358,17 +358,17 @@ class TopologyConstructionTest(keras_parameterized.TestCase):
     x = keras.layers.Dropout(0.5)(x, training=True)
     model = keras.models.Model(inp, x)
     # Would be `dropout/cond/Merge` by default
-    self.assertTrue(model.output.op.name.endswith('dropout/mul'))
+    self.assertTrue(model.output.op.name.endswith('dropout/mul_1'))
 
     # Test that argument is kept when applying the model
     inp2 = keras.layers.Input(shape=(2,))
     out2 = model(inp2)
-    self.assertTrue(out2.op.name.endswith('dropout/mul'))
+    self.assertTrue(out2.op.name.endswith('dropout/mul_1'))
 
     # Test that argument is kept after loading a model
     config = model.get_config()
     model = keras.models.Model.from_config(config)
-    self.assertTrue(model.output.op.name.endswith('dropout/mul'))
+    self.assertTrue(model.output.op.name.endswith('dropout/mul_1'))
 
   def test_node_construction(self):
     # test basics
",0,train
f98110b559362b086409438b00f5b73c9d870b60,tensorflow/tensorflow,"[TF] Use compare and cast instead of floor for dropout.

PiperOrigin-RevId: 229631446",wrappers_test.py,"@@ -159,7 +159,7 @@ class TimeDistributedTest(test.TestCase):
       np.random.seed(1234)
       x = keras.layers.Input(shape=(3, 2))
       y = keras.layers.TimeDistributed(
-          keras.layers.Dropout(.999))(x, training=True)
+          keras.layers.Dropout(.9999))(x, training=True)
       model = keras.models.Model(x, y)
       y = model.predict(np.random.random((10, 3, 2)))
       self.assertAllClose(np.mean(y), 0., atol=1e-1, rtol=1e-1)
",0,train
f98110b559362b086409438b00f5b73c9d870b60,tensorflow/tensorflow,"[TF] Use compare and cast instead of floor for dropout.

PiperOrigin-RevId: 229631446",nn_ops.py,"@@ -3292,15 +3292,13 @@ def dropout_v2(x, rate, noise_shape=None, seed=None, name=None):  # pylint: disa
         return x
 
     noise_shape = _get_noise_shape(x, noise_shape)
-
-    keep_prob = 1 - rate
-    # uniform [keep_prob, 1.0 + keep_prob)
-    random_tensor = keep_prob
-    random_tensor += random_ops.random_uniform(
+    # Sample a uniform distribution on [0.0, 1.0) and select values larger than
+    # rate.
+    random_tensor = random_ops.random_uniform(
         noise_shape, seed=seed, dtype=x.dtype)
-    # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
-    binary_tensor = math_ops.floor(random_tensor)
-    ret = math_ops.divide(x, keep_prob) * binary_tensor
+    keep_prob = 1 - rate
+    ret = (1 / keep_prob) * math_ops.cast(keep_prob >= random_tensor,
+                                          x.dtype) * x
     if not context.executing_eagerly():
       ret.set_shape(x.get_shape())
     return ret
",0,train
7ad1f3d479eaf46042c5254487cb74f7143010cd,tensorflow/tensorflow,"Refactor cross_device_ops so that when indexed slices are present in a batch, batched all-reduce can still be done on dense gradients.

PiperOrigin-RevId: 233695231",cross_device_ops.py,"@@ -269,6 +269,8 @@ class CrossDeviceOps(object):
       ValueError: if `value_destination_pairs` is not a list or a tuple of
         tuples of PerReplica objects and destinations
     """"""
+    # TODO(yuefengz): if destinations are different, split into several
+    # `_batch_reduce` invocations.
     if not _validate_value_destination_pairs(value_destination_pairs):
       # If the first element of each pair is a tensor, we try to turn it into a
       # PerReplica object.
@@ -374,8 +376,10 @@ class ReductionToOneDevice(CrossDeviceOps):
     super(ReductionToOneDevice, self).__init__()
 
   def reduce_implementation(self, reduce_op, per_replica_value, destinations):
-    assert check_destinations(destinations)
-    devices = get_devices_from(destinations)
+    if check_destinations(destinations):
+      devices = get_devices_from(destinations)
+    else:
+      devices = get_devices_from(per_replica_value)
     reduce_to_device = self.reduce_to_device or devices[0]
     logging.log_first_n(
         logging.INFO,
@@ -653,29 +657,15 @@ class AllReduceCrossDeviceOps(CrossDeviceOps):
     self._num_packs = num_packs
     self._agg_small_grads_max_bytes = agg_small_grads_max_bytes
     self._agg_small_grads_max_group = agg_small_grads_max_group
+    self._simple_cross_replica_ops = ReductionToOneDevice()
     super(AllReduceCrossDeviceOps, self).__init__()
 
   def reduce_implementation(self, reduce_op, per_replica_value, destinations):
-    contains_indexed_slices = cross_device_utils.contains_indexed_slices(
-        per_replica_value)
-    if (_devices_match(per_replica_value, destinations)
-        and not context.executing_eagerly()
-        and not contains_indexed_slices):
+    if _devices_match(per_replica_value, destinations):
       return self._batch_all_reduce(reduce_op, [per_replica_value])[0]
     else:
-      if contains_indexed_slices:
-        logging.log_first_n(
-            logging.WARN,
-            ""Efficient allreduce is not supported for IndexedSlices."", 10)
-
-      if check_destinations(destinations):
-        devices = get_devices_from(destinations)
-      else:
-        devices = get_devices_from(per_replica_value)
-      reduce_to_device = devices[0]
-      reduced = _simple_reduce(per_replica_value, reduce_to_device,
-                               math_ops.add_n, reduce_op)
-      return self.broadcast(reduced, destinations)
+      return self._simple_cross_replica_ops.reduce(reduce_op, per_replica_value,
+                                                   destinations)
 
   def batch_reduce_implementation(self, reduce_op, value_destination_pairs):
     all_devices_match = _all_devices_match(value_destination_pairs)
@@ -699,14 +689,31 @@ class AllReduceCrossDeviceOps(CrossDeviceOps):
 
   def _batch_all_reduce(self, reduce_op, per_replica_values):
     """"""All-reduce algorithm in a batch.""""""
+    dense_values, dense_indices, sparse_values, sparse_indices = (
+        cross_device_utils.split_by_sparsity(per_replica_values))
+    if dense_values:
+      dense_results = self._do_batch_all_reduce(reduce_op, dense_values)
+    else:
+      dense_results = []
+    if sparse_values:
+      sparse_results = self._do_batch_all_reduce_sparse(reduce_op,
+                                                        sparse_values)
+    else:
+      sparse_results = []
+    return cross_device_utils.stitch_values(((dense_results, dense_indices),
+                                             (sparse_results, sparse_indices)))
+
+  def _do_batch_all_reduce(self, reduce_op, dense_values):
+    """"""Run batch all-reduces.""""""
     logging.log_first_n(
         logging.INFO, ""batch_all_reduce invoked for batches size = %d with ""
         ""algorithm = %s, num_packs = %d, agg_small_grads_max_bytes = %d and ""
         ""agg_small_grads_max_group = %d"" %
-        (len(per_replica_values), self._all_reduce_alg, self._num_packs,
+        (len(dense_values), self._all_reduce_alg, self._num_packs,
          self._agg_small_grads_max_bytes, self._agg_small_grads_max_group), 10)
-    destinations = per_replica_values[0].devices
-    grouped = _group_value_by_device(per_replica_values)
+
+    destinations = dense_values[0].devices
+    grouped = _group_value_by_device(dense_values)
 
     device_grad_packs, tensor_packer = _pack_tensors(
         grouped, self._num_packs, self._agg_small_grads_max_bytes,
@@ -727,7 +734,18 @@ class AllReduceCrossDeviceOps(CrossDeviceOps):
               destinations, device_grad_packs))
 
     reduced = _unpack_tensors(reduced, tensor_packer)
-    return _ungroup_and_make_mirrored(reduced, per_replica_values[0], reduce_op)
+    return _ungroup_and_make_mirrored(reduced, dense_values[0], reduce_op)
+
+  def _do_batch_all_reduce_sparse(self, reduce_op, sparse_values):
+    """"""Run batch all-reduce for sparse values.""""""
+    logging.log_first_n(
+        logging.WARN,
+        ""Efficient allreduce is not supported for %d IndexedSlices"" %
+        len(sparse_values), 10)
+    # Use `sparse_values` as destinations to do all-reduces. It is effectively
+    # an allgather under the hood but not an efficient one.
+    return self._simple_cross_replica_ops.batch_reduce(
+        reduce_op, zip(sparse_values, sparse_values))
 
 
 # For compatibility with code using the old name of `AllReduceCrossDeviceOps`.
",0,train
7ad1f3d479eaf46042c5254487cb74f7143010cd,tensorflow/tensorflow,"Refactor cross_device_ops so that when indexed slices are present in a batch, batched all-reduce can still be done on dense gradients.

PiperOrigin-RevId: 233695231",cross_device_utils.py,"@@ -681,3 +681,58 @@ def contains_indexed_slices(value):
     return contains_indexed_slices(value.values)
   else:
     return False
+
+
+def is_indexed_slices(value):
+  if isinstance(value, ops.IndexedSlices):
+    return True
+  assert isinstance(value, value_lib.DistributedValues)
+  return all([isinstance(v, ops.IndexedSlices) for v in value.values])
+
+
+def split_by_sparsity(values):
+  """"""Split values into dense and sparse values.
+
+  Args:
+    values: a list of tensors or `PerReplica`s.
+
+  Returns:
+    Four lists:
+      a list of dense values, a list of their indices in `values` and
+      a list of sparse values, a list of their indices in `values`.
+  """"""
+  dense_values = []
+  dense_indices = []
+  sparse_values = []
+  sparse_indices = []
+  for i, v in enumerate(values):
+    if is_indexed_slices(v):
+      sparse_values.append(v)
+      sparse_indices.append(i)
+    else:
+      dense_values.append(v)
+      dense_indices.append(i)
+  return dense_values, dense_indices, sparse_values, sparse_indices
+
+
+def stitch_values(values_and_indices_list):
+  """"""Stitch values together according to their indices.
+
+  Args:
+    values_and_indices_list: a list of tuples of values and indices indicating
+      the values and postions in the returned list.
+
+  Returns:
+    a stitched list of values.
+  """"""
+  length = 0
+  for values_and_indices in values_and_indices_list:
+    length += len(values_and_indices[0])
+
+  result = [None] * length
+  for values_and_indices in values_and_indices_list:
+    if values_and_indices and values_and_indices[0]:
+      for v, i in zip(*values_and_indices):
+        assert result[i] is None
+        result[i] = v
+  return result
",0,train
5278fa03a9e703d1e414ccebd858f7fdf22dbba5,tensorflow/tensorflow,"Make quant_delay work even if user didn't create global step.

PiperOrigin-RevId: 174937793",quantize.py,"@@ -387,7 +387,7 @@ class _QuantizeContext(object):
 
     if delay_requested and self.quant_delay and self.quant_delay > 0:
       activate_quant = math_ops.greater_equal(
-          training_util.get_global_step(),
+          training_util.get_or_create_global_step(),
           self.quant_delay,
           name=scope + '/activate_quant')
       quant = control_flow_ops.cond(
",0,train
f5ea388e48a38b935ebd36442f756c8974b7ce3f,tensorflow/tensorflow,"Implement ZlibInputStream::Tell() by keeping track of the number of bytes
consumed by the reader.

PiperOrigin-RevId: 172634455",zlib_buffers_test.cc,"@@ -68,25 +68,25 @@ void TestAllCombinations(CompressionOptions input_options,
     for (auto input_buf_size : InputBufferSizes()) {
       for (auto output_buf_size : OutputBufferSizes()) {
         std::unique_ptr<WritableFile> file_writer;
-        TF_CHECK_OK(env->NewWritableFile(fname, &file_writer));
+        TF_ASSERT_OK(env->NewWritableFile(fname, &file_writer));
         string result;
 
         ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
                              output_options);
-        TF_CHECK_OK(out.Init());
+        TF_ASSERT_OK(out.Init());
 
-        TF_CHECK_OK(out.Append(StringPiece(data)));
-        TF_CHECK_OK(out.Close());
-        TF_CHECK_OK(file_writer->Flush());
-        TF_CHECK_OK(file_writer->Close());
+        TF_ASSERT_OK(out.Append(StringPiece(data)));
+        TF_ASSERT_OK(out.Close());
+        TF_ASSERT_OK(file_writer->Flush());
+        TF_ASSERT_OK(file_writer->Close());
 
         std::unique_ptr<RandomAccessFile> file_reader;
-        TF_CHECK_OK(env->NewRandomAccessFile(fname, &file_reader));
+        TF_ASSERT_OK(env->NewRandomAccessFile(fname, &file_reader));
         std::unique_ptr<RandomAccessInputStream> input_stream(
             new RandomAccessInputStream(file_reader.get()));
         ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size,
                            input_options);
-        TF_EXPECT_OK(in.ReadNBytes(data.size(), &result));
+        TF_ASSERT_OK(in.ReadNBytes(data.size(), &result));
         EXPECT_EQ(result, data);
       }
     }
@@ -118,24 +118,24 @@ void TestMultipleWrites(uint8 input_buf_size, uint8 output_buf_size,
   string actual_result;
   string expected_result;
 
-  TF_CHECK_OK(env->NewWritableFile(fname, &file_writer));
+  TF_ASSERT_OK(env->NewWritableFile(fname, &file_writer));
   ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
                        output_options);
-  TF_CHECK_OK(out.Init());
+  TF_ASSERT_OK(out.Init());
 
   for (int i = 0; i < num_writes; i++) {
-    TF_CHECK_OK(out.Append(StringPiece(data)));
+    TF_ASSERT_OK(out.Append(StringPiece(data)));
     if (with_flush) {
-      TF_CHECK_OK(out.Flush());
+      TF_ASSERT_OK(out.Flush());
     }
     strings::StrAppend(&expected_result, data);
   }
-  TF_CHECK_OK(out.Close());
-  TF_CHECK_OK(file_writer->Flush());
-  TF_CHECK_OK(file_writer->Close());
+  TF_ASSERT_OK(out.Close());
+  TF_ASSERT_OK(file_writer->Flush());
+  TF_ASSERT_OK(file_writer->Close());
 
   std::unique_ptr<RandomAccessFile> file_reader;
-  TF_CHECK_OK(env->NewRandomAccessFile(fname, &file_reader));
+  TF_ASSERT_OK(env->NewRandomAccessFile(fname, &file_reader));
   std::unique_ptr<RandomAccessInputStream> input_stream(
       new RandomAccessInputStream(file_reader.get()));
   ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size,
@@ -143,7 +143,7 @@ void TestMultipleWrites(uint8 input_buf_size, uint8 output_buf_size,
 
   for (int i = 0; i < num_writes; i++) {
     string decompressed_output;
-    TF_EXPECT_OK(in.ReadNBytes(data.size(), &decompressed_output));
+    TF_ASSERT_OK(in.ReadNBytes(data.size(), &decompressed_output));
     strings::StrAppend(&actual_result, decompressed_output);
   }
 
@@ -170,19 +170,19 @@ TEST(ZlibInputStream, FailsToReadIfWindowBitsAreIncompatible) {
 
   string data = GenTestString(10);
   std::unique_ptr<WritableFile> file_writer;
-  TF_CHECK_OK(env->NewWritableFile(fname, &file_writer));
+  TF_ASSERT_OK(env->NewWritableFile(fname, &file_writer));
   string result;
   ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
                        output_options);
-  TF_CHECK_OK(out.Init());
+  TF_ASSERT_OK(out.Init());
 
-  TF_CHECK_OK(out.Append(StringPiece(data)));
-  TF_CHECK_OK(out.Close());
-  TF_CHECK_OK(file_writer->Flush());
-  TF_CHECK_OK(file_writer->Close());
+  TF_ASSERT_OK(out.Append(StringPiece(data)));
+  TF_ASSERT_OK(out.Close());
+  TF_ASSERT_OK(file_writer->Flush());
+  TF_ASSERT_OK(file_writer->Close());
 
   std::unique_ptr<RandomAccessFile> file_reader;
-  TF_CHECK_OK(env->NewRandomAccessFile(fname, &file_reader));
+  TF_ASSERT_OK(env->NewRandomAccessFile(fname, &file_reader));
   std::unique_ptr<RandomAccessInputStream> input_stream(
       new RandomAccessInputStream(file_reader.get()));
   ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size,
@@ -192,5 +192,129 @@ TEST(ZlibInputStream, FailsToReadIfWindowBitsAreIncompatible) {
   CHECK(read_status.error_message().find(""inflate() failed"") != string::npos);
 }
 
+void WriteCompressedFile(Env* env, const string& fname, int input_buf_size,
+                         int output_buf_size,
+                         const CompressionOptions& output_options,
+                         const string& data) {
+  std::unique_ptr<WritableFile> file_writer;
+  TF_ASSERT_OK(env->NewWritableFile(fname, &file_writer));
+
+  ZlibOutputBuffer out(file_writer.get(), input_buf_size, output_buf_size,
+                       output_options);
+  TF_ASSERT_OK(out.Init());
+
+  TF_ASSERT_OK(out.Append(StringPiece(data)));
+  TF_ASSERT_OK(out.Close());
+  TF_ASSERT_OK(file_writer->Flush());
+  TF_ASSERT_OK(file_writer->Close());
+}
+
+void TestTell(CompressionOptions input_options,
+              CompressionOptions output_options) {
+  Env* env = Env::Default();
+  string fname = testing::TmpDir() + ""/zlib_buffers_test"";
+  for (auto file_size : NumCopies()) {
+    string data = GenTestString(file_size);
+    for (auto input_buf_size : InputBufferSizes()) {
+      for (auto output_buf_size : OutputBufferSizes()) {
+        // Write the compressed file.
+        WriteCompressedFile(env, fname, input_buf_size, output_buf_size,
+                            output_options, data);
+
+        // Boiler-plate to set up ZlibInputStream.
+        std::unique_ptr<RandomAccessFile> file_reader;
+        TF_ASSERT_OK(env->NewRandomAccessFile(fname, &file_reader));
+        std::unique_ptr<RandomAccessInputStream> input_stream(
+            new RandomAccessInputStream(file_reader.get()));
+        ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size,
+                           input_options);
+
+        string first_half(data, 0, data.size() / 2);
+        string bytes_read;
+
+        // Read the first half of the uncompressed file and expect that Tell()
+        // returns half the uncompressed length of the file.
+        TF_ASSERT_OK(in.ReadNBytes(first_half.size(), &bytes_read));
+        EXPECT_EQ(in.Tell(), first_half.size());
+        EXPECT_EQ(bytes_read, first_half);
+
+        // Read the remaining half of the uncompressed file and expect that
+        // Tell() points past the end of file.
+        string second_half;
+        TF_ASSERT_OK(
+            in.ReadNBytes(data.size() - first_half.size(), &second_half));
+        EXPECT_EQ(in.Tell(), data.size());
+        bytes_read.append(second_half);
+
+        // Expect that the file is correctly read.
+        EXPECT_EQ(bytes_read, data);
+      }
+    }
+  }
+}
+
+void TestSkipNBytes(CompressionOptions input_options,
+                    CompressionOptions output_options) {
+  Env* env = Env::Default();
+  string fname = testing::TmpDir() + ""/zlib_buffers_test"";
+  for (auto file_size : NumCopies()) {
+    string data = GenTestString(file_size);
+    for (auto input_buf_size : InputBufferSizes()) {
+      for (auto output_buf_size : OutputBufferSizes()) {
+        // Write the compressed file.
+        WriteCompressedFile(env, fname, input_buf_size, output_buf_size,
+                            output_options, data);
+
+        // Boiler-plate to set up ZlibInputStream.
+        std::unique_ptr<RandomAccessFile> file_reader;
+        TF_ASSERT_OK(env->NewRandomAccessFile(fname, &file_reader));
+        std::unique_ptr<RandomAccessInputStream> input_stream(
+            new RandomAccessInputStream(file_reader.get()));
+        ZlibInputStream in(input_stream.get(), input_buf_size, output_buf_size,
+                           input_options);
+
+        size_t data_half_size = data.size() / 2;
+        string second_half(data, data_half_size, data.size() - data_half_size);
+
+        // Skip past the first half of the file and expect Tell() returns
+        // correctly.
+        TF_ASSERT_OK(in.SkipNBytes(data_half_size));
+        EXPECT_EQ(in.Tell(), data_half_size);
+
+        // Expect that second half is read correctly and Tell() returns past
+        // end of file after reading complete file.
+        string bytes_read;
+        TF_ASSERT_OK(in.ReadNBytes(second_half.size(), &bytes_read));
+        EXPECT_EQ(bytes_read, second_half);
+        EXPECT_EQ(in.Tell(), data.size());
+      }
+    }
+  }
+}
+
+TEST(ZlibInputStream, TellDefaultOptions) {
+  TestTell(CompressionOptions::DEFAULT(), CompressionOptions::DEFAULT());
+}
+
+TEST(ZlibInputStream, TellRawDeflate) {
+  TestTell(CompressionOptions::RAW(), CompressionOptions::RAW());
+}
+
+TEST(ZlibInputStream, TellGzip) {
+  TestTell(CompressionOptions::GZIP(), CompressionOptions::GZIP());
+}
+
+TEST(ZlibInputStream, SkipNBytesDefaultOptions) {
+  TestSkipNBytes(CompressionOptions::DEFAULT(), CompressionOptions::DEFAULT());
+}
+
+TEST(ZlibInputStream, SkipNBytesRawDeflate) {
+  TestSkipNBytes(CompressionOptions::RAW(), CompressionOptions::RAW());
+}
+
+TEST(ZlibInputStream, SkipNBytesGzip) {
+  TestSkipNBytes(CompressionOptions::GZIP(), CompressionOptions::GZIP());
+}
+
 }  // namespace io
 }  // namespace tensorflow
",0,test
f5ea388e48a38b935ebd36442f756c8974b7ce3f,tensorflow/tensorflow,"Implement ZlibInputStream::Tell() by keeping track of the number of bytes
consumed by the reader.

PiperOrigin-RevId: 172634455",zlib_inputstream.cc,"@@ -32,7 +32,8 @@ ZlibInputStream::ZlibInputStream(
       z_stream_input_(new Bytef[input_buffer_capacity_]),
       z_stream_output_(new Bytef[output_buffer_capacity_]),
       zlib_options_(zlib_options),
-      z_stream_(new z_stream) {
+      z_stream_(new z_stream),
+      bytes_read_(0) {
   InitZlibBuffer();
 }
 
@@ -45,6 +46,7 @@ ZlibInputStream::~ZlibInputStream() {
 Status ZlibInputStream::Reset() {
   TF_RETURN_IF_ERROR(input_stream_->Reset());
   InitZlibBuffer();
+  bytes_read_ = 0;
   return Status::OK();
 }
 
@@ -127,6 +129,7 @@ size_t ZlibInputStream::ReadBytesFromCache(size_t bytes_to_read,
     result->append(next_unread_byte_, can_read_bytes);
     next_unread_byte_ += can_read_bytes;
   }
+  bytes_read_ += can_read_bytes;
   return can_read_bytes;
 }
 
@@ -170,8 +173,7 @@ Status ZlibInputStream::ReadNBytes(int64 bytes_to_read, string* result) {
   return Status::OK();
 }
 
-// TODO(srbs): Implement this.
-int64 ZlibInputStream::Tell() const { return -1; }
+int64 ZlibInputStream::Tell() const { return bytes_read_; }
 
 Status ZlibInputStream::Inflate() {
   int error = inflate(z_stream_.get(), zlib_options_.flush_mode);
",0,test
f5ea388e48a38b935ebd36442f756c8974b7ce3f,tensorflow/tensorflow,"Implement ZlibInputStream::Tell() by keeping track of the number of bytes
consumed by the reader.

PiperOrigin-RevId: 172634455",zlib_inputstream.h,"@@ -132,6 +132,9 @@ class ZlibInputStream : public InputStreamInterface {
   // Returns the size of [next_unread_byte_, z_stream_->next_out)
   size_t NumUnreadBytes() const;
 
+  // Number of *uncompressed* bytes that have been read from this stream.
+  int64 bytes_read_;
+
   TF_DISALLOW_COPY_AND_ASSIGN(ZlibInputStream);
 };
 
",0,test
f0a968651119a7dd17e727664c4741eaf737e839,tensorflow/tensorflow,Linter fixes,retrain.py,"@@ -41,7 +41,6 @@ The subfolder names are important, since they define what label is applied to
 each image, but the filenames themselves don't matter. Once your images are
 prepared, you can run the training with a command like this:
 
-
 ```bash
 bazel build tensorflow/examples/image_retraining:retrain && \
 bazel-bin/tensorflow/examples/image_retraining/retrain \
@@ -70,12 +69,14 @@ on resource-limited platforms, you can try the `--architecture` flag with a
 Mobilenet model. For example:
 
 Run floating-point version of mobilenet:
+
 ```bash
 python tensorflow/examples/image_retraining/retrain.py \
     --image_dir ~/flower_photos --architecture mobilenet_1.0_224
 ```
 
 Run quantized version of mobilenet:
+
 ```bash
 python tensorflow/examples/image_retraining/retrain.py \
     --image_dir ~/flower_photos/   --architecture mobilenet_1.0_224_quantized
@@ -98,8 +99,10 @@ tensorboard --logdir /tmp/retrain_logs
 
 To use with Tensorflow Serving:
 
-tensorflow_model_server --port=9000 --model_name=inception --model_base_path=/tmp/saved_models/
-
+```bash
+tensorflow_model_server --port=9000 --model_name=inception \
+    --model_base_path=/tmp/saved_models/
+```
 """"""
 from __future__ import absolute_import
 from __future__ import division
@@ -1026,24 +1029,25 @@ def export_model(sess, architecture, saved_model_dir):
   inputs = {'image': tf.saved_model.utils.build_tensor_info(in_image)}
 
   out_classes = sess.graph.get_tensor_by_name('final_result:0')
-  outputs = {'prediction': tf.saved_model.utils.build_tensor_info(out_classes)}
+  outputs = {'prediction': 
+             tf.saved_model.utils.build_tensor_info(out_classes)}
 
   signature = tf.saved_model.signature_def_utils.build_signature_def(
-    inputs=inputs,
-    outputs=outputs,
-    method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME
-  )
+      inputs=inputs,
+      outputs=outputs,
+      method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME)
 
   legacy_init_op = tf.group(tf.tables_initializer(), name='legacy_init_op')
 
   # Save out the SavedModel.
   builder = tf.saved_model.builder.SavedModelBuilder(saved_model_dir)
   builder.add_meta_graph_and_variables(
-    sess, [tf.saved_model.tag_constants.SERVING],
-    signature_def_map={
-      tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: signature
-    },
-    legacy_init_op=legacy_init_op)
+      sess, [tf.saved_model.tag_constants.SERVING],
+      signature_def_map = {
+          tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: 
+          signature
+      },
+      legacy_init_op=legacy_init_op)
   builder.save()
 
 
",0,train
efd51f0b45f62399f1ad7a44348e928bfdeaf1c7,tensorflow/tensorflow,"Add an example of using tf.learn's random forest on mnist.
Change: 128472012",random_forest_mnist.py,"@@ -0,0 +1,78 @@
+# Copyright 2016 The TensorFlow Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the ""License"");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an ""AS IS"" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+""""""A stand-alone example for tf.learn's random forest model on mnist.""""""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import tempfile
+
+import tensorflow as tf
+
+from tensorflow.contrib.learn.python.learn.estimators import random_forest
+from tensorflow.examples.tutorials.mnist import input_data
+
+flags = tf.app.flags
+FLAGS = flags.FLAGS
+
+flags.DEFINE_string('model_dir', '', 'Base directory for output models.')
+flags.DEFINE_string('data_dir', '/tmp/data/', 'Directory for storing data')
+
+flags.DEFINE_integer('train_steps', 1000, 'Number of training steps.')
+flags.DEFINE_string('batch_size', 1000,
+                    'Number of examples in a training batch.')
+flags.DEFINE_integer('num_trees', 100, 'Number of trees in the forest.')
+flags.DEFINE_integer('max_nodes', 1000, 'Max total nodes in a single tree.')
+
+
+def build_estimator(model_dir):
+  """"""Build an estimator.""""""
+  params = tf.contrib.tensor_forest.python.tensor_forest.ForestHParams(
+      num_classes=10, num_features=784,
+      num_trees=FLAGS.num_trees, max_nodes=FLAGS.max_nodes)
+  return random_forest.TensorForestEstimator(params, model_dir=model_dir)
+
+
+def train_and_eval():
+  """"""Train and evaluate the model.""""""
+  model_dir = tempfile.mkdtemp() if not FLAGS.model_dir else FLAGS.model_dir
+  print('model directory = %s' % model_dir)
+
+  estimator = build_estimator(model_dir)
+
+  # TensorForest's LossMonitor allows training to terminate early if the
+  # forest is no longer growing.
+  early_stopping_rounds = 100
+  check_every_n_steps = 100
+  monitor = random_forest.LossMonitor(early_stopping_rounds,
+                                      check_every_n_steps)
+
+  mnist = input_data.read_data_sets(FLAGS.data_dir, one_hot=False)
+
+  estimator.fit(x=mnist.train.images, y=mnist.train.labels,
+                batch_size=FLAGS.batch_size, monitors=[monitor])
+
+  results = estimator.evaluate(x=mnist.test.images, y=mnist.test.labels,
+                               batch_size=FLAGS.batch_size)
+  for key in sorted(results):
+    print('%s: %s' % (key, results[key]))
+
+
+def main(_):
+  train_and_eval()
+
+
+if __name__ == '__main__':
+  tf.app.run()
",0,train
f3f05c8fd6ab935a614337af033f413e262db301,tensorflow/tensorflow,"[XLA] Remove useless log message when dumping HLO GraphDef.

This produces too much output that is not helpful.
Change: 155212076",hlo_tfgraph_builder.cc,"@@ -68,9 +68,8 @@ void CleanNodeName(string* name) {
 }
 
 Status HloTfGraphBuilder::AddComputation(const HloComputation& computation) {
-  LOG(INFO) << ""Adding computation "" << computation.name();
+  VLOG(2) << ""Adding computation "" << computation.name();
   for (auto embedded : computation.MakeEmbeddedComputationsList()) {
-    LOG(INFO) << ""Adding embedded computation "" << embedded->name();
     for (auto& instruction : embedded->instructions()) {
       TF_RETURN_IF_ERROR(AddInstruction(instruction.get()));
     }
",0,train
cb9ba66ffcca6857c823cad05550296bf213aafb,tensorflow/tensorflow,"Fix crash of GFile in python 3.7

This fix tries to address the issue raised in 27276 where
in Python 3.7, opening a zip file (of GFile) will results in
the error of
```
    bytes = self.zip.open(key)
  File ""/usr/lib64/python3.7/zipfile.py"", line 1480, in open
    self._fpclose, self._lock, lambda: self._writing)
  File ""/usr/lib64/python3.7/zipfile.py"", line 722, in __init__
    self.seekable = file.seekable
AttributeError: 'GFile' object has no attribute 'seekable'
```

The issue is that Python 3.7 adds seekable check:
https://github.com/python/cpython/commit/066df4fd454d6ff9be66e80b2a65995b10af174f

This fix adds `seekable()` and returns True, as GFile is indeed seekable.

This fix fixes 27276

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",file_io.py,"@@ -246,6 +246,10 @@ class FileIO(object):
         pywrap_tensorflow.Set_TF_Status_from_Status(status, ret_status)
     self._writable_file = None
 
+  def seekable(self):
+    """"""Returns True as FileIO supports random access ops of seek()/tell()""""""
+    return True
+
 
 @tf_export(v1=[""gfile.Exists""])
 def file_exists(filename):
",0,train
ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op

PiperOrigin-RevId: 256070251",convert_matrix_diag_v2_to_v1.cc,"@@ -0,0 +1,101 @@
+/* Copyright 2019 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+#include ""tensorflow/core/lib/core/errors.h""
+#include ""tensorflow/core/platform/logging.h""
+#include ""tensorflow/lite/toco/graph_transformations/graph_transformations.h""
+#include ""tensorflow/lite/toco/model.h""
+#include ""tensorflow/lite/toco/tooling_util.h""
+
+namespace toco {
+
+::tensorflow::Status ConvertMatrixDiagV2ToV1::Run(Model* model,
+                                                  std::size_t op_index,
+                                                  bool* modified) {
+  *modified = false;
+  auto it = model->operators.begin() + op_index;
+  const auto* op = it->get();
+  if (op->type != OperatorType::kMatrixDiagV2) {
+    return ::tensorflow::Status::OK();
+  }
+
+  if (op->inputs.size() != 5) {
+    return tensorflow::errors::InvalidArgument(
+        ""The input size of op %s should be 5"", LogName(*op));
+  }
+
+  const auto& input_k = model->GetArray(op->inputs[1]);
+  const auto& input_num_rows = model->GetArray(op->inputs[2]);
+  const auto& input_num_cols = model->GetArray(op->inputs[3]);
+  const auto& input_padding_value = model->GetArray(op->inputs[4]);
+
+  if (!input_k.buffer || !input_num_rows.buffer || !input_num_cols.buffer ||
+      !input_padding_value.buffer) {
+    return ::tensorflow::Status::OK();
+  }
+
+  if (input_k.GetBuffer<ArrayDataType::kInt32>().data.size() != 1 ||
+      input_num_rows.GetBuffer<ArrayDataType::kInt32>().data.size() != 1 ||
+      input_num_cols.GetBuffer<ArrayDataType::kInt32>().data.size() != 1) {
+    return tensorflow::errors::InvalidArgument(
+        ""Array for argument k / num_rows / num_cols of op "", LogName(*op),
+        "" should contains exact one element"");
+  }
+
+  int k = input_k.GetBuffer<ArrayDataType::kInt32>().data[0];
+  int num_rows = input_num_rows.GetBuffer<ArrayDataType::kInt32>().data[0];
+  int num_cols = input_num_cols.GetBuffer<ArrayDataType::kInt32>().data[0];
+  const auto& padding_value_vector =
+      input_padding_value.GetBuffer<ArrayDataType::kUint8>().data;
+
+  if (k != 0) {
+    return tensorflow::errors::InvalidArgument(
+        ""parameter k of op "", LogName(*op),
+        "" is expected to be 0, other values are not supported currently"");
+  }
+
+  if (num_rows != -1) {
+    return tensorflow::errors::InvalidArgument(
+        ""parameter num_rows of op "", LogName(*op),
+        "" is expected to be -1, other values are not supported currently"");
+  }
+
+  if (num_cols != -1) {
+    return tensorflow::errors::InvalidArgument(
+        ""parameter num_cols of op "", LogName(*op),
+        "" is expected to be -1, other values are not supported currently"");
+  }
+  for (auto byte : padding_value_vector) {
+    if (byte != 0) {
+      return tensorflow::errors::InvalidArgument(
+          ""parameter padding_value of op "", LogName(*op),
+          "" is expected to be 0, other values are not supported currently"");
+    }
+  }
+
+  auto* matrix_diag_op = new MatrixDiagOperator;
+  matrix_diag_op->inputs.push_back(op->inputs[0]);
+  matrix_diag_op->outputs.push_back(op->outputs[0]);
+
+  AddMessageF(""Replacing %s with %s"", LogName(*op), LogName(*matrix_diag_op));
+
+  // Replace the operator in the graph.
+  model->operators.emplace(it, matrix_diag_op);
+  DeleteOpAndArrays(model, op);
+
+  *modified = true;
+  return ::tensorflow::Status::OK();
+}
+
+}  // namespace toco
",0,train
ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op

PiperOrigin-RevId: 256070251",graph_transformations.h,"@@ -123,13 +123,14 @@ inline void RunGraphTransformations(
 
 // List of all graph transformations
 DECLARE_GRAPH_TRANSFORMATION(ConvertExpandDimsToReshape)
+DECLARE_GRAPH_TRANSFORMATION(ConvertMatrixDiagV2ToV1)
 DECLARE_GRAPH_TRANSFORMATION(ConvertPureConvToDepthwise)
+DECLARE_GRAPH_TRANSFORMATION(ConvertReorderAxes)
 DECLARE_GRAPH_TRANSFORMATION(ConvertSqueezeToReshape)
 DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialAddNToAdd)
 DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialPackToReshape)
 DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialTileToConcat)
 DECLARE_GRAPH_TRANSFORMATION(ConvertTrivialTransposeToReshape)
-DECLARE_GRAPH_TRANSFORMATION(ConvertReorderAxes)
 DECLARE_GRAPH_TRANSFORMATION(EnsureBiasVectors)
 DECLARE_GRAPH_TRANSFORMATION(FuseActivationFunctions)
 DECLARE_GRAPH_TRANSFORMATION(FuseBinaryIntoFollowingAffine)
",0,train
ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op

PiperOrigin-RevId: 256070251",propagate_fixed_sizes.cc,"@@ -2426,6 +2426,10 @@ void ProcessMatrixSetDiagOperator(Model* model, MatrixSetDiagOperator* op) {
       // The sizes of the outputs are only known in runtime based on the input.
       // Ignore shape progapation here and defer that to the interpreter.
       break;
+    case OperatorType::kMatrixDiagV2:
+      // MatrixDiagV2 operators are converted to MatrixDiag, after which their
+      // shapes are propagated.
+      break;
     default:
       // Unimplemented, another graph transformation should drop it.
       LOG(FATAL) << ""Unhandled operator type "" << OperatorTypeName(op->type);
",0,train
ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op

PiperOrigin-RevId: 256070251",import_tensorflow.cc,"@@ -2516,6 +2516,7 @@ ConverterMapType GetTensorFlowNodeConverterMap() {
       {""LogSoftmax"", ConvertSimpleOperator<LogSoftmaxOperator, 1, 1>},
       {""MatMul"", ConvertMatMulOperator},
       {""MatrixDiag"", ConvertSimpleOperator<MatrixDiagOperator, 1, 1>},
+      {""MatrixDiagV2"", ConvertSimpleOperator<MatrixDiagV2Operator, 5, 1>},
       {""MatrixSetDiag"", ConvertSimpleOperator<MatrixSetDiagOperator, 2, 1>},
       {""Max"", ConvertReduceOperator<TensorFlowMaxOperator>},
       {""MaxPool"", ConvertMaxPoolOperator},
",0,train
ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op

PiperOrigin-RevId: 256070251",model.h,"@@ -172,7 +172,8 @@ enum class OperatorType : uint8 {
   kElu,
   kReverseSequence,
   kMatrixDiag,
-  kMatrixSetDiag
+  kMatrixSetDiag,
+  kMatrixDiagV2,
 };
 
 // Helper to deal with TensorFlow arrays using a different ordering of
@@ -2109,6 +2110,14 @@ struct MatrixDiagOperator : Operator {
   MatrixDiagOperator() : Operator(OperatorType::kMatrixDiag) {}
 };
 
+// Matrix Diag Operator V2:
+// Construct a batched diagonal tensor with given batched diagonal values.
+// Not fully supported, constains 4 extra inputs compared to MatrixDiag, support
+// default parameters settings which performs the same as MatrixDiag
+struct MatrixDiagV2Operator : Operator {
+  MatrixDiagV2Operator() : Operator(OperatorType::kMatrixDiagV2) {}
+};
+
 // Matrix Set Diag Operator:
 // Construct a batched diagonal tensor with given input and diagonal values.
 // Input is a rank (k+1) tensor of values.
",0,train
ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op

PiperOrigin-RevId: 256070251",toco_tooling.cc,"@@ -54,6 +54,7 @@ void MakeGeneralGraphTransformationsSet(
     GraphTransformationsSet* transformations) {
   CHECK(transformations->empty());
   transformations->Add(new ConvertExpandDimsToReshape);
+  transformations->Add(new ConvertMatrixDiagV2ToV1);
   transformations->Add(new ConvertSqueezeToReshape);
   transformations->Add(new ConvertTrivialAddNToAdd);
   transformations->Add(new ConvertTrivialPackToReshape);
",0,train
ec87f72b9a80dee361c9c522c896edbda257c2f5,tensorflow/tensorflow,"support MatrixDiagV2 op

PiperOrigin-RevId: 256070251",tooling_util.cc,"@@ -447,6 +447,7 @@ const char* OperatorTypeName(OperatorType type) {
     HANDLE_OPERATORTYPENAME_CASE(ReverseSequence)
     HANDLE_OPERATORTYPENAME_CASE(MatrixDiag)
     HANDLE_OPERATORTYPENAME_CASE(MatrixSetDiag)
+    HANDLE_OPERATORTYPENAME_CASE(MatrixDiagV2)
     default:
       LOG(FATAL) << ""Unhandled op type"";
 #undef HANDLE_OPERATORTYPENAME_CASE
",0,train
da8e7314544aa39d85d9bb111645077d1692ae05,tensorflow/tensorflow,"Fixed flaky test by increasing grace duration.

PiperOrigin-RevId: 335535674
Change-Id: Idff0caaedf9585f19ab15d51ecaa5a0495bf337c",profiler_client_test.cc,"@@ -125,7 +125,7 @@ TEST(RemoteProfilerSession, LongDuration) {
 
   absl::Time approx_start = absl::Now();
   // Empirically determined value.
-  absl::Duration grace = absl::Seconds(2);
+  absl::Duration grace = absl::Seconds(20);
   absl::Duration max_duration = duration + grace;
   const absl::Time deadline = approx_start + max_duration;
 
",0,train
da8e7314544aa39d85d9bb111645077d1692ae05,tensorflow/tensorflow,"Fixed flaky test by increasing grace duration.

PiperOrigin-RevId: 335535674
Change-Id: Idff0caaedf9585f19ab15d51ecaa5a0495bf337c",remote_profiler_session_manager_test.cc,"@@ -100,7 +100,8 @@ TEST(RemoteProfilerSessionManagerTest, LongSession) {
   auto server = StartServer(duration, &service_addresses);
   options.add_service_addresses(service_addresses);
   absl::Time approx_start = absl::Now();
-  absl::Duration grace = absl::Seconds(2);
+  // Empirically determined value.
+  absl::Duration grace = absl::Seconds(20);
   absl::Duration max_duration = duration + grace;
   options.set_max_session_duration_ms(absl::ToInt64Milliseconds(max_duration));
   options.set_session_creation_timestamp_ns(absl::ToUnixNanos(approx_start));
",0,train
ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe.

Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead.

Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former.

PiperOrigin-RevId: 261570194",local_client.cc,"@@ -180,8 +180,8 @@ StatusOr<ScopedShapedBuffer> LocalExecutable::Run(
   if (executable_->dumping_snapshot()) {
     return ExecuteAndDump(&options_and_stream.first, arguments);
   }
-  return executable_->ExecuteOnStreamWrapper(
-      &options_and_stream.first, run_options.execution_profile(), arguments);
+  return executable_->ExecuteOnStreamWrapper(&options_and_stream.first,
+                                             arguments);
 }
 
 StatusOr<ScopedShapedBuffer> LocalExecutable::RunAsync(
",0,test
ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe.

Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead.

Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former.

PiperOrigin-RevId: 261570194",cpu_executable.cc,"@@ -194,13 +194,13 @@ Status CpuExecutable::ExecuteComputeFunction(
 
   uint64 end_micros = tensorflow::Env::Default()->NowMicros();
 
-  {
-    tensorflow::mutex_lock lock(mutex_);
+  if (run_options->execution_profile()) {
     const double nanoseconds = (end_micros - start_micros) * 1000.0;
-    execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0));
+    run_options->execution_profile()->set_compute_time_ns(
+        std::max(nanoseconds, 1.0));
     // If hlo profiling was disabled then the cycle count is left empty.
     if (hlo_execution_profile) {
-      execution_profile_.set_compute_cycle_count(
+      run_options->execution_profile()->set_compute_cycle_count(
           hlo_execution_profile->total_cycles_executed(
               *module().entry_computation()));
     }
",0,test
ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe.

Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead.

Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former.

PiperOrigin-RevId: 261570194",executable.cc,"@@ -61,10 +61,11 @@ StatusOr<std::vector<ScopedShapedBuffer>> Executable::ExecuteOnStreams(
 }
 
 StatusOr<ScopedShapedBuffer> Executable::ExecuteOnStreamWrapper(
-    const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
+    const ServiceExecutableRunOptions* run_options,
     absl::Span<const ShapedBuffer* const> arguments) {
   se::Stream* stream = run_options->stream();
   std::unique_ptr<se::Timer> timer;
+  ExecutionProfile* profile = run_options->run_options().execution_profile();
   if (profile != nullptr) {
     timer.reset(new se::Timer(stream->parent()));
     stream->InitTimer(timer.get()).ThenStartTimer(timer.get());
@@ -102,11 +103,6 @@ StatusOr<ScopedShapedBuffer> Executable::ExecuteOnStreamWrapper(
     VLOG(1) << ""done with block-host-until-done"";
 
     // Merge in run-time profile information from execution_profile.
-    //
-    // TODO(b/71713097): This is buggy -- even though the mutex takes care of
-    // C++ level races, some other concurrent ExecuteOnStreamWrapper call could
-    // have rewritten the execution_profile before we get to it.
-    profile->MergeFrom(execution_profile());
 
     // Overall execution time (in nanoseconds) from the executor timer.
     if (stream->ok()) {
",0,test
ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe.

Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead.

Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former.

PiperOrigin-RevId: 261570194",executable.h,"@@ -171,6 +171,7 @@ class Executable {
   // called explicitly for other (async, for example) variants after the stream
   // has completed.
   virtual Status PopulateExecutionProfile(
+      ExecutionProfile* execution_profile,
       HloExecutionProfile* hlo_execution_profile, se::Stream* stream) {
     return Status::OK();
   }
@@ -179,16 +180,9 @@ class Executable {
   // timer for the execution, sets up HLO profiling if enabled, and fills in the
   // given ExecutionProfile if non-null.
   StatusOr<ScopedShapedBuffer> ExecuteOnStreamWrapper(
-      const ServiceExecutableRunOptions* run_options, ExecutionProfile* profile,
+      const ServiceExecutableRunOptions* run_options,
       absl::Span<const ShapedBuffer* const> arguments);
 
-  // Returns the ExecutionProfile from executing on the device. This includes
-  // the number of cycles taken for the computation or the compilation time.
-  ExecutionProfile execution_profile() const {
-    tensorflow::mutex_lock lock(mutex_);
-    return execution_profile_;
-  }
-
   const HloProfilePrinterData& hlo_profile_printer_data() const {
     CHECK(hlo_profiling_enabled());
     return *hlo_profile_printer_data_;
@@ -233,11 +227,6 @@ class Executable {
   HloProto const* hlo_proto() const { return hlo_proto_.get(); }
 
  protected:
-  mutable tensorflow::mutex mutex_;
-
-  // Execution profile data on the device.
-  ExecutionProfile execution_profile_ GUARDED_BY(mutex_);
-
   // HloModule this was compiled from. BufferAssignment keeps pointers to
   // HloInstructions owned by the HloModule so we need to keep the HloModule
   // around.
",0,test
ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe.

Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead.

Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former.

PiperOrigin-RevId: 261570194",gpu_executable.cc,"@@ -207,17 +207,20 @@ Status GpuExecutable::ExecuteThunks(
     }
   }
 
+  // FinishExecution() blocks until main_stream has completed if profiling is
+  // enabled; we therefore do not need to defer profile collection onto a
+  // stream.
   profiler.FinishExecution();
   uint64 end_micros = tensorflow::Env::Default()->NowMicros();
 
-  {
-    tensorflow::mutex_lock lock(mutex_);
+  if (run_options->run_options().execution_profile()) {
+    ExecutionProfile* profile = run_options->run_options().execution_profile();
     const double nanoseconds = (end_micros - start_micros) * 1000.0;
-    execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0));
+    profile->set_compute_time_ns(std::max(nanoseconds, 1.0));
 
     // If hlo profiling was disabled then the cycle count is left empty.
     if (do_profile) {
-      execution_profile_.set_compute_cycle_count(
+      profile->set_compute_cycle_count(
           hlo_execution_profile->total_cycles_executed(
               *module().entry_computation()));
     }
",0,test
ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe.

Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead.

Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former.

PiperOrigin-RevId: 261570194",hlo_runner.cc,"@@ -208,13 +208,13 @@ StatusOr<ScopedShapedBuffer> HloRunner::ExecuteWithDeviceBuffers(
   ServiceExecutableRunOptions service_run_options =
       GetServiceRunOptionsForDevice(backend().default_device_ordinal(), &stream,
                                     nullptr, RunId());
+  service_run_options.mutable_run_options()->set_execution_profile(profile);
 
   TF_ASSIGN_OR_RETURN(std::unique_ptr<Executable> executable,
                       CreateExecutable(std::move(module), run_hlo_passes));
   TF_ASSIGN_OR_RETURN(
       ScopedShapedBuffer retval,
-      executable->ExecuteOnStreamWrapper(&service_run_options,
-                                         /*profile=*/profile, arguments));
+      executable->ExecuteOnStreamWrapper(&service_run_options, arguments));
   TF_RETURN_IF_ERROR(stream.BlockHostUntilDone());
   return std::move(retval);
 }
@@ -244,11 +244,11 @@ StatusOr<ScopedShapedBuffer> HloRunner::ExecuteWithDeviceBuffers(
   ServiceExecutableRunOptions service_run_options =
       GetServiceRunOptionsForDevice(backend().default_device_ordinal(), &stream,
                                     nullptr, RunId());
+  service_run_options.mutable_run_options()->set_execution_profile(profile);
 
   TF_ASSIGN_OR_RETURN(
       ScopedShapedBuffer retval,
-      executable->ExecuteOnStreamWrapper(&service_run_options,
-                                         /*profile=*/profile, arguments));
+      executable->ExecuteOnStreamWrapper(&service_run_options, arguments));
   TF_RETURN_IF_ERROR(stream.BlockHostUntilDone());
   return std::move(retval);
 }
",0,test
ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe.

Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead.

Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former.

PiperOrigin-RevId: 261570194",executable.cc,"@@ -113,10 +113,10 @@ StatusOr<ScopedShapedBuffer> InterpreterExecutable::ExecuteOnStream(
 
   uint64 end_micros = tensorflow::Env::Default()->NowMicros();
 
-  {
-    tensorflow::mutex_lock lock(mutex_);
+  ExecutionProfile* profile = run_options->run_options().execution_profile();
+  if (profile) {
     const double nanoseconds = (end_micros - start_micros) * 1000.0;
-    execution_profile_.set_compute_time_ns(std::max(nanoseconds, 1.0));
+    profile->set_compute_time_ns(std::max(nanoseconds, 1.0));
   }
 
   return std::move(result);
",0,test
ab8b627e0b269ef2a4d1859fb62b80f3f1eea345,tensorflow/tensorflow,"[XLA] Clean up execution_profile usage and make it thread-safe.

Currently a mutable execution_profile is attached to xla::Executable. This isn't thread safe, since the same Executable may be invoked concurrently. Instead, clients already have the ability to pass in their own ExecutionProfile via the ExecutableRunOptions; update that one instead.

Simplify APIs that accepted both an ExecutionProfile and an ExecutableRunOptions given the latter contains a pointer to an instance of the former.

PiperOrigin-RevId: 261570194",service.cc,"@@ -451,6 +451,11 @@ Service::ExecuteParallelAndRegisterResult(
       options.set_intra_op_thread_pool(
           backend->eigen_intra_op_thread_pool_device());
       options.set_device_assignment(&device_assignment);
+      // Use run-time profile information from execution_profile on the 0th
+      // device.
+      if (i == 0) {
+        options.set_execution_profile(profile);
+      }
       ServiceExecutableRunOptions run_options(options,
                                               backend->StreamBorrower());
 
@@ -490,10 +495,6 @@ Service::ExecuteParallelAndRegisterResult(
     uint64 nanoseconds =
         *std::max_element(timer_nanoseconds.begin(), timer_nanoseconds.end());
 
-    // Merge in run-time profile information from execution_profile on the
-    // zeroth device.
-    profile->MergeFrom(executables[0]->execution_profile());
-
     // Overall execution time (in nanoseconds) from the executor timer.
     profile->set_compute_and_transfer_time_ns(nanoseconds);
 
@@ -546,13 +547,13 @@ StatusOr<GlobalDataHandle> Service::ExecuteAndRegisterResult(
     options.set_intra_op_thread_pool(
         backend->eigen_intra_op_thread_pool_device());
     options.set_device_assignment(&device_assignment);
+    options.set_execution_profile(profile);
     run_options.emplace_back(options, backend->StreamBorrower());
   }
 
   if (options_.number_of_replicas() == 1) {
-    TF_ASSIGN_OR_RETURN(
-        auto result, executable->ExecuteOnStreamWrapper(&run_options[0],
-                                                        profile, arguments[0]));
+    TF_ASSIGN_OR_RETURN(auto result, executable->ExecuteOnStreamWrapper(
+                                         &run_options[0], arguments[0]));
     return allocation_tracker_.Register(std::move(result), result_tag);
   }
 
",0,test
40ea8ce73d0b38f07260845057b5b0e2bdb2ac17,tensorflow/tensorflow,"Lookup layers allow tensor input vocabs

PiperOrigin-RevId: 376219678
Change-Id: Idcfad5f5eb3619785472f10d445af52dfbe14345",index_lookup.py,"@@ -26,6 +26,7 @@ from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import tensor_spec
+from tensorflow.python.framework import tensor_util
 from tensorflow.python.keras import backend
 from tensorflow.python.keras.engine import base_preprocessing_layer
 from tensorflow.python.keras.layers.preprocessing import category_encoding
@@ -377,16 +378,14 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
 
     # The MutableHashTable data will not be sorted, so we will create a inverted
     # lookup here, and use that to lookup a range of indices [0, vocab_size).
-    keys, values = self._table_handler.data()
-    if self.invert:
-      index_to_token = zip(keys, values)
-    else:
-      index_to_token = zip(values, keys)
-    lookup = collections.defaultdict(lambda: self.oov_token, index_to_token)
+    keys, values = self._table.export()
+    vocab, indices = (values, keys) if self.invert else (keys, values)
+    lookup = collections.defaultdict(
+        lambda: self.oov_token,
+        zip(indices.numpy(), self._tensor_vocab_to_numpy(vocab)))
     vocab = [lookup[x] for x in range(self.vocabulary_size())]
     if self.mask_token is not None and self.output_mode == INT:
       vocab[0] = self.mask_token
-
     return vocab
 
   def vocabulary_size(self):
@@ -441,9 +440,10 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
     it.
 
     Args:
-      vocabulary: An array of hashable tokens.
-      idf_weights: An array of inverse document frequency weights with equal
-        length to vocab. Only necessary if the layer output_mode is TF_IDF.
+      vocabulary: An array, numpy array, or tensor of hashable tokens.
+      idf_weights: An array, numpy array, or tensor of inverse document
+        frequency weights with equal length to vocab. Only necessary if the
+        layer output_mode is TF_IDF.
 
     Raises:
       ValueError: If there are too many inputs, the inputs do not match, or
@@ -452,6 +452,7 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
         called. This happens when `""multi_hot""`, `""count""`, and `""tfidf""` modes,
         if `pad_to_max_tokens` is False and the layer itself has already been
         called.
+      RuntimeError: If a tensor vocabulary is passed outside of eager execution.
     """"""
     if self._has_static_table:
       raise RuntimeError(""Layer {} was created with a static file-based table ""
@@ -470,6 +471,21 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
                          ""False, the vocabulary cannot be changed after the ""
                          ""layer is called."".format(self.output_mode))
 
+    if not context.executing_eagerly() and (tensor_util.is_tensor(vocabulary) or
+                                            tensor_util.is_tensor(idf_weights)):
+      raise RuntimeError(
+          ""Cannot set a tensor vocabulary on {} layer {} when not executing ""
+          ""eagerly. Create this layer or call `set_vocabulary` outside of ""
+          ""any `tf.function`s and with eager execution enabled."".format(
+              self.__class__.__name__, self.name))
+
+    # TODO(mattdangerw): for better performance we should rewrite this entire
+    # function to operate on tensors and convert vocabulary to a tensor here.
+    if tensor_util.is_tensor(vocabulary):
+      vocabulary = self._tensor_vocab_to_numpy(vocabulary)
+    if tensor_util.is_tensor(idf_weights):
+      idf_weights = idf_weights.numpy()
+
     oov_start = self._oov_start_index()
     token_start = self._token_start_index()
     should_have_mask = (oov_start > 0)
@@ -658,6 +674,11 @@ class IndexLookup(base_preprocessing_layer.CombinerPreprocessingLayer):
   def _trackable_saved_model_saver(self):
     return layer_serialization.IndexLookupLayerSavedModelSaver(self)
 
+  # Override points for IntegerLookup and StringLookup.
+  def _tensor_vocab_to_numpy(self, vocabulary):
+    """"""Converts a tensor vocabulary to a numpy vocabulary.""""""
+    return vocabulary.numpy()
+
 
 class _IndexLookupAccumulator(
     collections.namedtuple(""Accumulator"",
",0,train
40ea8ce73d0b38f07260845057b5b0e2bdb2ac17,tensorflow/tensorflow,"Lookup layers allow tensor input vocabs

PiperOrigin-RevId: 376219678
Change-Id: Idcfad5f5eb3619785472f10d445af52dfbe14345",integer_lookup_test.py,"@@ -26,6 +26,8 @@ from tensorflow.python import keras
 from tensorflow.python import tf2
 
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import errors_impl
@@ -525,6 +527,17 @@ class IntegerLookupVocabularyTest(
         "".*HashTable has different value for same key.*42.*""):
       _ = integer_lookup.IntegerLookup(vocabulary=vocab_path)
 
+  def test_tensor_vocab(self):
+    vocab_data = [-1, 42, 1138, 725, 1729]
+    vocab_tensor = constant_op.constant(vocab_data, dtypes.int64)
+    layer = integer_lookup.IntegerLookup(vocabulary=vocab_tensor)
+    returned_vocab = layer.get_vocabulary()
+    self.assertAllEqual(vocab_data, returned_vocab)
+    self.assertAllEqual(layer.vocabulary_size(), 5)
+    fn = def_function.function(lambda: layer.set_vocabulary(vocab_tensor))
+    with self.assertRaisesRegex(RuntimeError, ""Cannot set a tensor vocabulary""):
+      fn()
+
 
 @keras_parameterized.run_all_keras_modes(always_skip_v1=True)
 class IntegerLookupErrorTest(keras_parameterized.TestCase,
",0,train
40ea8ce73d0b38f07260845057b5b0e2bdb2ac17,tensorflow/tensorflow,"Lookup layers allow tensor input vocabs

PiperOrigin-RevId: 376219678
Change-Id: Idcfad5f5eb3619785472f10d445af52dfbe14345",string_lookup.py,"@@ -15,6 +15,8 @@
 """"""Keras string lookup preprocessing layer.""""""
 # pylint: disable=g-classes-have-attributes
 
+import numpy as np
+
 from tensorflow.python.framework import dtypes
 from tensorflow.python.keras.engine import base_preprocessing_layer
 from tensorflow.python.keras.layers.preprocessing import index_lookup
@@ -298,10 +300,6 @@ class StringLookup(index_lookup.IndexLookup):
     base_config = super(StringLookup, self).get_config()
     return dict(list(base_config.items()) + list(config.items()))
 
-  def get_vocabulary(self):
-    vocab = super(StringLookup, self).get_vocabulary()
-    return [compat.as_text(x, self.encoding) for x in vocab]
-
   def set_vocabulary(self, vocabulary, idf_weights=None):
     if isinstance(vocabulary, str):
       if self.output_mode == index_lookup.TF_IDF:
@@ -315,3 +313,8 @@ class StringLookup(index_lookup.IndexLookup):
       vocabulary = table_utils.get_vocabulary_from_file(vocabulary,
                                                         self.encoding)
     super().set_vocabulary(vocabulary, idf_weights=idf_weights)
+
+  # Overriden methods from IndexLookup.
+  def _tensor_vocab_to_numpy(self, vocabulary):
+    vocabulary = vocabulary.numpy()
+    return np.array([compat.as_text(x, self.encoding) for x in vocabulary])
",0,train
40ea8ce73d0b38f07260845057b5b0e2bdb2ac17,tensorflow/tensorflow,"Lookup layers allow tensor input vocabs

PiperOrigin-RevId: 376219678
Change-Id: Idcfad5f5eb3619785472f10d445af52dfbe14345",string_lookup_test.py,"@@ -21,6 +21,8 @@ import numpy as np
 from tensorflow.python import keras
 
 from tensorflow.python.data.ops import dataset_ops
+from tensorflow.python.eager import def_function
+from tensorflow.python.framework import constant_op
 from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import errors
 from tensorflow.python.framework import errors_impl
@@ -365,6 +367,16 @@ class StringLookupVocabularyTest(keras_parameterized.TestCase,
     output_data = model.predict(input_array)
     self.assertAllEqual(expected_output, output_data)
 
+  def test_tensor_vocab(self):
+    vocab_data = [""[UNK]"", ""wind"", ""and"", ""fire""]
+    vocab_tensor = constant_op.constant(vocab_data)
+    layer = string_lookup.StringLookup(vocabulary=vocab_tensor)
+    returned_vocab = layer.get_vocabulary()
+    self.assertAllEqual(vocab_data, returned_vocab)
+    self.assertAllEqual(layer.vocabulary_size(), 4)
+    fn = def_function.function(lambda: layer.set_vocabulary(vocab_tensor))
+    with self.assertRaisesRegex(RuntimeError, ""Cannot set a tensor vocabulary""):
+      fn()
 
 if __name__ == ""__main__"":
   test.main()
",0,train
40ea8ce73d0b38f07260845057b5b0e2bdb2ac17,tensorflow/tensorflow,"Lookup layers allow tensor input vocabs

PiperOrigin-RevId: 376219678
Change-Id: Idcfad5f5eb3619785472f10d445af52dfbe14345",table_utils.py,"@@ -51,10 +51,6 @@ class TableHandler(object):
         oov_tokens = [oov_tokens]
       self.oov_tokens = math_ops.cast(oov_tokens, table._value_dtype)  # pylint: disable=protected-access
 
-  def data(self):
-    keys, values = self.table.export()
-    return (keys.numpy(), values.numpy())
-
   def table_size(self):
     return self.table.size().numpy()
 
",0,train
995eec62445a677df9d7d1268143ae54fc7d6285,tensorflow/tensorflow,"Change derived type storage objects to define an 'operator==(const KeyTy &)' instead of converting to the KeyTy. This allows for handling cases where the KeyTy does not provide an equality operator on itself.

PiperOrigin-RevId: 229423249",TypeSupport.h,"@@ -143,6 +143,9 @@ class TypeUniquer {
 public:
   /// Lookup key for storage types.
   struct TypeLookupKey {
+    /// The known derived kind for the storage.
+    unsigned kind;
+
     /// The known hash value of the key.
     unsigned hashValue;
 
@@ -170,18 +173,12 @@ public:
 
     // Generate an equality function for the derived storage.
     std::function<bool(const TypeStorage *)> isEqual =
-        [kind, &derivedKey](const TypeStorage *existing) {
-          // Check that these type storages have the same kind.
-          if (kind != existing->getKind())
-            return false;
-          // Generate a key from the derived storage and compare it to the
-          // current key.
-          auto *derivedStorage = static_cast<const ImplType *>(existing);
-          return derivedStorage->getKey() == derivedKey;
+        [&derivedKey](const TypeStorage *existing) {
+          return static_cast<const ImplType &>(*existing) == derivedKey;
         };
 
     // Lookup an existing type with the given key.
-    TypeStorage *storage = lookup(TypeLookupKey{hashValue, isEqual});
+    TypeStorage *storage = lookup(TypeLookupKey{kind, hashValue, isEqual});
     if (storage)
       return T(storage);
 
",0,test
995eec62445a677df9d7d1268143ae54fc7d6285,tensorflow/tensorflow,"Change derived type storage objects to define an 'operator==(const KeyTy &)' instead of converting to the KeyTy. This allows for handling cases where the KeyTy does not provide an equality operator on itself.

PiperOrigin-RevId: 229423249",Types.h,"@@ -84,8 +84,8 @@ struct UnknownTypeStorage;
 ///      * The key type must have a llvm::DenseMapInfo specialization for
 ///        hashing.
 ///
-///    - Provide a method, 'KeyTy getKey() const', to construct the key type
-///      from an existing storage instance.
+///    - Provide a method, 'bool operator==(const KeyTy &) const', to
+///      compare the storage instance against an instance of the key type.
 ///
 ///    - Provide a construction method:
 ///        'DerivedStorage *construct(TypeStorageAllocator &, const KeyTy &key)'
",0,test
995eec62445a677df9d7d1268143ae54fc7d6285,tensorflow/tensorflow,"Change derived type storage objects to define an 'operator==(const KeyTy &)' instead of converting to the KeyTy. This allows for handling cases where the KeyTy does not provide an equality operator on itself.

PiperOrigin-RevId: 229423249",LLVMDialect.h,"@@ -43,7 +43,7 @@ namespace mlir {
 namespace LLVM {
 
 namespace detail {
-class LLVMTypeStorage;
+struct LLVMTypeStorage;
 }
 
 class LLVMType : public mlir::Type::TypeBase<LLVMType, mlir::Type,
",0,test
995eec62445a677df9d7d1268143ae54fc7d6285,tensorflow/tensorflow,"Change derived type storage objects to define an 'operator==(const KeyTy &)' instead of converting to the KeyTy. This allows for handling cases where the KeyTy does not provide an equality operator on itself.

PiperOrigin-RevId: 229423249",TypeDetail.h,"@@ -40,9 +40,9 @@ struct UnknownTypeStorage : public TypeStorage {
 
   /// The hash key used for uniquing.
   using KeyTy = std::pair<Identifier, StringRef>;
-
-  /// Convert to the key type.
-  KeyTy getKey() const { return std::make_pair(dialectNamespace, typeData); }
+  bool operator==(const KeyTy &key) const {
+    return key == KeyTy(dialectNamespace, typeData);
+  }
 
   static UnknownTypeStorage *construct(TypeStorageAllocator &allocator,
                                        const KeyTy &key) {
@@ -64,9 +64,7 @@ struct IntegerTypeStorage : public TypeStorage {
 
   /// The hash key used for uniquing.
   using KeyTy = unsigned;
-
-  /// Convert to the key type.
-  KeyTy getKey() const { return width; }
+  bool operator==(const KeyTy &key) const { return key == width; }
 
   static IntegerTypeStorage *construct(TypeStorageAllocator &allocator,
                                        KeyTy bitwidth) {
@@ -86,9 +84,9 @@ struct FunctionTypeStorage : public TypeStorage {
 
   /// The hash key used for uniquing.
   using KeyTy = std::pair<ArrayRef<Type>, ArrayRef<Type>>;
-
-  /// Convert to the key type.
-  KeyTy getKey() const { return KeyTy(getInputs(), getResults()); }
+  bool operator==(const KeyTy &key) const {
+    return key == KeyTy(getInputs(), getResults());
+  }
 
   /// Construction.
   static FunctionTypeStorage *construct(TypeStorageAllocator &allocator,
@@ -125,9 +123,7 @@ struct VectorOrTensorTypeStorage : public TypeStorage {
 
   /// The hash key used for uniquing.
   using KeyTy = Type;
-
-  /// Convert to the key type.
-  KeyTy getKey() const { return elementType; }
+  bool operator==(const KeyTy &key) const { return key == elementType; }
 
   Type elementType;
 };
@@ -141,9 +137,9 @@ struct VectorTypeStorage : public VectorOrTensorTypeStorage {
 
   /// The hash key used for uniquing.
   using KeyTy = std::pair<ArrayRef<int>, Type>;
-
-  /// Convert to the key type.
-  KeyTy getKey() const { return KeyTy(getShape(), elementType); }
+  bool operator==(const KeyTy &key) const {
+    return key == KeyTy(getShape(), elementType);
+  }
 
   /// Construction.
   static VectorTypeStorage *construct(TypeStorageAllocator &allocator,
@@ -171,9 +167,9 @@ struct RankedTensorTypeStorage : public VectorOrTensorTypeStorage {
 
   /// The hash key used for uniquing.
   using KeyTy = std::pair<ArrayRef<int>, Type>;
-
-  /// Convert to the key type.
-  KeyTy getKey() const { return KeyTy(getShape(), elementType); }
+  bool operator==(const KeyTy &key) const {
+    return key == KeyTy(getShape(), elementType);
+  }
 
   /// Construction.
   static RankedTensorTypeStorage *construct(TypeStorageAllocator &allocator,
@@ -194,14 +190,14 @@ struct RankedTensorTypeStorage : public VectorOrTensorTypeStorage {
 };
 
 struct UnrankedTensorTypeStorage : public VectorOrTensorTypeStorage {
-  UnrankedTensorTypeStorage(Type elementTy)
-      : VectorOrTensorTypeStorage(elementTy) {}
+  using VectorOrTensorTypeStorage::KeyTy;
+  using VectorOrTensorTypeStorage::VectorOrTensorTypeStorage;
 
   /// Construction.
   static UnrankedTensorTypeStorage *construct(TypeStorageAllocator &allocator,
                                               Type elementTy) {
-    auto *result = allocator.allocate<UnrankedTensorTypeStorage>();
-    return new (result) UnrankedTensorTypeStorage(elementTy);
+    return new (allocator.allocate<UnrankedTensorTypeStorage>())
+        UnrankedTensorTypeStorage(elementTy);
   }
 };
 
@@ -217,10 +213,8 @@ struct MemRefTypeStorage : public TypeStorage {
   // MemRefs are uniqued based on their shape, element type, affine map
   // composition, and memory space.
   using KeyTy = std::tuple<ArrayRef<int>, Type, ArrayRef<AffineMap>, unsigned>;
-
-  /// Convert to the key type.
-  KeyTy getKey() const {
-    return KeyTy(getShape(), elementType, getAffineMaps(), memorySpace);
+  bool operator==(const KeyTy &key) const {
+    return key == KeyTy(getShape(), elementType, getAffineMaps(), memorySpace);
   }
 
   /// Construction.
",0,test
4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8.

Implementation, tests, versioning are added.

Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",acceleration_test_list.cc,"@@ -214,6 +214,7 @@ TypesGatherOpTest/Float32Int32,29
 TypesGatherOpTest/Int32Int32,29
 TypesGatherOpTest/Uint8Int32,29
 TypesGatherOpTest/Int8Int32,29
+-TypesGatherOpTest/.*Int16.*
 
 # hashtable_lookup_test
 # All test excepted the string one should be accelerated
",0,train
4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8.

Implementation, tests, versioning are added.

Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",gather.cc,"@@ -61,6 +61,7 @@ TfLiteStatus Prepare(TfLiteContext* context, TfLiteNode* node) {
     case kTfLiteFloat32:
     case kTfLiteUInt8:
     case kTfLiteInt8:
+    case kTfLiteInt16:
     case kTfLiteInt64:
     case kTfLiteInt32:
     case kTfLiteBool:
@@ -143,6 +144,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         return Gather<uint8_t, int32_t>(*params, input, positions, output);
       case kTfLiteInt8:
         return Gather<int8_t, int32_t>(*params, input, positions, output);
+      case kTfLiteInt16:
+        return Gather<int16_t, int32_t>(*params, input, positions, output);
       case kTfLiteInt32:
         return Gather<int32_t, int32_t>(*params, input, positions, output);
       case kTfLiteInt64:
@@ -165,6 +168,8 @@ TfLiteStatus Eval(TfLiteContext* context, TfLiteNode* node) {
         return Gather<uint8_t, int64_t>(*params, input, positions, output);
       case kTfLiteInt8:
         return Gather<int8_t, int64_t>(*params, input, positions, output);
+      case kTfLiteInt16:
+        return Gather<int16_t, int64_t>(*params, input, positions, output);
       case kTfLiteInt32:
         return Gather<int32_t, int64_t>(*params, input, positions, output);
       case kTfLiteInt64:
",0,train
4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8.

Implementation, tests, versioning are added.

Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",gather_test.cc,"@@ -272,6 +272,24 @@ TEST(TypesGatherOpTest, Int8Int64) {
   EXPECT_THAT(m.GetOutput<int8_t>(), ElementsAreArray({14, 15, -13, -120}));
 }
 
+TEST(TypesGatherOpTest, Int16Int32) {
+  GatherOpModel m({TensorType_INT16, {2, 2}}, {TensorType_INT32, {2}});
+  m.SetInput<int16_t>({-13, -32000, 0, 32500});
+  m.SetPositions<int32_t>({1, 0});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<int16_t>(), ElementsAreArray({0, 32500, -13, -32000}));
+}
+
+TEST(TypesGatherOpTest, Int16Int64) {
+  GatherOpModel m({TensorType_INT16, {2, 2}}, {TensorType_INT64, {2}});
+  m.SetInput<int16_t>({-13, -32000, 0, 32500});
+  m.SetPositions<int64_t>({1LL, 0LL});
+  m.Invoke();
+
+  EXPECT_THAT(m.GetOutput<int16_t>(), ElementsAreArray({0, 32500, -13, -32000}));
+}
+
 TEST(TypesGatherOpTest, Int64Int32) {
   GatherOpModel m({TensorType_INT64, {2, 2}}, {TensorType_INT32, {2}});
   m.SetInput<int64_t>({-(1LL << 34), 134LL, 14LL, 15LL});
",0,train
4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8.

Implementation, tests, versioning are added.

Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",register.cc,"@@ -131,7 +131,7 @@ BuiltinOpResolver::BuiltinOpResolver() {
   AddBuiltin(BuiltinOperator_DEPTH_TO_SPACE, Register_DEPTH_TO_SPACE());
   AddBuiltin(BuiltinOperator_GATHER, Register_GATHER(),
              /* min_version = */ 1,
-             /* max_version = */ 3);
+             /* max_version = */ 4);
   AddBuiltin(BuiltinOperator_TRANSPOSE, Register_TRANSPOSE(),
              /* min_version = */ 1,
              /* max_version = */ 4);
",0,train
4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8.

Implementation, tests, versioning are added.

Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",op_version.cc,"@@ -80,6 +80,7 @@ std::string GetMinimumRuntimeVersionForModel(const Model& model) {
           {{OperatorType::kGather, 1}, ""1.6.0""},
           {{OperatorType::kGather, 2}, ""1.14.0""},
           {{OperatorType::kGather, 3}, ""1.15.0""},
+          {{OperatorType::kGather, 4}, kPendingReleaseOpVersion},
           {{OperatorType::kGatherNd, 1}, ""1.14.0""},
           {{OperatorType::kGatherNd, 2}, kPendingReleaseOpVersion},
           {{OperatorType::kSvdf, 1}, ""1.5.0""},
",0,train
4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8.

Implementation, tests, versioning are added.

Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",operator_property.cc,"@@ -191,7 +191,6 @@ OperatorProperty GetOperatorProperty(const ModelT* model, int subgraph_index,
       property.outputs = {{0, {}}};
       property.restrict_same_input_output_scale = true;
       property.version = 2;
-      property.quantizable_int16 = false;
       break;
     case BuiltinOperator_HARD_SWISH: {
       property.inputs = {{0, {}}};
",0,train
4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8.

Implementation, tests, versioning are added.

Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",op_version.cc,"@@ -176,6 +176,9 @@ int GetBuiltinOperatorVersion(const OpSignature& op_sig) {
       return 1;
 
     case BuiltinOperator_GATHER:
+      if (op_sig.input_types.at(0) == TensorType_INT16) {
+        return 4;
+      }
       // If the op takes bool input, it is version 3.
       if (op_sig.input_types.at(0) == TensorType_BOOL) {
         return 3;
",0,train
4b6a394e951090e8ffb3770badfef3ab0b293d23,tensorflow/tensorflow,"Added GATHER operator for 16x8.

Implementation, tests, versioning are added.

Change-Id: I87ffb816994b07770419979e45ce14a73b569bf9",runtime_version.cc,"@@ -109,6 +109,7 @@ std::string FindMinimumRuntimeVersionForOp(tflite::BuiltinOperator op_code,
               {{BuiltinOperator_GATHER, 1}, ""1.6.0""},
               {{BuiltinOperator_GATHER, 2}, ""1.14.0""},
               {{BuiltinOperator_GATHER, 3}, ""1.15.0""},
+              {{BuiltinOperator_GATHER, 4}, kPendingReleaseVersion},
               {{BuiltinOperator_GATHER_ND, 1}, ""1.14.0""},
               {{BuiltinOperator_GATHER_ND, 2}, ""2.3.0""},
               {{BuiltinOperator_HASHTABLE_LOOKUP, 1}, ""1.5.0""},
",0,train
4f3444ce5650831c7af364f5829ba1aa96e4a643,tensorflow/tensorflow,Print driver version.,gpu_cudamallocasync_allocator.cc,"@@ -129,10 +129,17 @@ GpuCudaMallocAsyncAllocator::GpuCudaMallocAsyncAllocator(
   if (auto status =
           cuDeviceGetAttribute(&cuda_malloc_async_supported,
                                CU_DEVICE_ATTRIBUTE_MEMORY_POOLS_SUPPORTED,
-                               platform_device_id.value()))
+                               platform_device_id.value())) {
+    int driverVersion;
+    if (auto status2 = cuDriverGetVersion(&driverVersion)) {
+      LOG(ERROR) << ""Error while fetching driver version: ""
+             << GetCudaErrorMessage(status2);
+    }
     LOG(FATAL)  // Crash OK.
         << ""On device: "" << platform_device_id.value()
+        << "" Current driver: "" << driverVersion
         << "". Failed to get device attribute : "" << GetCudaErrorMessage(status);
+  }
   if (!cuda_malloc_async_supported)
     LOG(FATAL)  // Crash OK.
         << ""TF_GPU_ALLOCATOR=cuda_malloc_async isn't currently supported on ""
",0,train
6a7a93e83c0957ca7fa916cece93bf21a3f33902,tensorflow/tensorflow,"Distributed runtime bfloat16 casting.
Change: 133062008",master_session.cc,"@@ -948,7 +948,19 @@ Status MasterSession::DoRunWithLocalExecution(CallOptions* opts,
     }
   };
   popts.control_flow_added = false;
-  // TODO(mrry): Enable DT_BFLOAT16 casting.
+  const bool enable_bfloat16_sendrecv =
+      session_opts_.config.graph_options().enable_bfloat16_sendrecv();
+  popts.should_cast = [enable_bfloat16_sendrecv](const Edge* e) {
+    if (e->IsControlEdge()) {
+      return DT_FLOAT;
+    }
+    DataType dtype = BaseType(e->src()->output_type(e->src_output()));
+    if (enable_bfloat16_sendrecv && dtype == DT_FLOAT) {
+      return DT_BFLOAT16;
+    } else {
+      return dtype;
+    }
+  };
   // TODO(mrry): Enable recv scheduling.
   TF_RETURN_IF_ERROR(rcg->RegisterPartitions(env_, popts, func_def_lib_));
 
",0,train
cf5ebf814eb9414c40d8c5323c322d498c7f0eed,tensorflow/tensorflow,"Adds BatchMatMul's c++ grad.
Change: 121382365",math_grad.cc,"@@ -511,9 +511,12 @@ Status MinGrad(const AttrSlice& attrs, FunctionDef* g) {
 }
 REGISTER_OP_GRADIENT(""Min"", MinGrad);
 
-static Status MatMulGradHelper(FunctionDef* g, const string& x0, bool tx0,
-                               const string& x1, bool tx1, const string& y0,
-                               bool ty0, const string& y1, bool ty1) {
+static Status MatMulGradHelper(FunctionDef* g, const string& opname,
+                               const string& attr_adj_x,
+                               const string& attr_adj_y, const string& x0,
+                               bool ax0, const string& x1, bool ax1,
+                               const string& y0, bool ay0, const string& y1,
+                               bool ay1) {
   *g = FDH::Define(
       // Arg defs
       {""x: T"", ""y: T"", ""dz: T""},
@@ -524,18 +527,20 @@ static Status MatMulGradHelper(FunctionDef* g, const string& x0, bool tx0,
       // Nodes
       {
           {{""dx""},
-           ""MatMul"",
+           opname,
            {x0, x1},
-           {{""T"", ""$T""}, {""transpose_a"", tx0}, {""transpose_b"", tx1}}},
+           {{""T"", ""$T""}, {attr_adj_x, ax0}, {attr_adj_y, ax1}}},
           {{""dy""},
-           ""MatMul"",
+           opname,
            {y0, y1},
-           {{""T"", ""$T""}, {""transpose_a"", ty0}, {""transpose_b"", ty1}}},
+           {{""T"", ""$T""}, {attr_adj_x, ay0}, {attr_adj_y, ay1}}},
       });
   return Status::OK();
 }
 
-Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) {
+Status MatMulGradCommon(const string& opname, const string& attr_adj_x,
+                        const string& attr_adj_y, const AttrSlice& attrs,
+                        FunctionDef* g) {
   DataType T;
   TF_RETURN_IF_ERROR(GetNodeAttr(attrs, ""T"", &T));
   if (T == DT_COMPLEX64) {
@@ -544,24 +549,36 @@ Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) {
   }
   bool ta;
   bool tb;
-  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, ""transpose_a"", &ta));
-  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, ""transpose_b"", &tb));
+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_x, &ta));
+  TF_RETURN_IF_ERROR(GetNodeAttr(attrs, attr_adj_y, &tb));
   if (!ta && !tb) {
-    return MatMulGradHelper(g, ""dz"", false, ""y"", true, ""x"", true, ""dz"", false);
+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, ""dz"", false, ""y"",
+                            true, ""x"", true, ""dz"", false);
   }
   if (!ta && tb) {
-    return MatMulGradHelper(g, ""dz"", false, ""y"", false, ""dz"", true, ""x"", false);
+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, ""dz"", false, ""y"",
+                            false, ""dz"", true, ""x"", false);
   }
   if (ta && !tb) {
-    return MatMulGradHelper(g, ""y"", false, ""dz"", true, ""x"", false, ""dz"", false);
+    return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, ""y"", false, ""dz"",
+                            true, ""x"", false, ""dz"", false);
   }
   CHECK(ta && tb);
-  return MatMulGradHelper(g, ""y"", true, ""dz"", true, ""dz"", true, ""x"", true);
+  return MatMulGradHelper(g, opname, attr_adj_x, attr_adj_y, ""y"", true, ""dz"",
+                          true, ""dz"", true, ""x"", true);
+}
+
+Status MatMulGrad(const AttrSlice& attrs, FunctionDef* g) {
+  return MatMulGradCommon(""MatMul"", ""transpose_a"", ""transpose_b"", attrs, g);
 }
 REGISTER_OP_GRADIENT(""MatMul"", MatMulGrad);
 
+Status BatchMatMulGrad(const AttrSlice& attrs, FunctionDef* g) {
+  return MatMulGradCommon(""BatchMatMul"", ""adj_x"", ""adj_y"", attrs, g);
+}
+REGISTER_OP_GRADIENT(""BatchMatMul"", BatchMatMulGrad);
+
 // REGISTER_OP_GRADIENT(""SparseMatMul"", SparseMatMulGrad);
-// REGISTER_OP_GRADIENT(""BatchMatMul"", BatchMatMulGrad);
 
 // Comparison ops.
 REGISTER_OP_NO_GRADIENT(""Less"");
",0,train
cf5ebf814eb9414c40d8c5323c322d498c7f0eed,tensorflow/tensorflow,"Adds BatchMatMul's c++ grad.
Change: 121382365",math_grad_test.cc,"@@ -209,14 +209,16 @@ class MathGradTest : public ::testing::Test {
     *di = outputs[1];
   }
 
-  Tensor MatMul(const Tensor& x, bool tx, const Tensor& y, bool ty) {
+  Tensor MatMulCommon(const string& opname, const string& attr_adj_x,
+                      const string& attr_adj_y, const Tensor& x, bool ax,
+                      const Tensor& y, bool ay) {
     auto T = x.dtype();
     auto gdef = test::function::GDef(
         {
             f::NDef(""x"", ""Placeholder"", {}, {{""dtype"", T}}),
             f::NDef(""y"", ""Placeholder"", {}, {{""dtype"", T}}),
-            f::NDef(""z"", ""MatMul"", {""x"", ""y""},
-                    {{""T"", T}, {""transpose_a"", tx}, {""transpose_b"", ty}}),
+            f::NDef(""z"", opname, {""x"", ""y""},
+                    {{""T"", T}, {attr_adj_x, ax}, {attr_adj_y, ay}}),
         },
         {});
     auto sess = NewSession();
@@ -229,8 +231,17 @@ class MathGradTest : public ::testing::Test {
     return outputs[0];
   }
 
-  void MatMulGrad(const Tensor& x, bool tx, const Tensor& y, bool ty,
-                  Tensor* dx, Tensor* dy) {
+  Tensor MatMul(const Tensor& x, bool ax, const Tensor& y, bool ay) {
+    return MatMulCommon(""MatMul"", ""transpose_a"", ""transpose_b"", x, ax, y, ay);
+  }
+
+  Tensor BatchMatMul(const Tensor& x, bool ax, const Tensor& y, bool ay) {
+    return MatMulCommon(""BatchMatMul"", ""adj_x"", ""adj_y"", x, ax, y, ay);
+  }
+
+  void MatMulGradCommon(const string& opname, const string& attr_adj_x,
+                        const string& attr_adj_y, const Tensor& x, bool ax,
+                        const Tensor& y, bool ay, Tensor* dx, Tensor* dy) {
     const DataType T = x.dtype();
     auto adef = [T](const string& name) {  // E.g., x:float, dy:double
       return strings::StrCat(name, "":"", DataTypeString(T));
@@ -240,9 +251,9 @@ class MathGradTest : public ::testing::Test {
         FDH::Define(""Test"", {adef(""x""), adef(""y"")}, {adef(""l"")}, {},
                     {
                         {{""z""},
-                         ""MatMul"",
+                         opname,
                          {""x"", ""y""},
-                         {{""T"", T}, {""transpose_a"", tx}, {""transpose_b"", ty}}},
+                         {{""T"", T}, {attr_adj_x, ax}, {attr_adj_y, ay}}},
                         FDH::Const(""zero"", 0),
                         FDH::Const(""one"", 1),
                         {{""r""}, ""Rank"", {""z""}, {{""T"", T}}},
@@ -289,6 +300,18 @@ class MathGradTest : public ::testing::Test {
     *dy = outputs[1];
   }
 
+  void MatMulGrad(const Tensor& x, bool ax, const Tensor& y, bool ay,
+                  Tensor* dx, Tensor* dy) {
+    return MatMulGradCommon(""MatMul"", ""transpose_a"", ""transpose_b"", x, ax, y,
+                            ay, dx, dy);
+  }
+
+  void BatchMatMulGrad(const Tensor& x, bool ax, const Tensor& y, bool ay,
+                       Tensor* dx, Tensor* dy) {
+    return MatMulGradCommon(""BatchMatMul"", ""adj_x"", ""adj_y"", x, ax, y, ay, dx,
+                            dy);
+  }
+
   void SelectGrad(const Tensor& c, const Tensor& x, const Tensor& y, Tensor* dc,
                   Tensor* dx, Tensor* dy) {
     auto T = DT_FLOAT;
@@ -829,6 +852,54 @@ TEST_F(MathGradTest, MatMul_11) {
   test::ExpectClose(dy, MatMul(dz, true, x, true));
 }
 
+TEST_F(MathGradTest, BatchMatMul_00) {
+  auto x = test::AsTensor<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f},
+                                 TensorShape({1, 2, 3}));
+  auto y = test::AsTensor<float>({-1.f, .5f, 2.f}, TensorShape({1, 3, 1}));
+  Tensor dx;
+  Tensor dy;
+  BatchMatMulGrad(x, false, y, false, &dx, &dy);
+  auto dz = test::AsTensor<float>({1.f, 1.f}, TensorShape({1, 2, 1}));
+  test::ExpectClose(dx, BatchMatMul(dz, false, y, true));
+  test::ExpectClose(dy, BatchMatMul(x, true, dz, false));
+}
+
+TEST_F(MathGradTest, BatchMatMul_01) {
+  auto x = test::AsTensor<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f},
+                                 TensorShape({1, 2, 3}));
+  auto y = test::AsTensor<float>({-1.f, .5f, 2.f}, TensorShape({1, 1, 3}));
+  Tensor dx;
+  Tensor dy;
+  BatchMatMulGrad(x, false, y, true, &dx, &dy);
+  auto dz = test::AsTensor<float>({1.f, 1.f}, TensorShape({1, 2, 1}));
+  test::ExpectClose(dx, BatchMatMul(dz, false, y, false));
+  test::ExpectClose(dy, BatchMatMul(dz, true, x, false));
+}
+
+TEST_F(MathGradTest, BatchMatMul_10) {
+  auto x = test::AsTensor<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f},
+                                 TensorShape({1, 3, 2}));
+  auto y = test::AsTensor<float>({-1.f, .5f, 2.f}, TensorShape({1, 3, 1}));
+  Tensor dx;
+  Tensor dy;
+  BatchMatMulGrad(x, true, y, false, &dx, &dy);
+  auto dz = test::AsTensor<float>({1.f, 1.f}, TensorShape({1, 2, 1}));
+  test::ExpectClose(dx, BatchMatMul(y, false, dz, true));
+  test::ExpectClose(dy, BatchMatMul(x, false, dz, false));
+}
+
+TEST_F(MathGradTest, BatchMatMul_11) {
+  auto x = test::AsTensor<float>({1.f, 2.f, 3.f, 4.f, 5.f, 6.f},
+                                 TensorShape({1, 3, 2}));
+  auto y = test::AsTensor<float>({-1.f, .5f, 2.f}, TensorShape({1, 1, 3}));
+  Tensor dx;
+  Tensor dy;
+  BatchMatMulGrad(x, true, y, true, &dx, &dy);
+  auto dz = test::AsTensor<float>({1.f, 1.f}, TensorShape({1, 2, 1}));
+  test::ExpectClose(dx, BatchMatMul(y, true, dz, true));
+  test::ExpectClose(dy, BatchMatMul(dz, true, x, true));
+}
+
 TEST_F(MathGradTest, Sum_dim0) {
   auto x = test::AsTensor<float>({-3.f, -2.f, -1.f, 1.f, 2.f, 3.f},
                                  TensorShape({2, 3}));
",0,train
006d228201a1e9e140aa0651a59c51d3396a2d12,tensorflow/tensorflow,"Fixed the typo in RunConfig pydoc.

PiperOrigin-RevId: 187498424",run_config.py,"@@ -345,7 +345,7 @@ class RunConfig(object):
       os.environ['TF_CONFIG'] = json.dumps(
           {'cluster': cluster,
            'task': {'type': 'worker', 'index': 1}})
-      config = ClusterConfig()
+      config = RunConfig()
       assert config.master == 'host4:2222'
       assert config.task_id == 1
       assert config.num_ps_replicas == 2
@@ -363,7 +363,7 @@ class RunConfig(object):
       os.environ['TF_CONFIG'] = json.dumps(
           {'cluster': cluster,
            'task': {'type': 'chief', 'index': 0}})
-      config = ClusterConfig()
+      config = RunConfig()
       assert config.master == 'host0:2222'
       assert config.task_id == 0
       assert config.num_ps_replicas == 2
@@ -381,7 +381,7 @@ class RunConfig(object):
       os.environ['TF_CONFIG'] = json.dumps(
           {'cluster': cluster,
            'task': {'type': 'evaluator', 'index': 0}})
-      config = ClusterConfig()
+      config = RunConfig()
       assert config.master == ''
       assert config.evaluator_master == ''
       assert config.task_id == 0
",0,train
58986fcacaa10f039e5518a9b29a3d9dd51a6a41,tensorflow/tensorflow,"Fixes a race condition in device_set.

The mutable device vectors recently introduced were updated in
unguarded functions, generating race conditions.

PiperOrigin-RevId: 293716863
Change-Id: I28da290862e3e51a8558bacab7a8fc5c2d4a2173",device_set.cc,"@@ -32,6 +32,7 @@ DeviceSet::DeviceSet() {}
 DeviceSet::~DeviceSet() {}
 
 void DeviceSet::AddDevice(Device* device) {
+  mutex_lock l(devices_mu_);
   devices_.push_back(device);
   prioritized_devices_.clear();
   prioritized_device_types_.clear();
@@ -104,21 +105,6 @@ void DeviceSet::SortPrioritizedDeviceTypeVector(
   std::sort(vector->begin(), vector->end(), device_sort);
 }
 
-const PrioritizedDeviceTypeVector& DeviceSet::prioritized_device_types() const {
-  if (prioritized_device_types_.size() == devices_.size()) {
-    return prioritized_device_types_;
-  }
-
-  std::set<DeviceType> seen;
-  for (const std::pair<Device*, int32>& p : prioritized_devices()) {
-    DeviceType t(p.first->device_type());
-    if (seen.insert(t).second) {
-      prioritized_device_types_.emplace_back(t, p.second);
-    }
-  }
-  return prioritized_device_types_;
-}
-
 void DeviceSet::SortPrioritizedDeviceVector(PrioritizedDeviceVector* vector) {
   auto device_sort = [](const std::pair<Device*, int32>& a,
                         const std::pair<Device*, int32>& b) {
@@ -140,19 +126,46 @@ void DeviceSet::SortPrioritizedDeviceVector(PrioritizedDeviceVector* vector) {
   std::sort(vector->begin(), vector->end(), device_sort);
 }
 
-const PrioritizedDeviceVector& DeviceSet::prioritized_devices() const {
-  if (prioritized_devices_.size() == devices_.size()) {
-    return prioritized_devices_;
+namespace {
+
+void UpdatePrioritizedVectors(
+    const std::vector<Device*>& devices,
+    PrioritizedDeviceVector* prioritized_devices,
+    PrioritizedDeviceTypeVector* prioritized_device_types) {
+  if (prioritized_devices->size() != devices.size()) {
+    for (Device* d : devices) {
+      prioritized_devices->emplace_back(
+          d, DeviceSet::DeviceTypeOrder(DeviceType(d->device_type())));
+    }
+    DeviceSet::SortPrioritizedDeviceVector(prioritized_devices);
   }
 
-  for (Device* d : devices_) {
-    prioritized_devices_.emplace_back(
-        d, DeviceSet::DeviceTypeOrder(DeviceType(d->device_type())));
+  if (prioritized_device_types != nullptr &&
+      prioritized_device_types->size() != devices.size()) {
+    std::set<DeviceType> seen;
+    for (const std::pair<Device*, int32>& p : *prioritized_devices) {
+      DeviceType t(p.first->device_type());
+      if (seen.insert(t).second) {
+        prioritized_device_types->emplace_back(t, p.second);
+      }
+    }
   }
+}
 
-  DeviceSet::SortPrioritizedDeviceVector(&prioritized_devices_);
+}  // namespace
 
+const PrioritizedDeviceVector& DeviceSet::prioritized_devices() const {
+  mutex_lock l(devices_mu_);
+  UpdatePrioritizedVectors(devices_, &prioritized_devices_,
+                           /* prioritized_device_types */ nullptr);
   return prioritized_devices_;
 }
 
+const PrioritizedDeviceTypeVector& DeviceSet::prioritized_device_types() const {
+  mutex_lock l(devices_mu_);
+  UpdatePrioritizedVectors(devices_, &prioritized_devices_,
+                           &prioritized_device_types_);
+  return prioritized_device_types_;
+}
+
 }  // namespace tensorflow
",0,train
58986fcacaa10f039e5518a9b29a3d9dd51a6a41,tensorflow/tensorflow,"Fixes a race condition in device_set.

The mutable device vectors recently introduced were updated in
unguarded functions, generating race conditions.

PiperOrigin-RevId: 293716863
Change-Id: I28da290862e3e51a8558bacab7a8fc5c2d4a2173",device_set.h,"@@ -38,7 +38,7 @@ class DeviceSet {
   ~DeviceSet();
 
   // Does not take ownership of 'device'.
-  void AddDevice(Device* device);
+  void AddDevice(Device* device) LOCKS_EXCLUDED(devices_mu_);
 
   // Set the device designated as the ""client"".  This device
   // must also be registered via AddDevice().
@@ -69,14 +69,16 @@ class DeviceSet {
 
   // Return the prioritized list of devices in this set.
   // Devices are prioritized first by `DeviceTypeOrder`, then by name.
-  const PrioritizedDeviceVector& prioritized_devices() const;
+  const PrioritizedDeviceVector& prioritized_devices() const
+      LOCKS_EXCLUDED(devices_mu_);
 
   // Return the prioritized list of unique device types in this set.
   //
   // The list will be ordered by decreasing priority. The priorities (the second
   // element in the list's `std::pair<DeviceType, int32>`) will be initialized
   // to the value of `DeviceTypeOrder` for the device types.
-  const PrioritizedDeviceTypeVector& prioritized_device_types() const;
+  const PrioritizedDeviceTypeVector& prioritized_device_types() const
+      LOCKS_EXCLUDED(devices_mu_);
 
   // An order to sort by device types according to system-determined
   // priority.
@@ -103,16 +105,19 @@ class DeviceSet {
       PrioritizedDeviceTypeVector* vector);
 
  private:
+  mutable mutex devices_mu_;
+
   // Not owned.
   std::vector<Device*> devices_;
 
   // Cached prioritized vector, created on-the-fly when
   // prioritized_devices() is called.
-  mutable PrioritizedDeviceVector prioritized_devices_;
+  mutable PrioritizedDeviceVector prioritized_devices_ GUARDED_BY(devices_mu_);
 
   // Cached prioritized vector, created on-the-fly when
   // prioritized_device_types() is called.
-  mutable PrioritizedDeviceTypeVector prioritized_device_types_;
+  mutable PrioritizedDeviceTypeVector prioritized_device_types_
+      GUARDED_BY(devices_mu_);
 
   // Fullname -> device* for device in devices_.
   std::unordered_map<string, Device*> device_by_name_;
",0,train
792efc53cb091fcb5229202290cc51505bcb9634,tensorflow/tensorflow,"Fix FIFOQueue usage in tf.function

Ensures that the queue resource is always created in the eager context and captured by any tf.functions, and that it is owned by the eager context regardless of where it was created.

PiperOrigin-RevId: 285479831
Change-Id: Ife6d46aded4da09ed89306550f28fd71f5673966",fifo_queue_test.py,"@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+import gc
 import random
 import time
 
@@ -34,9 +35,11 @@ from tensorflow.python.framework import errors_impl
 from tensorflow.python.framework import ops
 from tensorflow.python.framework import tensor_shape
 from tensorflow.python.framework import test_util
+from tensorflow.python.module import module
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import control_flow_ops
 from tensorflow.python.ops import data_flow_ops
+from tensorflow.python.ops import gen_resource_variable_ops
 from tensorflow.python.platform import test
 from tensorflow.python.util import compat
 
@@ -137,6 +140,42 @@ class FIFOQueueTest(test.TestCase):
     self.assertAllEqual(self.evaluate(q2.dequeue()), 2)
     self.assertAllEqual(self.evaluate(q.dequeue()), 1)
 
+  def testQueueInFunction(self):
+
+    class _M(module.Module):
+
+      def __init__(self):
+        self.q1 = data_flow_ops.FIFOQueue(10, [dtypes_lib.int32], shapes=[()])
+        self.q2 = None
+
+      @def_function.function
+      def uses_queues(self, x):
+        if self.q2 is None:
+          self.q2 = data_flow_ops.FIFOQueue(10, [dtypes_lib.int32], shapes=[()])
+        self.q2.enqueue(x)
+        self.q2.enqueue(x + 3)
+        self.q1.enqueue(self.q2.dequeue())
+
+    m = _M()
+    self.evaluate(m.uses_queues(constant_op.constant(2)))
+    self.assertAllEqual(2, self.evaluate(m.q1.dequeue()))
+    self.assertAllEqual(5, self.evaluate(m.q2.dequeue()))
+    if context.executing_eagerly():
+      q1_handle = m.q1.queue_ref
+      q2_handle = m.q2.queue_ref
+      del m
+      gc.collect()
+      # If executing eagerly, deleting the Module should clean up the queue
+      # resources.
+      with self.assertRaisesRegexp(errors_impl.NotFoundError,
+                                   r""Resource .* does not exist.""):
+        gen_resource_variable_ops.destroy_resource_op(
+            q1_handle, ignore_lookup_error=False)
+      with self.assertRaisesRegexp(errors_impl.NotFoundError,
+                                   r""Resource .* does not exist.""):
+        gen_resource_variable_ops.destroy_resource_op(
+            q2_handle, ignore_lookup_error=False)
+
   def testEnqueueDictWithoutNames(self):
     q = data_flow_ops.FIFOQueue(10, dtypes_lib.float32)
     with self.assertRaisesRegexp(ValueError, ""must have names""):
@@ -332,11 +371,11 @@ class FIFOQueueTest(test.TestCase):
       q.enqueue_many((7, [[1, 2], [3, 4], [5, 6]]))
 
   def testEnqueueManyEmptyTypeConversion(self):
+    q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.float32), (
+        (), ()))
 
     @def_function.function
     def _f():
-      q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.float32), (
-          (), ()))
       enq = q.enqueue_many(([], []))
       self.assertEqual(dtypes_lib.int32, enq.inputs[1].dtype)
       self.assertEqual(dtypes_lib.float32, enq.inputs[2].dtype)
@@ -344,12 +383,11 @@ class FIFOQueueTest(test.TestCase):
     _f()
 
   def testEnqueueWrongType(self):
+    q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.float32), (
+        (), ()))
 
     @def_function.function
     def _f():
-      q = data_flow_ops.FIFOQueue(10, (dtypes_lib.int32, dtypes_lib.float32), (
-          (), ()))
-
       with self.assertRaises(ValueError):
         q.enqueue((array_ops.placeholder(dtypes_lib.int32),
                    array_ops.placeholder(dtypes_lib.int32)))
",0,train
792efc53cb091fcb5229202290cc51505bcb9634,tensorflow/tensorflow,"Fix FIFOQueue usage in tf.function

Ensures that the queue resource is always created in the eager context and captured by any tf.functions, and that it is owned by the eager context regardless of where it was created.

PiperOrigin-RevId: 285479831
Change-Id: Ife6d46aded4da09ed89306550f28fd71f5673966",data_flow_ops.py,"@@ -171,7 +171,7 @@ class QueueBase(object):
     else:
       self._names = None
     self._queue_ref = queue_ref
-    if context.executing_eagerly():
+    if isinstance(queue_ref, ops.EagerTensor):
       if context.context().scope_name:
         self._name = context.context().scope_name
       else:
@@ -754,12 +754,13 @@ class FIFOQueue(QueueBase):
     dtypes = _as_type_list(dtypes)
     shapes = _as_shape_list(shapes, dtypes)
     names = _as_name_list(names, dtypes)
-    queue_ref = gen_data_flow_ops.fifo_queue_v2(
-        component_types=dtypes,
-        shapes=shapes,
-        capacity=capacity,
-        shared_name=_shared_name(shared_name),
-        name=name)
+    with ops.init_scope():
+      queue_ref = gen_data_flow_ops.fifo_queue_v2(
+          component_types=dtypes,
+          shapes=shapes,
+          capacity=capacity,
+          shared_name=_shared_name(shared_name),
+          name=name)
 
     super(FIFOQueue, self).__init__(dtypes, shapes, names, queue_ref)
 
",0,train
d8e755a0da01068e60a797efaf76df71b65bacbb,tensorflow/tensorflow,"Replace mode string literals 'train', 'test', and 'predict' with ModeKeys constants.

PiperOrigin-RevId: 226266973",callbacks.py,"@@ -43,6 +43,7 @@ from tensorflow.python.ops import variables
 from tensorflow.python.platform import tf_logging as logging
 from tensorflow.python.summary import summary as tf_summary
 from tensorflow.python.training import saver
+from tensorflow.python.training.mode_keys import ModeKeys
 from tensorflow.python.util.tf_export import keras_export
 
 try:
@@ -51,11 +52,6 @@ except ImportError:
   requests = None
 
 
-_TRAIN = 'train'
-_TEST = 'test'
-_PREDICT = 'predict'
-
-
 # pylint: disable=protected-access
 def configure_callbacks(callbacks,
                         model,
@@ -66,7 +62,7 @@ def configure_callbacks(callbacks,
                         samples=None,
                         verbose=1,
                         count_mode='steps',
-                        mode=_TRAIN):
+                        mode=ModeKeys.TRAIN):
   """"""Configures callbacks for use in various training loops.
 
   Arguments:
@@ -79,8 +75,8 @@ def configure_callbacks(callbacks,
       samples: Number of training samples.
       verbose: int, 0 or 1. Keras logging verbosity to pass to ProgbarLogger.
       count_mode: One of 'steps' or 'samples'. Per-batch or per-sample count.
-      mode: String. One of 'train', 'test', or 'predict'. Which loop mode to
-        configure callbacks for.
+      mode: String. One of ModeKeys.TRAIN, ModeKeys.TEST, or ModeKeys.PREDICT.
+        Which loop mode to configure callbacks for.
 
   Returns:
       Instance of CallbackList used to control all Callbacks.
@@ -93,7 +89,7 @@ def configure_callbacks(callbacks,
     callbacks = []
 
   # Add additional callbacks during training.
-  if mode == _TRAIN:
+  if mode == ModeKeys.TRAIN:
     model.history = History()
     stateful_metric_names = None
     if hasattr(model, 'metrics_names'):
@@ -113,7 +109,7 @@ def configure_callbacks(callbacks,
   callback_metrics = []
   # When we have deferred build scenario with iterator input, we will compile
   # when we standardize first batch of data.
-  if mode != _PREDICT and hasattr(model, 'metrics_names'):
+  if mode != ModeKeys.PREDICT and hasattr(model, 'metrics_names'):
     callback_metrics = copy.copy(model.metrics_names)
     if do_validation:
       callback_metrics += ['val_' + n for n in model.metrics_names]
@@ -148,7 +144,7 @@ def _is_generator_like(data):
 
 def make_logs(model, logs, outputs, mode, prefix=''):
   """"""Computes logs for sending to `on_batch_end` methods.""""""
-  if mode in {_TRAIN, _TEST}:
+  if mode in {ModeKeys.TRAIN, ModeKeys.TEST}:
     if hasattr(model, 'metrics_names'):
       for label, output in zip(model.metrics_names, outputs):
         logs[prefix + label] = output
@@ -220,27 +216,27 @@ class CallbackList(object):
 
   def _call_begin_hook(self, mode):
     """"""Helper function for on_{train|test|predict}_begin methods.""""""
-    if mode == _TRAIN:
+    if mode == ModeKeys.TRAIN:
       self.on_train_begin()
-    elif mode == _TEST:
+    elif mode == ModeKeys.TEST:
       self.on_test_begin()
     else:
       self.on_predict_begin()
 
   def _call_end_hook(self, mode):
     """"""Helper function for on_{train|test|predict}_end methods.""""""
-    if mode == _TRAIN:
+    if mode == ModeKeys.TRAIN:
       self.on_train_end()
-    elif mode == _TEST:
+    elif mode == ModeKeys.TEST:
       self.on_test_end()
     else:
       self.on_predict_end()
 
   def on_batch_begin(self, batch, logs=None):
-    self._call_batch_hook(_TRAIN, 'begin', batch, logs=logs)
+    self._call_batch_hook(ModeKeys.TRAIN, 'begin', batch, logs=logs)
 
   def on_batch_end(self, batch, logs=None):
-    self._call_batch_hook(_TRAIN, 'end', batch, logs=logs)
+    self._call_batch_hook(ModeKeys.TRAIN, 'end', batch, logs=logs)
 
   def on_epoch_begin(self, epoch, logs=None):
     """"""Calls the `on_epoch_begin` methods of its callbacks.
@@ -280,7 +276,7 @@ class CallbackList(object):
         logs: dict. Has keys `batch` and `size` representing the current batch
           number and the size of the batch.
     """"""
-    self._call_batch_hook(_TRAIN, 'begin', batch, logs=logs)
+    self._call_batch_hook(ModeKeys.TRAIN, 'begin', batch, logs=logs)
 
   def on_train_batch_end(self, batch, logs=None):
     """"""Calls the `on_train_batch_end` methods of its callbacks.
@@ -289,7 +285,7 @@ class CallbackList(object):
         batch: integer, index of batch within the current epoch.
         logs: dict. Metric results for this batch.
     """"""
-    self._call_batch_hook(_TRAIN, 'end', batch, logs=logs)
+    self._call_batch_hook(ModeKeys.TRAIN, 'end', batch, logs=logs)
 
   def on_test_batch_begin(self, batch, logs=None):
     """"""Calls the `on_test_batch_begin` methods of its callbacks.
@@ -299,7 +295,7 @@ class CallbackList(object):
         logs: dict. Has keys `batch` and `size` representing the current batch
           number and the size of the batch.
     """"""
-    self._call_batch_hook(_TEST, 'begin', batch, logs=logs)
+    self._call_batch_hook(ModeKeys.TEST, 'begin', batch, logs=logs)
 
   def on_test_batch_end(self, batch, logs=None):
     """"""Calls the `on_test_batch_end` methods of its callbacks.
@@ -308,7 +304,7 @@ class CallbackList(object):
         batch: integer, index of batch within the current epoch.
         logs: dict. Metric results for this batch.
     """"""
-    self._call_batch_hook(_TEST, 'end', batch, logs=logs)
+    self._call_batch_hook(ModeKeys.TEST, 'end', batch, logs=logs)
 
   def on_predict_batch_begin(self, batch, logs=None):
     """"""Calls the `on_predict_batch_begin` methods of its callbacks.
@@ -318,7 +314,7 @@ class CallbackList(object):
         logs: dict. Has keys `batch` and `size` representing the current batch
           number and the size of the batch.
     """"""
-    self._call_batch_hook(_PREDICT, 'begin', batch, logs=logs)
+    self._call_batch_hook(ModeKeys.PREDICT, 'begin', batch, logs=logs)
 
   def on_predict_batch_end(self, batch, logs=None):
     """"""Calls the `on_predict_batch_end` methods of its callbacks.
@@ -327,7 +323,7 @@ class CallbackList(object):
         batch: integer, index of batch within the current epoch.
         logs: dict. Metric results for this batch.
     """"""
-    self._call_batch_hook(_PREDICT, 'end', batch, logs=logs)
+    self._call_batch_hook(ModeKeys.PREDICT, 'end', batch, logs=logs)
 
   def on_train_begin(self, logs=None):
     """"""Calls the `on_train_begin` methods of its callbacks.
",0,train
d8e755a0da01068e60a797efaf76df71b65bacbb,tensorflow/tensorflow,"Replace mode string literals 'train', 'test', and 'predict' with ModeKeys constants.

PiperOrigin-RevId: 226266973",training.py,"@@ -2057,7 +2057,7 @@ class Model(Network):
       # Gets network outputs. Does not update weights.
       # Does update the network states.
       kwargs = getattr(self, '_function_kwargs', {})
-      with K.name_scope('predict'):
+      with K.name_scope(ModeKeys.PREDICT):
         self.predict_function = K.function(
             inputs,
             self.outputs,
",0,train
d8e755a0da01068e60a797efaf76df71b65bacbb,tensorflow/tensorflow,"Replace mode string literals 'train', 'test', and 'predict' with ModeKeys constants.

PiperOrigin-RevId: 226266973",training_arrays.py,"@@ -41,7 +41,7 @@ except ImportError:
 
 
 def _get_model_feed(model, mode):
-  if mode == 'predict':
+  if mode == ModeKeys.PREDICT:
     feed = model._feed_inputs
   else:
     feed = (
@@ -85,7 +85,7 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode):
     inputs: List or dict of model inputs.
     targets: Optional list of model targets.
     sample_weights: Optional list of sample weight arrays.
-    mode: One of 'train'/'test'/'predict'.
+    mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT.
 
   Returns:
     Feed values for the model in the given mode.
@@ -111,7 +111,8 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode):
   targets = targets or []
   sample_weights = sample_weights or []
   ins = inputs + targets + sample_weights
-  if mode == 'train' and not isinstance(K.symbolic_learning_phase(), int):
+  if mode == ModeKeys.TRAIN and not isinstance(K.symbolic_learning_phase(),
+                                               int):
     ins += [True]
   return ins
 
@@ -138,10 +139,10 @@ def model_iteration(model,
                     initial_epoch=0,
                     steps_per_epoch=None,
                     validation_steps=None,
-                    mode='train',
+                    mode=ModeKeys.TRAIN,
                     validation_in_fit=False,
                     **kwargs):
-  """"""Loop function for arrays of data with modes 'train'/'test'/'predict'.
+  """"""Loop function for arrays of data with modes TRAIN/TEST/PREDICT.
 
   Arguments:
       model: Keras Model instance.
@@ -165,7 +166,7 @@ def model_iteration(model,
         the default value of `None`.
       validation_steps: Number of steps to run validation for (only if doing
         validation from data tensors). Ignored with the default value of `None`.
-      mode: One of 'train'/'test'/'predict'.
+      mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT.
       validation_in_fit: DEPRECATED: if true, then this method is invoked from
         within training iteration (for validation). In this case, do not copy
         weights when using a tf.distribute.Strategy. The input is deprecated as
@@ -174,9 +175,9 @@ def model_iteration(model,
       **kwargs: Additional arguments for backwards compatibility.
 
   Returns:
-      - In 'train' mode: `History` object.
-      - In 'test' mode: Evaluation metrics.
-      - In 'predict' mode: Outputs of the Model called on inputs.
+      - In TRAIN mode: `History` object.
+      - In TEST mode: Evaluation metrics.
+      - In PREDICT mode: Outputs of the Model called on inputs.
 
   Raises:
       ValueError: in case of invalid arguments.
@@ -186,7 +187,7 @@ def model_iteration(model,
     steps_per_epoch = kwargs['steps']
 
   _validate_arguments(steps_per_epoch, validation_steps, kwargs)
-  if mode == 'train':
+  if mode == ModeKeys.TRAIN:
     _print_train_info(inputs, val_inputs, steps_per_epoch, verbose)
 
   # Enter DistributionStrategy scope.
@@ -230,7 +231,7 @@ def model_iteration(model,
         indices_for_conversion_to_dense.append(i)
 
   # Select aggregation method.
-  if mode == 'predict':
+  if mode == ModeKeys.PREDICT:
     aggregator = training_utils.OutputsAggregator(use_steps,
                                                   num_samples_or_steps)
   else:
@@ -364,14 +365,14 @@ def model_iteration(model,
           steps_per_epoch=validation_steps,
           callbacks=callbacks,
           verbose=0,
-          mode='test',
+          mode=ModeKeys.TEST,
           validation_in_fit=True)
       if not isinstance(val_results, list):
         val_results = [val_results]
       epoch_logs = cbks.make_logs(
           model, epoch_logs, val_results, mode, prefix='val_')
 
-    if mode == 'train':
+    if mode == ModeKeys.TRAIN:
       # Epochs only apply to `fit`.
       callbacks.on_epoch_end(epoch, epoch_logs)
       progbar.on_epoch_end(epoch, epoch_logs)
@@ -385,12 +386,14 @@ def model_iteration(model,
           model, model._distributed_model, mode)
     scope.__exit__(None, None, None)
 
-  if mode == 'train':
+  if mode == ModeKeys.TRAIN:
     return model.history
   return results
 
 
 # For backwards compatibility for internal users of these loops.
-fit_loop = functools.partial(model_iteration, mode='train')
-test_loop = functools.partial(model_iteration, mode='test', shuffle=False)
-predict_loop = functools.partial(model_iteration, mode='predict', shuffle=False)
+fit_loop = functools.partial(model_iteration, mode=ModeKeys.TRAIN)
+test_loop = functools.partial(
+    model_iteration, mode=ModeKeys.TEST, shuffle=False)
+predict_loop = functools.partial(
+    model_iteration, mode=ModeKeys.PREDICT, shuffle=False)
",0,train
d8e755a0da01068e60a797efaf76df71b65bacbb,tensorflow/tensorflow,"Replace mode string literals 'train', 'test', and 'predict' with ModeKeys constants.

PiperOrigin-RevId: 226266973",training_distributed.py,"@@ -19,7 +19,6 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
-import enum  # pylint: disable=g-bad-import-order
 import numpy as np
 
 from tensorflow.python.distribute import distribute_lib
@@ -38,13 +37,10 @@ from tensorflow.python.keras.utils.generic_utils import Progbar
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import math_ops
 from tensorflow.python.platform import tf_logging as logging
+from tensorflow.python.training.mode_keys import ModeKeys
 from tensorflow.python.util import nest
 
 
-class _Mode(enum.Enum):
-  TRAIN = 'train'
-  TEST = 'test'
-  PREDICT = 'predict'
 # TODO(priyag, sourabhbajaj): Refactor this file to address code duplication.
 
 
@@ -100,10 +96,10 @@ def experimental_fit_loop(model,
     if model._compile_distribution:
       clone_model_on_replicas(model, current_strategy,
                               make_callback_model=True, inputs=inputs,
-                              targets=targets, mode=_Mode.TRAIN)
+                              targets=targets, mode=ModeKeys.TRAIN)
     else:
       _build_distributed_network(model, current_strategy, inputs,
-                                 targets, mode=_Mode.TRAIN)
+                                 targets, mode=ModeKeys.TRAIN)
 
     (grouped_inputs, grouped_outputs, grouped_updates,
      grouped_session_args) = current_strategy.extended.call_for_each_replica(
@@ -215,7 +211,7 @@ def experimental_fit_loop(model,
         # the weights back to the original model before we can run validation.
         with current_strategy.scope():
           _copy_weights_to_original_model(
-              model, model._distributed_model_train, 'train')
+              model, model._distributed_model_train, ModeKeys.TRAIN)
 
       val_outs = experimental_test_loop(  # pylint: disable=undefined-variable
           model,
@@ -237,7 +233,7 @@ def experimental_fit_loop(model,
     # Copy the weights back from the replicated model to the original model.
     with current_strategy.scope():
       _copy_weights_to_original_model(model, model._distributed_model_train,
-                                      'train')
+                                      ModeKeys.TRAIN)
   scope.__exit__(None, None, None)
   return model.history
 
@@ -281,10 +277,10 @@ def experimental_test_loop(model,
     if model._compile_distribution:
       clone_model_on_replicas(model, current_strategy,
                               make_callback_model=False, inputs=inputs,
-                              targets=targets, mode=_Mode.TEST)
+                              targets=targets, mode=ModeKeys.TEST)
     else:
       _build_distributed_network(model, current_strategy, inputs,
-                                 targets, mode=_Mode.TEST)
+                                 targets, mode=ModeKeys.TEST)
 
     (grouped_inputs, grouped_outputs, grouped_updates,
      grouped_session_args) = current_strategy.extended.call_for_each_replica(
@@ -397,10 +393,10 @@ def experimental_predict_loop(model, iterator, verbose=0, steps=None):
     if model._compile_distribution:
       clone_model_on_replicas(model, current_strategy,
                               make_callback_model=False, inputs=inputs,
-                              mode=_Mode.PREDICT)
+                              mode=ModeKeys.PREDICT)
     else:
       _build_distributed_network(model, current_strategy, inputs,
-                                 mode=_Mode.PREDICT)
+                                 mode=ModeKeys.PREDICT)
 
     (grouped_inputs, grouped_outputs, grouped_updates,
      grouped_session_args) = current_strategy.extended.call_for_each_replica(
@@ -535,7 +531,7 @@ def _build_network_on_replica(model, inputs=None, targets=None, mode=None):
   if isinstance(targets, tuple):
     targets = nest.flatten(targets)
 
-  if mode == _Mode.PREDICT:
+  if mode == ModeKeys.PREDICT:
     _custom_compile_for_predict(updated_model)
   else:
     updated_model.compile(
@@ -557,11 +553,11 @@ def _build_distributed_network(model, strategy, inputs=None, targets=None,
     distributed_model = strategy.extended.call_for_each_replica(
         _build_network_on_replica,
         args=(model, inputs, targets, mode))
-    if mode is _Mode.TRAIN:
+    if mode is ModeKeys.TRAIN:
       model._distributed_model_train = distributed_model
-    elif mode is _Mode.TEST:
+    elif mode is ModeKeys.TEST:
       model._distributed_model_test = distributed_model
-    elif mode is _Mode.PREDICT:
+    elif mode is ModeKeys.PREDICT:
       model._distributed_model_predict = distributed_model
     else:
       model._distributed_model = distributed_model
@@ -594,7 +590,7 @@ def _clone_and_build_model(model, inputs=None, targets=None, mode=None):
 
   if isinstance(targets, tuple):
     targets = nest.flatten(targets)
-  if mode == _Mode.PREDICT:
+  if mode == ModeKeys.PREDICT:
     _custom_compile_for_predict(cloned_model)
   else:
     cloned_model.compile(
@@ -615,11 +611,11 @@ def clone_model_on_replicas(model, strategy, make_callback_model=False,
   with K.get_graph().as_default(), strategy.scope():
     distributed_model = strategy.extended.call_for_each_replica(
         _clone_and_build_model, args=(model, inputs, targets, mode))
-    if mode is _Mode.TRAIN:
+    if mode is ModeKeys.TRAIN:
       model._distributed_model_train = distributed_model
-    elif mode is _Mode.TEST:
+    elif mode is ModeKeys.TEST:
       model._distributed_model_test = distributed_model
-    elif mode is _Mode.PREDICT:
+    elif mode is ModeKeys.PREDICT:
       model._distributed_model_predict = distributed_model
     else:
       model._distributed_model = distributed_model
@@ -659,7 +655,7 @@ def _make_execution_function(model, mode):
   if not model._distributed_model:
     if model._compile_distribution:
       clone_model_on_replicas(
-          model, strategy, make_callback_model=(mode == 'train'))
+          model, strategy, make_callback_model=(mode == ModeKeys.TRAIN))
     else:
       _build_distributed_network(model, strategy)
 
@@ -674,7 +670,7 @@ def _make_execution_function(model, mode):
      grouped_session_args) = strategy.extended.call_for_each_replica(
          _per_device_function, args=(model._distributed_model,))
 
-    if mode == 'train':
+    if mode == ModeKeys.TRAIN:
       # Initialize the variables in the replicated model. This is necessary for
       # multi-worker training because on some workers, initialization is not
       # needed. This method does initialization or waiting for initialization
@@ -692,7 +688,7 @@ def _make_execution_function(model, mode):
          grouped_outputs,
          grouped_updates,
          grouped_session_args,
-         with_loss_tensor=(mode != 'predict'))
+         with_loss_tensor=(mode != ModeKeys.PREDICT))
 
     return K.function(
         all_inputs,
@@ -708,7 +704,7 @@ def _make_eager_execution_function(model, mode):
   if not model._distributed_model:
     if model._compile_distribution:
       clone_model_on_replicas(
-          model, strategy, make_callback_model=(mode == 'train'))
+          model, strategy, make_callback_model=(mode == ModeKeys.TRAIN))
     else:
       _build_distributed_network(model, strategy)
 
@@ -732,7 +728,7 @@ def _make_eager_execution_function(model, mode):
         strategy,
         grouped_inputs,
         grouped_outputs,
-        with_loss_tensor=(mode != 'predict'))
+        with_loss_tensor=(mode != ModeKeys.PREDICT))
 
     return K.function(
         all_inputs,
@@ -748,7 +744,7 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode):
     inputs: List or dict of model inputs.
     targets: Optional list of model targets.
     sample_weights: Optional list of sample weight arrays.
-    mode: One of 'train'/'test'/'predict'.
+    mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT.
 
   Returns:
     Feed values for the model in the given mode.
@@ -758,7 +754,7 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode):
   inputs = distributed_training_utils.flatten_perdevice_values(strategy, inputs)
   targets = distributed_training_utils.flatten_perdevice_values(
       strategy, targets)
-  if mode == 'predict':
+  if mode == ModeKeys.PREDICT:
     sample_weights = []
     targets = []
   else:
@@ -766,7 +762,8 @@ def _prepare_feed_values(model, inputs, targets, sample_weights, mode):
         None for _ in range(len(model.outputs) * strategy.num_replicas_in_sync)
     ]
   ins = inputs + targets + sample_weights
-  if mode == 'train' and not isinstance(K.symbolic_learning_phase(), int):
+  if mode == ModeKeys.TRAIN and not isinstance(K.symbolic_learning_phase(),
+                                               int):
     ins += [True]
   return ins
 
@@ -785,7 +782,7 @@ def _copy_weights_to_distributed_model(original_model, grouped_model):
 
 def _copy_weights_to_original_model(model, grouped_model, mode):
   """"""Copies weights from first distributed model back to original model.""""""
-  if model._distribution_strategy and mode == 'train':
+  if model._distribution_strategy and mode == ModeKeys.TRAIN:
     updated_weights = model._distribution_strategy.unwrap(
         grouped_model)[0].get_weights()
     model.set_weights(updated_weights)
@@ -793,7 +790,7 @@ def _copy_weights_to_original_model(model, grouped_model, mode):
 
 def _per_device_aggregate_batch(batch_outs, model, mode):
   """"""Aggregates the per-device batch-level outputs from a distributed step.""""""
-  if model._distribution_strategy is not None and mode == 'predict':
+  if model._distribution_strategy is not None and mode == ModeKeys.PREDICT:
     total_batch_outs = []
     for i in range(len(model.outputs)):
       num_replicas = model._distribution_strategy.num_replicas_in_sync
",0,train
d8e755a0da01068e60a797efaf76df71b65bacbb,tensorflow/tensorflow,"Replace mode string literals 'train', 'test', and 'predict' with ModeKeys constants.

PiperOrigin-RevId: 226266973",training_generator.py,"@@ -52,10 +52,10 @@ def model_iteration(model,
                     use_multiprocessing=False,
                     shuffle=False,
                     initial_epoch=0,
-                    mode='train',
+                    mode=ModeKeys.TRAIN,
                     batch_size=None,
                     **kwargs):
-  """"""Loop function for arrays of data with modes 'train'/'test'/'predict'.
+  """"""Loop function for arrays of data with modes TRAIN/TEST/PREDICT.
 
   Arguments:
       model: Keras Model instance.
@@ -90,16 +90,16 @@ def model_iteration(model,
         `None`.
       initial_epoch: Epoch at which to start training (useful for resuming a
         previous training run).
-      mode: One of 'train'/'test'/'predict'.
+      mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT.
       batch_size: Integer batch size or None if unknown. Will only be used if
         `data` is in NumPy/Tensor format.
       **kwargs: Additional arguments for backwards compatibility. `steps` is
         accepted as an alias for `steps_per_epoch`.
 
   Returns:
-      - In 'train' mode: `History` object.
-      - In 'test' mode: Evaluation metrics.
-      - In 'predict' mode: Outputs of the Model called on inputs.
+      - In TRAIN mode: `History` object.
+      - In TEST mode: Evaluation metrics.
+      - In PREDICT mode: Outputs of the Model called on inputs.
 
   Raises:
       ValueError: in case of invalid arguments.
@@ -152,14 +152,14 @@ def model_iteration(model,
   progbar.params = callbacks.params
   progbar.params['verbose'] = verbose
 
-  if mode == 'predict':
+  if mode == ModeKeys.PREDICT:
     aggregator = training_utils.OutputsAggregator(True, steps_per_epoch)
   else:
     aggregator = training_utils.MetricsAggregator(True, steps_per_epoch)
 
   if should_set_learning_phase:
     old_learning_phase = backend.learning_phase()
-    backend.set_learning_phase(1 if mode == 'train' else 0)
+    backend.set_learning_phase(1 if mode == ModeKeys.TRAIN else 0)
 
   callbacks.model.stop_training = False
   callbacks._call_begin_hook(mode)
@@ -226,14 +226,14 @@ def model_iteration(model,
           max_queue_size=max_queue_size,
           callbacks=callbacks,
           verbose=0,
-          mode='test')
+          mode=ModeKeys.TEST)
 
       if not isinstance(val_results, list):
         val_results = [val_results]
       epoch_logs = cbks.make_logs(
           model, epoch_logs, val_results, mode, prefix='val_')
 
-    if mode == 'train':
+    if mode == ModeKeys.TRAIN:
       # Epochs only apply to `fit`.
       callbacks.on_epoch_end(epoch, epoch_logs)
       progbar.on_epoch_end(epoch, epoch_logs)
@@ -246,17 +246,17 @@ def model_iteration(model,
   if should_set_learning_phase:
     backend.set_learning_phase(old_learning_phase)
 
-  if mode == 'train':
+  if mode == ModeKeys.TRAIN:
     return model.history
   return results
 
 
 # Maintain compatibility with the existing names.
-fit_generator = functools.partial(model_iteration, mode='train')
+fit_generator = functools.partial(model_iteration, mode=ModeKeys.TRAIN)
 evaluate_generator = functools.partial(
-    model_iteration, mode='test', shuffle=False)
+    model_iteration, mode=ModeKeys.TEST, shuffle=False)
 predict_generator = functools.partial(
-    model_iteration, mode='predict', shuffle=False)
+    model_iteration, mode=ModeKeys.PREDICT, shuffle=False)
 
 
 def _get_next_batch(output_generator, mode):
@@ -268,7 +268,7 @@ def _get_next_batch(output_generator, mode):
     logging.warning('Your dataset iterator ran out of data.')
     return None
   if not isinstance(generator_output, tuple):
-    if mode == 'predict':
+    if mode == ModeKeys.PREDICT:
       # Always wrap in a tuple.
       return (generator_output,)
     else:
@@ -307,7 +307,7 @@ def _validate_arguments(is_sequence, use_multiprocessing, workers,
       `keras.utils.data_utils.Sequence` object or Eager Iterator or Dataset.
     validation_steps: Total number of steps (batches of samples) before
       declaring validation finished.
-    mode: One of 'train'/'test'/'predict'.
+    mode: One of ModeKeys.TRAIN/ModeKeys.TEST/ModeKeys.PREDICT.
     kwargs: Additional arguments for backwards compatibility.
 
   Raises:
@@ -323,7 +323,7 @@ def _validate_arguments(is_sequence, use_multiprocessing, workers,
                     ' class.'))
 
   if steps_per_epoch is None:
-    arg_name = 'steps_per_epoch' if mode == 'train' else 'steps'
+    arg_name = 'steps_per_epoch' if mode == ModeKeys.TRAIN else 'steps'
     raise ValueError('Please specify the number of steps via the '
                      '`{}` argument.'.format(arg_name))
 
@@ -429,11 +429,11 @@ def _make_enqueued_generator(generator,
 
 def _make_execution_function(model, mode, class_weight=None):
   """"""Makes function to run one step of model execution.""""""
-  if mode == 'train':
+  if mode == ModeKeys.TRAIN:
     if not context.executing_eagerly():
       model._make_fit_function()
     f = functools.partial(model.train_on_batch, class_weight=class_weight)
-  elif mode == 'test':
+  elif mode == ModeKeys.TEST:
     if not context.executing_eagerly():
       model._make_eval_function()
     f = model.test_on_batch
@@ -446,7 +446,7 @@ def _make_execution_function(model, mode, class_weight=None):
     f = predict_on_batch
 
   # Maintain stateful metrics across batch-level calls.
-  if mode != 'predict':
+  if mode != ModeKeys.PREDICT:
     f = functools.partial(f, reset_metrics=False)
 
   return f
",0,train
9db4d46857b4d313425a83bdf86b24927e05423d,tensorflow/tensorflow,"[XLA/GPU] Use SequentialHloOrdering for single stream modules.

Also simplify tests in gpu_hlo_schedule_test.

PiperOrigin-RevId: 357989058
Change-Id: Iab8a2857ce41a30c278c1f202be3162ca16b6428",gpu_compiler.cc,"@@ -520,9 +520,9 @@ GpuCompiler::RunHloPassesAndBufferAssignement(
 
   std::unique_ptr<StreamAssignment> stream_assignment =
       AssignStreams(*hlo_module);
-  TF_ASSIGN_OR_RETURN(
-      std::unique_ptr<GpuHloSchedule> hlo_schedule,
-      GpuHloSchedule::Build(*hlo_module, *stream_assignment, pointer_size_));
+  TF_ASSIGN_OR_RETURN(std::unique_ptr<GpuHloSchedule> hlo_schedule,
+                      GpuHloSchedule::Build(hlo_module.get(),
+                                            *stream_assignment, pointer_size_));
 
   auto buffer_size_bytes_function =
       [this](const BufferValue& buffer_value) -> int64 {
@@ -565,7 +565,7 @@ static Status CompileModuleToLlvmIrImpl(
       AssignStreams(*hlo_module);
   TF_ASSIGN_OR_RETURN(
       std::unique_ptr<GpuHloSchedule> hlo_schedule,
-      GpuHloSchedule::Build(*hlo_module, *stream_assignment, pointer_size));
+      GpuHloSchedule::Build(hlo_module, *stream_assignment, pointer_size));
 
   auto buffer_size_bytes_function =
       [pointer_size](const BufferValue& buffer_value) -> int64 {
",0,train
9db4d46857b4d313425a83bdf86b24927e05423d,tensorflow/tensorflow,"[XLA/GPU] Use SequentialHloOrdering for single stream modules.

Also simplify tests in gpu_hlo_schedule_test.

PiperOrigin-RevId: 357989058
Change-Id: Iab8a2857ce41a30c278c1f202be3162ca16b6428",gpu_hlo_schedule.cc,"@@ -190,30 +190,29 @@ GpuHloSchedule::GpuHloSchedule() {}
 
 /* static */
 StatusOr<std::unique_ptr<GpuHloSchedule>> GpuHloSchedule::Build(
-    const HloModule& module, const StreamAssignment& stream_assignment,
+    HloModule* module, const StreamAssignment& stream_assignment,
     int64 pointer_size) {
   std::unique_ptr<GpuHloSchedule> schedule(new GpuHloSchedule);
 
   // Initialize thunk_launch_order_, the total order of thunk launches.
-  HloComputation* entry_computation = module.entry_computation();
+  HloComputation* entry_computation = module->entry_computation();
   if (stream_assignment.StreamCount() == 1) {
-    // All kernels are launched on a single stream, so there's no loss of
-    // concurrency by optimizing for minimal memory usage.
     TF_ASSIGN_OR_RETURN(
-        HloInstructionSequence sequence,
-        ScheduleComputation(
-            entry_computation, [pointer_size](const BufferValue& buffer) {
-              return ShapeUtil::ByteSizeOf(buffer.shape(), pointer_size);
-            }));
-    schedule->thunk_launch_order_ = sequence.instructions();
+        HloSchedule sequences,
+        ScheduleModule(module, [pointer_size](const BufferValue& buffer) {
+          return ShapeUtil::ByteSizeOf(buffer.shape(), pointer_size);
+        }));
+    schedule->thunk_launch_order_ =
+        sequences.sequence(entry_computation).instructions();
+    schedule->hlo_ordering_ =
+        absl::make_unique<SequentialHloOrdering>(sequences);
   } else {
     // BFS tends to increase concurrency, but also increases memory usage.
     BFSLaunchOrder(entry_computation, &schedule->thunk_launch_order_);
+    schedule->hlo_ordering_ = absl::make_unique<GpuHloOrdering>(
+        module, stream_assignment, schedule->thunk_launch_order_);
   }
 
-  schedule->hlo_ordering_ = absl::make_unique<GpuHloOrdering>(
-      &module, stream_assignment, schedule->thunk_launch_order_);
-
   return std::move(schedule);
 }
 
",0,train
9db4d46857b4d313425a83bdf86b24927e05423d,tensorflow/tensorflow,"[XLA/GPU] Use SequentialHloOrdering for single stream modules.

Also simplify tests in gpu_hlo_schedule_test.

PiperOrigin-RevId: 357989058
Change-Id: Iab8a2857ce41a30c278c1f202be3162ca16b6428",gpu_hlo_schedule.h,"@@ -41,7 +41,7 @@ class GpuHloSchedule {
   // Constructs an GpuHloSchedule for the given module, based on the given
   // stream assignment.
   static StatusOr<std::unique_ptr<GpuHloSchedule>> Build(
-      const HloModule& module, const StreamAssignment& stream_assignment,
+      HloModule* module, const StreamAssignment& stream_assignment,
       int64 pointer_size);
 
   // Returns the total order of thunk launches, represented in terms of HLO
",0,train
9db4d46857b4d313425a83bdf86b24927e05423d,tensorflow/tensorflow,"[XLA/GPU] Use SequentialHloOrdering for single stream modules.

Also simplify tests in gpu_hlo_schedule_test.

PiperOrigin-RevId: 357989058
Change-Id: Iab8a2857ce41a30c278c1f202be3162ca16b6428",gpu_hlo_schedule_test.cc,"@@ -39,7 +39,7 @@ class GpuHloScheduleTest : public HloTestBase {
   Shape f32_2x2_ = ShapeUtil::MakeShape(F32, {2, 2});
 
   static std::unique_ptr<GpuHloSchedule> BuildGpuHloSchedule(
-      const HloModule& module, const StreamAssignment& streams) {
+      HloModule* module, const StreamAssignment& streams) {
     return GpuHloSchedule::Build(module, streams, /*pointer_size=*/8)
         .ConsumeValueOrDie();
   }
@@ -86,7 +86,7 @@ TEST_F(GpuHloScheduleTest, SequentialMatMul) {
   EXPECT_EQ(streams->StreamNumberForHlo(*dot1),
             streams->StreamNumberForHlo(*dot2));
 
-  auto schedule = BuildGpuHloSchedule(*module, *streams);
+  auto schedule = BuildGpuHloSchedule(module.get(), *streams);
   // Remove parameters, which are unordered.
   EXPECT_EQ(RemoveHlo(schedule->ThunkLaunchOrder(), {x, y, z}),
             HloVec({dot1, dot2}));
@@ -94,32 +94,10 @@ TEST_F(GpuHloScheduleTest, SequentialMatMul) {
   // Parameters x,y,z are mutually unordered, while dot1 and dot2 are
   // transitively ordered by operands.
   auto order = schedule->ConsumeHloOrdering();
+  EXPECT_TRUE(order->ExecutesBefore(z, y));
+  EXPECT_TRUE(order->ExecutesBefore(y, x));
   EXPECT_TRUE(order->ExecutesBefore(x, dot1));
-  EXPECT_TRUE(order->ExecutesBefore(x, dot2));
-  EXPECT_TRUE(order->ExecutesBefore(y, dot1));
-  EXPECT_TRUE(order->ExecutesBefore(y, dot2));
-  EXPECT_TRUE(order->ExecutesBefore(z, dot2));
   EXPECT_TRUE(order->ExecutesBefore(dot1, dot2));
-
-  EXPECT_FALSE(order->ExecutesBefore(x, x));
-  EXPECT_FALSE(order->ExecutesBefore(x, y));
-  EXPECT_FALSE(order->ExecutesBefore(x, z));
-  EXPECT_FALSE(order->ExecutesBefore(y, x));
-  EXPECT_FALSE(order->ExecutesBefore(y, y));
-  EXPECT_FALSE(order->ExecutesBefore(y, z));
-  EXPECT_FALSE(order->ExecutesBefore(z, x));
-  EXPECT_FALSE(order->ExecutesBefore(z, y));
-  EXPECT_FALSE(order->ExecutesBefore(z, z));
-  EXPECT_FALSE(order->ExecutesBefore(z, dot1));
-  EXPECT_FALSE(order->ExecutesBefore(dot1, x));
-  EXPECT_FALSE(order->ExecutesBefore(dot1, y));
-  EXPECT_FALSE(order->ExecutesBefore(dot1, z));
-  EXPECT_FALSE(order->ExecutesBefore(dot1, dot1));
-  EXPECT_FALSE(order->ExecutesBefore(dot2, x));
-  EXPECT_FALSE(order->ExecutesBefore(dot2, y));
-  EXPECT_FALSE(order->ExecutesBefore(dot2, z));
-  EXPECT_FALSE(order->ExecutesBefore(dot2, dot1));
-  EXPECT_FALSE(order->ExecutesBefore(dot2, dot2));
 }
 
 // Test of a single stream, where data dependencies do not fully determine the
@@ -148,7 +126,7 @@ TEST_F(GpuHloScheduleTest, SequentialAdd) {
   EXPECT_EQ(streams->StreamNumberForHlo(*add1),
             streams->StreamNumberForHlo(*add3));
 
-  auto schedule = BuildGpuHloSchedule(*module, *streams);
+  auto schedule = BuildGpuHloSchedule(module.get(), *streams);
   // Remove parameters, which are unordered.
   EXPECT_EQ(RemoveHlo(schedule->ThunkLaunchOrder(), {x, y, z}),
             HloVec({add1, add2, add3}));
@@ -156,43 +134,11 @@ TEST_F(GpuHloScheduleTest, SequentialAdd) {
   // Parameters x,y,z are mutually unordered, while add1, add2 and add3 are
   // transitively ordered by operands.
   auto order = schedule->ConsumeHloOrdering();
+  EXPECT_TRUE(order->ExecutesBefore(y, z));
+  EXPECT_TRUE(order->ExecutesBefore(z, x));
   EXPECT_TRUE(order->ExecutesBefore(x, add1));
-  EXPECT_TRUE(order->ExecutesBefore(x, add2));
-  EXPECT_TRUE(order->ExecutesBefore(x, add3));
-  EXPECT_TRUE(order->ExecutesBefore(y, add1));
-  EXPECT_TRUE(order->ExecutesBefore(y, add2));
-  EXPECT_TRUE(order->ExecutesBefore(y, add3));
-  EXPECT_TRUE(order->ExecutesBefore(z, add2));
-  EXPECT_TRUE(order->ExecutesBefore(z, add3));
-  EXPECT_TRUE(order->ExecutesBefore(add1, add3));
+  EXPECT_TRUE(order->ExecutesBefore(add1, add2));
   EXPECT_TRUE(order->ExecutesBefore(add2, add3));
-  // The HLO graph does not define an ordering for add1 and add2, but their
-  // assignment onto the same stream does define an ordering.
-  if (order->ExecutesBefore(add1, add2)) {
-    EXPECT_FALSE(order->ExecutesBefore(add2, add1));
-  } else {
-    EXPECT_TRUE(order->ExecutesBefore(add2, add1));
-    EXPECT_FALSE(order->ExecutesBefore(add1, add2));
-  }
-
-  EXPECT_FALSE(order->ExecutesBefore(x, x));
-  EXPECT_FALSE(order->ExecutesBefore(x, y));
-  EXPECT_FALSE(order->ExecutesBefore(x, z));
-  EXPECT_FALSE(order->ExecutesBefore(y, x));
-  EXPECT_FALSE(order->ExecutesBefore(y, y));
-  EXPECT_FALSE(order->ExecutesBefore(y, z));
-  EXPECT_FALSE(order->ExecutesBefore(z, x));
-  EXPECT_FALSE(order->ExecutesBefore(z, y));
-  EXPECT_FALSE(order->ExecutesBefore(z, z));
-  EXPECT_FALSE(order->ExecutesBefore(z, add1));
-  EXPECT_FALSE(order->ExecutesBefore(add1, x));
-  EXPECT_FALSE(order->ExecutesBefore(add1, y));
-  EXPECT_FALSE(order->ExecutesBefore(add1, z));
-  EXPECT_FALSE(order->ExecutesBefore(add1, add1));
-  EXPECT_FALSE(order->ExecutesBefore(add2, x));
-  EXPECT_FALSE(order->ExecutesBefore(add2, y));
-  EXPECT_FALSE(order->ExecutesBefore(add2, z));
-  EXPECT_FALSE(order->ExecutesBefore(add2, add2));
 }
 
 // Test of two streams.
@@ -216,7 +162,7 @@ TEST_F(GpuHloScheduleTest, ConcurrentMatMul) {
   EXPECT_NE(streams->StreamNumberForHlo(*dot1),
             streams->StreamNumberForHlo(*dot2));
 
-  auto schedule = BuildGpuHloSchedule(*module, *streams);
+  auto schedule = BuildGpuHloSchedule(module.get(), *streams);
   // Remove parameters, which are unordered.
   HloVec thunk_launch_order = RemoveHlo(schedule->ThunkLaunchOrder(), {x, y});
   EXPECT_TRUE(thunk_launch_order == HloVec({dot1, dot2, add}) ||
@@ -308,7 +254,7 @@ TEST_F(GpuHloScheduleTest, LatticeMatMul) {
 
   // We don't check the thunk launch order, since there are many valid total
   // orders, and it's annoying to express.
-  auto schedule = BuildGpuHloSchedule(*module, *streams);
+  auto schedule = BuildGpuHloSchedule(module.get(), *streams);
 
   auto order = schedule->ConsumeHloOrdering();
   const HloVec all_params(
",0,train
9db4d46857b4d313425a83bdf86b24927e05423d,tensorflow/tensorflow,"[XLA/GPU] Use SequentialHloOrdering for single stream modules.

Also simplify tests in gpu_hlo_schedule_test.

PiperOrigin-RevId: 357989058
Change-Id: Iab8a2857ce41a30c278c1f202be3162ca16b6428",ir_emitter_unnested.cc,"@@ -3829,6 +3829,20 @@ Status CheckConditionalBuffersShareAllocation(
   return Status::OK();
 }
 
+Status AcceptMaybeOrdered(HloComputation* computation,
+                          IrEmitterUnnested* emitter,
+                          const BufferAssignment& buffer_assignment) {
+  const auto& debug_options = computation->parent()->config().debug_options();
+  if (debug_options.xla_gpu_disable_multi_streaming()) {
+    const HloInstructionSequence* sequence =
+        buffer_assignment.hlo_ordering().SequentialOrder(*computation);
+    // Always expect a sequential ordering for single-stream programs.
+    TF_RET_CHECK(sequence);
+    return computation->AcceptOrdered(emitter, sequence->instructions());
+  }
+  return computation->Accept(emitter);
+}
+
 }  // namespace
 
 StatusOr<std::unique_ptr<Thunk>> IrEmitterUnnested::BuildWhileThunk(
@@ -3842,14 +3856,18 @@ StatusOr<std::unique_ptr<Thunk>> IrEmitterUnnested::BuildWhileThunk(
   TF_ASSIGN_OR_RETURN(auto ir_emitter_condition,
                       IrEmitterUnnested::Create(hlo_module_config_, condition,
                                                 ir_emitter_context_));
-  TF_RETURN_IF_ERROR(condition->Accept(ir_emitter_condition.get()));
+
+  TF_RETURN_IF_ERROR(
+      AcceptMaybeOrdered(condition, ir_emitter_condition.get(),
+                         ir_emitter_context_->buffer_assignment()));
 
   // Generate thunk sequence for while 'body'.
   HloComputation* body = hlo->while_body();
   TF_ASSIGN_OR_RETURN(
       auto ir_emitter_body,
       IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_));
-  TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get()));
+  TF_RETURN_IF_ERROR(AcceptMaybeOrdered(
+      body, ir_emitter_body.get(), ir_emitter_context_->buffer_assignment()));
 
   const auto* index_map = ir_emitter_context_->profile_index_map();
   absl::optional<size_t> condition_profile_index, body_profile_index;
@@ -3877,7 +3895,8 @@ StatusOr<std::unique_ptr<Thunk>> IrEmitterUnnested::BuildForThunk(
   TF_ASSIGN_OR_RETURN(
       auto ir_emitter_body,
       IrEmitterUnnested::Create(hlo_module_config_, body, ir_emitter_context_));
-  TF_RETURN_IF_ERROR(body->Accept(ir_emitter_body.get()));
+  TF_RETURN_IF_ERROR(AcceptMaybeOrdered(
+      body, ir_emitter_body.get(), ir_emitter_context_->buffer_assignment()));
 
   const auto* index_map = ir_emitter_context_->profile_index_map();
   absl::optional<size_t> body_profile_index;
@@ -3914,7 +3933,8 @@ StatusOr<std::unique_ptr<Thunk>> IrEmitterUnnested::BuildConditionalThunk(
         auto ir_emitter,
         IrEmitterUnnested::Create(hlo_module_config_, branch_computation,
                                   ir_emitter_context_));
-    TF_CHECK_OK(branch_computation->Accept(ir_emitter.get()));
+    TF_CHECK_OK(AcceptMaybeOrdered(branch_computation, ir_emitter.get(),
+                                   ir_emitter_context_->buffer_assignment()));
     branch_thunks.push_back(std::move(*ir_emitter->ConsumeThunkSequence()));
 
     absl::optional<size_t> profile_index;
",0,train
f04c6f08a3756a7d5fd7ae94fb8199831e22cebd,tensorflow/tensorflow,Addressed more review comments,mkl_layout_pass.cc,"@@ -1210,16 +1210,16 @@ class MklLayoutRewritePass : public GraphOptimizationPass {
     string mode_string;
     GetNodeAttr(n->def(), ""mode"", &mode_string);
     if (mode_string != ""SCALED"") {
-      VLOG(1) << ""DequantizeRewrite: Mode is not SCALED.""
+      VLOG(1) << ""DequantizeRewrite: Mode is not SCALED. ""
               << ""This case is not optimized by Intel MKL kernel, thus using ""
-                 ""Eigen op for Dequantize op "";
+                 ""Eigen op for Dequantize op."";
       return false;
     }
     if (input->IsConstant()) {
       VLOG(1) << ""DequantizeRewrite: Trying to dequantize a Const node which ""
               << ""could possibly be a filter. ""
               << ""This case is not supported by Intel MKL kernel, thus using ""
-                 ""Eigen op for Dequantize op "";
+                 ""Eigen op for Dequantize op."";
       return false;
     }
     return true;
",0,train
ceb4b27be1947e9232304ad81c2d6e02d542e7ed,tensorflow/tensorflow,"Upstreaming the changes from https://github.com/tensorflow/tflite-micro/pull/369/

PiperOrigin-RevId: 389148121
Change-Id: I36a86a8122632f8ebc010be7d429e11b642b0aea",cppmath.h,"@@ -19,9 +19,8 @@ limitations under the License.
 
 namespace tflite {
 
-#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) ||                           \
-    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(ARDUINO) || \
-    defined(__ZEPHYR__)
+#if defined(TF_LITE_USE_GLOBAL_CMATH_FUNCTIONS) || \
+    (defined(__ANDROID__) && !defined(__NDK_MAJOR__)) || defined(__ZEPHYR__)
 #define TF_LITE_GLOBAL_STD_PREFIX
 #else
 #define TF_LITE_GLOBAL_STD_PREFIX std
",0,train
b46aa421ece747f31a47c3f600fac03bb6e4247f,tensorflow/tensorflow,"Ensure traceback from Operation() is correct

This is a followup to cl/439389601, which only tested and fixed the path
from _create_c_op. Operation() has another level of nested-ness.

This could have been done by using stacklevel=4 in extract_traceback_for_op.
But we instead mutate traceback inplace to avoid changing the API of
_create_c_op to add a stacklevel argument.

PiperOrigin-RevId: 439624965",ops.py,"@@ -2114,6 +2114,10 @@ class Operation(object):
     # Post process for control flows.
     self._control_flow_post_processing(input_tensors=inputs)
 
+    # Removes this frame from the Python traceback.
+    # We adjust stacklevel directly to avoid triggering serialization.
+    self.traceback._stacklevel += 1  # pylint: disable=protected-access
+
   @classmethod
   def _from_c_op(cls, c_op, g):
     """"""Create an Operation from a TF_Operation.
",0,train
b46aa421ece747f31a47c3f600fac03bb6e4247f,tensorflow/tensorflow,"Ensure traceback from Operation() is correct

This is a followup to cl/439389601, which only tested and fixed the path
from _create_c_op. Operation() has another level of nested-ness.

This could have been done by using stacklevel=4 in extract_traceback_for_op.
But we instead mutate traceback inplace to avoid changing the API of
_create_c_op to add a stacklevel argument.

PiperOrigin-RevId: 439624965",ops_test.py,"@@ -663,6 +663,13 @@ def _apply_op(g, *args, **kwargs):
 
 class OperationTest(test_util.TensorFlowTestCase):
 
+  def testTraceback(self):
+    g = ops.Graph()
+    op1 = ops.Operation(
+        ops._NodeDef(""None"", ""op1""), g, [],
+        [dtypes.float32_ref, dtypes.float32])
+    self.assertIn(""testTraceback"", op1.traceback[-1])
+
   @test_util.run_deprecated_v1
   def testNoInputs(self):
     op = test_ops.float_output_string_output(name=""myop"").a.op
",0,train
b46aa421ece747f31a47c3f600fac03bb6e4247f,tensorflow/tensorflow,"Ensure traceback from Operation() is correct

This is a followup to cl/439389601, which only tested and fixed the path
from _create_c_op. Operation() has another level of nested-ness.

This could have been done by using stacklevel=4 in extract_traceback_for_op.
But we instead mutate traceback inplace to avoid changing the API of
_create_c_op to add a stacklevel argument.

PiperOrigin-RevId: 439624965",tf_stack.cc,"@@ -138,7 +138,7 @@ std::string StackFrameToString(
 
 class StackTraceWrapper : public AbstractStackTrace {
  public:
-  explicit StackTraceWrapper(absl::Span<StackFrame const> stack_frames)
+  explicit StackTraceWrapper(absl::Span<const StackFrame> stack_frames)
       : stack_frames_cache_(std::vector<StackFrame>(stack_frames.begin(),
                                                     stack_frames.end())) {}
 
@@ -149,7 +149,7 @@ class StackTraceWrapper : public AbstractStackTrace {
                              stacklevel};
   }
 
-  absl::Span<StackFrame const> ToFrames() const override {
+  absl::Span<const StackFrame> ToFrames() const override {
     if (stack_frames_cache_) {
       return *stack_frames_cache_;
     }
@@ -172,6 +172,10 @@ class StackTraceWrapper : public AbstractStackTrace {
     return *stack_frames_cache_;
   }
 
+  int get_stacklevel() const { return stacklevel_; }
+
+  void set_stacklevel(int stacklevel) { stacklevel_ = stacklevel; }
+
   std::vector<StackFrame> GetUserFrames(int limit = -1) const {
     PyGILState_STATE state = PyGILState_Ensure();
     std::vector<StackFrame> user_frames = captured_.ToStackFrames(
@@ -262,7 +266,7 @@ class StackTraceWrapper : public AbstractStackTrace {
         filter_(filter),
         stacklevel_(stacklevel) {}
 
-  static std::string ToStringHelper(absl::Span<StackFrame const> stack_frames,
+  static std::string ToStringHelper(absl::Span<const StackFrame> stack_frames,
                                     const TracePrintingOptions& opts,
                                     int shared_prefix_size) {
     return absl::StrJoin(
@@ -369,8 +373,8 @@ PYBIND11_MODULE(_tf_stack, m) {
       // TODO(slebedev): upstream negative indexing support into pybind11.
       .def(
           ""__getitem__"",
-          [](const StackTraceWrapper& self, ssize_t index) {
-            absl::Span<StackFrame const> frames = self.ToFrames();
+          [](const StackTraceWrapper& self, py::ssize_t index) {
+            absl::Span<const StackFrame> frames = self.ToFrames();
             const size_t eff_index =
                 index < 0 ? frames.size() + index : static_cast<size_t>(index);
             if (eff_index >= frames.size()) {
@@ -382,7 +386,7 @@ PYBIND11_MODULE(_tf_stack, m) {
       .def(
           ""__getitem__"",
           [](const StackTraceWrapper& self, py::slice slice) {
-            absl::Span<StackFrame const> frames = self.ToFrames();
+            absl::Span<const StackFrame> frames = self.ToFrames();
             py::ssize_t start, stop, step, slicelength;
             if (!slice.compute(frames.size(), &start, &stop, &step,
                                &slicelength)) {
@@ -402,9 +406,19 @@ PYBIND11_MODULE(_tf_stack, m) {
             return StackTraceWrapper{out};
           },
           py::return_value_policy::reference_internal)
+      .def(""__delitem__"",
+           [](StackTraceWrapper& self, py::ssize_t index) {
+             absl::Span<const StackFrame> frames = self.ToFrames();
+             const size_t eff_index =
+                 index < 0 ? frames.size() + index : static_cast<size_t>(index);
+             if (eff_index >= frames.size()) {
+               throw py::index_error();
+             }
+             self.Erase(eff_index, eff_index + 1);
+           })
       .def(""__delitem__"",
            [](StackTraceWrapper& self, py::slice slice) {
-             absl::Span<StackFrame const> frames = self.ToFrames();
+             absl::Span<const StackFrame> frames = self.ToFrames();
              py::ssize_t start, stop, step, slicelength;
              if (!slice.compute(frames.size(), &start, &stop, &step,
                                 &slicelength)) {
@@ -433,6 +447,10 @@ PYBIND11_MODULE(_tf_stack, m) {
            [](const StackTraceWrapper& self) {
              return py::str(self.ToString({}));
            })
+      .def_property(
+          ""_stacklevel"", &StackTraceWrapper::get_stacklevel,
+          &StackTraceWrapper::set_stacklevel,
+          ""Adjusts stacklevel; no effects after ToFrames() is called."")
       .def(
           ""get_user_frames"",
           [](const StackTraceWrapper& self) {
",0,train
b46aa421ece747f31a47c3f600fac03bb6e4247f,tensorflow/tensorflow,"Ensure traceback from Operation() is correct

This is a followup to cl/439389601, which only tested and fixed the path
from _create_c_op. Operation() has another level of nested-ness.

This could have been done by using stacklevel=4 in extract_traceback_for_op.
But we instead mutate traceback inplace to avoid changing the API of
_create_c_op to add a stacklevel argument.

PiperOrigin-RevId: 439624965",tf_stack_test.py,"@@ -64,6 +64,23 @@ class TFStackTest(test.TestCase):
     self.assertRegex(frames[-1].line, ""# COMMENT"")
     self.assertRegex(frames[-2].line, ""# CALLSITE"")
 
+  def testGelItem(self):
+
+    def func(n):
+      if n == 0:
+        return tf_stack.extract_stack()  # COMMENT
+      else:
+        return func(n - 1)
+
+    trace = func(5)
+    self.assertIn(""COMMENT"", trace[-1].line)
+
+    with self.assertRaises(IndexError):
+      _ = trace[-len(trace) - 1]
+
+    with self.assertRaises(IndexError):
+      _ = trace[len(trace)]
+
   def testDelItem(self):
 
     def func(n):
@@ -72,6 +89,7 @@ class TFStackTest(test.TestCase):
       else:
         return func(n - 1)
 
+    # Test deleting a slice.
     trace = func(5)
     self.assertGreater(len(trace), 5)
 
@@ -82,6 +100,22 @@ class TFStackTest(test.TestCase):
     self.assertLen(head_list, len(full_list) - 5)
     self.assertEqual(head_list, full_list[:-5])
 
+    # Test deleting an item.
+    trace = func(1)
+    self.assertGreater(len(trace), 1)
+    full_list = list(trace)
+    del trace[-1]
+    head_list = list(trace)
+    self.assertLen(head_list, len(full_list) - 1)
+    self.assertEqual(head_list, full_list[:-1])
+
+    # Errors
+    trace = func(5)
+    with self.assertRaises(IndexError):
+      del trace[-len(trace) - 1]
+
+    with self.assertRaises(IndexError):
+      del trace[len(trace)]
 
 if __name__ == ""__main__"":
   test.main()
",0,train
19f428204217b7fc2820a340c43ce79b0e958a58,tensorflow/tensorflow,"Close remote contexts with a context_view_id.

Adding this field to the CloseContextRequest avoids the scenario where a stale request closes a newly created context.

PiperOrigin-RevId: 276631303
Change-Id: If9ff9191e3d1f2c0245265cdd9dccfd8bab00cd3",context.cc,"@@ -215,21 +215,27 @@ bool EagerContext::MirrorTensors() const {
 #if !defined(IS_MOBILE_PLATFORM)
 void EagerContext::CloseAndClearAllRemoteContexts() {
   uint64 context_id;
+  uint64 context_view_id;
   {
     mutex_lock l(remote_state_mu_);
     if (!is_master_) return;
     context_id = context_id_;
+    context_view_id = context_view_id_;
     context_id_ = kInvalidContextId;
+    // Forget the current view id and reset to the starting value 0.
+    context_view_id_ = 0;
   }
-  CloseRemoteContexts(remote_contexts_, context_id);
+  CloseRemoteContexts(remote_contexts_, context_id, context_view_id);
   remote_contexts_.clear();
 }
 
 void EagerContext::CloseRemoteContexts(
-    const std::vector<string>& remote_contexts, uint64 context_id) {
+    const std::vector<string>& remote_contexts, uint64 context_id,
+    uint64 context_view_id) {
   // Close all remote contexts.
   eager::CloseContextRequest request;
   request.set_context_id(context_id);
+  request.set_context_view_id(context_view_id);
   // Setting context_id to a new value can avoid us issuing DestroyTensorHandle
   // request to closed remote workers.
   std::vector<eager::CloseContextResponse> responses(remote_contexts.size());
@@ -762,13 +768,12 @@ Status EagerContext::UpdateRemoteMaster(
   }
 
   if (!remove_remote_contexts.empty()) {
-    // N.B. remove_remote_contexts include both removed and replaced workers. It
-    // is safe to send CloseContextRequest to them using the old copy of eager
-    // client cache (i.e., `remote_eager_workers_`) because the replaced workers
-    // will be resolved to the old eager clients. Thus, it correctly closes
-    // contexts on workers that are replaced by new ones. It must be called
-    // before overwriting `remote_eager_workers_` in current master context.
-    CloseRemoteContexts(remove_remote_contexts, context_id);
+    // N.B. remove_remote_contexts include both removed and replaced workers.
+    // In the case where a worker is replaced by one that resolves to the same
+    // `hostname:port`, it is safe to close context with the current view id,
+    // since the newly created context on the remote worker will be holding
+    // a larger view id and ignores this request.
+    CloseRemoteContexts(remove_remote_contexts, context_id, GetContextViewId());
     for (const string& remote_context : remove_remote_contexts) {
       remote_contexts_.erase(
           std::remove(remote_contexts_.begin(), remote_contexts_.end(),
",0,train
19f428204217b7fc2820a340c43ce79b0e958a58,tensorflow/tensorflow,"Close remote contexts with a context_view_id.

Adding this field to the CloseContextRequest avoids the scenario where a stale request closes a newly created context.

PiperOrigin-RevId: 276631303
Change-Id: If9ff9191e3d1f2c0245265cdd9dccfd8bab00cd3",context.h,"@@ -460,7 +460,7 @@ class EagerContext : public core::RefCounted {
 #if !defined(IS_MOBILE_PLATFORM)
   void CloseAndClearAllRemoteContexts();
   void CloseRemoteContexts(const std::vector<string>& remote_contexts,
-                           uint64 context_id);
+                           uint64 context_id, uint64 context_view_id);
 
   Status SetMasterContextState(
       std::unique_ptr<ServerInterface> server, WorkerEnv* worker_env,
",0,train
19f428204217b7fc2820a340c43ce79b0e958a58,tensorflow/tensorflow,"Close remote contexts with a context_view_id.

Adding this field to the CloseContextRequest avoids the scenario where a stale request closes a newly created context.

PiperOrigin-RevId: 276631303
Change-Id: If9ff9191e3d1f2c0245265cdd9dccfd8bab00cd3",eager_service_impl.cc,"@@ -459,9 +459,17 @@ Status EagerServiceImpl::CloseContext(const CloseContextRequest* request,
     // Swallow the error here.
     return Status::OK();
   }
-
   core::ScopedUnref context_unref(context);
 
+  if (request->context_view_id() < context->Context()->GetContextViewId()) {
+    // Swallow the error here.
+    LOG(INFO) << ""Ignoring CloseContext request with a stale context_view_id ""
+              << request->context_view_id() << ""  for context_id ""
+              << request->context_id() << "". The current context_view_id is ""
+              << context->Context()->GetContextViewId() << ""."";
+    return Status::OK();
+  }
+
   mutex_lock l(contexts_mu_);
   contexts_.erase(request->context_id());
 
",0,train
19f428204217b7fc2820a340c43ce79b0e958a58,tensorflow/tensorflow,"Close remote contexts with a context_view_id.

Adding this field to the CloseContextRequest avoids the scenario where a stale request closes a newly created context.

PiperOrigin-RevId: 276631303
Change-Id: If9ff9191e3d1f2c0245265cdd9dccfd8bab00cd3",eager_service_impl_test.cc,"@@ -312,6 +312,7 @@ TEST_F(EagerServiceImplTest, BasicTest) {
 
   CloseContextRequest close_context_request;
   close_context_request.set_context_id(context_id);
+  close_context_request.set_context_view_id(0);
   CloseContextResponse close_context_response;
   TF_ASSERT_OK(eager_service_impl.CloseContext(&close_context_request,
                                                &close_context_response));
@@ -379,6 +380,7 @@ TEST_F(EagerServiceImplTest, BasicFunctionTest) {
 
   CloseContextRequest close_context_request;
   close_context_request.set_context_id(context_id);
+  close_context_request.set_context_view_id(0);
   CloseContextResponse close_context_response;
   TF_ASSERT_OK(eager_service_impl.CloseContext(&close_context_request,
                                                &close_context_response));
@@ -473,6 +475,7 @@ class FunctionWithRemoteInputsTest : public EagerServiceImplTest {
 
     CloseContextRequest close_context_request;
     close_context_request.set_context_id(context_id_);
+    close_context_request.set_context_view_id(0);
     CloseContextResponse close_context_response;
     TF_ASSERT_OK(eager_service_impl_.CloseContext(&close_context_request,
                                                   &close_context_response));
@@ -640,6 +643,7 @@ TEST_F(EagerServiceImplTest, SendTensorTest) {
 
   CloseContextRequest close_context_request;
   close_context_request.set_context_id(context_id);
+  close_context_request.set_context_view_id(0);
   CloseContextResponse close_context_response;
   TF_ASSERT_OK(eager_service_impl.CloseContext(&close_context_request,
                                                &close_context_response));
",0,train
4d568d5967cbb2f46b763800ea63390868368a24,tensorflow/tensorflow,"NFC: Move AffineOps dialect to the Dialect sub-directory.
PiperOrigin-RevId: 264482571",AffineOps.h,"@@ -20,8 +20,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef MLIR_AFFINEOPS_AFFINEOPS_H
-#define MLIR_AFFINEOPS_AFFINEOPS_H
+#ifndef MLIR_DIALECT_AFFINEOPS_AFFINEOPS_H
+#define MLIR_DIALECT_AFFINEOPS_AFFINEOPS_H
 
 #include ""mlir/IR/AffineMap.h""
 #include ""mlir/IR/Builders.h""
@@ -540,7 +540,7 @@ void fullyComposeAffineMapAndOperands(AffineMap *map,
                                       llvm::SmallVectorImpl<Value *> *operands);
 
 #define GET_OP_CLASSES
-#include ""mlir/AffineOps/AffineOps.h.inc""
+#include ""mlir/Dialect/AffineOps/AffineOps.h.inc""
 
 /// Returns if the provided value is the induction variable of a AffineForOp.
 bool isForInductionVar(Value *val);
",0,train
4d568d5967cbb2f46b763800ea63390868368a24,tensorflow/tensorflow,"NFC: Move AffineOps dialect to the Dialect sub-directory.
PiperOrigin-RevId: 264482571",Builders.h,"@@ -23,7 +23,7 @@
 #ifndef MLIR_EDSC_BUILDERS_H_
 #define MLIR_EDSC_BUILDERS_H_
 
-#include ""mlir/AffineOps/AffineOps.h""
+#include ""mlir/Dialect/AffineOps/AffineOps.h""
 #include ""mlir/Dialect/StandardOps/Ops.h""
 #include ""mlir/Dialect/VectorOps/VectorOps.h""
 #include ""mlir/IR/Builders.h""
",0,train
3c08b43159e4bd1e587170a707f44fbea77239d1,tensorflow/tensorflow,"Docstring example and formatting updates
Change: 138936680",tensor_signature.py,"@@ -35,6 +35,17 @@ class TensorSignature(collections.namedtuple(
 
   Useful to check compatibility of tensors.
 
+  Example:
+
+  ```python
+  examples = tf.placeholder(...)
+  inputs = {'a': var_a, 'b': var_b}
+  signatures = tensor_signature.create_signatures(inputs)
+  result = tensor_signature.create_example_parser_from_signatures(
+      signatures, examples)
+  self.assertTrue(tensor_signature.tensors_compatible(result, signatures))
+  ```
+
   Attributes:
     dtype: `DType` object.
     shape: `TensorShape` object.
",0,train
028725d42f687243b47caa689909ae3e91221a1f,tensorflow/tensorflow,"Release GIL for PyLocalBuffer::copy_to_host_async

PiperOrigin-RevId: 273971322",xla.cc,"@@ -458,7 +458,8 @@ PYBIND11_MODULE(xla_extension, m) {
              py::gil_scoped_release gil_release;
              return buffer->BlockHostUntilReady();
            })
-      .def(""copy_to_host_async"", &PyLocalBuffer::CopyToHostAsync)
+      .def(""copy_to_host_async"", &PyLocalBuffer::CopyToHostAsync,
+           py::call_guard<py::gil_scoped_release>())
       .def(""to_py"",
            [](PyLocalBuffer* buffer) -> StatusOr<py::object> {
              GlobalPyRefManager()->CollectGarbage();
",0,test
e6ab6e648673041de33ba16d250367157a7cb2ec,tensorflow/tensorflow,"tfdbg: Google internal-oriented changes with no external effect
Change: 135694400",local_cli.py,"@@ -23,6 +23,7 @@ import shutil
 import sys
 import tempfile
 
+# Google-internal import(s).
 from tensorflow.python.debug import debug_data
 from tensorflow.python.debug import framework
 from tensorflow.python.debug.cli import analyzer_cli
@@ -37,7 +38,7 @@ _DUMP_ROOT_PREFIX = ""tfdbg_""
 class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession):
   """"""Concrete subclass of BaseDebugWrapperSession implementing a local CLI.""""""
 
-  def __init__(self, sess, dump_root=None):
+  def __init__(self, sess, dump_root=None, log_usage=True):
     """"""Constructor of LocalCLIDebugWrapperSession.
 
     Args:
@@ -46,12 +47,16 @@ class LocalCLIDebugWrapperSession(framework.BaseDebugWrapperSession):
         a directory that does not exist or an empty directory. If the directory
         does not exist, it will be created by the debugger core during debug
         run() calls and removed afterwards.
+      log_usage: (bool) Whether the usage of this class is to be logged.
 
     Raises:
       ValueError: If dump_root is an existing and non-empty directory or if
         dump_root is a file.
     """"""
 
+    if log_usage:
+      pass  # No logging for open-source.
+
     framework.BaseDebugWrapperSession.__init__(self, sess)
 
     if dump_root is None:
",0,test
e6ab6e648673041de33ba16d250367157a7cb2ec,tensorflow/tensorflow,"tfdbg: Google internal-oriented changes with no external effect
Change: 135694400",local_cli_test.py,"@@ -37,14 +37,14 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
       shutil.rmtree(self._tmp_dir)
 
   def testConstructWrapper(self):
-    local_cli.LocalCLIDebugWrapperSession(session.Session())
+    local_cli.LocalCLIDebugWrapperSession(session.Session(), log_usage=False)
 
   def testConstructWrapperWithExistingEmptyDumpRoot(self):
     os.mkdir(self._tmp_dir)
     self.assertTrue(os.path.isdir(self._tmp_dir))
 
     local_cli.LocalCLIDebugWrapperSession(
-        session.Session(), dump_root=self._tmp_dir)
+        session.Session(), dump_root=self._tmp_dir, log_usage=False)
 
   def testConstructWrapperWithExistingNonEmptyDumpRoot(self):
     os.mkdir(self._tmp_dir)
@@ -55,7 +55,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(
         ValueError, ""dump_root path points to a non-empty directory""):
       local_cli.LocalCLIDebugWrapperSession(
-          session.Session(), dump_root=self._tmp_dir)
+          session.Session(), dump_root=self._tmp_dir, log_usage=False)
 
   def testConstructWrapperWithExistingFileDumpRoot(self):
     os.mkdir(self._tmp_dir)
@@ -65,7 +65,7 @@ class LocalCLIDebugWrapperSessionTest(test_util.TensorFlowTestCase):
     with self.assertRaisesRegexp(
         ValueError, ""dump_root path points to a file""):
       local_cli.LocalCLIDebugWrapperSession(
-          session.Session(), dump_root=file_path)
+          session.Session(), dump_root=file_path, log_usage=False)
 
 
 if __name__ == ""__main__"":
",0,test
00ee6689bb838f45a393d4fbca11ad10018a382a,tensorflow/tensorflow,"Improvements to function._FuncGraph.

* Adds 'inputs', 'outputs', and 'name' field to _FuncGraph. This
  allows _FuncGraph to encapsulate all the information needed to
  convert it to a FunctionDef.
* Refactor logic for converting a Python callable to a _FuncGraph into
  a new method, func_graph_from_py_func().

These changes are in preparation for converting tf.cond to emit an If
op. By exposing _FuncGraph functionality outside of _DefinedFunction,
_FuncGraphs can be used to represent functions that are manipulated
(e.g. to output intermediate tensors) before being converted to
FunctionDef protos.

PiperOrigin-RevId: 197003496",function.py,"@@ -258,12 +258,10 @@ class _DefinedFunction(object):
     # another reference to _definition.signature
     self._op_def = None
 
-    self._args = []
     assert isinstance(input_types, (list, tuple))
-    for i in range(len(input_types)):
-      argname = argnames[i] if i < len(argnames) else (""arg%d"" % i)
-      argtype = input_types[i]
-      self._args.append((argname, argtype))
+    self._arg_types = input_types
+    self._arg_names = [argnames[i] if i < len(argnames) else (""arg%d"" % i)
+                       for i in range(len(input_types))]
 
   @property
   def name(self):
@@ -336,42 +334,11 @@ class _DefinedFunction(object):
     if self._definition is not None or self._c_func is not None:
       return
 
-    # Create the func_def object.
-    temp_graph = _FuncGraph(capture_by_value=self._capture_by_value)
-    with temp_graph.as_default(), ops.device(self._caller_device):
-      # List of placeholders for the function_def.
-      inputs = []
-      for (argname, argtype) in self._args:
-        argholder = array_ops.placeholder(argtype, name=argname)
-        inputs.append(argholder)
-      # Call func and gather the output tensors.
-      with vs.variable_scope("""", custom_getter=temp_graph.getvar):
-        outputs = self._func(*inputs)
-
-      # There is no way of distinguishing between a function not returning
-      # anything and a function returning None in Python.
-      # We need to allow the former and ideally want to forbid the latter as
-      # it is most likely user error.
-      # TODO(iga): Consider adding a @NoOutput decorator on top of @Defun to
-      # allow users to explicitly mark the function as not returning anything.
-      # For now, we allow a single None return and interpret it as a function
-      # with no output.
-      if outputs is None:
-        outputs = []
-      else:
-        # If func only returned one value, make it a tuple.
-        if not isinstance(outputs, (list, tuple)):
-          outputs = (outputs,)
-        if any([_ is None for _ in outputs]):
-          raise ValueError(""Function can not return None."")
-      # Ensures each output is a Tensor in the function graph.
-      outputs = [ops.convert_to_tensor(t) for t in outputs]
-      outputs = [
-          temp_graph.capture(t) if t.graph is not temp_graph else t
-          for t in outputs
-      ]
+    temp_graph = func_graph_from_py_func(
+        self._func, self._arg_names, self._arg_types, self._func_name,
+        self._capture_by_value, self._caller_device)
+
     self._extra_inputs = temp_graph.extra_inputs
-    inputs.extend(temp_graph.extra_args)
     # pylint: disable=protected-access
     self._sub_functions = temp_graph._functions
     # pylint: enable=protected-access
@@ -390,8 +357,8 @@ class _DefinedFunction(object):
       self._definition = graph_to_function_def.graph_to_function_def(
           temp_graph,
           temp_graph.get_operations(),
-          inputs,
-          outputs,
+          temp_graph.inputs,
+          temp_graph.outputs,
           out_names=self._out_names)
 
       for k in kwargs_attr:
@@ -421,8 +388,8 @@ class _DefinedFunction(object):
           base_func_name,
           self._func_name is None,  # append_hash_to_fn_name
           None,  # opers
-          [t._as_tf_output() for t in inputs],
-          [t._as_tf_output() for t in outputs],
+          [t._as_tf_output() for t in temp_graph.inputs],
+          [t._as_tf_output() for t in temp_graph.outputs],
           output_names,
           None,  # opts
           description)
@@ -653,16 +620,33 @@ class _FuncGraph(ops.Graph):
   function argument and the caller passes in the captured tensor.
   """"""
 
-  def __init__(self, capture_by_value, *args, **kwargs):
+  def __init__(self, name, capture_by_value, *args, **kwargs):
     super(_FuncGraph, self).__init__(*args, **kwargs)
     self._capture_by_value = capture_by_value
     self._building_function = True
     self._outer_graph = ops.get_default_graph()
     self._vscope = vs.get_variable_scope()
     self._old_custom_getter = self._vscope.custom_getter
+
+    # The name of the function.
+    self.name = name
+    # Placeholder tensors representing the inputs to this function. The tensors
+    # are in this _FuncGraph.
+    self.inputs = []
+    # Tensors that will be returned this function. The tensors are in this
+    # _FuncGraph.
+    self.outputs = []
+    # Maps external tensor -> internal tensor (e.g. input placeholder).
     self._captured = {}
+    # The external tensors that have been captured as inputs and must be passed
+    # to this function (empty if capturing by value, otherwise these are the
+    # keys of _captured).
     self.extra_inputs = []
+    # Input placeholders that been added for captured values (empty if capturing
+    # by value).
     self.extra_args = []
+    # Captured variables.
+    # TODO(skyewm): is this needed?
     self.extra_vars = []
 
   def getvar(
@@ -742,6 +726,7 @@ class _FuncGraph(ops.Graph):
     else:
       ph._handle_data = tensor._handle_data
     # pylint: enable=protected-access
+    self.inputs.append(ph)
     self._captured[tensor] = ph
     self.extra_args.append(ph)
     if _is_guaranteed_const(tensor):
@@ -780,6 +765,63 @@ class _FuncGraph(ops.Graph):
     return captured_op
 
 
+def func_graph_from_py_func(func, arg_names, arg_types, name=None,
+                            capture_by_value=False, device=None):
+  """"""Returns a _FuncGraph generated from `func`.
+
+  Args:
+    func: A Python callable which constructs a TF function body. The arguments
+      must correspond to `arg_types`. Returns a value or list/tuple of values.
+      No returned value can be None.
+    arg_names: A sequence of strings for the function argument names.
+    arg_types: A sequence of the function's argument types.
+    name: The function name. If None, the name is derived from `func`.
+    capture_by_value: boolean. If True, captured values will be copied into the
+      function body.
+    device: device name or function.
+
+  Returns:
+    A _FuncGraph.
+
+  Raises:
+    ValueError: if func returns None.
+  """"""
+  if not name:
+    name = _get_func_name(func)
+  func_graph = _FuncGraph(name, capture_by_value)
+  with func_graph.as_default(), ops.device(device):
+    # Create placeholders for the function arguments.
+    for (argname, argtype) in zip(arg_names, arg_types):
+      argholder = array_ops.placeholder(argtype, name=argname)
+      func_graph.inputs.append(argholder)
+    # Call func and gather the output tensors.
+    with vs.variable_scope("""", custom_getter=func_graph.getvar):
+      outputs = func(*func_graph.inputs)
+
+    # There is no way of distinguishing between a function not returning
+    # anything and a function returning None in Python.
+    # We need to allow the former and ideally want to forbid the latter as
+    # it is most likely user error.
+    # TODO(iga): Consider adding a @NoOutput decorator on top of @Defun to
+    # allow users to explicitly mark the function as not returning anything.
+    # For now, we allow a single None return and interpret it as a function
+    # with no output.
+    if outputs is None:
+      outputs = []
+    else:
+      # If func only returned one value, make it a tuple.
+      if not isinstance(outputs, (list, tuple)):
+        outputs = (outputs,)
+      if any([_ is None for _ in outputs]):
+        raise ValueError(""Function can not return None."")
+    # Ensures each output is a Tensor in the function graph.
+    outputs = [ops.convert_to_tensor(t) for t in outputs]
+    outputs = [func_graph.capture(t) if t.graph is not func_graph else t
+               for t in outputs]
+    func_graph.outputs = outputs
+  return func_graph
+
+
 def _is_guaranteed_const(tensor):
   """"""Determines whether `tensor` is guaranteed to be a constant.
 
",0,train
8fde5290d6f9acea81482a9f300178a07873322c,tensorflow/tensorflow,"Canonicalize MatrixSetDiag and MatrixSetDiagV2 ops to MatrixSetDiagV3

Lower canonical MatrixSetDiagV3 op in TFLite Converter instead of MatrixSetDiag

Now, MatrixSetDiag and MatrixSetDiagV2 are canonicalized to MatrixSetDiagV3 so this removes the need to downgrade MatrixSetDiagV2 and MatrixSetDiagV3 ops to MatrixSetDiag.

PiperOrigin-RevId: 343133748
Change-Id: Ia60046e17358de72af3b2ac9144fc80d437e930b",tf_ops_a_m.cc,"@@ -2428,6 +2428,24 @@ static LogicalResult Verify(MatrixBandPartOp op) {
   return success();
 }
 
+//===----------------------------------------------------------------------===//
+// MatrixSetDiagOp
+//===----------------------------------------------------------------------===//
+//
+void MatrixSetDiagOp::getCanonicalizationPatterns(
+    OwningRewritePatternList &results, MLIRContext *context) {
+  results.insert<MatrixSetDiagToV3>(context);
+}
+
+//===----------------------------------------------------------------------===//
+// MatrixSetDiagV2Op
+//===----------------------------------------------------------------------===//
+
+void MatrixSetDiagV2Op::getCanonicalizationPatterns(
+    OwningRewritePatternList &results, MLIRContext *context) {
+  results.insert<MatrixSetDiagV2ToV3>(context);
+}
+
 //===----------------------------------------------------------------------===//
 // MaxOp
 //===----------------------------------------------------------------------===//
",0,train
bfe235d23fd60d251a0cfa4325bb1b92bbf47f49,tensorflow/tensorflow,"Cleanup: Use a standard name for the directives annotation.

PiperOrigin-RevId: 283450563
Change-Id: I7db4fde547fe9d0d66c7a2e161e481f8add0b5ff",directives.py,"@@ -98,9 +98,9 @@ class DirectivesTransformer(converter.Base):
       raise ValueError(
           '""%s"" must be used inside a statement' % directive.__name__)
     target = self.get_local(ENCLOSING_LOOP)
-    node_anno = anno.getanno(target, converter.AgAnno.DIRECTIVES, {})
+    node_anno = anno.getanno(target, anno.Basic.DIRECTIVES, {})
     node_anno[directive] = _map_args(call_node, directive)
-    anno.setanno(target, converter.AgAnno.DIRECTIVES, node_anno)
+    anno.setanno(target, anno.Basic.DIRECTIVES, node_anno)
     return call_node
 
   def visit_Name(self, node):
",0,train
bfe235d23fd60d251a0cfa4325bb1b92bbf47f49,tensorflow/tensorflow,"Cleanup: Use a standard name for the directives annotation.

PiperOrigin-RevId: 283450563
Change-Id: I7db4fde547fe9d0d66c7a2e161e481f8add0b5ff",directives_test.py,"@@ -20,7 +20,6 @@ from __future__ import print_function
 
 from tensorflow.python.autograph.converters import directives as directives_converter
 from tensorflow.python.autograph.core import converter_testing
-from tensorflow.python.autograph.core.converter import AgAnno
 from tensorflow.python.autograph.lang import directives
 from tensorflow.python.autograph.pyct import anno
 from tensorflow.python.autograph.pyct import parser
@@ -68,7 +67,7 @@ class DirectivesTest(converter_testing.TestCase):
     node, ctx = self.prepare(test_fn, {'directives': directives})
     node = directives_converter.transform(node, ctx)
 
-    d = anno.getanno(node.body[1], AgAnno.DIRECTIVES)
+    d = anno.getanno(node.body[1], anno.Basic.DIRECTIVES)
     d = d[directives.set_loop_options]
     self.assertEqual(d['parallel_iterations'].n, 10)
     self.assertEqual(d['back_prop'].id, 'a')
",0,train
bfe235d23fd60d251a0cfa4325bb1b92bbf47f49,tensorflow/tensorflow,"Cleanup: Use a standard name for the directives annotation.

PiperOrigin-RevId: 283450563
Change-Id: I7db4fde547fe9d0d66c7a2e161e481f8add0b5ff",converter.py,"@@ -354,15 +354,6 @@ class AnnotatedDef(reaching_definitions.Definition):
     self.directives = {}
 
 
-class AgAnno(enum.Enum):
-  """"""Annotation labels specific to AutoGraph. See anno.py.""""""
-
-  DIRECTIVES = 'User directives associated with the annotated statement.'
-
-  def __repr__(self):
-    return self.name
-
-
 def standard_analysis(node, context, is_initial=False):
   """"""Performs a complete static analysis of the given code.
 
",0,train
bfe235d23fd60d251a0cfa4325bb1b92bbf47f49,tensorflow/tensorflow,"Cleanup: Use a standard name for the directives annotation.

PiperOrigin-RevId: 283450563
Change-Id: I7db4fde547fe9d0d66c7a2e161e481f8add0b5ff",anno.py,"@@ -55,6 +55,8 @@ class Basic(NoValue):
       ' `name_map` allows renaming symbols.')
   ORIGIN = ('Information about the source code that converted code originated'
             ' from. See origin_information.py.')
+  DIRECTIVES = ('User directives associated with a statement or a variable.'
+                ' Typically, they affect the immediately-enclosing statement.')
 
 
 class Static(NoValue):
",0,train
bfe235d23fd60d251a0cfa4325bb1b92bbf47f49,tensorflow/tensorflow,"Cleanup: Use a standard name for the directives annotation.

PiperOrigin-RevId: 283450563
Change-Id: I7db4fde547fe9d0d66c7a2e161e481f8add0b5ff",templates.py,"@@ -120,6 +120,7 @@ class ReplaceTransformer(gast.NodeTransformer):
     self.preserved_annos = {
         anno.Basic.ORIGIN,
         anno.Basic.SKIP_PROCESSING,
+        anno.Basic.DIRECTIVES,
         anno.Static.ORIG_DEFINITIONS,
         'extra_test',
         'function_context_name',
",0,train
f9a44a69c35dcf7f1c0f42e1ae9971bae0148099,tensorflow/tensorflow,Update the docs and api_def.,gcs_config_ops.cc,"@@ -21,50 +21,12 @@ namespace tensorflow {
 
 REGISTER_OP(""GcsConfigureCredentials"")
     .Input(""json: string"")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R""doc(
-Configures the credentials used by the GCS client of the local TF runtime.
-
-The json input can be of the format:
-
-1. Refresh Token:
-{
-  ""client_id"": ""<redacted>"",
-  ""client_secret"": ""<redacted>"",
-  ""refresh_token: ""<redacted>"",
-  ""type"": ""authorized_user"",
-}
-
-2. Service Account:
-{
-  ""type"": ""service_account"",
-  ""project_id"": ""<redacted>"",
-  ""private_key_id"": ""<redacted>"",
-  ""private_key"": ""------BEGIN PRIVATE KEY-----\n<REDACTED>\n-----END PRIVATE KEY------\n"",
-  ""client_email"": ""<REDACTED>@<REDACTED>.iam.gserviceaccount.com"",
-  ""client_id"": ""<REDACTED>"",
-  # Some additional fields elided
-}
-
-Note the credentials established through this method are shared across all
-sessions run on this runtime.
-
-Note be sure to feed the inputs to this op to ensure the credentials are not
-stored in a constant op within the graph that might accidentally be checkpointed
-or in other ways be persisted or exfiltrated.
-)doc"");
+    .SetShapeFn(shape_inference::NoOutputs);
 
 REGISTER_OP(""GcsConfigureBlockCache"")
     .Input(""max_cache_size: uint64"")
     .Input(""block_size: uint64"")
     .Input(""max_staleness: uint64"")
-    .SetShapeFn(shape_inference::NoOutputs)
-    .Doc(R""doc(
-Re-configures the GCS block cache with the new configuration values.
-
-If the values are the same as already configured values, this op is a no-op. If
-they are different, the current contents of the block cache is dropped, and a
-new block cache is created fresh.
-)doc"");
+    .SetShapeFn(shape_inference::NoOutputs);
 
 }  // namespace tensorflow
",0,test
fcc9a6ed272d6599d38ae59ae215cff786ad1bea,tensorflow/tensorflow,"Making the third_party ffmpeg decode_audio op resilient to small numbers of
decoding failures. Instead of crashing the TF pipeline it will now return an
empty tensor. Note that pipelines that want to take advantage of this will have
to be modified to handle empty tensors.
Change: 128747076",decode_audio_op.cc,"@@ -25,6 +25,7 @@
 #include ""tensorflow/core/lib/strings/str_util.h""
 #include ""tensorflow/core/lib/strings/strcat.h""
 #include ""tensorflow/core/platform/env.h""
+#include ""tensorflow/core/platform/logging.h""
 
 namespace tensorflow {
 namespace ffmpeg {
@@ -112,6 +113,13 @@ class DecodeAudioOp : public OpKernel {
           context, result.ok(),
           errors::Unavailable(""FFmpeg must be installed to run this op. FFmpeg ""
                               ""can be found at http://www.ffmpeg.org.""));
+    } else if (result.code() == error::UNKNOWN) {
+      LOG(ERROR) << ""Ffmpeg failed with error '"" << result.error_message()
+                 << ""'. Returning empty tensor."";
+      Tensor* output = nullptr;
+      OP_REQUIRES_OK(
+          context, context->allocate_output(0, TensorShape({0, 0}), &output));
+      return;
     } else {
       OP_REQUIRES_OK(context, result);
     }
@@ -162,7 +170,8 @@ different from the contents of the file, channels will be merged or created.
 
 contents: The binary audio file contents.
 sampled_audio: A rank 2 tensor containing all tracks of the audio. Dimension 0
-    is time and dimension 1 is the channel.
+    is time and dimension 1 is the channel. If ffmpeg fails to decode the audio
+    then an empty tensor will be returned.
 file_format: A string describing the audio file format. This can be ""wav"" or
     ""mp3"".
 samples_per_second: The number of samples per second that the audio should have.
",0,test
fcc9a6ed272d6599d38ae59ae215cff786ad1bea,tensorflow/tensorflow,"Making the third_party ffmpeg decode_audio op resilient to small numbers of
decoding failures. Instead of crashing the TF pipeline it will now return an
empty tensor. Note that pipelines that want to take advantage of this will have
to be modified to handle empty tensors.
Change: 128747076",decode_audio_op_test.py,"@@ -72,6 +72,14 @@ class DecodeAudioOpTest(tf.test.TestCase):
   def testOgg(self):
     self._loadFileAndTest('mono_10khz.ogg', 'ogg', 0.57, 10000, 1)
 
+  def testInvalidFile(self):
+    with self.test_session():
+      contents = 'invalid file'
+      audio_op = ffmpeg.decode_audio(contents, file_format='wav',
+                                     samples_per_second=10000, channel_count=2)
+      audio = audio_op.eval()
+      self.assertEqual(audio.shape, (0, 0))
+
 
 if __name__ == '__main__':
   tf.test.main()
",0,test
fcc9a6ed272d6599d38ae59ae215cff786ad1bea,tensorflow/tensorflow,"Making the third_party ffmpeg decode_audio op resilient to small numbers of
decoding failures. Instead of crashing the TF pipeline it will now return an
empty tensor. Note that pipelines that want to take advantage of this will have
to be modified to handle empty tensors.
Change: 128747076",ffmpeg_ops.py,"@@ -67,7 +67,8 @@ def decode_audio(contents, file_format=None, samples_per_second=None,
   Returns:
     A rank 2 tensor that has time along dimension 0 and channels along
     dimension 1. Dimension 0 will be `samples_per_second * length` wide, and
-    dimension 1 will be `channel_count` wide.
+    dimension 1 will be `channel_count` wide. If ffmpeg fails to decode the
+    audio then an empty tensor will be returned.
   """"""
   return gen_decode_audio_op_py.decode_audio(
       contents, file_format=file_format, samples_per_second=samples_per_second,
",0,test
e0ec3437cfe4bf6ed3ab14d6601f3b7110fc5285,tensorflow/tensorflow,"LinearOperator (base class), prefer statically defined shape if available.
Change: 143529651",linear_operator.py,"@@ -21,6 +21,7 @@ from __future__ import print_function
 import contextlib
 
 from tensorflow.contrib import framework as contrib_framework
+from tensorflow.contrib.linalg.python.ops import linear_operator_util
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import linalg_ops
@@ -258,7 +259,12 @@ class LinearOperator(object):
     with self._name_scope(name):
       # Be clean by avoiding adding shape Ops to the graph too many times.
       if self._cached_shape_dynamic is None:
-        self._cached_shape_dynamic = self._shape_dynamic()
+        # Prefer to use statically defined shape if available.
+        if self.shape.is_fully_defined():
+          self._cached_shape_dynamic = linear_operator_util.shape_tensor(
+              self.shape.as_list())
+        else:
+          self._cached_shape_dynamic = self._shape_dynamic()
       return self._cached_shape_dynamic
 
   @property
@@ -291,8 +297,12 @@ class LinearOperator(object):
     # Derived classes get this ""for free"" once .shape() is implemented.
     with self._name_scope(name):
       if self._cached_batch_shape_dynamic is None:
-        self._cached_batch_shape_dynamic = array_ops.slice(
-            self.shape_dynamic(), [0], [self.tensor_rank_dynamic() - 2])
+        # Prefer to use statically defined shape if available.
+        if self.batch_shape.is_fully_defined():
+          self._cached_batch_shape_dynamic = linear_operator_util.shape_tensor(
+              self.batch_shape.as_list(), name=""batch_shape"")
+        else:
+          self._cached_batch_shape_dynamic = self.shape_dynamic()[:-2]
       return self._cached_batch_shape_dynamic
 
   @property
@@ -327,7 +337,13 @@ class LinearOperator(object):
     # Derived classes get this ""for free"" once .shape() is implemented.
     with self._name_scope(name):
       if self._cached_tensor_rank_dynamic is None:
-        self._cached_tensor_rank_dynamic = array_ops.size(self.shape_dynamic())
+        # Prefer to use statically defined shape if available.
+        if self.tensor_rank is not None:
+          self._cached_tensor_rank_dynamic = ops.convert_to_tensor(
+              self.tensor_rank)
+        else:
+          self._cached_tensor_rank_dynamic = array_ops.size(
+              self.shape_dynamic())
       return self._cached_tensor_rank_dynamic
 
   @property
@@ -360,8 +376,12 @@ class LinearOperator(object):
     # Derived classes get this ""for free"" once .shape() is implemented.
     with self._name_scope(name):
       if self._cached_domain_dimension_dynamic is None:
-        self._cached_domain_dimension_dynamic = array_ops.gather(
-            self.shape_dynamic(), self.tensor_rank_dynamic() - 1)
+        # Prefer to use statically defined shape if available.
+        if self.domain_dimension.value is not None:
+          self._cached_domain_dimension_dynamic = ops.convert_to_tensor(
+              self.domain_dimension.value)
+        else:
+          self._cached_domain_dimension_dynamic = self.shape_dynamic()[-1]
       return self._cached_domain_dimension_dynamic
 
   @property
@@ -394,8 +414,12 @@ class LinearOperator(object):
     # Derived classes get this ""for free"" once .shape() is implemented.
     with self._name_scope(name):
       if self._cached_range_dimension_dynamic is None:
-        self._cached_range_dimension_dynamic = array_ops.gather(
-            self.shape_dynamic(), self.tensor_rank_dynamic() - 2)
+        # Prefer to use statically defined shape if available.
+        if self.range_dimension.value is not None:
+          self._cached_range_dimension_dynamic = ops.convert_to_tensor(
+              self.range_dimension.value)
+        else:
+          self._cached_range_dimension_dynamic = self.shape_dynamic()[-2]
       return self._cached_range_dimension_dynamic
 
   def _assert_non_singular(self):
",0,train
e0ec3437cfe4bf6ed3ab14d6601f3b7110fc5285,tensorflow/tensorflow,"LinearOperator (base class), prefer statically defined shape if available.
Change: 143529651",linear_operator_util.py,"@@ -18,6 +18,7 @@ from __future__ import absolute_import
 from __future__ import division
 from __future__ import print_function
 
+from tensorflow.python.framework import dtypes
 from tensorflow.python.framework import ops
 from tensorflow.python.ops import check_ops
 from tensorflow.python.ops import control_flow_ops
@@ -64,3 +65,13 @@ def assert_zero_imag_part(x, message=None, name=""assert_zero_imag_part""):
 
     zero = ops.convert_to_tensor(0, dtype=dtype.real_dtype)
     return check_ops.assert_equal(zero, math_ops.imag(x), message=message)
+
+
+def shape_tensor(shape, name=None):
+  """"""Convert Tensor using default type, unless empty list or tuple.""""""
+  # Works just like random_ops._ShapeTensor.
+  if isinstance(shape, (tuple, list)) and not shape:
+    dtype = dtypes.int32
+  else:
+    dtype = None
+  return ops.convert_to_tensor(shape, dtype=dtype, name=name)
",0,train
428a2813f2aa5703f8ecdee0f175a13d59707ed5,tensorflow/tensorflow,"Fix broken tensorboard/backend:server_test in OSS.

It failed because:
- assertSameElements doesn't exist in OSS
- json_format.MessageToDict doesn't exist in the OSS.

Replaced with the correct ones.
Change: 137602799",server_test.py,"@@ -243,7 +243,7 @@ class TensorboardServerTest(tf.test.TestCase):
       return
 
     info_json = self._getJson('/data/plugin/projector/info?run=run1')
-    self.assertSameElements(info_json['embeddings'], [
+    self.assertItemsEqual(info_json['embeddings'], [
         {
             'tensorShape': [1, 2],
             'tensorName': 'var1'
",0,train
428a2813f2aa5703f8ecdee0f175a13d59707ed5,tensorflow/tensorflow,"Fix broken tensorboard/backend:server_test in OSS.

It failed because:
- assertSameElements doesn't exist in OSS
- json_format.MessageToDict doesn't exist in the OSS.

Replaced with the correct ones.
Change: 137602799",plugin.py,"@@ -170,7 +170,7 @@ class ProjectorPlugin(TBPlugin):
       if not info.tensor_shape:
         info.tensor_shape.extend(tensor_shape)
 
-    self.handler.respond(json_format.MessageToDict(config), 'application/json')
+    self.handler.respond(json_format.MessageToJson(config), 'application/json')
 
   def _serve_metadata(self, query_params):
     run = query_params.get('run')
",0,train
c955f9804bd9e56c712934c6f4c2b24cfc3a2310,tensorflow/tensorflow,Make *args in sv.loop example an iterable,supervisor.py,"@@ -152,7 +152,7 @@ class Supervisor(object):
     ...
     sv = Supervisor(logdir='/tmp/mydir')
     with sv.managed_session(FLAGS.master) as sess:
-      sv.loop(60, print_loss, (sess))
+      sv.loop(60, print_loss, (sess, ))
       while not sv.should_stop():
         sess.run(my_train_op)
     ```
",0,test
60a9676ea1b7645e4d268a09df21147b3381a140,tensorflow/tensorflow,"Convert unicode strings to (byte-)strings in py_func (Python3 compatibility)

PiperOrigin-RevId: 170524684",py_func_test.py,"@@ -133,12 +133,34 @@ class PyOpTest(test.TestCase):
       z, = script_ops.py_func(read_and_return_strings, [x, y], [dtypes.string])
       self.assertListEqual(list(z.eval()), [b""hello there"", b""hi there""])
 
+  def testStringsAreConvertedToBytes(self):
+
+    def read_fixed_length_numpy_strings():
+      return np.array(["" there""])
+
+    def read_and_return_strings(x, y):
+      return x + y
+
+    with self.test_session():
+      x = constant_op.constant([""hello"", ""hi""], dtypes.string)
+      y, = script_ops.py_func(read_fixed_length_numpy_strings, [],
+                              [dtypes.string])
+      z, = script_ops.py_func(read_and_return_strings, [x, y], [dtypes.string])
+      self.assertListEqual(list(z.eval()), [b""hello there"", b""hi there""])
+
   def testStringPadding(self):
     correct = [b""this"", b""is"", b""a"", b""test""]
     with self.test_session():
       s, = script_ops.py_func(lambda: [correct], [], [dtypes.string])
       self.assertAllEqual(s.eval(), correct)
 
+  def testStringPaddingAreConvertedToBytes(self):
+    inp = [""this"", ""is"", ""a"", ""test""]
+    correct = [b""this"", b""is"", b""a"", b""test""]
+    with self.test_session():
+      s, = script_ops.py_func(lambda: [inp], [], [dtypes.string])
+      self.assertAllEqual(s.eval(), correct)
+
   def testLarge(self):
     with self.test_session() as sess:
       x = array_ops.zeros([1000000], dtype=np.float32)
",0,train
60a9676ea1b7645e4d268a09df21147b3381a140,tensorflow/tensorflow,"Convert unicode strings to (byte-)strings in py_func (Python3 compatibility)

PiperOrigin-RevId: 170524684",script_ops.py,"@@ -64,6 +64,8 @@ class FuncRegistry(object):
     components of a tensor have different lengths.  This is bad: ignoring the
     padding is wrong for text data, and removing the padding is wrong for binary
     data.  To avoid this bug, we redo the conversion using an object dtype.
+    Additionally, we convert unicode strings to (byte-)strings for Python3
+    compatibility.
 
     Args:
       value: Value to convert to a numpy array.
@@ -72,9 +74,15 @@ class FuncRegistry(object):
       A numpy array.
     """"""
     result = np.asarray(value, order=""C"")
-    if result.dtype.char in ""SU"" and result is not value:
+    if result.dtype.char == ""S"" and result is not value:
       return np.asarray(value, order=""C"", dtype=object)
-    return result
+    elif result.dtype.char == ""U"" and result is not value:
+      value = np.vectorize(lambda x: x.encode())(value)
+      return np.asarray(value, order=""C"", dtype=object)
+    elif result.dtype.char == ""U"":
+      return result.astype(np.bytes_)
+    else:
+      return result
 
   def __call__(self, token, args):
     """"""Calls the registered function for `token` with args.""""""
",0,train
4f5b9455c3fae482e1f0477ff664777778e9da02,tensorflow/tensorflow,"Add test case for GitHub issue 33383

where ignore_erorrs combined with tf.data.Dataset.zip could
cause out-of-sync for remaining components.

Signed-off-by: Yong Tang <yong.tang.github@outlook.com>",ignore_errors_test.py,"@@ -126,6 +126,19 @@ class IgnoreErrorsTest(test_base.DatasetTestBase):
     with self.assertRaises(errors.OutOfRangeError):
       self.evaluate(get_next())
 
+  def testZipIgnoreError(self):
+    a = dataset_ops.Dataset.from_tensor_slices([1., 2., 0., 4.])
+    b = a.map(lambda x: array_ops.check_numerics(1. / x, ""error""))
+
+    dataset = dataset_ops.Dataset.zip(
+        (b, a)).apply(error_ops.ignore_errors())
+    get_next = self.getNext(dataset)
+
+    for x in [1., 2., 4.]:
+      self.assertEqual((1. / x, x), self.evaluate(get_next()))
+    with self.assertRaises(errors.OutOfRangeError):
+      self.evaluate(get_next())
+
 
 if __name__ == ""__main__"":
   test.main()
",0,train
7bbbbe8a86ad35c3ca9a864f4df8722508f68b97,tensorflow/tensorflow,"Automatic precision selection.

PiperOrigin-RevId: 296910710
Change-Id: I64d9a23f21225bacbb21ed7bf2d51fcb68f7d8e2",performance_profiling.cc,"@@ -22,6 +22,7 @@ limitations under the License.
 #include ""tensorflow/lite/delegates/gpu/cl/cl_command_queue.h""
 #include ""tensorflow/lite/delegates/gpu/cl/environment.h""
 #include ""tensorflow/lite/delegates/gpu/cl/inference_context.h""
+#include ""tensorflow/lite/delegates/gpu/cl/model_hints.h""
 #include ""tensorflow/lite/delegates/gpu/cl/opencl_wrapper.h""
 #include ""tensorflow/lite/delegates/gpu/cl/precision.h""
 #include ""tensorflow/lite/delegates/gpu/cl/tensor_type.h""
@@ -122,7 +123,9 @@ Status RunModelSample(const std::string& model_name) {
   RETURN_IF_ERROR(CreateEnvironment(&env));
 
   InferenceContext::CreateInferenceInfo create_info;
-  create_info.precision = CalculationsPrecision::F16;
+  create_info.precision = env.IsSupported(CalculationsPrecision::F16)
+                              ? CalculationsPrecision::F16
+                              : CalculationsPrecision::F32;
   create_info.storage_type = GetFastestStorageType(env.device());
   std::cout << ""Precision: "" << ToString(create_info.precision) << std::endl;
   std::cout << ""Storage type: "" << ToString(create_info.storage_type)
",0,test
3c4e684b81810bde0bd72fabd149a6083aeff02e,tensorflow/tensorflow,"Turn on VariablePolicy for TPUStrategy.

PiperOrigin-RevId: 337271222
Change-Id: Iacebd894d496b01ebf5bc78d07bc2639d3896da5",tpu_strategy.py,"@@ -740,7 +740,7 @@ class TPUExtended(distribute_lib.StrategyExtendedV1):
       atexit.register(async_wait)
 
     # Flag to turn on VariablePolicy
-    self._use_var_policy = False
+    self._use_var_policy = True
 
   def _validate_colocate_with_variable(self, colocate_with_variable):
     distribute_utils. validate_colocate(colocate_with_variable, self)
",0,train
eb0f97061cf9f13ac611f5807870873360348ae6,tensorflow/tensorflow,"Add migration block for disable_v2_behavior

PiperOrigin-RevId: 387851624
Change-Id: I3f37d5e4aee06ecd5d294574b59d2c0c6d6a8949",v2_compat.py,"@@ -93,6 +93,16 @@ def disable_v2_behavior():
   TensorFlow 1.x and 2.x to behave as intended for 1.x.
 
   User can call this function to disable 2.x behavior during complex migrations.
+
+  @compatibility(TF2)
+  Using this function indicates that your software is not compatible
+  with eager execution and `tf.function` in TF2.
+
+  To migrate to TF2, rewrite your code to be compatible with eager execution.
+  Please refer to the [migration guide]
+  (https://www.tensorflow.org/guide/migrate) for additional resource on the
+  topic.
+  @end_compatibility
   """"""
   _v2_behavior_usage_gauge.get_cell(""disable"").set(True)
   tf2.disable()
",0,train
49002f2e95446e4aa262080839226eb9f47ad43b,tensorflow/tensorflow,"Remove unnecessary __init__ files.

PiperOrigin-RevId: 260967386",__init__.py,"@@ -1,32 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the ""License"");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an ""AS IS"" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-""""""estimator python module.
-
-Importing from tensorflow.python.estimator is unsupported
-and will soon break!
-""""""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow_estimator.python import estimator
-
-# Include attrs that start with single underscore.
-_HAS_DYNAMIC_ATTRIBUTES = True
-estimator.__all__ = [s for s in dir(estimator) if not s.startswith('__')]
-
-from tensorflow_estimator.python.estimator import *
",0,train
49002f2e95446e4aa262080839226eb9f47ad43b,tensorflow/tensorflow,"Remove unnecessary __init__ files.

PiperOrigin-RevId: 260967386",__init__.py,"@@ -1,32 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the ""License"");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an ""AS IS"" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-""""""canned python module.
-
-Importing from tensorflow.python.estimator is unsupported
-and will soon break!
-""""""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow_estimator.python.estimator import canned
-
-# Include attrs that start with single underscore.
-_HAS_DYNAMIC_ATTRIBUTES = True
-canned.__all__ = [s for s in dir(canned) if not s.startswith('__')]
-
-from tensorflow_estimator.python.estimator.canned import *
",0,train
49002f2e95446e4aa262080839226eb9f47ad43b,tensorflow/tensorflow,"Remove unnecessary __init__ files.

PiperOrigin-RevId: 260967386",__init__.py,"@@ -1,32 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the ""License"");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an ""AS IS"" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-""""""export python module.
-
-Importing from tensorflow.python.estimator is unsupported
-and will soon break!
-""""""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow_estimator.python.estimator import export
-
-# Include attrs that start with single underscore.
-_HAS_DYNAMIC_ATTRIBUTES = True
-export.__all__ = [s for s in dir(export) if not s.startswith('__')]
-
-from tensorflow_estimator.python.estimator.export import *
",0,train
49002f2e95446e4aa262080839226eb9f47ad43b,tensorflow/tensorflow,"Remove unnecessary __init__ files.

PiperOrigin-RevId: 260967386",__init__.py,"@@ -1,32 +0,0 @@
-# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the ""License"");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an ""AS IS"" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-# ==============================================================================
-""""""inputs python module.
-
-Importing from tensorflow.python.estimator is unsupported
-and will soon break!
-""""""
-# pylint: disable=unused-import,g-bad-import-order,g-import-not-at-top,wildcard-import
-
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
-
-from tensorflow_estimator.python.estimator import inputs
-
-# Include attrs that start with single underscore.
-_HAS_DYNAMIC_ATTRIBUTES = True
-inputs.__all__ = [s for s in dir(inputs) if not s.startswith('__')]
-
-from tensorflow_estimator.python.estimator.inputs import *
",0,train
955f41c5f2240495a086b503e54eac6928876aca,tensorflow/tensorflow,"Cleanup `astor` output to match `codegen` output.

The default `astor` output messes up the function signature docs for many docs without a bit of cleanup.

With this change the only differences I see are parens around lambdas and math expressions in default arguments.",parser.py,"@@ -650,6 +650,9 @@ def _remove_first_line_indent(string):
   return '\n'.join([line[indent:] for line in string.split('\n')])
 
 
+PAREN_NUMBER_RE = re.compile(""^\(([0-9.e-]+)\)"")
+
+
 def _generate_signature(func, reverse_index):
   """"""Given a function, returns a list of strings representing its args.
 
@@ -705,7 +708,11 @@ def _generate_signature(func, reverse_index):
       if id(default) in reverse_index:
         default_text = reverse_index[id(default)]
       elif ast_default is not None:
-        default_text = astor.to_source(ast_default)
+        default_text = (
+            astor.to_source(ast_default).rstrip('\n').replace('\t','\\t')
+                 .replace('\n','\\n').replace('""""""',""'""))
+        default_text = PAREN_NUMBER_RE.sub('\\1',default_text)
+
         if default_text != repr(default):
           # This may be an internal name. If so, handle the ones we know about.
           # TODO(wicke): This should be replaced with a lookup in the index.
",0,train
59292f548ccb7454c4e4bf3bb7e3f51eab50251f,tensorflow/tensorflow,Addressing @penpornk's comments,direct_session_with_tracking_alloc_test.cc,"@@ -108,25 +108,9 @@ TEST(DirectSessionWithTrackingAllocTest, CostModelTest) {
         EXPECT_EQ(2, shape.dim(0).size());
         EXPECT_EQ(1, shape.dim(1).size());
         if (node->name() == y->name()) {
-#if defined(INTEL_MKL) && defined(ENABLE_MKL)
-          // if MKL is used, it goes through various additional
-          // graph rewrite pass. In TF, everytime a graph pass
-          // happens, ""constant"" nodes are allocated
-          // and deallocated. Each allocation calls the
-          // (FindChunkPtr of BFCAllocator),
-          // which increments the value of AllocationId.
-          // Thus AllocationId becomes more than TF if MKL
-          // is used. Now IDs for MKL are 8 more than TF.
           EXPECT_EQ(13, cm->AllocationId(node, 0));
-#else
-          EXPECT_EQ(13, cm->AllocationId(node, 0));
-#endif  // INTEL_MKL && ENABLE_MKL
         } else {
-#if defined(INTEL_MKL) && defined(ENABLE_MKL)
-          EXPECT_EQ(14, cm->AllocationId(node, 0));
-#else
           EXPECT_EQ(14, cm->AllocationId(node, 0));
-#endif  // INTEL_MKL && ENABLE_MKL
         }
       }
       EXPECT_LE(0, cm->MaxExecutionTime(node));
",0,train
0349fe6a146c8dbb0d27a3d38729436c8164fffa,tensorflow/tensorflow,"Cosmetic fix to AUC class docstring.

PiperOrigin-RevId: 377530540
Change-Id: I6eeb45ece631ef4ce3b1fec354ffb04c01ddf919",metrics.py,"@@ -1971,7 +1971,7 @@ class AUC(Metric):
   of binary classifiers. Unlike the accuracy, and like cross-entropy
   losses, ROC-AUC and PR-AUC evaluate all the operational points of a model.
 
-  This classes approximates AUCs using a Riemann sum: During the metric
+  This class approximates AUCs using a Riemann sum. During the metric
   accumulation phrase, predictions are accumulated within predefined buckets
   by value. The AUC is then computed by interpolating per-bucket averages. These
   buckets define the evaluated operational points.
",0,test
552580beb1b5488128053506a03730e3d1ba02ad,tensorflow/tensorflow,Divide by non zero data,math_grad.py,"@@ -447,7 +447,7 @@ def _SegmentProdGrad(op, grad):
   non_zero_prod = gen_math_ops.segment_prod(non_zero_data, segment_ids)
   gathered_prod = array_ops.gather(op.outputs[0], segment_ids)
   gathered_non_zero_prod = array_ops.gather(non_zero_prod, segment_ids)
-  prod_divided_by_el = gathered_prod / data  # May contain nan/inf.
+  prod_divided_by_el = gathered_prod / non_zero_data
   # Now fetch the individual results for segments containing 0 and those that
   # don't.
   partial_derivative = array_ops.where_v2(is_zero, gathered_non_zero_prod,
",0,train
4dc57fb74b7885a5ef468bc5fced373724d4ac59,tensorflow/tensorflow,"[XLA] Try to validate that shape sizes are sane.

This won't catch all overflows, but will do the right thing for the ""normal"" flow.

Also fix layout validation to reject padded sparse layouts.

PiperOrigin-RevId: 202151215",layout_util.cc,"@@ -248,6 +248,12 @@ Layout CreateDefaultLayoutForRank(int64 rank) {
     }
   }
 
+  if (layout.format() == SPARSE) {
+    if (!layout.padded_dimensions().empty()) {
+      return InvalidArgument(""Sparse layout has padded dimensions"");
+    }
+  }
+
   return Status::OK();
 }
 
",0,train
4dc57fb74b7885a5ef468bc5fced373724d4ac59,tensorflow/tensorflow,"[XLA] Try to validate that shape sizes are sane.

This won't catch all overflows, but will do the right thing for the ""normal"" flow.

Also fix layout validation to reject padded sparse layouts.

PiperOrigin-RevId: 202151215",overflow_util.h,"@@ -0,0 +1,50 @@
+/* Copyright 2015 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the ""License"");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an ""AS IS"" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef TENSORFLOW_COMPILER_XLA_OVERFLOW_UTIL_H_
+#define TENSORFLOW_COMPILER_XLA_OVERFLOW_UTIL_H_
+
+#include ""tensorflow/core/platform/logging.h""
+#include ""tensorflow/core/platform/macros.h""
+#include ""tensorflow/core/platform/types.h""
+
+namespace xla {
+
+// Multiply two nonnegative int64's, returning negative for overflow
+inline int64 MultiplyWithoutOverflow(const int64 x, const int64 y) {
+  // Multiply in uint64 rather than int64 since signed overflow is undefined.
+  // Negative values will wrap around to large unsigned values in the casts
+  // (see section 4.7 [conv.integral] of the C++14 standard).
+  const uint64 ux = x;
+  const uint64 uy = y;
+  const uint64 uxy = ux * uy;
+
+  // Check if we overflow uint64, using a cheap check if both inputs are small
+  if (TF_PREDICT_FALSE((ux | uy) >> 32 != 0)) {
+    // Ensure nonnegativity.  Note that negative numbers will appear ""large""
+    // to the unsigned comparisons above.
+    CHECK(x >= 0 && y >= 0);
+
+    // Otherwise, detect overflow using a division
+    if (ux != 0 && uxy / ux != uy) return -1;
+  }
+
+  // Cast back to signed.  Any negative value will signal an error.
+  return static_cast<int64>(uxy);
+}
+
+}  // namespace xla
+
+#endif  // TENSORFLOW_COMPILER_XLA_OVERFLOW_UTIL_H_
",0,train
4dc57fb74b7885a5ef468bc5fced373724d4ac59,tensorflow/tensorflow,"[XLA] Try to validate that shape sizes are sane.

This won't catch all overflows, but will do the right thing for the ""normal"" flow.

Also fix layout validation to reject padded sparse layouts.

PiperOrigin-RevId: 202151215",shape_util.cc,"@@ -24,6 +24,7 @@ limitations under the License.
 
 #include ""tensorflow/compiler/xla/index_util.h""
 #include ""tensorflow/compiler/xla/layout_util.h""
+#include ""tensorflow/compiler/xla/overflow_util.h""
 #include ""tensorflow/compiler/xla/primitive_util.h""
 #include ""tensorflow/compiler/xla/status_macros.h""
 #include ""tensorflow/compiler/xla/types.h""
@@ -885,6 +886,50 @@ StatusOr<Shape> ParseShapeStringInternal(tensorflow::StringPiece* s) {
     }
   }
 
+  TF_RETURN_IF_ERROR(ValidateShapeSize(shape));
+  return Status::OK();
+}
+
+/* static */ Status ShapeUtil::ValidateShapeSize(const Shape& shape) {
+  VLOG(3) << ""Validating shape size: "" << ShapeUtil::HumanString(shape);
+  auto invalid_argument =
+      InvalidArgument(""Shape %s size may overflow int64."",
+                      ShapeUtil::HumanString(shape).c_str());
+  if (!IsArray(shape)) {
+    return Status::OK();
+  }
+  int64 shape_size;
+  if (LayoutUtil::IsSparseArray(shape)) {
+    shape_size = LayoutUtil::MaxSparseElements(shape.layout());
+    shape_size = MultiplyWithoutOverflow(shape_size, ShapeUtil::Rank(shape));
+    if (shape_size < 0) {
+      return invalid_argument;
+    }
+    shape_size = MultiplyWithoutOverflow(shape_size, sizeof(int64));
+    if (shape_size < 0) {
+      return invalid_argument;
+    }
+  }
+
+  // This is intentionally unconditional: even if the shape is sparse, we want
+  // to verify the densified version has a reasonable size.
+  if (shape.dimensions().empty()) {
+    return Status::OK();
+  }
+  shape_size = 1;
+  for (int64 dim : shape.dimensions()) {
+    shape_size = MultiplyWithoutOverflow(shape_size, dim);
+    if (shape_size < 0) {
+      return invalid_argument;
+    }
+  }
+  shape_size = MultiplyWithoutOverflow(
+      shape_size, ByteSizeOfPrimitiveType(shape.element_type()));
+  if (shape_size < 0) {
+    return invalid_argument;
+  }
+
+  VLOG(3) << ""Shape size is valid: "" << shape_size;
   return Status::OK();
 }
 
",0,train
4dc57fb74b7885a5ef468bc5fced373724d4ac59,tensorflow/tensorflow,"[XLA] Try to validate that shape sizes are sane.

This won't catch all overflows, but will do the right thing for the ""normal"" flow.

Also fix layout validation to reject padded sparse layouts.

PiperOrigin-RevId: 202151215",shape_util.h,"@@ -702,6 +702,10 @@ class ShapeUtil {
   static size_t Hash(const Shape& shape);
 
  private:
+  // Validates the shape size is sane. This makes sure it's safe to do
+  // calculations in int64 without overflowing.
+  static Status ValidateShapeSize(const Shape& shape);
+
   // Validates all of the non-layout properties of the shape -- this is a helper
   // used by both the layout-optional and layout-required public method.
   static Status ValidateShapeWithOptionalLayoutInternal(const Shape& shape);
",0,train
604ff7509dcfe452a3baa06b1b500980063d8262,tensorflow/tensorflow,"Always return a list for Graph.collection (including in Python3)

PiperOrigin-RevId: 156870745",ops.py,"@@ -2789,7 +2789,7 @@ class Graph(object):
   @property
   def collections(self):
     """"""Returns the names of the collections known to this graph.""""""
-    return self._collections.keys()
+    return list(self._collections)
 
   def add_to_collection(self, name, value):
     """"""Stores `value` in the collection with the given `name`.
",0,train
fc6510b506731bf2ffc2520e30fba73b79e5b687,tensorflow/tensorflow,"Fix CheckpointSaverHook to properly save every save_checkpoints_steps for TPU workloads.

PiperOrigin-RevId: 193266515
(cherry picked from commit 5aba07dce5b9e924183efcd05cd82f2fbb70edc8)",tpu_estimator.py,"@@ -2054,6 +2054,14 @@ class TPUEstimator(estimator_lib.Estimator):
                   },
                   every_n_secs=30)
           ] + input_hooks
+          chief_hooks = [
+              training.CheckpointSaverHook(
+                  self.model_dir,
+                  save_secs=self._config.save_checkpoints_secs,
+                  save_steps=self._config.save_checkpoints_steps,
+                  steps_per_run=self._config.tpu_config.iterations_per_loop,
+                  scaffold=scaffold)
+          ]
           summary.scalar(model_fn_lib.LOSS_METRIC_KEY, loss)
           with ops.control_dependencies([loss]):
             update_ops = _sync_variables_ops()
@@ -2067,6 +2075,7 @@ class TPUEstimator(estimator_lib.Estimator):
           return model_fn_lib.EstimatorSpec(
               mode,
               loss=loss,
+              training_chief_hooks=chief_hooks,
               training_hooks=hooks,
               train_op=train_op,
               scaffold=scaffold)
",0,train
fc6510b506731bf2ffc2520e30fba73b79e5b687,tensorflow/tensorflow,"Fix CheckpointSaverHook to properly save every save_checkpoints_steps for TPU workloads.

PiperOrigin-RevId: 193266515
(cherry picked from commit 5aba07dce5b9e924183efcd05cd82f2fbb70edc8)",basic_session_run_hooks.py,"@@ -391,7 +391,8 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook):
                saver=None,
                checkpoint_basename=""model.ckpt"",
                scaffold=None,
-               listeners=None):
+               listeners=None,
+               steps_per_run=1):
     """"""Initializes a `CheckpointSaverHook`.
 
     Args:
@@ -404,6 +405,9 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook):
       listeners: List of `CheckpointSaverListener` subclass instances.
         Used for callbacks that run immediately before or after this hook saves
         the checkpoint.
+      steps_per_run: `int`, number of steps that occur between each invocation
+        of the hook. Primarily used for TPU workloads which run multiple steps
+        in a while loop in a single Session.run.
 
     Raises:
       ValueError: One of `save_steps` or `save_secs` should be set.
@@ -419,6 +423,7 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook):
     self._timer = SecondOrStepTimer(every_secs=save_secs,
                                     every_steps=save_steps)
     self._listeners = listeners or []
+    self._steps_per_run = steps_per_run
 
   def begin(self):
     self._summary_writer = SummaryWriterCache.get(self._checkpoint_dir)
@@ -450,7 +455,8 @@ class CheckpointSaverHook(session_run_hook.SessionRunHook):
 
   def after_run(self, run_context, run_values):
     stale_global_step = run_values.results
-    if self._timer.should_trigger_for_step(stale_global_step+1):
+    if self._timer.should_trigger_for_step(
+        stale_global_step + self._steps_per_run):
       # get the real value after train op.
       global_step = run_context.session.run(self._global_step_tensor)
       if self._timer.should_trigger_for_step(global_step):
",0,train
fc6510b506731bf2ffc2520e30fba73b79e5b687,tensorflow/tensorflow,"Fix CheckpointSaverHook to properly save every save_checkpoints_steps for TPU workloads.

PiperOrigin-RevId: 193266515
(cherry picked from commit 5aba07dce5b9e924183efcd05cd82f2fbb70edc8)",basic_session_run_hooks_test.py,"@@ -719,6 +719,99 @@ class CheckpointSaverHookTest(test.TestCase):
     fake_summary_writer.FakeSummaryWriter.uninstall()
 
 
+class CheckpointSaverHookMultiStepTest(test.TestCase):
+
+  def setUp(self):
+    self.model_dir = tempfile.mkdtemp()
+    self.graph = ops.Graph()
+    self.steps_per_run = 5
+    with self.graph.as_default():
+      self.scaffold = monitored_session.Scaffold()
+      self.global_step = variables.get_or_create_global_step()
+      self.train_op = training_util._increment_global_step(self.steps_per_run)
+
+  def tearDown(self):
+    shutil.rmtree(self.model_dir, ignore_errors=True)
+
+  def test_save_steps_saves_in_first_step(self):
+    with self.graph.as_default():
+      hook = basic_session_run_hooks.CheckpointSaverHook(
+          self.model_dir,
+          save_steps=2*self.steps_per_run,
+          scaffold=self.scaffold,
+          steps_per_run=self.steps_per_run)
+      hook.begin()
+      self.scaffold.finalize()
+      with session_lib.Session() as sess:
+        sess.run(self.scaffold.init_op)
+        mon_sess = monitored_session._HookedSession(sess, [hook])
+        mon_sess.run(self.train_op)
+        self.assertEqual(5,
+                         checkpoint_utils.load_variable(self.model_dir,
+                                                        self.global_step.name))
+
+  def test_save_steps_saves_periodically(self):
+    with self.graph.as_default():
+      hook = basic_session_run_hooks.CheckpointSaverHook(
+          self.model_dir,
+          save_steps=2*self.steps_per_run,
+          scaffold=self.scaffold,
+          steps_per_run=self.steps_per_run)
+      hook.begin()
+      self.scaffold.finalize()
+      with session_lib.Session() as sess:
+        sess.run(self.scaffold.init_op)
+        mon_sess = monitored_session._HookedSession(sess, [hook])
+        mon_sess.run(self.train_op)
+        # Saved (step=5)
+        self.assertEqual(5,
+                         checkpoint_utils.load_variable(self.model_dir,
+                                                        self.global_step.name))
+
+        mon_sess.run(self.train_op)
+        # Not saved (step=10)
+        self.assertEqual(5,
+                         checkpoint_utils.load_variable(self.model_dir,
+                                                        self.global_step.name))
+
+        mon_sess.run(self.train_op)
+        # Saved (step=15)
+        self.assertEqual(15,
+                         checkpoint_utils.load_variable(self.model_dir,
+                                                        self.global_step.name))
+
+        mon_sess.run(self.train_op)
+        # Not saved (step=20)
+        self.assertEqual(15,
+                         checkpoint_utils.load_variable(self.model_dir,
+                                                        self.global_step.name))
+
+        mon_sess.run(self.train_op)
+        # Saved (step=25)
+        self.assertEqual(25,
+                         checkpoint_utils.load_variable(self.model_dir,
+                                                        self.global_step.name))
+
+  def test_save_steps_saves_at_end(self):
+    with self.graph.as_default():
+      hook = basic_session_run_hooks.CheckpointSaverHook(
+          self.model_dir,
+          save_steps=2*self.steps_per_run,
+          scaffold=self.scaffold,
+          steps_per_run=self.steps_per_run)
+      hook.begin()
+      self.scaffold.finalize()
+      with session_lib.Session() as sess:
+        sess.run(self.scaffold.init_op)
+        mon_sess = monitored_session._HookedSession(sess, [hook])
+        mon_sess.run(self.train_op)
+        mon_sess.run(self.train_op)
+        hook.end(sess)
+        self.assertEqual(10,
+                         checkpoint_utils.load_variable(self.model_dir,
+                                                        self.global_step.name))
+
+
 class ResourceCheckpointSaverHookTest(test.TestCase):
 
   def setUp(self):
",0,train
d84acd6e45d5c33743d032885e4f5ee727f57db8,tensorflow/tensorflow,"Remove unused symbols in vars_test.

PiperOrigin-RevId: 324686959
Change-Id: If5a8d2ccf6d4baa4e1f19d83a2d54d359c6e6514",vars_test.py,"@@ -26,7 +26,6 @@ from tensorflow.python.distribute import combinations
 from tensorflow.python.distribute import distribution_strategy_context
 from tensorflow.python.distribute import strategy_combinations
 from tensorflow.python.distribute import tpu_strategy
-from tensorflow.python.distribute import tpu_values
 from tensorflow.python.distribute import values
 from tensorflow.python.distribute.cluster_resolver import tpu_cluster_resolver
 from tensorflow.python.eager import context
@@ -664,26 +663,6 @@ class OnWriteVariableSyncScatterTests(test.TestCase, parameterized.TestCase):
       self.assertAllEqual([1, 1, 1], self.evaluate(v2.read_value()))
 
 
-def _make_replica_local(method, strategy=None):
-  if strategy is None:
-    devices = (""/device:GPU:0"", ""/device:CPU:0"")
-  else:
-    devices = strategy.extended.worker_devices
-
-  v = []
-  for d, n, init in zip(devices, [""v"", ""v/replica""], [1., 2.]):
-    with ops.device(d):
-      v.append(variable_scope.get_variable(
-          name=n, initializer=init, use_resource=True))
-
-  if (strategy is not None) and isinstance(strategy, _TPU_STRATEGIES):
-    var_cls = tpu_values.TPUSyncOnReadVariable
-  else:
-    var_cls = values.SyncOnReadVariable
-  replica_local = var_cls(strategy, v, method)
-  return v, replica_local
-
-
 class OnReadVariableSyncTest(test.TestCase, parameterized.TestCase):
 
   @combinations.generate(strategy_and_run_tf_function_combinations())
@@ -1258,12 +1237,5 @@ class SyncOnReadScatterReplicaTest(test.TestCase, parameterized.TestCase):
       self.evaluate(distribution.run(v.scatter_min, args=(delta,)))
 
 
-def _make_index_slices(vals, indices, dense_shape=None):
-  if dense_shape:
-    dense_shape = array_ops.identity(dense_shape)
-  return indexed_slices.IndexedSlices(
-      array_ops.identity(vals), array_ops.identity(indices), dense_shape)
-
-
 if __name__ == ""__main__"":
   test.main()
",0,train
b1109ff54544be100bdb88a6661e8938937cac7b,tensorflow/tensorflow,"Enable a few test targets for tfrt. Disable quantization test since we don't plan to have quantization support in the initial launch.

PiperOrigin-RevId: 335143411
Change-Id: I606bacf12bd9b349da304cd97a8acc081dc758f0",bitcast_op_test.py,"@@ -82,6 +82,7 @@ class BitcastTest(test.TestCase):
       datatype = dtypes.int8
       array_ops.bitcast(x, datatype, None)
 
+  @test_util.disable_tfrt(""b/169901260"")
   def testQuantizedType(self):
     shape = [3, 4]
     x = np.zeros(shape, np.uint16)
",0,train
b1109ff54544be100bdb88a6661e8938937cac7b,tensorflow/tensorflow,"Enable a few test targets for tfrt. Disable quantization test since we don't plan to have quantization support in the initial launch.

PiperOrigin-RevId: 335143411
Change-Id: I606bacf12bd9b349da304cd97a8acc081dc758f0",constant_op_test.py,"@@ -456,6 +456,7 @@ class ZerosTest(test.TestCase):
         self.assertFalse(np.any(z_value))
         self.assertEqual((2, 3), z_value.shape)
 
+  @test_util.disable_tfrt(""b/169901260"")
   def testQint8Dtype(self):
     dtype = dtypes_lib.qint8
     z = array_ops.zeros([2, 3], dtype=dtype)
@@ -466,6 +467,7 @@ class ZerosTest(test.TestCase):
     z_value = self.evaluate(math_ops.cast(z, dtypes_lib.int32))
     self.assertFalse(np.any(z_value))
 
+  @test_util.disable_tfrt(""b/169901260"")
   def testQint16Dtype(self):
     dtype = dtypes_lib.qint16
     z = array_ops.zeros([2, 3], dtype=dtype)
@@ -650,6 +652,7 @@ class OnesTest(test.TestCase):
         self.assertEqual([2, 3], z.get_shape())
         self.assertAllEqual(z, np.ones([2, 3]))
 
+  @test_util.disable_tfrt(""b/169901260"")
   def testQintDtype(self):
 
     @def_function.function(autograph=False)
",0,train
b1109ff54544be100bdb88a6661e8938937cac7b,tensorflow/tensorflow,"Enable a few test targets for tfrt. Disable quantization test since we don't plan to have quantization support in the initial launch.

PiperOrigin-RevId: 335143411
Change-Id: I606bacf12bd9b349da304cd97a8acc081dc758f0",cwise_ops_binary_test.py,"@@ -991,6 +991,7 @@ class ComparisonOpTest(test.TestCase):
           [[True, True, True, True, True], [False, False, False, False, False]],
           values)
 
+  @test_util.disable_tfrt(""b/169901260"")
   def testEqualQuantizeDType(self):
     dtypes = [
         dtypes_lib.qint8,
",0,train
b1109ff54544be100bdb88a6661e8938937cac7b,tensorflow/tensorflow,"Enable a few test targets for tfrt. Disable quantization test since we don't plan to have quantization support in the initial launch.

PiperOrigin-RevId: 335143411
Change-Id: I606bacf12bd9b349da304cd97a8acc081dc758f0",dynamic_stitch_op_test.py,"@@ -62,6 +62,7 @@ class DynamicStitchTestBase(object):
         # length.
         self.assertEqual([None], stitched_t.get_shape().as_list())
 
+  @test_util.disable_tfrt(""b/169901260"")
   def testSimpleOneDimensional(self):
     # Test various datatypes in the simple case to ensure that the op was
     # registered under those types.
",0,train
b1109ff54544be100bdb88a6661e8938937cac7b,tensorflow/tensorflow,"Enable a few test targets for tfrt. Disable quantization test since we don't plan to have quantization support in the initial launch.

PiperOrigin-RevId: 335143411
Change-Id: I606bacf12bd9b349da304cd97a8acc081dc758f0",spacetodepth_op_test.py,"@@ -309,6 +309,7 @@ class SpaceToDepthTest(test.TestCase):
       actual_vals, expected_vals = self.evaluate([actual, expected])
       self.assertTrue(np.array_equal(actual_vals, expected_vals))
 
+  @test_util.disable_tfrt(""b/169901260"")
   def testAgainstTranspose(self):
     self.compareToTranspose(3, 2, 3, 1, 2, ""NHWC"", dtypes.float32, False)
     self.compareToTranspose(1, 2, 3, 2, 2, ""NHWC"", dtypes.float32, False)
",0,train
4d4794806b565656e3c6a5844be159e84867cd4c,tensorflow/tensorflow,"Update tensorflow/python/keras/callbacks.py

Co-Authored-By: aweers <32593524+aweers@users.noreply.github.com>",callbacks.py,"@@ -1025,7 +1025,7 @@ class EarlyStopping(Callback):
   # Firstly, let's create the callback
   callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=3)
   # This callback will stop training when there is no improvement in
-  # validation loss for three epochs
+  # the validation loss for three consecutive epochs.
   # then simply train the model with the callback
   model.fit(data, labels, epochs=100, callbacks=[callback], 
       validation_data=(val_data, val_labels))
",0,train
62df725269a89a0a5d877eae18d0c83155f2ea9d,tensorflow/tensorflow,"Increase the input dimension size from 4 to 6 to address the RetinaNet model

PiperOrigin-RevId: 206235660",import_tensorflow.cc,"@@ -215,7 +215,7 @@ tensorflow::Status ImportFloatArray(const TensorProto& input_tensor,
                                     Array* output_array) {
   CHECK_EQ(input_tensor.dtype(), DT_FLOAT);
   const auto& input_shape = input_tensor.tensor_shape();
-  CHECK_LE(input_shape.dim_size(), 4);
+  CHECK_LE(input_shape.dim_size(), 6);
   int input_flat_size;
   auto status = ImportShape(input_shape.dim(), &input_flat_size,
                             output_array->mutable_shape());
@@ -253,7 +253,7 @@ tensorflow::Status ImportQuint8Array(const TensorProto& input_tensor,
                                      Array* output_array) {
   CHECK_EQ(input_tensor.dtype(), DT_QUINT8);
   const auto& input_shape = input_tensor.tensor_shape();
-  CHECK_LE(input_shape.dim_size(), 4);
+  CHECK_LE(input_shape.dim_size(), 6);
   int input_flat_size;
   auto status = ImportShape(input_shape.dim(), &input_flat_size,
                             output_array->mutable_shape());
@@ -290,7 +290,7 @@ tensorflow::Status ImportInt32Array(const TensorProto& input_tensor,
                                     Array* output_array) {
   CHECK_EQ(input_tensor.dtype(), DT_INT32);
   const auto& input_shape = input_tensor.tensor_shape();
-  CHECK_LE(input_shape.dim_size(), 4);
+  CHECK_LE(input_shape.dim_size(), 6);
   int input_flat_size;
   auto status = ImportShape(input_shape.dim(), &input_flat_size,
                             output_array->mutable_shape());
@@ -326,7 +326,7 @@ tensorflow::Status ImportInt64Array(const TensorProto& input_tensor,
                                     Array* output_array) {
   CHECK_EQ(input_tensor.dtype(), DT_INT64);
   const auto& input_shape = input_tensor.tensor_shape();
-  CHECK_LE(input_shape.dim_size(), 4);
+  CHECK_LE(input_shape.dim_size(), 6);
   int input_flat_size;
   auto status = ImportShape(input_shape.dim(), &input_flat_size,
                             output_array->mutable_shape());
@@ -363,7 +363,7 @@ tensorflow::Status ImportBoolArray(const TensorProto& input_tensor,
                                    Array* output_array) {
   CHECK_EQ(input_tensor.dtype(), DT_BOOL);
   const auto& input_shape = input_tensor.tensor_shape();
-  CHECK_LE(input_shape.dim_size(), 4);
+  CHECK_LE(input_shape.dim_size(), 6);
   int input_flat_size;
   auto status = ImportShape(input_shape.dim(), &input_flat_size,
                             output_array->mutable_shape());
@@ -409,7 +409,7 @@ tensorflow::Status ImportStringArray(const TensorProto& input_tensor,
                                      Array* output_array) {
   CHECK_EQ(input_tensor.dtype(), DT_STRING);
   const auto& input_shape = input_tensor.tensor_shape();
-  CHECK_LE(input_shape.dim_size(), 4);
+  CHECK_LE(input_shape.dim_size(), 6);
   int input_flat_size;
   auto status = ImportShape(input_shape.dim(), &input_flat_size,
                             output_array->mutable_shape());
",0,train
7322a44ff82a5a44e690b59fec5557ffc8f4ab34,tensorflow/tensorflow,"Add IsEmpty() utility for telling if XSpace is empty.

PiperOrigin-RevId: 336768938
Change-Id: Idce1f4dae25bb9a4248ffc50aa2c50d1c90d661f",xplane_utils.cc,"@@ -240,5 +240,16 @@ uint64 GetStartTimestampNs(const XPlane& plane) {
   return plane_timestamp;
 }
 
+bool IsEmpty(const XSpace& space) {
+  for (const auto& plane : space.planes()) {
+    for (const auto& line : plane.lines()) {
+      if (!line.events().empty()) {
+        return false;
+      }
+    }
+  }
+  return true;
+}
+
 }  // namespace profiler
 }  // namespace tensorflow
",0,train
7322a44ff82a5a44e690b59fec5557ffc8f4ab34,tensorflow/tensorflow,"Add IsEmpty() utility for telling if XSpace is empty.

PiperOrigin-RevId: 336768938
Change-Id: Idce1f4dae25bb9a4248ffc50aa2c50d1c90d661f",xplane_utils.h,"@@ -110,6 +110,9 @@ void MergePlanes(const XPlane& src_plane, XPlane* dst_plane);
 // timestamps. If zero line exists, return 0;
 uint64 GetStartTimestampNs(const XPlane& plane);
 
+// Returns true if there are no XEvents.
+bool IsEmpty(const XSpace& space);
+
 }  // namespace profiler
 }  // namespace tensorflow
 
",0,train
7530e33cee8fe6555d98a7951006faf51c8b3809,tensorflow/tensorflow,Iterate on copy of the structure we modify.,fusion_bitcast_lift.cc,"@@ -193,7 +193,9 @@ StatusOr<bool> FusionBitcastLift::Run(HloModule* module) {
                 i->CloneWithNewOperands(dtyped_new_shape, new_operands));
             // Replace the old bitcasts with the new instruction to
             // remove it.
-            for (HloInstruction* user: i->users()) {
+            // Copy the vector as it will be modified while we iterate on it.
+            const std::vector<HloInstruction*> users = i->users();
+            for (HloInstruction* user: users) {
               TF_RETURN_IF_ERROR(i->parent()->ReplaceInstructionWithDifferentShape(
                   user, cloned_i));
             }
",0,train
b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer.

Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"".

PiperOrigin-RevId: 360970929
Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",distribute_strategy_test.py,"@@ -579,7 +579,7 @@ class TestDistributionStrategyWithNumpyArrays(test.TestCase,
         return self.v2 + inp
 
     with self.cached_session(), distribution.scope():
-      layer = MyLayer(dtype=policy.Policy(policy_name))
+      layer = MyLayer(dtype=policy_name)
       def run_fn():
         x = np.array([1.])
         with backprop.GradientTape() as tape:
",0,train
b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer.

Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"".

PiperOrigin-RevId: 360970929
Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",base_layer.py,"@@ -2348,6 +2348,11 @@ class Layer(module.Module, version_utils.LayerVersionSelector):
       self._dtype_policy = dtype
     elif isinstance(dtype, dict):
       self._dtype_policy = policy.deserialize(dtype)
+    elif isinstance(dtype, str) and dtype in ('mixed_float16',
+                                              'mixed_bfloat16'):
+      # The isinstance check is required since np.dtype raises an error if
+      # compared to a non-dtype string.
+      self._dtype_policy = policy.Policy(dtype)
     elif dtype:
       self._dtype_policy = policy.Policy(dtypes.as_dtype(dtype).name)
     else:
",0,train
b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer.

Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"".

PiperOrigin-RevId: 360970929
Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",base_layer_v1.py,"@@ -1750,6 +1750,11 @@ class Layer(base_layer.Layer):
       self._dtype_policy = dtype
     elif isinstance(dtype, dict):
       self._dtype_policy = policy.deserialize(dtype)
+    elif isinstance(dtype, str) and dtype in ('mixed_float16',
+                                              'mixed_bfloat16'):
+      # The isinstance check is required since np.dtype raises an error if
+      # compared to a non-dtype string.
+      self._dtype_policy = policy.Policy(dtype)
     elif dtype:
       self._dtype_policy = policy.Policy(dtypes.as_dtype(dtype).name)
     else:
",0,train
b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer.

Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"".

PiperOrigin-RevId: 360970929
Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",normalization_test.py,"@@ -31,7 +31,6 @@ from tensorflow.python.keras import keras_parameterized
 from tensorflow.python.keras import testing_utils
 from tensorflow.python.keras.layers import normalization
 from tensorflow.python.keras.layers import normalization_v2
-from tensorflow.python.keras.mixed_precision import policy
 from tensorflow.python.ops import array_ops
 from tensorflow.python.ops import gradient_checker_v2
 from tensorflow.python.ops import math_ops
@@ -166,7 +165,7 @@ class BatchNormalizationTest(keras_parameterized.TestCase):
         axis=-1,
         input_shape=(4, 4, 3),
         momentum=0.8,
-        dtype=policy.Policy('mixed_float16'))
+        dtype='mixed_float16')
     x = np.random.normal(size=(10, 4, 4, 3))
     y = norm(x)
     self.assertEqual(y.dtype, 'float16')
@@ -181,7 +180,7 @@ class BatchNormalizationTest(keras_parameterized.TestCase):
         axis=-1,
         input_shape=(1, 1, 1),
         fused=fused,
-        dtype=policy.Policy('mixed_float16'))
+        dtype='mixed_float16')
     x = np.array([-1000., 1000.]).reshape((2, 1, 1, 1))
     y = norm(x, training=True)
     expected_y = np.array([-1.0, 1.0]).reshape((2, 1, 1, 1))
",0,train
b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer.

Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"".

PiperOrigin-RevId: 360970929
Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",keras_test.py,"@@ -164,7 +164,7 @@ class KerasLayerTest(keras_parameterized.TestCase):
         return math_ops.cast(inputs, 'int32') + self.v
 
     x = constant_op.constant([1.])
-    layer = LayerWithIntVar(dtype=policy.Policy('mixed_float16'))
+    layer = LayerWithIntVar(dtype='mixed_float16')
     self.assertEqual(layer(x).dtype, 'int32')
 
   @parameterized.named_parameters(*TESTCASES)
@@ -239,14 +239,6 @@ class KerasLayerTest(keras_parameterized.TestCase):
         self.assertEqual(layer(x).dtype, dtypes.float64)
         self.assertEqual(layer.v.dtype, dtypes.float64)
 
-  def test_error_passing_policy_string_to_layer(self):
-    with self.assertRaisesRegex(
-        TypeError, ""Cannot convert value 'mixed_float16' to a ""
-        'TensorFlow DType'):
-      # This is not allowed, as otherwise a ""mixed_float16"" policy could be
-      # created without an API call that has the name ""experimental"" in it.
-      mp_test_util.MultiplyLayer(dtype='mixed_float16')
-
   @parameterized.named_parameters(*TESTCASES)
   def test_gradient(self, strategy_fn):
     x = constant_op.constant([1.])
@@ -344,7 +336,7 @@ class KerasLayerTest(keras_parameterized.TestCase):
         self.assertEqual(layer(x).dtype, dtype)
         self.assertEqual(layer.v.dtype, dtype)
 
-      layer = mp_test_util.MultiplyLayer(dtype=policy.Policy('mixed_float16'))
+      layer = mp_test_util.MultiplyLayer(dtype='mixed_float16')
       config = layer.get_config()
       self.assertEqual(config['dtype'],
                        {'class_name': 'Policy',
@@ -430,7 +422,7 @@ class KerasLayerTest(keras_parameterized.TestCase):
       self.assertEqual(config['dtype'], 'float16')
 
   def test_delete_variable(self):
-    layer = base_layer.Layer(dtype=policy.Policy('mixed_float16'))
+    layer = base_layer.Layer(dtype='mixed_float16')
     layer.x = layer.add_weight('x')
     self.assertEqual(layer.trainable_weights, [layer.x])
     del layer.x
@@ -455,7 +447,7 @@ class KerasLayerTest(keras_parameterized.TestCase):
         'stop using mixed precision by removing the use of the '
         '""mixed_float16"" policy or use a different Strategy, e.g. '
         'a MirroredStrategy.'):
-      mp_test_util.MultiplyLayer(dtype=policy.Policy('mixed_float16'))
+      mp_test_util.MultiplyLayer(dtype='mixed_float16')
     # Non-mixed policies are fine
     mp_test_util.MultiplyLayer(dtype=policy.Policy('float64'))
 
",0,train
b4a149edf6185bf65a4dde7cc16fb88995056af9,tensorflow/tensorflow,"Allow passing dtype='mixed_float16' to a layer.

Before, only dtypes or anything convertible to dtypes could be passed. Now, policy names which are not dtypes can also be passed: 'mixed_float16' and 'mixed_bfloat16"".

PiperOrigin-RevId: 360970929
Change-Id: I6b83931d430a6b2dec9dfba7af1792e4a7d9ae8f",policy.py,"@@ -70,8 +70,9 @@ class Policy(object):
 
   In the example above, passing `dtype='float32'` to the layer is equivalent to
   passing `dtype=tf.keras.mixed_precision.Policy('float32')`. In general,
-  passing a dtype to a layer is equivalent to passing the corresponding policy,
-  so it is never necessary to explicitly construct a `Policy` object.
+  passing a dtype policy name to a layer is equivalent to passing the
+  corresponding policy, so it is never necessary to explicitly construct a
+  `Policy` object.
 
   Note: `Model.compile` will automatically wrap an optimizer with a
   `tf.keras.mixed_precision.LossScaleOptimizer` if you use the `'mixed_float16'`
@@ -145,8 +146,7 @@ class Policy(object):
   ...     # With mixed precision, self.kernel will be casted to float16
   ...     return tf.linalg.matmul(inputs, self.kernel)
   ...
-  >>> dtype_policy = tf.keras.mixed_precision.Policy('mixed_float16')
-  >>> layer = SimpleDense(dtype=dtype_policy)
+  >>> layer = SimpleDense(dtype='mixed_float16')
   >>> y = layer(tf.ones((10, 10)))
   >>> y.dtype
   tf.float16
@@ -178,9 +178,7 @@ class Policy(object):
   ...     # occur when adding `inputs` to `rand`.
   ...     rand = tf.random.normal(shape=inputs.shape, dtype=inputs.dtype)
   ...     return inputs + rand
-
-  >>> dtype_policy = tf.keras.mixed_precision.Policy('mixed_float16')
-  >>> layer = AddRandom(dtype=dtype_policy)
+  >>> layer = AddRandom(dtype='mixed_float16')
   >>> y = layer(x)
   >>> y.dtype
   tf.float16
",0,train
389fa0598c5a1d0ffdfefa8ce24aab7d5d0f8864,tensorflow/tensorflow,"Add legalization of HLO reduce to LHLO reduce.

PiperOrigin-RevId: 283928453
Change-Id: Ib4d878e41473fe41c1ef20f269542aa0f248b723",hlo_legalize_to_lhlo.cc,"@@ -18,6 +18,7 @@ limitations under the License.
 #include ""absl/memory/memory.h""
 #include ""mlir/Dialect/StandardOps/Ops.h""  // TF:local_config_mlir
 #include ""mlir/IR/Attributes.h""  // TF:local_config_mlir
+#include ""mlir/IR/BlockAndValueMapping.h""  // TF:local_config_mlir
 #include ""mlir/IR/Builders.h""  // TF:local_config_mlir
 #include ""mlir/IR/Function.h""  // TF:local_config_mlir
 #include ""mlir/IR/Location.h""  // TF:local_config_mlir
@@ -38,13 +39,19 @@ namespace {
 
 constexpr StringRef kTempBufferAttr = ""temp"";
 
-Value* GetTensorStoreMemRef(Value* value) {
+Value* GetTensorStoreOrReturnMemRef(Value* value) {
   for (const auto& user : value->getUsers()) {
     if (auto tensor_store = dyn_cast<TensorStoreOp>(user)) {
       if (tensor_store.getOperand(0) == value) {
         return tensor_store.getOperand(1);
       }
     }
+    if (auto return_op = dyn_cast<xla_hlo::ReturnOp>(user)) {
+      if (return_op.getOperand(0) == value) {
+        auto block = return_op.getOperation()->getBlock();
+        return *block->args_rbegin();
+      }
+    }
   }
   return nullptr;
 }
@@ -88,8 +95,8 @@ Value* InsertAllocAndDealloc(Location loc, Value* result,
 /// function to store that values held in the tensor.
 Value* GetBufferForResultValue(Location loc, Value* result,
                                ConversionPatternRewriter* rewriter) {
-  if (auto tensor_store_memref = GetTensorStoreMemRef(result)) {
-    return tensor_store_memref;
+  if (auto existing_memref = GetTensorStoreOrReturnMemRef(result)) {
+    return existing_memref;
   }
   return InsertAllocAndDealloc(loc, result, rewriter);
 }
@@ -122,6 +129,62 @@ class HloToLhloOpConverter : public ConversionPattern {
   }
 };
 
+struct HloToLHloReduceConverter
+    : public OpConversionPattern<xla_hlo::ReduceOp> {
+ public:
+  using OpConversionPattern::OpConversionPattern;
+
+  PatternMatchResult matchAndRewrite(
+      xla_hlo::ReduceOp op, ArrayRef<Value*> operands,
+      ConversionPatternRewriter& rewriter) const final {
+    auto loc = op.getLoc();
+    // TODO(b/137624192) Implement variadic reduce.
+    if (op.getNumResults() != 1) return matchFailure();
+    if (op.getParentRegion()->getBlocks().size() != 1) {
+      emitError(loc,
+                ""tensor to buffer conversion expects a single block in the ""
+                ""region containing the operation"");
+    }
+    const auto& original_results = op.getResults();
+    SmallVector<Value*, 4> buffer_args(operands.begin(), operands.end());
+    for (auto result : original_results) {
+      buffer_args.push_back(GetBufferForResultValue(loc, result, &rewriter));
+    }
+    auto new_op = rewriter.create<xla_lhlo::ReduceOp>(
+        loc, llvm::None, buffer_args, op.getAttrs());
+
+    // Copy over the operations inside the region.
+    rewriter.inlineRegionBefore(op.body(), new_op.body(), new_op.body().end());
+
+    // Create new block arguments with correct type.
+    auto& entry_block = new_op.body().front();
+    int original_arg_count = entry_block.getNumArguments();
+    for (int i = 0; i < original_arg_count; ++i) {
+      auto old_arg = entry_block.getArgument(i);
+      auto old_type = old_arg->getType().cast<TensorType>();
+      auto new_type =
+          MemRefType::get(old_type.getShape(), old_type.getElementType());
+      auto new_arg = entry_block.addArgument(new_type);
+      rewriter.replaceUsesOfBlockArgument(old_arg, new_arg);
+    }
+    // Add an argument for the result.
+    entry_block.addArgument(
+        entry_block.getArgument(original_arg_count)->getType());
+    // Remove the old arguments.
+    for (int i = original_arg_count - 1; i >= 0; --i) {
+      entry_block.eraseArgument(i);
+    }
+    // Insert terminator at the end.
+    rewriter.setInsertionPointToEnd(&entry_block);
+    rewriter.create<xla_lhlo::TerminatorOp>(loc);
+
+    rewriter.replaceOp(op, ArrayRef<Value*>(buffer_args).slice(operands.size()),
+                       llvm::to_vector<4>(original_results));
+
+    return matchSuccess();
+  }
+};
+
 class HloToLhloTensorLoadConverter : public ConversionPattern {
  public:
   explicit HloToLhloTensorLoadConverter(MLIRContext* context)
@@ -135,6 +198,7 @@ class HloToLhloTensorLoadConverter : public ConversionPattern {
   }
 };
 
+// TODO(b/137624192): Rewrite into a copy and elide copy if possible.
 class HloToLhloTensorStoreConverter : public ConversionPattern {
  public:
   explicit HloToLhloTensorStoreConverter(MLIRContext* context)
@@ -148,6 +212,19 @@ class HloToLhloTensorStoreConverter : public ConversionPattern {
   }
 };
 
+// TODO(b/137624192): Rewrite into a copy and elide copy if possible.
+class HloToLhloReturnConverter : public OpConversionPattern<xla_hlo::ReturnOp> {
+ public:
+  using OpConversionPattern::OpConversionPattern;
+
+  PatternMatchResult matchAndRewrite(
+      xla_hlo::ReturnOp op, ArrayRef<Value*> operands,
+      ConversionPatternRewriter& rewriter) const final {
+    rewriter.eraseOp(op);
+    return matchSuccess();
+  }
+};
+
 // Lowers from HLO dialect to LHLO dialect allocating/deallocating temporary
 // buffers if necessary.
 //
@@ -215,6 +292,7 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context,
                            xla_lhlo::BroadcastInDimOp>,
       HloToLhloOpConverter<xla_hlo::CeilOp, xla_lhlo::CeilOp>,
       HloToLhloOpConverter<xla_hlo::CompareOp, xla_lhlo::CompareOp>,
+      HloToLhloOpConverter<xla_hlo::ConstOp, xla_lhlo::ConstOp>,
       HloToLhloOpConverter<xla_hlo::ConvertOp, xla_lhlo::ConvertOp>,
       HloToLhloOpConverter<xla_hlo::CosOp, xla_lhlo::CosOp>,
       HloToLhloOpConverter<xla_hlo::DivOp, xla_lhlo::DivOp>,
@@ -229,6 +307,7 @@ void populateHLOToLHLOConversionPattern(MLIRContext* context,
       HloToLhloOpConverter<xla_hlo::SignOp, xla_lhlo::SignOp>,
       HloToLhloOpConverter<xla_hlo::SubOp, xla_lhlo::SubOp>,
       HloToLhloOpConverter<xla_hlo::TanhOp, xla_lhlo::TanhOp>,
+      HloToLHloReduceConverter, HloToLhloReturnConverter,
       HloToLhloTensorLoadConverter, HloToLhloTensorStoreConverter
   >(context);
   // clang-format on
",0,train
389fa0598c5a1d0ffdfefa8ce24aab7d5d0f8864,tensorflow/tensorflow,"Add legalization of HLO reduce to LHLO reduce.

PiperOrigin-RevId: 283928453
Change-Id: Ib4d878e41473fe41c1ef20f269542aa0f248b723",kernel_lowering.cc,"@@ -65,8 +65,8 @@ struct FusionToLhloConverter
     mlir::OwningRewritePatternList patterns;
     mlir::ConversionTarget target(ctx);
     target.addLegalDialect<::mlir::xla_lhlo::XlaLhloDialect>();
-
     ::mlir::xla_hlo::populateHLOToLHLOConversionPattern(&ctx, &patterns);
+
     getFunction().walk([&](FusionOp op) {
       if (failed(applyPartialConversion(op, target, patterns, nullptr))) {
         signalPassFailure();
",0,train
389fa0598c5a1d0ffdfefa8ce24aab7d5d0f8864,tensorflow/tensorflow,"Add legalization of HLO reduce to LHLO reduce.

PiperOrigin-RevId: 283928453
Change-Id: Ib4d878e41473fe41c1ef20f269542aa0f248b723",mlir_gpu_lhlo_gen_test.cc,"@@ -255,45 +255,44 @@ ENTRY %AddMultiply (x: f32[2,2], y: f32[2,2], z: f32[2,2]) -> f32[2,2] {
                      LoweringStage::GPU);
 }
 
-// TODO(herhut): Re-enable once we can lower hlo_reduce to proper lhlo_reduce.
-// TEST_F(LhloGenTest, FusedReduce) {
-//   CompileAndVerifyIr(R""(
-// HloModule FusedReduce
-//
-// %add (x: f32[], y: f32[]) -> f32[] {
-//   %x = f32[] parameter(0)
-//   %y = f32[] parameter(1)
-//   ROOT %add = f32[] add(f32[] %x, f32[] %y)
-// }
-//
-// %fused_computation (param: f32[100,10]) -> f32[10] {
-//   %param = f32[100,10] parameter(0)
-//   %constant = f32[] constant(0)
-//   ROOT %reduce = f32[10]{0} reduce(f32[100,10]{1,0} %param, f32[] %constant),
-//       dimensions={0}, to_apply=%add
-// }
-//
-// ENTRY %FusedReduce (x: f32[100,10]) -> f32[10] {
-//   %x = f32[100,10] parameter(0)
-//   ROOT %fusion = f32[10]{0} fusion(f32[100,10]{1,0} %x), kind=kInput,
-//       calls=%fused_computation
-// }
-// )"",
-//                      R""(
-// ;CHECK: func @fusion(%[[ARG0:.*]]: [[TYPE:.*]], %[[RESULT:.*]]: [[RTYPE:.*]])
-// ;CHECK: ""xla_lhlo.fusion""() ( {
-// ;CHECK:   %[[REF0:.*]] = tensor_load %arg0 : [[TYPE]]
-// ;CHECK:   %[[CT0:.*]] = xla_hlo.constant dense<0.000000e+00>
-// ;CHECK:   %[[RED:.*]] = ""xla_hlo.reduce""(%0, %1) ( {
-// ;CHECK:     ^bb0(%[[BARG0:.*]]: [[ETYPE:.*]], %[[BARG1:.*]]: [[ETYPE]])
-// ;CHECK:       %[[ADD:.*]] = xla_hlo.add %[[BARG0]], %[[BARG1]] : [[ETYPE]]
-// ;CHECK:       ""xla_hlo.return""(%[[ADD]])
-// ;CHECK:     })
-// ;CHECK:   tensor_store %[[RED]], %[[RESULT]] : [[RTYPE]]
-// ;CHECK:   ""xla_lhlo.terminator""()
-// ;CHECK-NEXT: })
-//       )"");
-// }
+TEST_F(LhloGenTest, FusedReduce) {
+  CompileAndVerifyIr(R""(
+HloModule FusedReduce
+
+%add (x: f32[], y: f32[]) -> f32[] {
+  %x = f32[] parameter(0)
+  %y = f32[] parameter(1)
+  ROOT %add = f32[] add(f32[] %x, f32[] %y)
+}
+
+%fused_computation (param: f32[100,10]) -> f32[10] {
+  %param = f32[100,10] parameter(0)
+  %constant = f32[] constant(0)
+  ROOT %reduce = f32[10]{0} reduce(f32[100,10]{1,0} %param, f32[] %constant),
+      dimensions={0}, to_apply=%add
+}
+
+ENTRY %FusedReduce (x: f32[100,10]) -> f32[10] {
+  %x = f32[100,10] parameter(0)
+  ROOT %fusion = f32[10]{0} fusion(f32[100,10]{1,0} %x), kind=kInput,
+      calls=%fused_computation
+}
+)"",
+                     R""(
+;CHECK: func @fusion(%[[ARG0:.*]]: [[TYPE:.*]], %[[RESULT:.*]]: [[RTYPE:.*]])
+;CHECK: ""xla_lhlo.fusion""() ( {
+;CHECK:   %[[REF0:.*]] = tensor_load %arg0 : [[TYPE]]
+;CHECK:   %[[CT0:.*]] = xla_hlo.constant dense<0.000000e+00>
+;CHECK:   %[[RED:.*]] = ""xla_hlo.reduce""(%0, %1) ( {
+;CHECK:     ^bb0(%[[BARG0:.*]]: [[ETYPE:.*]], %[[BARG1:.*]]: [[ETYPE]])
+;CHECK:       %[[ADD:.*]] = xla_hlo.add %[[BARG0]], %[[BARG1]] : [[ETYPE]]
+;CHECK:       ""xla_hlo.return""(%[[ADD]])
+;CHECK:     })
+;CHECK:   tensor_store %[[RED]], %[[RESULT]] : [[RTYPE]]
+;CHECK:   ""xla_lhlo.terminator""()
+;CHECK-NEXT: })
+      )"");
+}
 
 TEST_F(LhloGenTest, Broadcast) {
   CompileAndVerifyIr(R""(
",0,train
4a265a6f3a8ea441e6135da03aafa773bbce5505,tensorflow/tensorflow,"Ensure `min_node_weight` is scalar in `BoostedTreesCalculateBestFeatureSplitV2`

PiperOrigin-RevId: 411085102
Change-Id: Ibd511f4b224452cbe235e3d2359f384c839ea558",stats_ops.cc,"@@ -736,6 +736,10 @@ class BoostedTreesCalculateBestFeatureSplitV2 : public OpKernel {
     const Tensor* min_node_weight_t;
     OP_REQUIRES_OK(context,
                    context->input(""min_node_weight"", &min_node_weight_t));
+    OP_REQUIRES(context, TensorShapeUtils::IsScalar(min_node_weight_t->shape()),
+                errors::InvalidArgument(
+                    ""min_node_weight must be a scalar, got a tensor of shape "",
+                    min_node_weight_t->shape().DebugString()));
     const auto min_node_weight = min_node_weight_t->scalar<float>()();
 
     std::vector<int32> output_node_ids;
",0,train
75a5cb4e1f6b82c24e9c3ca011ea94ec4c459a48,tensorflow/tensorflow,"Rollback the change of interpreter_wrapper which breaks TF Serving.

PiperOrigin-RevId: 410667131
Change-Id: I54dd3831ba9595d0af34ebc8160608002bf31c84",interpreter_wrapper.cc,"@@ -258,12 +258,8 @@ InterpreterWrapper::~InterpreterWrapper() {}
 
 PyObject* InterpreterWrapper::AllocateTensors(int subgraph_index) {
   TFLITE_PY_ENSURE_VALID_INTERPRETER();
-  if (subgraph_index == InterpreterWrapper::kUndeterminedSubgraphIndex) {
-    TFLITE_PY_CHECK(interpreter_->AllocateTensors());
-  } else {
-    TFLITE_PY_SUBGRAPH_BOUNDS_CHECK(subgraph_index);
-    TFLITE_PY_CHECK(interpreter_->subgraph(subgraph_index)->AllocateTensors());
-  }
+  TFLITE_PY_SUBGRAPH_BOUNDS_CHECK(subgraph_index);
+  TFLITE_PY_CHECK(interpreter_->subgraph(subgraph_index)->AllocateTensors());
   Py_RETURN_NONE;
 }
 
",0,train
75a5cb4e1f6b82c24e9c3ca011ea94ec4c459a48,tensorflow/tensorflow,"Rollback the change of interpreter_wrapper which breaks TF Serving.

PiperOrigin-RevId: 410667131
Change-Id: I54dd3831ba9595d0af34ebc8160608002bf31c84",interpreter_wrapper.h,"@@ -44,8 +44,6 @@ class InterpreterWrapper {
  public:
   using Model = FlatBufferModel;
 
-  static constexpr int kUndeterminedSubgraphIndex = -1;
-
   // SWIG caller takes ownership of pointer.
   static InterpreterWrapper* CreateWrapperCPPFromFile(
       const char* model_path, int op_resolver_id,
",0,train
75a5cb4e1f6b82c24e9c3ca011ea94ec4c459a48,tensorflow/tensorflow,"Rollback the change of interpreter_wrapper which breaks TF Serving.

PiperOrigin-RevId: 410667131
Change-Id: I54dd3831ba9595d0af34ebc8160608002bf31c84",interpreter_wrapper_pybind11.cc,"@@ -94,8 +94,7 @@ PYBIND11_MODULE(_pywrap_tensorflow_interpreter_wrapper, m) {
           [](InterpreterWrapper& self, int subgraph_index) {
             return tensorflow::PyoOrThrow(self.AllocateTensors(subgraph_index));
           },
-          py::arg(""subgraph_index"") =
-              InterpreterWrapper::kUndeterminedSubgraphIndex)
+          py::arg(""subgraph_index"") = 0)
       .def(
           ""Invoke"",
           [](InterpreterWrapper& self, int subgraph_index) {
",0,train
3db21177223b70103644c0a87299cf194e8f2c6c,tensorflow/tensorflow,"Update GraphDef version to 840.

PiperOrigin-RevId: 387057910
Change-Id: I1fbfceb78e988f6a4ca133e86b0076878cdfe547",version.h,"@@ -108,7 +108,7 @@ limitations under the License.
 
 #define TF_GRAPH_DEF_VERSION_MIN_PRODUCER 0
 #define TF_GRAPH_DEF_VERSION_MIN_CONSUMER 0
-#define TF_GRAPH_DEF_VERSION 839  // Updated: 2021/7/26
+#define TF_GRAPH_DEF_VERSION 840  // Updated: 2021/7/27
 
 // Checkpoint compatibility versions (the versions field in SavedSliceMeta).
 //
",0,test
fab3f8548264590133d2f49c75ed9c0c0ab83f28,tensorflow/tensorflow,"compat: Update forward compatibility horizon to 2020-11-28

PiperOrigin-RevId: 344594007
Change-Id: I99aa6a0b381bff28ab595736a77169cd7b060724",compat.py,"@@ -33,7 +33,7 @@ from tensorflow.python.util.tf_export import tf_export
 # This value changes every day with an automatic CL. It can be modified in code
 # via `forward_compatibility_horizon()` or with the environment variable
 # TF_FORWARD_COMPATIBILITY_DELTA_DAYS, which is added to the compatibility date.
-_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 11, 27)
+_FORWARD_COMPATIBILITY_HORIZON = datetime.date(2020, 11, 28)
 _FORWARD_COMPATIBILITY_DELTA_DAYS_VAR_NAME = ""TF_FORWARD_COMPATIBILITY_DELTA_DAYS""
 _FORWARD_COMPATIBILITY_DATE_NUMBER = None
 
",0,train